1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 19 #include "llvm/Analysis/TypeMetadataUtils.h" 20 #include "llvm/Bitcode/BitcodeWriter.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/DebugInfo.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/IPO.h" 31 #include "llvm/Transforms/IPO/FunctionAttrs.h" 32 #include "llvm/Transforms/Utils/Cloning.h" 33 using namespace llvm; 34 35 namespace { 36 37 // Produce a unique identifier for this module by taking the MD5 sum of the 38 // names of the module's strong external symbols. This identifier is 39 // normally guaranteed to be unique, or the program would fail to link due to 40 // multiply defined symbols. 41 // 42 // If the module has no strong external symbols (such a module may still have a 43 // semantic effect if it performs global initialization), we cannot produce a 44 // unique identifier for this module, so we return the empty string, which 45 // causes the entire module to be written as a regular LTO module. 46 std::string getModuleId(Module *M) { 47 MD5 Md5; 48 bool ExportsSymbols = false; 49 for (auto &GV : M->global_values()) { 50 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 51 !GV.hasExternalLinkage()) 52 continue; 53 ExportsSymbols = true; 54 Md5.update(GV.getName()); 55 Md5.update(ArrayRef<uint8_t>{0}); 56 } 57 58 if (!ExportsSymbols) 59 return ""; 60 61 MD5::MD5Result R; 62 Md5.final(R); 63 64 SmallString<32> Str; 65 MD5::stringifyResult(R, Str); 66 return ("$" + Str).str(); 67 } 68 69 // Promote each local-linkage entity defined by ExportM and used by ImportM by 70 // changing visibility and appending the given ModuleId. 71 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 72 DenseMap<const Comdat *, Comdat *> RenamedComdats; 73 for (auto &ExportGV : ExportM.global_values()) { 74 if (!ExportGV.hasLocalLinkage()) 75 continue; 76 77 auto Name = ExportGV.getName(); 78 GlobalValue *ImportGV = ImportM.getNamedValue(Name); 79 if (!ImportGV || ImportGV->use_empty()) 80 continue; 81 82 std::string NewName = (Name + ModuleId).str(); 83 84 if (const auto *C = ExportGV.getComdat()) 85 if (C->getName() == Name) 86 RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName)); 87 88 ExportGV.setName(NewName); 89 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 90 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 91 92 ImportGV->setName(NewName); 93 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 94 } 95 96 if (!RenamedComdats.empty()) 97 for (auto &GO : ExportM.global_objects()) 98 if (auto *C = GO.getComdat()) { 99 auto Replacement = RenamedComdats.find(C); 100 if (Replacement != RenamedComdats.end()) 101 GO.setComdat(Replacement->second); 102 } 103 } 104 105 // Promote all internal (i.e. distinct) type ids used by the module by replacing 106 // them with external type ids formed using the module id. 107 // 108 // Note that this needs to be done before we clone the module because each clone 109 // will receive its own set of distinct metadata nodes. 110 void promoteTypeIds(Module &M, StringRef ModuleId) { 111 DenseMap<Metadata *, Metadata *> LocalToGlobal; 112 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 113 Metadata *MD = 114 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 115 116 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 117 Metadata *&GlobalMD = LocalToGlobal[MD]; 118 if (!GlobalMD) { 119 std::string NewName = 120 (to_string(LocalToGlobal.size()) + ModuleId).str(); 121 GlobalMD = MDString::get(M.getContext(), NewName); 122 } 123 124 CI->setArgOperand(ArgNo, 125 MetadataAsValue::get(M.getContext(), GlobalMD)); 126 } 127 }; 128 129 if (Function *TypeTestFunc = 130 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 131 for (const Use &U : TypeTestFunc->uses()) { 132 auto CI = cast<CallInst>(U.getUser()); 133 ExternalizeTypeId(CI, 1); 134 } 135 } 136 137 if (Function *TypeCheckedLoadFunc = 138 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 139 for (const Use &U : TypeCheckedLoadFunc->uses()) { 140 auto CI = cast<CallInst>(U.getUser()); 141 ExternalizeTypeId(CI, 2); 142 } 143 } 144 145 for (GlobalObject &GO : M.global_objects()) { 146 SmallVector<MDNode *, 1> MDs; 147 GO.getMetadata(LLVMContext::MD_type, MDs); 148 149 GO.eraseMetadata(LLVMContext::MD_type); 150 for (auto MD : MDs) { 151 auto I = LocalToGlobal.find(MD->getOperand(1)); 152 if (I == LocalToGlobal.end()) { 153 GO.addMetadata(LLVMContext::MD_type, *MD); 154 continue; 155 } 156 GO.addMetadata( 157 LLVMContext::MD_type, 158 *MDNode::get(M.getContext(), 159 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 160 } 161 } 162 } 163 164 // Drop unused globals, and drop type information from function declarations. 165 // FIXME: If we made functions typeless then there would be no need to do this. 166 void simplifyExternals(Module &M) { 167 FunctionType *EmptyFT = 168 FunctionType::get(Type::getVoidTy(M.getContext()), false); 169 170 for (auto I = M.begin(), E = M.end(); I != E;) { 171 Function &F = *I++; 172 if (F.isDeclaration() && F.use_empty()) { 173 F.eraseFromParent(); 174 continue; 175 } 176 177 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 178 continue; 179 180 Function *NewF = 181 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 182 NewF->setVisibility(F.getVisibility()); 183 NewF->takeName(&F); 184 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 185 F.eraseFromParent(); 186 } 187 188 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 189 GlobalVariable &GV = *I++; 190 if (GV.isDeclaration() && GV.use_empty()) { 191 GV.eraseFromParent(); 192 continue; 193 } 194 } 195 } 196 197 void filterModule( 198 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 199 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 200 I != E;) { 201 GlobalAlias *GA = &*I++; 202 if (ShouldKeepDefinition(GA)) 203 continue; 204 205 GlobalObject *GO; 206 if (GA->getValueType()->isFunctionTy()) 207 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 208 GlobalValue::ExternalLinkage, "", M); 209 else 210 GO = new GlobalVariable( 211 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 212 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 213 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 214 GO->takeName(GA); 215 GA->replaceAllUsesWith(GO); 216 GA->eraseFromParent(); 217 } 218 219 for (Function &F : *M) { 220 if (ShouldKeepDefinition(&F)) 221 continue; 222 223 F.deleteBody(); 224 F.setComdat(nullptr); 225 F.clearMetadata(); 226 } 227 228 for (GlobalVariable &GV : M->globals()) { 229 if (ShouldKeepDefinition(&GV)) 230 continue; 231 232 GV.setInitializer(nullptr); 233 GV.setLinkage(GlobalValue::ExternalLinkage); 234 GV.setComdat(nullptr); 235 GV.clearMetadata(); 236 } 237 } 238 239 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 240 if (auto *F = dyn_cast<Function>(C)) 241 return Fn(F); 242 if (isa<GlobalValue>(C)) 243 return; 244 for (Value *Op : C->operands()) 245 forEachVirtualFunction(cast<Constant>(Op), Fn); 246 } 247 248 // If it's possible to split M into regular and thin LTO parts, do so and write 249 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 250 // regular LTO bitcode file to OS. 251 void splitAndWriteThinLTOBitcode( 252 raw_ostream &OS, raw_ostream *ThinLinkOS, 253 function_ref<AAResults &(Function &)> AARGetter, Module &M) { 254 std::string ModuleId = getModuleId(&M); 255 if (ModuleId.empty()) { 256 // We couldn't generate a module ID for this module, just write it out as a 257 // regular LTO module. 258 WriteBitcodeToFile(&M, OS); 259 if (ThinLinkOS) 260 // We don't have a ThinLTO part, but still write the module to the 261 // ThinLinkOS if requested so that the expected output file is produced. 262 WriteBitcodeToFile(&M, *ThinLinkOS); 263 return; 264 } 265 266 promoteTypeIds(M, ModuleId); 267 268 // Returns whether a global has attached type metadata. Such globals may 269 // participate in CFI or whole-program devirtualization, so they need to 270 // appear in the merged module instead of the thin LTO module. 271 auto HasTypeMetadata = [&](const GlobalObject *GO) { 272 SmallVector<MDNode *, 1> MDs; 273 GO->getMetadata(LLVMContext::MD_type, MDs); 274 return !MDs.empty(); 275 }; 276 277 // Collect the set of virtual functions that are eligible for virtual constant 278 // propagation. Each eligible function must not access memory, must return 279 // an integer of width <=64 bits, must take at least one argument, must not 280 // use its first argument (assumed to be "this") and all arguments other than 281 // the first one must be of <=64 bit integer type. 282 // 283 // Note that we test whether this copy of the function is readnone, rather 284 // than testing function attributes, which must hold for any copy of the 285 // function, even a less optimized version substituted at link time. This is 286 // sound because the virtual constant propagation optimizations effectively 287 // inline all implementations of the virtual function into each call site, 288 // rather than using function attributes to perform local optimization. 289 std::set<const Function *> EligibleVirtualFns; 290 // If any member of a comdat lives in MergedM, put all members of that 291 // comdat in MergedM to keep the comdat together. 292 DenseSet<const Comdat *> MergedMComdats; 293 for (GlobalVariable &GV : M.globals()) 294 if (HasTypeMetadata(&GV)) { 295 if (const auto *C = GV.getComdat()) 296 MergedMComdats.insert(C); 297 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 298 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 299 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 300 !F->arg_begin()->use_empty()) 301 return; 302 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 303 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 304 if (!ArgT || ArgT->getBitWidth() > 64) 305 return; 306 } 307 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 308 EligibleVirtualFns.insert(F); 309 }); 310 } 311 312 ValueToValueMapTy VMap; 313 std::unique_ptr<Module> MergedM( 314 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 315 if (const auto *C = GV->getComdat()) 316 if (MergedMComdats.count(C)) 317 return true; 318 if (auto *F = dyn_cast<Function>(GV)) 319 return EligibleVirtualFns.count(F); 320 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 321 return HasTypeMetadata(GVar); 322 return false; 323 })); 324 StripDebugInfo(*MergedM); 325 326 for (Function &F : *MergedM) 327 if (!F.isDeclaration()) { 328 // Reset the linkage of all functions eligible for virtual constant 329 // propagation. The canonical definitions live in the thin LTO module so 330 // that they can be imported. 331 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 332 F.setComdat(nullptr); 333 } 334 335 // Remove all globals with type metadata, globals with comdats that live in 336 // MergedM, and aliases pointing to such globals from the thin LTO module. 337 filterModule(&M, [&](const GlobalValue *GV) { 338 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 339 if (HasTypeMetadata(GVar)) 340 return false; 341 if (const auto *C = GV->getComdat()) 342 if (MergedMComdats.count(C)) 343 return false; 344 return true; 345 }); 346 347 promoteInternals(*MergedM, M, ModuleId); 348 promoteInternals(M, *MergedM, ModuleId); 349 350 simplifyExternals(*MergedM); 351 352 353 // FIXME: Try to re-use BSI and PFI from the original module here. 354 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 355 356 SmallVector<char, 0> Buffer; 357 358 BitcodeWriter W(Buffer); 359 // Save the module hash produced for the full bitcode, which will 360 // be used in the backends, and use that in the minimized bitcode 361 // produced for the full link. 362 ModuleHash ModHash = {{0}}; 363 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 364 /*GenerateHash=*/true, &ModHash); 365 W.writeModule(MergedM.get()); 366 W.writeStrtab(); 367 OS << Buffer; 368 369 // If a minimized bitcode module was requested for the thin link, 370 // strip the debug info (the merged module was already stripped above) 371 // and write it to the given OS. 372 if (ThinLinkOS) { 373 Buffer.clear(); 374 BitcodeWriter W2(Buffer); 375 StripDebugInfo(M); 376 W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 377 /*GenerateHash=*/false, &ModHash); 378 W2.writeModule(MergedM.get()); 379 W2.writeStrtab(); 380 *ThinLinkOS << Buffer; 381 } 382 } 383 384 // Returns whether this module needs to be split because it uses type metadata. 385 bool requiresSplit(Module &M) { 386 SmallVector<MDNode *, 1> MDs; 387 for (auto &GO : M.global_objects()) { 388 GO.getMetadata(LLVMContext::MD_type, MDs); 389 if (!MDs.empty()) 390 return true; 391 } 392 393 return false; 394 } 395 396 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, 397 function_ref<AAResults &(Function &)> AARGetter, 398 Module &M, const ModuleSummaryIndex *Index) { 399 // See if this module has any type metadata. If so, we need to split it. 400 if (requiresSplit(M)) 401 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); 402 403 // Otherwise we can just write it out as a regular module. 404 405 // Save the module hash produced for the full bitcode, which will 406 // be used in the backends, and use that in the minimized bitcode 407 // produced for the full link. 408 ModuleHash ModHash = {{0}}; 409 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 410 /*GenerateHash=*/true, &ModHash); 411 // If a minimized bitcode module was requested for the thin link, 412 // strip the debug info and write it to the given OS. 413 if (ThinLinkOS) { 414 StripDebugInfo(M); 415 WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, 416 Index, 417 /*GenerateHash=*/false, &ModHash); 418 } 419 } 420 421 class WriteThinLTOBitcode : public ModulePass { 422 raw_ostream &OS; // raw_ostream to print on 423 // The output stream on which to emit a minimized module for use 424 // just in the thin link, if requested. 425 raw_ostream *ThinLinkOS; 426 427 public: 428 static char ID; // Pass identification, replacement for typeid 429 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { 430 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 431 } 432 433 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) 434 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { 435 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 436 } 437 438 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 439 440 bool runOnModule(Module &M) override { 441 const ModuleSummaryIndex *Index = 442 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 443 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); 444 return true; 445 } 446 void getAnalysisUsage(AnalysisUsage &AU) const override { 447 AU.setPreservesAll(); 448 AU.addRequired<AssumptionCacheTracker>(); 449 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 450 AU.addRequired<TargetLibraryInfoWrapperPass>(); 451 } 452 }; 453 } // anonymous namespace 454 455 char WriteThinLTOBitcode::ID = 0; 456 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 457 "Write ThinLTO Bitcode", false, true) 458 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 459 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 460 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 461 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 462 "Write ThinLTO Bitcode", false, true) 463 464 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, 465 raw_ostream *ThinLinkOS) { 466 return new WriteThinLTOBitcode(Str, ThinLinkOS); 467 } 468