1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" 11 #include "llvm/Analysis/BasicAliasAnalysis.h" 12 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 13 #include "llvm/Analysis/ProfileSummaryInfo.h" 14 #include "llvm/Analysis/TypeMetadataUtils.h" 15 #include "llvm/Bitcode/BitcodeWriter.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/DebugInfo.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/IR/PassManager.h" 21 #include "llvm/Pass.h" 22 #include "llvm/Support/ScopedPrinter.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Transforms/IPO.h" 25 #include "llvm/Transforms/IPO/FunctionAttrs.h" 26 #include "llvm/Transforms/Utils/Cloning.h" 27 #include "llvm/Transforms/Utils/ModuleUtils.h" 28 using namespace llvm; 29 30 namespace { 31 32 // Promote each local-linkage entity defined by ExportM and used by ImportM by 33 // changing visibility and appending the given ModuleId. 34 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, 35 SetVector<GlobalValue *> &PromoteExtra) { 36 DenseMap<const Comdat *, Comdat *> RenamedComdats; 37 for (auto &ExportGV : ExportM.global_values()) { 38 if (!ExportGV.hasLocalLinkage()) 39 continue; 40 41 auto Name = ExportGV.getName(); 42 GlobalValue *ImportGV = nullptr; 43 if (!PromoteExtra.count(&ExportGV)) { 44 ImportGV = ImportM.getNamedValue(Name); 45 if (!ImportGV) 46 continue; 47 ImportGV->removeDeadConstantUsers(); 48 if (ImportGV->use_empty()) { 49 ImportGV->eraseFromParent(); 50 continue; 51 } 52 } 53 54 std::string NewName = (Name + ModuleId).str(); 55 56 if (const auto *C = ExportGV.getComdat()) 57 if (C->getName() == Name) 58 RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName)); 59 60 ExportGV.setName(NewName); 61 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 62 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 63 64 if (ImportGV) { 65 ImportGV->setName(NewName); 66 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 67 } 68 } 69 70 if (!RenamedComdats.empty()) 71 for (auto &GO : ExportM.global_objects()) 72 if (auto *C = GO.getComdat()) { 73 auto Replacement = RenamedComdats.find(C); 74 if (Replacement != RenamedComdats.end()) 75 GO.setComdat(Replacement->second); 76 } 77 } 78 79 // Promote all internal (i.e. distinct) type ids used by the module by replacing 80 // them with external type ids formed using the module id. 81 // 82 // Note that this needs to be done before we clone the module because each clone 83 // will receive its own set of distinct metadata nodes. 84 void promoteTypeIds(Module &M, StringRef ModuleId) { 85 DenseMap<Metadata *, Metadata *> LocalToGlobal; 86 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 87 Metadata *MD = 88 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 89 90 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 91 Metadata *&GlobalMD = LocalToGlobal[MD]; 92 if (!GlobalMD) { 93 std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str(); 94 GlobalMD = MDString::get(M.getContext(), NewName); 95 } 96 97 CI->setArgOperand(ArgNo, 98 MetadataAsValue::get(M.getContext(), GlobalMD)); 99 } 100 }; 101 102 if (Function *TypeTestFunc = 103 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 104 for (const Use &U : TypeTestFunc->uses()) { 105 auto CI = cast<CallInst>(U.getUser()); 106 ExternalizeTypeId(CI, 1); 107 } 108 } 109 110 if (Function *TypeCheckedLoadFunc = 111 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 112 for (const Use &U : TypeCheckedLoadFunc->uses()) { 113 auto CI = cast<CallInst>(U.getUser()); 114 ExternalizeTypeId(CI, 2); 115 } 116 } 117 118 for (GlobalObject &GO : M.global_objects()) { 119 SmallVector<MDNode *, 1> MDs; 120 GO.getMetadata(LLVMContext::MD_type, MDs); 121 122 GO.eraseMetadata(LLVMContext::MD_type); 123 for (auto MD : MDs) { 124 auto I = LocalToGlobal.find(MD->getOperand(1)); 125 if (I == LocalToGlobal.end()) { 126 GO.addMetadata(LLVMContext::MD_type, *MD); 127 continue; 128 } 129 GO.addMetadata( 130 LLVMContext::MD_type, 131 *MDNode::get(M.getContext(), 132 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 133 } 134 } 135 } 136 137 // Drop unused globals, and drop type information from function declarations. 138 // FIXME: If we made functions typeless then there would be no need to do this. 139 void simplifyExternals(Module &M) { 140 FunctionType *EmptyFT = 141 FunctionType::get(Type::getVoidTy(M.getContext()), false); 142 143 for (auto I = M.begin(), E = M.end(); I != E;) { 144 Function &F = *I++; 145 if (F.isDeclaration() && F.use_empty()) { 146 F.eraseFromParent(); 147 continue; 148 } 149 150 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT || 151 // Changing the type of an intrinsic may invalidate the IR. 152 F.getName().startswith("llvm.")) 153 continue; 154 155 Function *NewF = 156 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 157 NewF->setVisibility(F.getVisibility()); 158 NewF->takeName(&F); 159 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 160 F.eraseFromParent(); 161 } 162 163 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 164 GlobalVariable &GV = *I++; 165 if (GV.isDeclaration() && GV.use_empty()) { 166 GV.eraseFromParent(); 167 continue; 168 } 169 } 170 } 171 172 void filterModule( 173 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 174 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 175 I != E;) { 176 GlobalAlias *GA = &*I++; 177 if (ShouldKeepDefinition(GA)) 178 continue; 179 180 GlobalObject *GO; 181 if (GA->getValueType()->isFunctionTy()) 182 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 183 GlobalValue::ExternalLinkage, "", M); 184 else 185 GO = new GlobalVariable( 186 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 187 nullptr, "", nullptr, 188 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 189 GO->takeName(GA); 190 GA->replaceAllUsesWith(GO); 191 GA->eraseFromParent(); 192 } 193 194 for (Function &F : *M) { 195 if (ShouldKeepDefinition(&F)) 196 continue; 197 198 F.deleteBody(); 199 F.setComdat(nullptr); 200 F.clearMetadata(); 201 } 202 203 for (GlobalVariable &GV : M->globals()) { 204 if (ShouldKeepDefinition(&GV)) 205 continue; 206 207 GV.setInitializer(nullptr); 208 GV.setLinkage(GlobalValue::ExternalLinkage); 209 GV.setComdat(nullptr); 210 GV.clearMetadata(); 211 } 212 } 213 214 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 215 if (auto *F = dyn_cast<Function>(C)) 216 return Fn(F); 217 if (isa<GlobalValue>(C)) 218 return; 219 for (Value *Op : C->operands()) 220 forEachVirtualFunction(cast<Constant>(Op), Fn); 221 } 222 223 // If it's possible to split M into regular and thin LTO parts, do so and write 224 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 225 // regular LTO bitcode file to OS. 226 void splitAndWriteThinLTOBitcode( 227 raw_ostream &OS, raw_ostream *ThinLinkOS, 228 function_ref<AAResults &(Function &)> AARGetter, Module &M) { 229 std::string ModuleId = getUniqueModuleId(&M); 230 if (ModuleId.empty()) { 231 // We couldn't generate a module ID for this module, just write it out as a 232 // regular LTO module. 233 WriteBitcodeToFile(&M, OS); 234 if (ThinLinkOS) 235 // We don't have a ThinLTO part, but still write the module to the 236 // ThinLinkOS if requested so that the expected output file is produced. 237 WriteBitcodeToFile(&M, *ThinLinkOS); 238 return; 239 } 240 241 promoteTypeIds(M, ModuleId); 242 243 // Returns whether a global has attached type metadata. Such globals may 244 // participate in CFI or whole-program devirtualization, so they need to 245 // appear in the merged module instead of the thin LTO module. 246 auto HasTypeMetadata = [&](const GlobalObject *GO) { 247 SmallVector<MDNode *, 1> MDs; 248 GO->getMetadata(LLVMContext::MD_type, MDs); 249 return !MDs.empty(); 250 }; 251 252 // Collect the set of virtual functions that are eligible for virtual constant 253 // propagation. Each eligible function must not access memory, must return 254 // an integer of width <=64 bits, must take at least one argument, must not 255 // use its first argument (assumed to be "this") and all arguments other than 256 // the first one must be of <=64 bit integer type. 257 // 258 // Note that we test whether this copy of the function is readnone, rather 259 // than testing function attributes, which must hold for any copy of the 260 // function, even a less optimized version substituted at link time. This is 261 // sound because the virtual constant propagation optimizations effectively 262 // inline all implementations of the virtual function into each call site, 263 // rather than using function attributes to perform local optimization. 264 std::set<const Function *> EligibleVirtualFns; 265 // If any member of a comdat lives in MergedM, put all members of that 266 // comdat in MergedM to keep the comdat together. 267 DenseSet<const Comdat *> MergedMComdats; 268 for (GlobalVariable &GV : M.globals()) 269 if (HasTypeMetadata(&GV)) { 270 if (const auto *C = GV.getComdat()) 271 MergedMComdats.insert(C); 272 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 273 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 274 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 275 !F->arg_begin()->use_empty()) 276 return; 277 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 278 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 279 if (!ArgT || ArgT->getBitWidth() > 64) 280 return; 281 } 282 if (!F->isDeclaration() && 283 computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 284 EligibleVirtualFns.insert(F); 285 }); 286 } 287 288 ValueToValueMapTy VMap; 289 std::unique_ptr<Module> MergedM( 290 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 291 if (const auto *C = GV->getComdat()) 292 if (MergedMComdats.count(C)) 293 return true; 294 if (auto *F = dyn_cast<Function>(GV)) 295 return EligibleVirtualFns.count(F); 296 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 297 return HasTypeMetadata(GVar); 298 return false; 299 })); 300 StripDebugInfo(*MergedM); 301 MergedM->setModuleInlineAsm(""); 302 303 for (Function &F : *MergedM) 304 if (!F.isDeclaration()) { 305 // Reset the linkage of all functions eligible for virtual constant 306 // propagation. The canonical definitions live in the thin LTO module so 307 // that they can be imported. 308 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 309 F.setComdat(nullptr); 310 } 311 312 SetVector<GlobalValue *> CfiFunctions; 313 for (auto &F : M) 314 if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) 315 CfiFunctions.insert(&F); 316 317 // Remove all globals with type metadata, globals with comdats that live in 318 // MergedM, and aliases pointing to such globals from the thin LTO module. 319 filterModule(&M, [&](const GlobalValue *GV) { 320 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 321 if (HasTypeMetadata(GVar)) 322 return false; 323 if (const auto *C = GV->getComdat()) 324 if (MergedMComdats.count(C)) 325 return false; 326 return true; 327 }); 328 329 promoteInternals(*MergedM, M, ModuleId, CfiFunctions); 330 promoteInternals(M, *MergedM, ModuleId, CfiFunctions); 331 332 SmallVector<MDNode *, 8> CfiFunctionMDs; 333 for (auto V : CfiFunctions) { 334 Function &F = *cast<Function>(V); 335 SmallVector<MDNode *, 2> Types; 336 F.getMetadata(LLVMContext::MD_type, Types); 337 338 auto &Ctx = MergedM->getContext(); 339 SmallVector<Metadata *, 4> Elts; 340 Elts.push_back(MDString::get(Ctx, F.getName())); 341 CfiFunctionLinkage Linkage; 342 if (!F.isDeclarationForLinker()) 343 Linkage = CFL_Definition; 344 else if (F.isWeakForLinker()) 345 Linkage = CFL_WeakDeclaration; 346 else 347 Linkage = CFL_Declaration; 348 Elts.push_back(ConstantAsMetadata::get( 349 llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); 350 for (auto Type : Types) 351 Elts.push_back(Type); 352 CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); 353 } 354 355 if(!CfiFunctionMDs.empty()) { 356 NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); 357 for (auto MD : CfiFunctionMDs) 358 NMD->addOperand(MD); 359 } 360 361 SmallVector<MDNode *, 8> FunctionAliases; 362 for (auto &A : M.aliases()) { 363 if (!isa<Function>(A.getAliasee())) 364 continue; 365 366 auto *F = cast<Function>(A.getAliasee()); 367 auto &Ctx = MergedM->getContext(); 368 SmallVector<Metadata *, 4> Elts; 369 370 Elts.push_back(MDString::get(Ctx, A.getName())); 371 Elts.push_back(MDString::get(Ctx, F->getName())); 372 Elts.push_back(ConstantAsMetadata::get( 373 llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.getVisibility()))); 374 Elts.push_back(ConstantAsMetadata::get( 375 llvm::ConstantInt::get(Type::getInt8Ty(Ctx), A.isWeakForLinker()))); 376 377 FunctionAliases.push_back(MDTuple::get(Ctx, Elts)); 378 } 379 380 if (!FunctionAliases.empty()) { 381 NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("aliases"); 382 for (auto MD : FunctionAliases) 383 NMD->addOperand(MD); 384 } 385 386 simplifyExternals(*MergedM); 387 388 // FIXME: Try to re-use BSI and PFI from the original module here. 389 ProfileSummaryInfo PSI(M); 390 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); 391 392 // Mark the merged module as requiring full LTO. We still want an index for 393 // it though, so that it can participate in summary-based dead stripping. 394 MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); 395 ModuleSummaryIndex MergedMIndex = 396 buildModuleSummaryIndex(*MergedM, nullptr, &PSI); 397 398 SmallVector<char, 0> Buffer; 399 400 BitcodeWriter W(Buffer); 401 // Save the module hash produced for the full bitcode, which will 402 // be used in the backends, and use that in the minimized bitcode 403 // produced for the full link. 404 ModuleHash ModHash = {{0}}; 405 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 406 /*GenerateHash=*/true, &ModHash); 407 W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, 408 &MergedMIndex); 409 W.writeSymtab(); 410 W.writeStrtab(); 411 OS << Buffer; 412 413 // If a minimized bitcode module was requested for the thin link, only 414 // the information that is needed by thin link will be written in the 415 // given OS (the merged module will be written as usual). 416 if (ThinLinkOS) { 417 Buffer.clear(); 418 BitcodeWriter W2(Buffer); 419 StripDebugInfo(M); 420 W2.writeThinLinkBitcode(&M, Index, ModHash); 421 W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, 422 &MergedMIndex); 423 W2.writeSymtab(); 424 W2.writeStrtab(); 425 *ThinLinkOS << Buffer; 426 } 427 } 428 429 // Returns whether this module needs to be split because it uses type metadata. 430 bool requiresSplit(Module &M) { 431 SmallVector<MDNode *, 1> MDs; 432 for (auto &GO : M.global_objects()) { 433 GO.getMetadata(LLVMContext::MD_type, MDs); 434 if (!MDs.empty()) 435 return true; 436 } 437 438 return false; 439 } 440 441 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, 442 function_ref<AAResults &(Function &)> AARGetter, 443 Module &M, const ModuleSummaryIndex *Index) { 444 // See if this module has any type metadata. If so, we need to split it. 445 if (requiresSplit(M)) 446 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); 447 448 // Otherwise we can just write it out as a regular module. 449 450 // Save the module hash produced for the full bitcode, which will 451 // be used in the backends, and use that in the minimized bitcode 452 // produced for the full link. 453 ModuleHash ModHash = {{0}}; 454 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 455 /*GenerateHash=*/true, &ModHash); 456 // If a minimized bitcode module was requested for the thin link, only 457 // the information that is needed by thin link will be written in the 458 // given OS. 459 if (ThinLinkOS && Index) 460 WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash); 461 } 462 463 class WriteThinLTOBitcode : public ModulePass { 464 raw_ostream &OS; // raw_ostream to print on 465 // The output stream on which to emit a minimized module for use 466 // just in the thin link, if requested. 467 raw_ostream *ThinLinkOS; 468 469 public: 470 static char ID; // Pass identification, replacement for typeid 471 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { 472 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 473 } 474 475 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) 476 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { 477 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 478 } 479 480 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 481 482 bool runOnModule(Module &M) override { 483 const ModuleSummaryIndex *Index = 484 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 485 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); 486 return true; 487 } 488 void getAnalysisUsage(AnalysisUsage &AU) const override { 489 AU.setPreservesAll(); 490 AU.addRequired<AssumptionCacheTracker>(); 491 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 492 AU.addRequired<TargetLibraryInfoWrapperPass>(); 493 } 494 }; 495 } // anonymous namespace 496 497 char WriteThinLTOBitcode::ID = 0; 498 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 499 "Write ThinLTO Bitcode", false, true) 500 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 501 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 502 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 503 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 504 "Write ThinLTO Bitcode", false, true) 505 506 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, 507 raw_ostream *ThinLinkOS) { 508 return new WriteThinLTOBitcode(Str, ThinLinkOS); 509 } 510 511 PreservedAnalyses 512 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { 513 FunctionAnalysisManager &FAM = 514 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 515 writeThinLTOBitcode(OS, ThinLinkOS, 516 [&FAM](Function &F) -> AAResults & { 517 return FAM.getResult<AAManager>(F); 518 }, 519 M, &AM.getResult<ModuleSummaryIndexAnalysis>(M)); 520 return PreservedAnalyses::all(); 521 } 522