1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" 11 #include "llvm/Analysis/BasicAliasAnalysis.h" 12 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 13 #include "llvm/Analysis/ProfileSummaryInfo.h" 14 #include "llvm/Analysis/TypeMetadataUtils.h" 15 #include "llvm/Bitcode/BitcodeWriter.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/DebugInfo.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/IR/PassManager.h" 21 #include "llvm/Pass.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/ScopedPrinter.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Transforms/IPO.h" 26 #include "llvm/Transforms/IPO/FunctionAttrs.h" 27 #include "llvm/Transforms/Utils/Cloning.h" 28 #include "llvm/Transforms/Utils/ModuleUtils.h" 29 using namespace llvm; 30 31 namespace { 32 33 // Promote each local-linkage entity defined by ExportM and used by ImportM by 34 // changing visibility and appending the given ModuleId. 35 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, 36 SetVector<GlobalValue *> &PromoteExtra) { 37 DenseMap<const Comdat *, Comdat *> RenamedComdats; 38 for (auto &ExportGV : ExportM.global_values()) { 39 if (!ExportGV.hasLocalLinkage()) 40 continue; 41 42 auto Name = ExportGV.getName(); 43 GlobalValue *ImportGV = nullptr; 44 if (!PromoteExtra.count(&ExportGV)) { 45 ImportGV = ImportM.getNamedValue(Name); 46 if (!ImportGV) 47 continue; 48 ImportGV->removeDeadConstantUsers(); 49 if (ImportGV->use_empty()) { 50 ImportGV->eraseFromParent(); 51 continue; 52 } 53 } 54 55 std::string NewName = (Name + ModuleId).str(); 56 57 if (const auto *C = ExportGV.getComdat()) 58 if (C->getName() == Name) 59 RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName)); 60 61 ExportGV.setName(NewName); 62 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 63 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 64 65 if (ImportGV) { 66 ImportGV->setName(NewName); 67 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 68 } 69 } 70 71 if (!RenamedComdats.empty()) 72 for (auto &GO : ExportM.global_objects()) 73 if (auto *C = GO.getComdat()) { 74 auto Replacement = RenamedComdats.find(C); 75 if (Replacement != RenamedComdats.end()) 76 GO.setComdat(Replacement->second); 77 } 78 } 79 80 // Promote all internal (i.e. distinct) type ids used by the module by replacing 81 // them with external type ids formed using the module id. 82 // 83 // Note that this needs to be done before we clone the module because each clone 84 // will receive its own set of distinct metadata nodes. 85 void promoteTypeIds(Module &M, StringRef ModuleId) { 86 DenseMap<Metadata *, Metadata *> LocalToGlobal; 87 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 88 Metadata *MD = 89 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 90 91 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 92 Metadata *&GlobalMD = LocalToGlobal[MD]; 93 if (!GlobalMD) { 94 std::string NewName = 95 (to_string(LocalToGlobal.size()) + ModuleId).str(); 96 GlobalMD = MDString::get(M.getContext(), NewName); 97 } 98 99 CI->setArgOperand(ArgNo, 100 MetadataAsValue::get(M.getContext(), GlobalMD)); 101 } 102 }; 103 104 if (Function *TypeTestFunc = 105 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 106 for (const Use &U : TypeTestFunc->uses()) { 107 auto CI = cast<CallInst>(U.getUser()); 108 ExternalizeTypeId(CI, 1); 109 } 110 } 111 112 if (Function *TypeCheckedLoadFunc = 113 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 114 for (const Use &U : TypeCheckedLoadFunc->uses()) { 115 auto CI = cast<CallInst>(U.getUser()); 116 ExternalizeTypeId(CI, 2); 117 } 118 } 119 120 for (GlobalObject &GO : M.global_objects()) { 121 SmallVector<MDNode *, 1> MDs; 122 GO.getMetadata(LLVMContext::MD_type, MDs); 123 124 GO.eraseMetadata(LLVMContext::MD_type); 125 for (auto MD : MDs) { 126 auto I = LocalToGlobal.find(MD->getOperand(1)); 127 if (I == LocalToGlobal.end()) { 128 GO.addMetadata(LLVMContext::MD_type, *MD); 129 continue; 130 } 131 GO.addMetadata( 132 LLVMContext::MD_type, 133 *MDNode::get(M.getContext(), 134 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 135 } 136 } 137 } 138 139 // Drop unused globals, and drop type information from function declarations. 140 // FIXME: If we made functions typeless then there would be no need to do this. 141 void simplifyExternals(Module &M) { 142 FunctionType *EmptyFT = 143 FunctionType::get(Type::getVoidTy(M.getContext()), false); 144 145 for (auto I = M.begin(), E = M.end(); I != E;) { 146 Function &F = *I++; 147 if (F.isDeclaration() && F.use_empty()) { 148 F.eraseFromParent(); 149 continue; 150 } 151 152 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT || 153 // Changing the type of an intrinsic may invalidate the IR. 154 F.getName().startswith("llvm.")) 155 continue; 156 157 Function *NewF = 158 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 159 NewF->setVisibility(F.getVisibility()); 160 NewF->takeName(&F); 161 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 162 F.eraseFromParent(); 163 } 164 165 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 166 GlobalVariable &GV = *I++; 167 if (GV.isDeclaration() && GV.use_empty()) { 168 GV.eraseFromParent(); 169 continue; 170 } 171 } 172 } 173 174 void filterModule( 175 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 176 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 177 I != E;) { 178 GlobalAlias *GA = &*I++; 179 if (ShouldKeepDefinition(GA)) 180 continue; 181 182 GlobalObject *GO; 183 if (GA->getValueType()->isFunctionTy()) 184 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 185 GlobalValue::ExternalLinkage, "", M); 186 else 187 GO = new GlobalVariable( 188 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 189 nullptr, "", nullptr, 190 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 191 GO->takeName(GA); 192 GA->replaceAllUsesWith(GO); 193 GA->eraseFromParent(); 194 } 195 196 for (Function &F : *M) { 197 if (ShouldKeepDefinition(&F)) 198 continue; 199 200 F.deleteBody(); 201 F.setComdat(nullptr); 202 F.clearMetadata(); 203 } 204 205 for (GlobalVariable &GV : M->globals()) { 206 if (ShouldKeepDefinition(&GV)) 207 continue; 208 209 GV.setInitializer(nullptr); 210 GV.setLinkage(GlobalValue::ExternalLinkage); 211 GV.setComdat(nullptr); 212 GV.clearMetadata(); 213 } 214 } 215 216 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 217 if (auto *F = dyn_cast<Function>(C)) 218 return Fn(F); 219 if (isa<GlobalValue>(C)) 220 return; 221 for (Value *Op : C->operands()) 222 forEachVirtualFunction(cast<Constant>(Op), Fn); 223 } 224 225 // If it's possible to split M into regular and thin LTO parts, do so and write 226 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 227 // regular LTO bitcode file to OS. 228 void splitAndWriteThinLTOBitcode( 229 raw_ostream &OS, raw_ostream *ThinLinkOS, 230 function_ref<AAResults &(Function &)> AARGetter, Module &M) { 231 std::string ModuleId = getUniqueModuleId(&M); 232 if (ModuleId.empty()) { 233 // We couldn't generate a module ID for this module, just write it out as a 234 // regular LTO module. 235 WriteBitcodeToFile(&M, OS); 236 if (ThinLinkOS) 237 // We don't have a ThinLTO part, but still write the module to the 238 // ThinLinkOS if requested so that the expected output file is produced. 239 WriteBitcodeToFile(&M, *ThinLinkOS); 240 return; 241 } 242 243 promoteTypeIds(M, ModuleId); 244 245 // Returns whether a global has attached type metadata. Such globals may 246 // participate in CFI or whole-program devirtualization, so they need to 247 // appear in the merged module instead of the thin LTO module. 248 auto HasTypeMetadata = [&](const GlobalObject *GO) { 249 SmallVector<MDNode *, 1> MDs; 250 GO->getMetadata(LLVMContext::MD_type, MDs); 251 return !MDs.empty(); 252 }; 253 254 // Collect the set of virtual functions that are eligible for virtual constant 255 // propagation. Each eligible function must not access memory, must return 256 // an integer of width <=64 bits, must take at least one argument, must not 257 // use its first argument (assumed to be "this") and all arguments other than 258 // the first one must be of <=64 bit integer type. 259 // 260 // Note that we test whether this copy of the function is readnone, rather 261 // than testing function attributes, which must hold for any copy of the 262 // function, even a less optimized version substituted at link time. This is 263 // sound because the virtual constant propagation optimizations effectively 264 // inline all implementations of the virtual function into each call site, 265 // rather than using function attributes to perform local optimization. 266 std::set<const Function *> EligibleVirtualFns; 267 // If any member of a comdat lives in MergedM, put all members of that 268 // comdat in MergedM to keep the comdat together. 269 DenseSet<const Comdat *> MergedMComdats; 270 for (GlobalVariable &GV : M.globals()) 271 if (HasTypeMetadata(&GV)) { 272 if (const auto *C = GV.getComdat()) 273 MergedMComdats.insert(C); 274 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 275 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 276 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 277 !F->arg_begin()->use_empty()) 278 return; 279 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 280 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 281 if (!ArgT || ArgT->getBitWidth() > 64) 282 return; 283 } 284 if (!F->isDeclaration() && 285 computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 286 EligibleVirtualFns.insert(F); 287 }); 288 } 289 290 ValueToValueMapTy VMap; 291 std::unique_ptr<Module> MergedM( 292 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 293 if (const auto *C = GV->getComdat()) 294 if (MergedMComdats.count(C)) 295 return true; 296 if (auto *F = dyn_cast<Function>(GV)) 297 return EligibleVirtualFns.count(F); 298 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 299 return HasTypeMetadata(GVar); 300 return false; 301 })); 302 StripDebugInfo(*MergedM); 303 304 for (Function &F : *MergedM) 305 if (!F.isDeclaration()) { 306 // Reset the linkage of all functions eligible for virtual constant 307 // propagation. The canonical definitions live in the thin LTO module so 308 // that they can be imported. 309 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 310 F.setComdat(nullptr); 311 } 312 313 SetVector<GlobalValue *> CfiFunctions; 314 for (auto &F : M) 315 if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) 316 CfiFunctions.insert(&F); 317 318 // Remove all globals with type metadata, globals with comdats that live in 319 // MergedM, and aliases pointing to such globals from the thin LTO module. 320 filterModule(&M, [&](const GlobalValue *GV) { 321 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 322 if (HasTypeMetadata(GVar)) 323 return false; 324 if (const auto *C = GV->getComdat()) 325 if (MergedMComdats.count(C)) 326 return false; 327 return true; 328 }); 329 330 promoteInternals(*MergedM, M, ModuleId, CfiFunctions); 331 promoteInternals(M, *MergedM, ModuleId, CfiFunctions); 332 333 SmallVector<MDNode *, 8> CfiFunctionMDs; 334 for (auto V : CfiFunctions) { 335 Function &F = *cast<Function>(V); 336 SmallVector<MDNode *, 2> Types; 337 F.getMetadata(LLVMContext::MD_type, Types); 338 339 auto &Ctx = MergedM->getContext(); 340 SmallVector<Metadata *, 4> Elts; 341 Elts.push_back(MDString::get(Ctx, F.getName())); 342 CfiFunctionLinkage Linkage; 343 if (!F.isDeclarationForLinker()) 344 Linkage = CFL_Definition; 345 else if (F.isWeakForLinker()) 346 Linkage = CFL_WeakDeclaration; 347 else 348 Linkage = CFL_Declaration; 349 Elts.push_back(ConstantAsMetadata::get( 350 llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); 351 for (auto Type : Types) 352 Elts.push_back(Type); 353 CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); 354 } 355 356 if(!CfiFunctionMDs.empty()) { 357 NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); 358 for (auto MD : CfiFunctionMDs) 359 NMD->addOperand(MD); 360 } 361 362 simplifyExternals(*MergedM); 363 364 // FIXME: Try to re-use BSI and PFI from the original module here. 365 ProfileSummaryInfo PSI(M); 366 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); 367 368 // Mark the merged module as requiring full LTO. We still want an index for 369 // it though, so that it can participate in summary-based dead stripping. 370 MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); 371 ModuleSummaryIndex MergedMIndex = 372 buildModuleSummaryIndex(*MergedM, nullptr, &PSI); 373 374 SmallVector<char, 0> Buffer; 375 376 BitcodeWriter W(Buffer); 377 // Save the module hash produced for the full bitcode, which will 378 // be used in the backends, and use that in the minimized bitcode 379 // produced for the full link. 380 ModuleHash ModHash = {{0}}; 381 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 382 /*GenerateHash=*/true, &ModHash); 383 W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, 384 &MergedMIndex); 385 W.writeSymtab(); 386 W.writeStrtab(); 387 OS << Buffer; 388 389 // If a minimized bitcode module was requested for the thin link, only 390 // the information that is needed by thin link will be written in the 391 // given OS (the merged module will be written as usual). 392 if (ThinLinkOS) { 393 Buffer.clear(); 394 BitcodeWriter W2(Buffer); 395 StripDebugInfo(M); 396 W2.writeThinLinkBitcode(&M, Index, ModHash); 397 W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, 398 &MergedMIndex); 399 W2.writeSymtab(); 400 W2.writeStrtab(); 401 *ThinLinkOS << Buffer; 402 } 403 } 404 405 // Returns whether this module needs to be split because it uses type metadata. 406 bool requiresSplit(Module &M) { 407 SmallVector<MDNode *, 1> MDs; 408 for (auto &GO : M.global_objects()) { 409 GO.getMetadata(LLVMContext::MD_type, MDs); 410 if (!MDs.empty()) 411 return true; 412 } 413 414 return false; 415 } 416 417 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, 418 function_ref<AAResults &(Function &)> AARGetter, 419 Module &M, const ModuleSummaryIndex *Index) { 420 // See if this module has any type metadata. If so, we need to split it. 421 if (requiresSplit(M)) 422 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); 423 424 // Otherwise we can just write it out as a regular module. 425 426 // Save the module hash produced for the full bitcode, which will 427 // be used in the backends, and use that in the minimized bitcode 428 // produced for the full link. 429 ModuleHash ModHash = {{0}}; 430 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 431 /*GenerateHash=*/true, &ModHash); 432 // If a minimized bitcode module was requested for the thin link, only 433 // the information that is needed by thin link will be written in the 434 // given OS. 435 if (ThinLinkOS && Index) 436 WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash); 437 } 438 439 class WriteThinLTOBitcode : public ModulePass { 440 raw_ostream &OS; // raw_ostream to print on 441 // The output stream on which to emit a minimized module for use 442 // just in the thin link, if requested. 443 raw_ostream *ThinLinkOS; 444 445 public: 446 static char ID; // Pass identification, replacement for typeid 447 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { 448 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 449 } 450 451 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) 452 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { 453 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 454 } 455 456 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 457 458 bool runOnModule(Module &M) override { 459 const ModuleSummaryIndex *Index = 460 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 461 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); 462 return true; 463 } 464 void getAnalysisUsage(AnalysisUsage &AU) const override { 465 AU.setPreservesAll(); 466 AU.addRequired<AssumptionCacheTracker>(); 467 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 468 AU.addRequired<TargetLibraryInfoWrapperPass>(); 469 } 470 }; 471 } // anonymous namespace 472 473 char WriteThinLTOBitcode::ID = 0; 474 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 475 "Write ThinLTO Bitcode", false, true) 476 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 477 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 478 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 479 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 480 "Write ThinLTO Bitcode", false, true) 481 482 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, 483 raw_ostream *ThinLinkOS) { 484 return new WriteThinLTOBitcode(Str, ThinLinkOS); 485 } 486 487 PreservedAnalyses 488 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { 489 FunctionAnalysisManager &FAM = 490 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 491 writeThinLTOBitcode(OS, ThinLinkOS, 492 [&FAM](Function &F) -> AAResults & { 493 return FAM.getResult<AAManager>(F); 494 }, 495 M, &AM.getResult<ModuleSummaryIndexAnalysis>(M)); 496 return PreservedAnalyses::all(); 497 } 498