1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" 11 #include "llvm/Analysis/BasicAliasAnalysis.h" 12 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 13 #include "llvm/Analysis/ProfileSummaryInfo.h" 14 #include "llvm/Analysis/TypeMetadataUtils.h" 15 #include "llvm/Bitcode/BitcodeWriter.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/DebugInfo.h" 18 #include "llvm/IR/Intrinsics.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/IR/PassManager.h" 21 #include "llvm/Pass.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/ScopedPrinter.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Transforms/IPO.h" 26 #include "llvm/Transforms/IPO/FunctionAttrs.h" 27 #include "llvm/Transforms/Utils/Cloning.h" 28 #include "llvm/Transforms/Utils/ModuleUtils.h" 29 using namespace llvm; 30 31 namespace { 32 33 // Promote each local-linkage entity defined by ExportM and used by ImportM by 34 // changing visibility and appending the given ModuleId. 35 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 36 DenseMap<const Comdat *, Comdat *> RenamedComdats; 37 for (auto &ExportGV : ExportM.global_values()) { 38 if (!ExportGV.hasLocalLinkage()) 39 continue; 40 41 auto Name = ExportGV.getName(); 42 GlobalValue *ImportGV = ImportM.getNamedValue(Name); 43 if (!ImportGV || ImportGV->use_empty()) 44 continue; 45 46 std::string NewName = (Name + ModuleId).str(); 47 48 if (const auto *C = ExportGV.getComdat()) 49 if (C->getName() == Name) 50 RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName)); 51 52 ExportGV.setName(NewName); 53 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 54 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 55 56 ImportGV->setName(NewName); 57 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 58 } 59 60 if (!RenamedComdats.empty()) 61 for (auto &GO : ExportM.global_objects()) 62 if (auto *C = GO.getComdat()) { 63 auto Replacement = RenamedComdats.find(C); 64 if (Replacement != RenamedComdats.end()) 65 GO.setComdat(Replacement->second); 66 } 67 } 68 69 // Promote all internal (i.e. distinct) type ids used by the module by replacing 70 // them with external type ids formed using the module id. 71 // 72 // Note that this needs to be done before we clone the module because each clone 73 // will receive its own set of distinct metadata nodes. 74 void promoteTypeIds(Module &M, StringRef ModuleId) { 75 DenseMap<Metadata *, Metadata *> LocalToGlobal; 76 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 77 Metadata *MD = 78 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 79 80 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 81 Metadata *&GlobalMD = LocalToGlobal[MD]; 82 if (!GlobalMD) { 83 std::string NewName = 84 (to_string(LocalToGlobal.size()) + ModuleId).str(); 85 GlobalMD = MDString::get(M.getContext(), NewName); 86 } 87 88 CI->setArgOperand(ArgNo, 89 MetadataAsValue::get(M.getContext(), GlobalMD)); 90 } 91 }; 92 93 if (Function *TypeTestFunc = 94 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 95 for (const Use &U : TypeTestFunc->uses()) { 96 auto CI = cast<CallInst>(U.getUser()); 97 ExternalizeTypeId(CI, 1); 98 } 99 } 100 101 if (Function *TypeCheckedLoadFunc = 102 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 103 for (const Use &U : TypeCheckedLoadFunc->uses()) { 104 auto CI = cast<CallInst>(U.getUser()); 105 ExternalizeTypeId(CI, 2); 106 } 107 } 108 109 for (GlobalObject &GO : M.global_objects()) { 110 SmallVector<MDNode *, 1> MDs; 111 GO.getMetadata(LLVMContext::MD_type, MDs); 112 113 GO.eraseMetadata(LLVMContext::MD_type); 114 for (auto MD : MDs) { 115 auto I = LocalToGlobal.find(MD->getOperand(1)); 116 if (I == LocalToGlobal.end()) { 117 GO.addMetadata(LLVMContext::MD_type, *MD); 118 continue; 119 } 120 GO.addMetadata( 121 LLVMContext::MD_type, 122 *MDNode::get(M.getContext(), 123 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 124 } 125 } 126 } 127 128 // Drop unused globals, and drop type information from function declarations. 129 // FIXME: If we made functions typeless then there would be no need to do this. 130 void simplifyExternals(Module &M) { 131 FunctionType *EmptyFT = 132 FunctionType::get(Type::getVoidTy(M.getContext()), false); 133 134 for (auto I = M.begin(), E = M.end(); I != E;) { 135 Function &F = *I++; 136 if (F.isDeclaration() && F.use_empty()) { 137 F.eraseFromParent(); 138 continue; 139 } 140 141 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 142 continue; 143 144 Function *NewF = 145 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 146 NewF->setVisibility(F.getVisibility()); 147 NewF->takeName(&F); 148 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 149 F.eraseFromParent(); 150 } 151 152 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 153 GlobalVariable &GV = *I++; 154 if (GV.isDeclaration() && GV.use_empty()) { 155 GV.eraseFromParent(); 156 continue; 157 } 158 } 159 } 160 161 void filterModule( 162 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 163 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 164 I != E;) { 165 GlobalAlias *GA = &*I++; 166 if (ShouldKeepDefinition(GA)) 167 continue; 168 169 GlobalObject *GO; 170 if (GA->getValueType()->isFunctionTy()) 171 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 172 GlobalValue::ExternalLinkage, "", M); 173 else 174 GO = new GlobalVariable( 175 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 176 nullptr, "", nullptr, 177 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 178 GO->takeName(GA); 179 GA->replaceAllUsesWith(GO); 180 GA->eraseFromParent(); 181 } 182 183 for (Function &F : *M) { 184 if (ShouldKeepDefinition(&F)) 185 continue; 186 187 F.deleteBody(); 188 F.setComdat(nullptr); 189 F.clearMetadata(); 190 } 191 192 for (GlobalVariable &GV : M->globals()) { 193 if (ShouldKeepDefinition(&GV)) 194 continue; 195 196 GV.setInitializer(nullptr); 197 GV.setLinkage(GlobalValue::ExternalLinkage); 198 GV.setComdat(nullptr); 199 GV.clearMetadata(); 200 } 201 } 202 203 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 204 if (auto *F = dyn_cast<Function>(C)) 205 return Fn(F); 206 if (isa<GlobalValue>(C)) 207 return; 208 for (Value *Op : C->operands()) 209 forEachVirtualFunction(cast<Constant>(Op), Fn); 210 } 211 212 // If it's possible to split M into regular and thin LTO parts, do so and write 213 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 214 // regular LTO bitcode file to OS. 215 void splitAndWriteThinLTOBitcode( 216 raw_ostream &OS, raw_ostream *ThinLinkOS, 217 function_ref<AAResults &(Function &)> AARGetter, Module &M) { 218 std::string ModuleId = getUniqueModuleId(&M); 219 if (ModuleId.empty()) { 220 // We couldn't generate a module ID for this module, just write it out as a 221 // regular LTO module. 222 WriteBitcodeToFile(&M, OS); 223 if (ThinLinkOS) 224 // We don't have a ThinLTO part, but still write the module to the 225 // ThinLinkOS if requested so that the expected output file is produced. 226 WriteBitcodeToFile(&M, *ThinLinkOS); 227 return; 228 } 229 230 promoteTypeIds(M, ModuleId); 231 232 // Returns whether a global has attached type metadata. Such globals may 233 // participate in CFI or whole-program devirtualization, so they need to 234 // appear in the merged module instead of the thin LTO module. 235 auto HasTypeMetadata = [&](const GlobalObject *GO) { 236 SmallVector<MDNode *, 1> MDs; 237 GO->getMetadata(LLVMContext::MD_type, MDs); 238 return !MDs.empty(); 239 }; 240 241 // Collect the set of virtual functions that are eligible for virtual constant 242 // propagation. Each eligible function must not access memory, must return 243 // an integer of width <=64 bits, must take at least one argument, must not 244 // use its first argument (assumed to be "this") and all arguments other than 245 // the first one must be of <=64 bit integer type. 246 // 247 // Note that we test whether this copy of the function is readnone, rather 248 // than testing function attributes, which must hold for any copy of the 249 // function, even a less optimized version substituted at link time. This is 250 // sound because the virtual constant propagation optimizations effectively 251 // inline all implementations of the virtual function into each call site, 252 // rather than using function attributes to perform local optimization. 253 std::set<const Function *> EligibleVirtualFns; 254 // If any member of a comdat lives in MergedM, put all members of that 255 // comdat in MergedM to keep the comdat together. 256 DenseSet<const Comdat *> MergedMComdats; 257 for (GlobalVariable &GV : M.globals()) 258 if (HasTypeMetadata(&GV)) { 259 if (const auto *C = GV.getComdat()) 260 MergedMComdats.insert(C); 261 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 262 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 263 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 264 !F->arg_begin()->use_empty()) 265 return; 266 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 267 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 268 if (!ArgT || ArgT->getBitWidth() > 64) 269 return; 270 } 271 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 272 EligibleVirtualFns.insert(F); 273 }); 274 } 275 276 ValueToValueMapTy VMap; 277 std::unique_ptr<Module> MergedM( 278 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 279 if (const auto *C = GV->getComdat()) 280 if (MergedMComdats.count(C)) 281 return true; 282 if (auto *F = dyn_cast<Function>(GV)) 283 return EligibleVirtualFns.count(F); 284 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 285 return HasTypeMetadata(GVar); 286 return false; 287 })); 288 StripDebugInfo(*MergedM); 289 290 for (Function &F : *MergedM) 291 if (!F.isDeclaration()) { 292 // Reset the linkage of all functions eligible for virtual constant 293 // propagation. The canonical definitions live in the thin LTO module so 294 // that they can be imported. 295 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 296 F.setComdat(nullptr); 297 } 298 299 // Remove all globals with type metadata, globals with comdats that live in 300 // MergedM, and aliases pointing to such globals from the thin LTO module. 301 filterModule(&M, [&](const GlobalValue *GV) { 302 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 303 if (HasTypeMetadata(GVar)) 304 return false; 305 if (const auto *C = GV->getComdat()) 306 if (MergedMComdats.count(C)) 307 return false; 308 return true; 309 }); 310 311 promoteInternals(*MergedM, M, ModuleId); 312 promoteInternals(M, *MergedM, ModuleId); 313 314 simplifyExternals(*MergedM); 315 316 317 // FIXME: Try to re-use BSI and PFI from the original module here. 318 ProfileSummaryInfo PSI(M); 319 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); 320 321 SmallVector<char, 0> Buffer; 322 323 BitcodeWriter W(Buffer); 324 // Save the module hash produced for the full bitcode, which will 325 // be used in the backends, and use that in the minimized bitcode 326 // produced for the full link. 327 ModuleHash ModHash = {{0}}; 328 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 329 /*GenerateHash=*/true, &ModHash); 330 W.writeModule(MergedM.get()); 331 W.writeStrtab(); 332 OS << Buffer; 333 334 // If a minimized bitcode module was requested for the thin link, 335 // strip the debug info (the merged module was already stripped above) 336 // and write it to the given OS. 337 if (ThinLinkOS) { 338 Buffer.clear(); 339 BitcodeWriter W2(Buffer); 340 StripDebugInfo(M); 341 W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 342 /*GenerateHash=*/false, &ModHash); 343 W2.writeModule(MergedM.get()); 344 W2.writeStrtab(); 345 *ThinLinkOS << Buffer; 346 } 347 } 348 349 // Returns whether this module needs to be split because it uses type metadata. 350 bool requiresSplit(Module &M) { 351 SmallVector<MDNode *, 1> MDs; 352 for (auto &GO : M.global_objects()) { 353 GO.getMetadata(LLVMContext::MD_type, MDs); 354 if (!MDs.empty()) 355 return true; 356 } 357 358 return false; 359 } 360 361 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, 362 function_ref<AAResults &(Function &)> AARGetter, 363 Module &M, const ModuleSummaryIndex *Index) { 364 // See if this module has any type metadata. If so, we need to split it. 365 if (requiresSplit(M)) 366 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); 367 368 // Otherwise we can just write it out as a regular module. 369 370 // Save the module hash produced for the full bitcode, which will 371 // be used in the backends, and use that in the minimized bitcode 372 // produced for the full link. 373 ModuleHash ModHash = {{0}}; 374 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 375 /*GenerateHash=*/true, &ModHash); 376 // If a minimized bitcode module was requested for the thin link, 377 // strip the debug info and write it to the given OS. 378 if (ThinLinkOS) { 379 StripDebugInfo(M); 380 WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, 381 Index, 382 /*GenerateHash=*/false, &ModHash); 383 } 384 } 385 386 class WriteThinLTOBitcode : public ModulePass { 387 raw_ostream &OS; // raw_ostream to print on 388 // The output stream on which to emit a minimized module for use 389 // just in the thin link, if requested. 390 raw_ostream *ThinLinkOS; 391 392 public: 393 static char ID; // Pass identification, replacement for typeid 394 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { 395 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 396 } 397 398 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) 399 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { 400 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 401 } 402 403 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 404 405 bool runOnModule(Module &M) override { 406 const ModuleSummaryIndex *Index = 407 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 408 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); 409 return true; 410 } 411 void getAnalysisUsage(AnalysisUsage &AU) const override { 412 AU.setPreservesAll(); 413 AU.addRequired<AssumptionCacheTracker>(); 414 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 415 AU.addRequired<TargetLibraryInfoWrapperPass>(); 416 } 417 }; 418 } // anonymous namespace 419 420 char WriteThinLTOBitcode::ID = 0; 421 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 422 "Write ThinLTO Bitcode", false, true) 423 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 424 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 425 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 426 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 427 "Write ThinLTO Bitcode", false, true) 428 429 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, 430 raw_ostream *ThinLinkOS) { 431 return new WriteThinLTOBitcode(Str, ThinLinkOS); 432 } 433 434 PreservedAnalyses 435 llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { 436 FunctionAnalysisManager &FAM = 437 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 438 writeThinLTOBitcode(OS, ThinLinkOS, 439 [&FAM](Function &F) -> AAResults & { 440 return FAM.getResult<AAManager>(F); 441 }, 442 M, &AM.getResult<ModuleSummaryIndexAnalysis>(M)); 443 return PreservedAnalyses::all(); 444 } 445