1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Analysis/BasicAliasAnalysis.h" 18 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 19 #include "llvm/Analysis/TypeMetadataUtils.h" 20 #include "llvm/Bitcode/BitcodeWriter.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/DebugInfo.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include "llvm/IR/Module.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/IPO.h" 31 #include "llvm/Transforms/IPO/FunctionAttrs.h" 32 #include "llvm/Transforms/Utils/Cloning.h" 33 #include "llvm/Transforms/Utils/ModuleUtils.h" 34 using namespace llvm; 35 36 namespace { 37 38 // Promote each local-linkage entity defined by ExportM and used by ImportM by 39 // changing visibility and appending the given ModuleId. 40 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 41 DenseMap<const Comdat *, Comdat *> RenamedComdats; 42 for (auto &ExportGV : ExportM.global_values()) { 43 if (!ExportGV.hasLocalLinkage()) 44 continue; 45 46 auto Name = ExportGV.getName(); 47 GlobalValue *ImportGV = ImportM.getNamedValue(Name); 48 if (!ImportGV || ImportGV->use_empty()) 49 continue; 50 51 std::string NewName = (Name + ModuleId).str(); 52 53 if (const auto *C = ExportGV.getComdat()) 54 if (C->getName() == Name) 55 RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName)); 56 57 ExportGV.setName(NewName); 58 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 59 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 60 61 ImportGV->setName(NewName); 62 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 63 } 64 65 if (!RenamedComdats.empty()) 66 for (auto &GO : ExportM.global_objects()) 67 if (auto *C = GO.getComdat()) { 68 auto Replacement = RenamedComdats.find(C); 69 if (Replacement != RenamedComdats.end()) 70 GO.setComdat(Replacement->second); 71 } 72 } 73 74 // Promote all internal (i.e. distinct) type ids used by the module by replacing 75 // them with external type ids formed using the module id. 76 // 77 // Note that this needs to be done before we clone the module because each clone 78 // will receive its own set of distinct metadata nodes. 79 void promoteTypeIds(Module &M, StringRef ModuleId) { 80 DenseMap<Metadata *, Metadata *> LocalToGlobal; 81 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 82 Metadata *MD = 83 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 84 85 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 86 Metadata *&GlobalMD = LocalToGlobal[MD]; 87 if (!GlobalMD) { 88 std::string NewName = 89 (to_string(LocalToGlobal.size()) + ModuleId).str(); 90 GlobalMD = MDString::get(M.getContext(), NewName); 91 } 92 93 CI->setArgOperand(ArgNo, 94 MetadataAsValue::get(M.getContext(), GlobalMD)); 95 } 96 }; 97 98 if (Function *TypeTestFunc = 99 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 100 for (const Use &U : TypeTestFunc->uses()) { 101 auto CI = cast<CallInst>(U.getUser()); 102 ExternalizeTypeId(CI, 1); 103 } 104 } 105 106 if (Function *TypeCheckedLoadFunc = 107 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 108 for (const Use &U : TypeCheckedLoadFunc->uses()) { 109 auto CI = cast<CallInst>(U.getUser()); 110 ExternalizeTypeId(CI, 2); 111 } 112 } 113 114 for (GlobalObject &GO : M.global_objects()) { 115 SmallVector<MDNode *, 1> MDs; 116 GO.getMetadata(LLVMContext::MD_type, MDs); 117 118 GO.eraseMetadata(LLVMContext::MD_type); 119 for (auto MD : MDs) { 120 auto I = LocalToGlobal.find(MD->getOperand(1)); 121 if (I == LocalToGlobal.end()) { 122 GO.addMetadata(LLVMContext::MD_type, *MD); 123 continue; 124 } 125 GO.addMetadata( 126 LLVMContext::MD_type, 127 *MDNode::get(M.getContext(), 128 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 129 } 130 } 131 } 132 133 // Drop unused globals, and drop type information from function declarations. 134 // FIXME: If we made functions typeless then there would be no need to do this. 135 void simplifyExternals(Module &M) { 136 FunctionType *EmptyFT = 137 FunctionType::get(Type::getVoidTy(M.getContext()), false); 138 139 for (auto I = M.begin(), E = M.end(); I != E;) { 140 Function &F = *I++; 141 if (F.isDeclaration() && F.use_empty()) { 142 F.eraseFromParent(); 143 continue; 144 } 145 146 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 147 continue; 148 149 Function *NewF = 150 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 151 NewF->setVisibility(F.getVisibility()); 152 NewF->takeName(&F); 153 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 154 F.eraseFromParent(); 155 } 156 157 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 158 GlobalVariable &GV = *I++; 159 if (GV.isDeclaration() && GV.use_empty()) { 160 GV.eraseFromParent(); 161 continue; 162 } 163 } 164 } 165 166 void filterModule( 167 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 168 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 169 I != E;) { 170 GlobalAlias *GA = &*I++; 171 if (ShouldKeepDefinition(GA)) 172 continue; 173 174 GlobalObject *GO; 175 if (GA->getValueType()->isFunctionTy()) 176 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 177 GlobalValue::ExternalLinkage, "", M); 178 else 179 GO = new GlobalVariable( 180 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 181 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 182 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 183 GO->takeName(GA); 184 GA->replaceAllUsesWith(GO); 185 GA->eraseFromParent(); 186 } 187 188 for (Function &F : *M) { 189 if (ShouldKeepDefinition(&F)) 190 continue; 191 192 F.deleteBody(); 193 F.setComdat(nullptr); 194 F.clearMetadata(); 195 } 196 197 for (GlobalVariable &GV : M->globals()) { 198 if (ShouldKeepDefinition(&GV)) 199 continue; 200 201 GV.setInitializer(nullptr); 202 GV.setLinkage(GlobalValue::ExternalLinkage); 203 GV.setComdat(nullptr); 204 GV.clearMetadata(); 205 } 206 } 207 208 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 209 if (auto *F = dyn_cast<Function>(C)) 210 return Fn(F); 211 if (isa<GlobalValue>(C)) 212 return; 213 for (Value *Op : C->operands()) 214 forEachVirtualFunction(cast<Constant>(Op), Fn); 215 } 216 217 // If it's possible to split M into regular and thin LTO parts, do so and write 218 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 219 // regular LTO bitcode file to OS. 220 void splitAndWriteThinLTOBitcode( 221 raw_ostream &OS, raw_ostream *ThinLinkOS, 222 function_ref<AAResults &(Function &)> AARGetter, Module &M) { 223 std::string ModuleId = getUniqueModuleId(&M); 224 if (ModuleId.empty()) { 225 // We couldn't generate a module ID for this module, just write it out as a 226 // regular LTO module. 227 WriteBitcodeToFile(&M, OS); 228 if (ThinLinkOS) 229 // We don't have a ThinLTO part, but still write the module to the 230 // ThinLinkOS if requested so that the expected output file is produced. 231 WriteBitcodeToFile(&M, *ThinLinkOS); 232 return; 233 } 234 235 promoteTypeIds(M, ModuleId); 236 237 // Returns whether a global has attached type metadata. Such globals may 238 // participate in CFI or whole-program devirtualization, so they need to 239 // appear in the merged module instead of the thin LTO module. 240 auto HasTypeMetadata = [&](const GlobalObject *GO) { 241 SmallVector<MDNode *, 1> MDs; 242 GO->getMetadata(LLVMContext::MD_type, MDs); 243 return !MDs.empty(); 244 }; 245 246 // Collect the set of virtual functions that are eligible for virtual constant 247 // propagation. Each eligible function must not access memory, must return 248 // an integer of width <=64 bits, must take at least one argument, must not 249 // use its first argument (assumed to be "this") and all arguments other than 250 // the first one must be of <=64 bit integer type. 251 // 252 // Note that we test whether this copy of the function is readnone, rather 253 // than testing function attributes, which must hold for any copy of the 254 // function, even a less optimized version substituted at link time. This is 255 // sound because the virtual constant propagation optimizations effectively 256 // inline all implementations of the virtual function into each call site, 257 // rather than using function attributes to perform local optimization. 258 std::set<const Function *> EligibleVirtualFns; 259 // If any member of a comdat lives in MergedM, put all members of that 260 // comdat in MergedM to keep the comdat together. 261 DenseSet<const Comdat *> MergedMComdats; 262 for (GlobalVariable &GV : M.globals()) 263 if (HasTypeMetadata(&GV)) { 264 if (const auto *C = GV.getComdat()) 265 MergedMComdats.insert(C); 266 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 267 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 268 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 269 !F->arg_begin()->use_empty()) 270 return; 271 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 272 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 273 if (!ArgT || ArgT->getBitWidth() > 64) 274 return; 275 } 276 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 277 EligibleVirtualFns.insert(F); 278 }); 279 } 280 281 ValueToValueMapTy VMap; 282 std::unique_ptr<Module> MergedM( 283 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 284 if (const auto *C = GV->getComdat()) 285 if (MergedMComdats.count(C)) 286 return true; 287 if (auto *F = dyn_cast<Function>(GV)) 288 return EligibleVirtualFns.count(F); 289 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 290 return HasTypeMetadata(GVar); 291 return false; 292 })); 293 StripDebugInfo(*MergedM); 294 295 for (Function &F : *MergedM) 296 if (!F.isDeclaration()) { 297 // Reset the linkage of all functions eligible for virtual constant 298 // propagation. The canonical definitions live in the thin LTO module so 299 // that they can be imported. 300 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 301 F.setComdat(nullptr); 302 } 303 304 // Remove all globals with type metadata, globals with comdats that live in 305 // MergedM, and aliases pointing to such globals from the thin LTO module. 306 filterModule(&M, [&](const GlobalValue *GV) { 307 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 308 if (HasTypeMetadata(GVar)) 309 return false; 310 if (const auto *C = GV->getComdat()) 311 if (MergedMComdats.count(C)) 312 return false; 313 return true; 314 }); 315 316 promoteInternals(*MergedM, M, ModuleId); 317 promoteInternals(M, *MergedM, ModuleId); 318 319 simplifyExternals(*MergedM); 320 321 322 // FIXME: Try to re-use BSI and PFI from the original module here. 323 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 324 325 SmallVector<char, 0> Buffer; 326 327 BitcodeWriter W(Buffer); 328 // Save the module hash produced for the full bitcode, which will 329 // be used in the backends, and use that in the minimized bitcode 330 // produced for the full link. 331 ModuleHash ModHash = {{0}}; 332 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 333 /*GenerateHash=*/true, &ModHash); 334 W.writeModule(MergedM.get()); 335 W.writeStrtab(); 336 OS << Buffer; 337 338 // If a minimized bitcode module was requested for the thin link, 339 // strip the debug info (the merged module was already stripped above) 340 // and write it to the given OS. 341 if (ThinLinkOS) { 342 Buffer.clear(); 343 BitcodeWriter W2(Buffer); 344 StripDebugInfo(M); 345 W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 346 /*GenerateHash=*/false, &ModHash); 347 W2.writeModule(MergedM.get()); 348 W2.writeStrtab(); 349 *ThinLinkOS << Buffer; 350 } 351 } 352 353 // Returns whether this module needs to be split because it uses type metadata. 354 bool requiresSplit(Module &M) { 355 SmallVector<MDNode *, 1> MDs; 356 for (auto &GO : M.global_objects()) { 357 GO.getMetadata(LLVMContext::MD_type, MDs); 358 if (!MDs.empty()) 359 return true; 360 } 361 362 return false; 363 } 364 365 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, 366 function_ref<AAResults &(Function &)> AARGetter, 367 Module &M, const ModuleSummaryIndex *Index) { 368 // See if this module has any type metadata. If so, we need to split it. 369 if (requiresSplit(M)) 370 return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); 371 372 // Otherwise we can just write it out as a regular module. 373 374 // Save the module hash produced for the full bitcode, which will 375 // be used in the backends, and use that in the minimized bitcode 376 // produced for the full link. 377 ModuleHash ModHash = {{0}}; 378 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 379 /*GenerateHash=*/true, &ModHash); 380 // If a minimized bitcode module was requested for the thin link, 381 // strip the debug info and write it to the given OS. 382 if (ThinLinkOS) { 383 StripDebugInfo(M); 384 WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, 385 Index, 386 /*GenerateHash=*/false, &ModHash); 387 } 388 } 389 390 class WriteThinLTOBitcode : public ModulePass { 391 raw_ostream &OS; // raw_ostream to print on 392 // The output stream on which to emit a minimized module for use 393 // just in the thin link, if requested. 394 raw_ostream *ThinLinkOS; 395 396 public: 397 static char ID; // Pass identification, replacement for typeid 398 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) { 399 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 400 } 401 402 explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) 403 : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { 404 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 405 } 406 407 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 408 409 bool runOnModule(Module &M) override { 410 const ModuleSummaryIndex *Index = 411 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 412 writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index); 413 return true; 414 } 415 void getAnalysisUsage(AnalysisUsage &AU) const override { 416 AU.setPreservesAll(); 417 AU.addRequired<AssumptionCacheTracker>(); 418 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 419 AU.addRequired<TargetLibraryInfoWrapperPass>(); 420 } 421 }; 422 } // anonymous namespace 423 424 char WriteThinLTOBitcode::ID = 0; 425 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 426 "Write ThinLTO Bitcode", false, true) 427 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 428 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 429 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 430 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 431 "Write ThinLTO Bitcode", false, true) 432 433 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, 434 raw_ostream *ThinLinkOS) { 435 return new WriteThinLTOBitcode(Str, ThinLinkOS); 436 } 437