1 //===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass prepares a module containing type metadata for ThinLTO by splitting 11 // it into regular and thin LTO parts if possible, and writing both parts to 12 // a multi-module bitcode file. Modules that do not contain type metadata are 13 // written unmodified as a single module. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/IPO.h" 18 #include "llvm/Analysis/BasicAliasAnalysis.h" 19 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 20 #include "llvm/Analysis/TypeMetadataUtils.h" 21 #include "llvm/Bitcode/BitcodeWriter.h" 22 #include "llvm/IR/Constants.h" 23 #include "llvm/IR/DebugInfo.h" 24 #include "llvm/IR/Intrinsics.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/ScopedPrinter.h" 29 #include "llvm/Transforms/IPO/FunctionAttrs.h" 30 #include "llvm/Transforms/Utils/Cloning.h" 31 using namespace llvm; 32 33 namespace { 34 35 // Produce a unique identifier for this module by taking the MD5 sum of the 36 // names of the module's strong external symbols. This identifier is 37 // normally guaranteed to be unique, or the program would fail to link due to 38 // multiply defined symbols. 39 // 40 // If the module has no strong external symbols (such a module may still have a 41 // semantic effect if it performs global initialization), we cannot produce a 42 // unique identifier for this module, so we return the empty string, which 43 // causes the entire module to be written as a regular LTO module. 44 std::string getModuleId(Module *M) { 45 MD5 Md5; 46 bool ExportsSymbols = false; 47 auto AddGlobal = [&](GlobalValue &GV) { 48 if (GV.isDeclaration() || GV.getName().startswith("llvm.") || 49 !GV.hasExternalLinkage()) 50 return; 51 ExportsSymbols = true; 52 Md5.update(GV.getName()); 53 Md5.update(ArrayRef<uint8_t>{0}); 54 }; 55 56 for (auto &F : *M) 57 AddGlobal(F); 58 for (auto &GV : M->globals()) 59 AddGlobal(GV); 60 for (auto &GA : M->aliases()) 61 AddGlobal(GA); 62 for (auto &IF : M->ifuncs()) 63 AddGlobal(IF); 64 65 if (!ExportsSymbols) 66 return ""; 67 68 MD5::MD5Result R; 69 Md5.final(R); 70 71 SmallString<32> Str; 72 MD5::stringifyResult(R, Str); 73 return ("$" + Str).str(); 74 } 75 76 // Promote each local-linkage entity defined by ExportM and used by ImportM by 77 // changing visibility and appending the given ModuleId. 78 void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { 79 auto PromoteInternal = [&](GlobalValue &ExportGV) { 80 if (!ExportGV.hasLocalLinkage()) 81 return; 82 83 GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); 84 if (!ImportGV || ImportGV->use_empty()) 85 return; 86 87 std::string NewName = (ExportGV.getName() + ModuleId).str(); 88 89 ExportGV.setName(NewName); 90 ExportGV.setLinkage(GlobalValue::ExternalLinkage); 91 ExportGV.setVisibility(GlobalValue::HiddenVisibility); 92 93 ImportGV->setName(NewName); 94 ImportGV->setVisibility(GlobalValue::HiddenVisibility); 95 }; 96 97 for (auto &F : ExportM) 98 PromoteInternal(F); 99 for (auto &GV : ExportM.globals()) 100 PromoteInternal(GV); 101 for (auto &GA : ExportM.aliases()) 102 PromoteInternal(GA); 103 for (auto &IF : ExportM.ifuncs()) 104 PromoteInternal(IF); 105 } 106 107 // Promote all internal (i.e. distinct) type ids used by the module by replacing 108 // them with external type ids formed using the module id. 109 // 110 // Note that this needs to be done before we clone the module because each clone 111 // will receive its own set of distinct metadata nodes. 112 void promoteTypeIds(Module &M, StringRef ModuleId) { 113 DenseMap<Metadata *, Metadata *> LocalToGlobal; 114 auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { 115 Metadata *MD = 116 cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata(); 117 118 if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) { 119 Metadata *&GlobalMD = LocalToGlobal[MD]; 120 if (!GlobalMD) { 121 std::string NewName = 122 (to_string(LocalToGlobal.size()) + ModuleId).str(); 123 GlobalMD = MDString::get(M.getContext(), NewName); 124 } 125 126 CI->setArgOperand(ArgNo, 127 MetadataAsValue::get(M.getContext(), GlobalMD)); 128 } 129 }; 130 131 if (Function *TypeTestFunc = 132 M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { 133 for (const Use &U : TypeTestFunc->uses()) { 134 auto CI = cast<CallInst>(U.getUser()); 135 ExternalizeTypeId(CI, 1); 136 } 137 } 138 139 if (Function *TypeCheckedLoadFunc = 140 M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { 141 for (const Use &U : TypeCheckedLoadFunc->uses()) { 142 auto CI = cast<CallInst>(U.getUser()); 143 ExternalizeTypeId(CI, 2); 144 } 145 } 146 147 for (GlobalObject &GO : M.global_objects()) { 148 SmallVector<MDNode *, 1> MDs; 149 GO.getMetadata(LLVMContext::MD_type, MDs); 150 151 GO.eraseMetadata(LLVMContext::MD_type); 152 for (auto MD : MDs) { 153 auto I = LocalToGlobal.find(MD->getOperand(1)); 154 if (I == LocalToGlobal.end()) { 155 GO.addMetadata(LLVMContext::MD_type, *MD); 156 continue; 157 } 158 GO.addMetadata( 159 LLVMContext::MD_type, 160 *MDNode::get(M.getContext(), 161 ArrayRef<Metadata *>{MD->getOperand(0), I->second})); 162 } 163 } 164 } 165 166 // Drop unused globals, and drop type information from function declarations. 167 // FIXME: If we made functions typeless then there would be no need to do this. 168 void simplifyExternals(Module &M) { 169 FunctionType *EmptyFT = 170 FunctionType::get(Type::getVoidTy(M.getContext()), false); 171 172 for (auto I = M.begin(), E = M.end(); I != E;) { 173 Function &F = *I++; 174 if (F.isDeclaration() && F.use_empty()) { 175 F.eraseFromParent(); 176 continue; 177 } 178 179 if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) 180 continue; 181 182 Function *NewF = 183 Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); 184 NewF->setVisibility(F.getVisibility()); 185 NewF->takeName(&F); 186 F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); 187 F.eraseFromParent(); 188 } 189 190 for (auto I = M.global_begin(), E = M.global_end(); I != E;) { 191 GlobalVariable &GV = *I++; 192 if (GV.isDeclaration() && GV.use_empty()) { 193 GV.eraseFromParent(); 194 continue; 195 } 196 } 197 } 198 199 void filterModule( 200 Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) { 201 for (Function &F : *M) { 202 if (ShouldKeepDefinition(&F)) 203 continue; 204 205 F.deleteBody(); 206 F.setComdat(nullptr); 207 F.clearMetadata(); 208 } 209 210 for (GlobalVariable &GV : M->globals()) { 211 if (ShouldKeepDefinition(&GV)) 212 continue; 213 214 GV.setInitializer(nullptr); 215 GV.setLinkage(GlobalValue::ExternalLinkage); 216 GV.setComdat(nullptr); 217 GV.clearMetadata(); 218 } 219 220 for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); 221 I != E;) { 222 GlobalAlias *GA = &*I++; 223 if (ShouldKeepDefinition(GA)) 224 continue; 225 226 GlobalObject *GO; 227 if (I->getValueType()->isFunctionTy()) 228 GO = Function::Create(cast<FunctionType>(GA->getValueType()), 229 GlobalValue::ExternalLinkage, "", M); 230 else 231 GO = new GlobalVariable( 232 *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, 233 (Constant *)nullptr, "", (GlobalVariable *)nullptr, 234 GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); 235 GO->takeName(GA); 236 GA->replaceAllUsesWith(GO); 237 GA->eraseFromParent(); 238 } 239 } 240 241 void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) { 242 if (auto *F = dyn_cast<Function>(C)) 243 return Fn(F); 244 if (isa<GlobalValue>(C)) 245 return; 246 for (Value *Op : C->operands()) 247 forEachVirtualFunction(cast<Constant>(Op), Fn); 248 } 249 250 // If it's possible to split M into regular and thin LTO parts, do so and write 251 // a multi-module bitcode file with the two parts to OS. Otherwise, write only a 252 // regular LTO bitcode file to OS. 253 void splitAndWriteThinLTOBitcode( 254 raw_ostream &OS, function_ref<AAResults &(Function &)> AARGetter, 255 Module &M) { 256 std::string ModuleId = getModuleId(&M); 257 if (ModuleId.empty()) { 258 // We couldn't generate a module ID for this module, just write it out as a 259 // regular LTO module. 260 WriteBitcodeToFile(&M, OS); 261 return; 262 } 263 264 promoteTypeIds(M, ModuleId); 265 266 // Returns whether a global has attached type metadata. Such globals may 267 // participate in CFI or whole-program devirtualization, so they need to 268 // appear in the merged module instead of the thin LTO module. 269 auto HasTypeMetadata = [&](const GlobalObject *GO) { 270 SmallVector<MDNode *, 1> MDs; 271 GO->getMetadata(LLVMContext::MD_type, MDs); 272 return !MDs.empty(); 273 }; 274 275 // Collect the set of virtual functions that are eligible for virtual constant 276 // propagation. Each eligible function must not access memory, must return 277 // an integer of width <=64 bits, must take at least one argument, must not 278 // use its first argument (assumed to be "this") and all arguments other than 279 // the first one must be of <=64 bit integer type. 280 // 281 // Note that we test whether this copy of the function is readnone, rather 282 // than testing function attributes, which must hold for any copy of the 283 // function, even a less optimized version substituted at link time. This is 284 // sound because the virtual constant propagation optimizations effectively 285 // inline all implementations of the virtual function into each call site, 286 // rather than using function attributes to perform local optimization. 287 std::set<const Function *> EligibleVirtualFns; 288 for (GlobalVariable &GV : M.globals()) 289 if (HasTypeMetadata(&GV)) 290 forEachVirtualFunction(GV.getInitializer(), [&](Function *F) { 291 auto *RT = dyn_cast<IntegerType>(F->getReturnType()); 292 if (!RT || RT->getBitWidth() > 64 || F->arg_empty() || 293 !F->arg_begin()->use_empty()) 294 return; 295 for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) { 296 auto *ArgT = dyn_cast<IntegerType>(Arg.getType()); 297 if (!ArgT || ArgT->getBitWidth() > 64) 298 return; 299 } 300 if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) 301 EligibleVirtualFns.insert(F); 302 }); 303 304 ValueToValueMapTy VMap; 305 std::unique_ptr<Module> MergedM( 306 CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool { 307 if (auto *F = dyn_cast<Function>(GV)) 308 return EligibleVirtualFns.count(F); 309 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 310 return HasTypeMetadata(GVar); 311 return false; 312 })); 313 StripDebugInfo(*MergedM); 314 315 for (Function &F : *MergedM) 316 if (!F.isDeclaration()) { 317 // Reset the linkage of all functions eligible for virtual constant 318 // propagation. The canonical definitions live in the thin LTO module so 319 // that they can be imported. 320 F.setLinkage(GlobalValue::AvailableExternallyLinkage); 321 F.setComdat(nullptr); 322 } 323 324 // Remove all globals with type metadata, as well as aliases pointing to them, 325 // from the thin LTO module. 326 filterModule(&M, [&](const GlobalValue *GV) { 327 if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject())) 328 return !HasTypeMetadata(GVar); 329 return true; 330 }); 331 332 promoteInternals(*MergedM, M, ModuleId); 333 promoteInternals(M, *MergedM, ModuleId); 334 335 simplifyExternals(*MergedM); 336 337 SmallVector<char, 0> Buffer; 338 BitcodeWriter W(Buffer); 339 340 // FIXME: Try to re-use BSI and PFI from the original module here. 341 ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); 342 W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, 343 /*GenerateHash=*/true); 344 345 W.writeModule(MergedM.get()); 346 347 OS << Buffer; 348 } 349 350 // Returns whether this module needs to be split because it uses type metadata. 351 bool requiresSplit(Module &M) { 352 SmallVector<MDNode *, 1> MDs; 353 for (auto &GO : M.global_objects()) { 354 GO.getMetadata(LLVMContext::MD_type, MDs); 355 if (!MDs.empty()) 356 return true; 357 } 358 359 return false; 360 } 361 362 void writeThinLTOBitcode(raw_ostream &OS, 363 function_ref<AAResults &(Function &)> AARGetter, 364 Module &M, const ModuleSummaryIndex *Index) { 365 // See if this module has any type metadata. If so, we need to split it. 366 if (requiresSplit(M)) 367 return splitAndWriteThinLTOBitcode(OS, AARGetter, M); 368 369 // Otherwise we can just write it out as a regular module. 370 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, 371 /*GenerateHash=*/true); 372 } 373 374 class WriteThinLTOBitcode : public ModulePass { 375 raw_ostream &OS; // raw_ostream to print on 376 377 public: 378 static char ID; // Pass identification, replacement for typeid 379 WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { 380 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 381 } 382 383 explicit WriteThinLTOBitcode(raw_ostream &o) 384 : ModulePass(ID), OS(o) { 385 initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); 386 } 387 388 StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } 389 390 bool runOnModule(Module &M) override { 391 const ModuleSummaryIndex *Index = 392 &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex()); 393 writeThinLTOBitcode(OS, LegacyAARGetter(*this), M, Index); 394 return true; 395 } 396 void getAnalysisUsage(AnalysisUsage &AU) const override { 397 AU.setPreservesAll(); 398 AU.addRequired<AssumptionCacheTracker>(); 399 AU.addRequired<ModuleSummaryIndexWrapperPass>(); 400 AU.addRequired<TargetLibraryInfoWrapperPass>(); 401 } 402 }; 403 } // anonymous namespace 404 405 char WriteThinLTOBitcode::ID = 0; 406 INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", 407 "Write ThinLTO Bitcode", false, true) 408 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 409 INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) 410 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 411 INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", 412 "Write ThinLTO Bitcode", false, true) 413 414 ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { 415 return new WriteThinLTOBitcode(Str); 416 } 417