1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the "backend" phase of LTO, i.e. it performs 11 // optimization and code generation on a loaded module. It is generally used 12 // internally by the LTO class but can also be used independently, for example 13 // to implement a standalone ThinLTO backend. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/LTO/LTOBackend.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/CGSCCPassManager.h" 20 #include "llvm/Analysis/LoopPassManager.h" 21 #include "llvm/Analysis/TargetLibraryInfo.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/Bitcode/ReaderWriter.h" 24 #include "llvm/IR/LegacyPassManager.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/IR/Verifier.h" 27 #include "llvm/LTO/LTO.h" 28 #include "llvm/LTO/legacy/UpdateCompilerUsed.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Passes/PassBuilder.h" 31 #include "llvm/Support/Error.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/ThreadPool.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Transforms/IPO.h" 37 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 38 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 39 #include "llvm/Transforms/Utils/SplitModule.h" 40 41 using namespace llvm; 42 using namespace lto; 43 44 LLVM_ATTRIBUTE_NORETURN void reportOpenError(StringRef Path, Twine Msg) { 45 errs() << "failed to open " << Path << ": " << Msg << '\n'; 46 errs().flush(); 47 exit(1); 48 } 49 50 Error Config::addSaveTemps(std::string OutputFileName, 51 bool UseInputModulePath) { 52 ShouldDiscardValueNames = false; 53 54 std::error_code EC; 55 ResolutionFile = llvm::make_unique<raw_fd_ostream>( 56 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text); 57 if (EC) 58 return errorCodeToError(EC); 59 60 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { 61 // Keep track of the hook provided by the linker, which also needs to run. 62 ModuleHookFn LinkerHook = Hook; 63 Hook = [=](unsigned Task, const Module &M) { 64 // If the linker's hook returned false, we need to pass that result 65 // through. 66 if (LinkerHook && !LinkerHook(Task, M)) 67 return false; 68 69 std::string PathPrefix; 70 // If this is the combined module (not a ThinLTO backend compile) or the 71 // user hasn't requested using the input module's path, emit to a file 72 // named from the provided OutputFileName with the Task ID appended. 73 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { 74 PathPrefix = OutputFileName + utostr(Task); 75 } else 76 PathPrefix = M.getModuleIdentifier(); 77 std::string Path = PathPrefix + "." + PathSuffix + ".bc"; 78 std::error_code EC; 79 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); 80 // Because -save-temps is a debugging feature, we report the error 81 // directly and exit. 82 if (EC) 83 reportOpenError(Path, EC.message()); 84 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false); 85 return true; 86 }; 87 }; 88 89 setHook("0.preopt", PreOptModuleHook); 90 setHook("1.promote", PostPromoteModuleHook); 91 setHook("2.internalize", PostInternalizeModuleHook); 92 setHook("3.import", PostImportModuleHook); 93 setHook("4.opt", PostOptModuleHook); 94 setHook("5.precodegen", PreCodeGenModuleHook); 95 96 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { 97 std::string Path = OutputFileName + "index.bc"; 98 std::error_code EC; 99 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); 100 // Because -save-temps is a debugging feature, we report the error 101 // directly and exit. 102 if (EC) 103 reportOpenError(Path, EC.message()); 104 WriteIndexToFile(Index, OS); 105 return true; 106 }; 107 108 return Error(); 109 } 110 111 namespace { 112 113 std::unique_ptr<TargetMachine> 114 createTargetMachine(Config &Conf, StringRef TheTriple, 115 const Target *TheTarget) { 116 SubtargetFeatures Features; 117 Features.getDefaultSubtargetFeatures(Triple(TheTriple)); 118 for (const std::string &A : Conf.MAttrs) 119 Features.AddFeature(A); 120 121 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( 122 TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel, 123 Conf.CodeModel, Conf.CGOptLevel)); 124 } 125 126 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, 127 std::string PipelineDesc, 128 std::string AAPipelineDesc, 129 bool DisableVerify) { 130 PassBuilder PB(TM); 131 AAManager AA; 132 133 // Parse a custom AA pipeline if asked to. 134 if (!AAPipelineDesc.empty()) 135 if (!PB.parseAAPipeline(AA, AAPipelineDesc)) 136 report_fatal_error("unable to parse AA pipeline description: " + 137 AAPipelineDesc); 138 139 LoopAnalysisManager LAM; 140 FunctionAnalysisManager FAM; 141 CGSCCAnalysisManager CGAM; 142 ModuleAnalysisManager MAM; 143 144 // Register the AA manager first so that our version is the one used. 145 FAM.registerPass([&] { return std::move(AA); }); 146 147 // Register all the basic analyses with the managers. 148 PB.registerModuleAnalyses(MAM); 149 PB.registerCGSCCAnalyses(CGAM); 150 PB.registerFunctionAnalyses(FAM); 151 PB.registerLoopAnalyses(LAM); 152 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); 153 154 ModulePassManager MPM; 155 156 // Always verify the input. 157 MPM.addPass(VerifierPass()); 158 159 // Now, add all the passes we've been requested to. 160 if (!PB.parsePassPipeline(MPM, PipelineDesc)) 161 report_fatal_error("unable to parse pass pipeline description: " + 162 PipelineDesc); 163 164 if (!DisableVerify) 165 MPM.addPass(VerifierPass()); 166 MPM.run(Mod, MAM); 167 } 168 169 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, 170 bool IsThinLto) { 171 legacy::PassManager passes; 172 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); 173 174 PassManagerBuilder PMB; 175 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); 176 PMB.Inliner = createFunctionInliningPass(); 177 // Unconditionally verify input since it is not verified before this 178 // point and has unknown origin. 179 PMB.VerifyInput = true; 180 PMB.VerifyOutput = !Conf.DisableVerify; 181 PMB.LoopVectorize = true; 182 PMB.SLPVectorize = true; 183 PMB.OptLevel = Conf.OptLevel; 184 if (IsThinLto) 185 PMB.populateThinLTOPassManager(passes); 186 else 187 PMB.populateLTOPassManager(passes); 188 passes.run(Mod); 189 } 190 191 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, 192 bool IsThinLto) { 193 Mod.setDataLayout(TM->createDataLayout()); 194 if (Conf.OptPipeline.empty()) 195 runOldPMPasses(Conf, Mod, TM, IsThinLto); 196 else 197 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, 198 Conf.DisableVerify); 199 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); 200 } 201 202 /// Monolithic LTO does not support caching (yet), this is a convenient wrapper 203 /// around AddOutput to workaround this. 204 static AddOutputFn getUncachedOutputWrapper(AddOutputFn &AddOutput, 205 unsigned Task) { 206 return [Task, &AddOutput](unsigned TaskId) { 207 auto Output = AddOutput(Task); 208 if (Output->isCachingEnabled() && Output->tryLoadFromCache("")) 209 report_fatal_error("Cache hit without a valid key?"); 210 assert(Task == TaskId && "Unexpexted TaskId mismatch"); 211 return Output; 212 }; 213 } 214 215 void codegen(Config &Conf, TargetMachine *TM, AddOutputFn AddOutput, 216 unsigned Task, Module &Mod) { 217 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) 218 return; 219 220 auto Output = AddOutput(Task); 221 std::unique_ptr<raw_pwrite_stream> OS = Output->getStream(); 222 legacy::PassManager CodeGenPasses; 223 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, 224 TargetMachine::CGFT_ObjectFile)) 225 report_fatal_error("Failed to setup codegen"); 226 CodeGenPasses.run(Mod); 227 } 228 229 void splitCodeGen(Config &C, TargetMachine *TM, AddOutputFn AddOutput, 230 unsigned ParallelCodeGenParallelismLevel, 231 std::unique_ptr<Module> Mod) { 232 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel); 233 unsigned ThreadCount = 0; 234 const Target *T = &TM->getTarget(); 235 236 SplitModule( 237 std::move(Mod), ParallelCodeGenParallelismLevel, 238 [&](std::unique_ptr<Module> MPart) { 239 // We want to clone the module in a new context to multi-thread the 240 // codegen. We do it by serializing partition modules to bitcode 241 // (while still on the main thread, in order to avoid data races) and 242 // spinning up new threads which deserialize the partitions into 243 // separate contexts. 244 // FIXME: Provide a more direct way to do this in LLVM. 245 SmallString<0> BC; 246 raw_svector_ostream BCOS(BC); 247 WriteBitcodeToFile(MPart.get(), BCOS); 248 249 // Enqueue the task 250 CodegenThreadPool.async( 251 [&](const SmallString<0> &BC, unsigned ThreadId) { 252 LTOLLVMContext Ctx(C); 253 ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( 254 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"), 255 Ctx); 256 if (!MOrErr) 257 report_fatal_error("Failed to read bitcode"); 258 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); 259 260 std::unique_ptr<TargetMachine> TM = 261 createTargetMachine(C, MPartInCtx->getTargetTriple(), T); 262 263 codegen(C, TM.get(), 264 getUncachedOutputWrapper(AddOutput, ThreadId), ThreadId, 265 *MPartInCtx); 266 }, 267 // Pass BC using std::move to ensure that it get moved rather than 268 // copied into the thread's context. 269 std::move(BC), ThreadCount++); 270 }, 271 false); 272 } 273 274 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) { 275 if (!C.OverrideTriple.empty()) 276 Mod.setTargetTriple(C.OverrideTriple); 277 else if (Mod.getTargetTriple().empty()) 278 Mod.setTargetTriple(C.DefaultTriple); 279 280 std::string Msg; 281 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg); 282 if (!T) 283 return make_error<StringError>(Msg, inconvertibleErrorCode()); 284 return T; 285 } 286 287 } 288 289 static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) { 290 // Collect the list of undefined symbols used in asm and update 291 // llvm.compiler.used to prevent optimization to drop these from the output. 292 StringSet<> AsmUndefinedRefs; 293 object::IRObjectFile::CollectAsmUndefinedRefs( 294 Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(), 295 [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { 296 if (Flags & object::BasicSymbolRef::SF_Undefined) 297 AsmUndefinedRefs.insert(Name); 298 }); 299 updateCompilerUsed(Mod, TM, AsmUndefinedRefs); 300 } 301 302 Error lto::backend(Config &C, AddOutputFn AddOutput, 303 unsigned ParallelCodeGenParallelismLevel, 304 std::unique_ptr<Module> Mod) { 305 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod); 306 if (!TOrErr) 307 return TOrErr.takeError(); 308 309 std::unique_ptr<TargetMachine> TM = 310 createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); 311 312 handleAsmUndefinedRefs(*Mod, *TM); 313 314 if (!C.CodeGenOnly) 315 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLto=*/false)) 316 return Error(); 317 318 if (ParallelCodeGenParallelismLevel == 1) { 319 codegen(C, TM.get(), getUncachedOutputWrapper(AddOutput, 0), 0, *Mod); 320 } else { 321 splitCodeGen(C, TM.get(), AddOutput, ParallelCodeGenParallelismLevel, 322 std::move(Mod)); 323 } 324 return Error(); 325 } 326 327 Error lto::thinBackend(Config &Conf, unsigned Task, AddOutputFn AddOutput, 328 Module &Mod, ModuleSummaryIndex &CombinedIndex, 329 const FunctionImporter::ImportMapTy &ImportList, 330 const GVSummaryMapTy &DefinedGlobals, 331 MapVector<StringRef, MemoryBufferRef> &ModuleMap) { 332 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod); 333 if (!TOrErr) 334 return TOrErr.takeError(); 335 336 std::unique_ptr<TargetMachine> TM = 337 createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); 338 339 handleAsmUndefinedRefs(Mod, *TM); 340 341 if (Conf.CodeGenOnly) { 342 codegen(Conf, TM.get(), AddOutput, Task, Mod); 343 return Error(); 344 } 345 346 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) 347 return Error(); 348 349 renameModuleForThinLTO(Mod, CombinedIndex); 350 351 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); 352 353 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) 354 return Error(); 355 356 if (!DefinedGlobals.empty()) 357 thinLTOInternalizeModule(Mod, DefinedGlobals); 358 359 if (Conf.PostInternalizeModuleHook && 360 !Conf.PostInternalizeModuleHook(Task, Mod)) 361 return Error(); 362 363 auto ModuleLoader = [&](StringRef Identifier) { 364 assert(Mod.getContext().isODRUniquingDebugTypes() && 365 "ODR Type uniquing should be enabled on the context"); 366 return std::move(getLazyBitcodeModule(MemoryBuffer::getMemBuffer( 367 ModuleMap[Identifier], false), 368 Mod.getContext(), 369 /*ShouldLazyLoadMetadata=*/true) 370 .get()); 371 }; 372 373 FunctionImporter Importer(CombinedIndex, ModuleLoader); 374 Importer.importFunctions(Mod, ImportList); 375 376 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) 377 return Error(); 378 379 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLto=*/true)) 380 return Error(); 381 382 codegen(Conf, TM.get(), AddOutput, Task, Mod); 383 return Error(); 384 } 385