1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the "backend" phase of LTO, i.e. it performs 11 // optimization and code generation on a loaded module. It is generally used 12 // internally by the LTO class but can also be used independently, for example 13 // to implement a standalone ThinLTO backend. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/LTO/LTOBackend.h" 18 #include "llvm/Analysis/AliasAnalysis.h" 19 #include "llvm/Analysis/CGSCCPassManager.h" 20 #include "llvm/Analysis/TargetLibraryInfo.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/Bitcode/BitcodeReader.h" 23 #include "llvm/Bitcode/BitcodeWriter.h" 24 #include "llvm/IR/LegacyPassManager.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/IR/Verifier.h" 27 #include "llvm/LTO/LTO.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 #include "llvm/Object/ModuleSymbolTable.h" 30 #include "llvm/Passes/PassBuilder.h" 31 #include "llvm/Support/Error.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Support/Path.h" 35 #include "llvm/Support/Program.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Support/TargetRegistry.h" 38 #include "llvm/Support/ThreadPool.h" 39 #include "llvm/Target/TargetMachine.h" 40 #include "llvm/Transforms/IPO.h" 41 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 42 #include "llvm/Transforms/Scalar/LoopPassManager.h" 43 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 44 #include "llvm/Transforms/Utils/SplitModule.h" 45 46 using namespace llvm; 47 using namespace lto; 48 49 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { 50 errs() << "failed to open " << Path << ": " << Msg << '\n'; 51 errs().flush(); 52 exit(1); 53 } 54 55 Error Config::addSaveTemps(std::string OutputFileName, 56 bool UseInputModulePath) { 57 ShouldDiscardValueNames = false; 58 59 std::error_code EC; 60 ResolutionFile = llvm::make_unique<raw_fd_ostream>( 61 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text); 62 if (EC) 63 return errorCodeToError(EC); 64 65 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) { 66 // Keep track of the hook provided by the linker, which also needs to run. 67 ModuleHookFn LinkerHook = Hook; 68 Hook = [=](unsigned Task, const Module &M) { 69 // If the linker's hook returned false, we need to pass that result 70 // through. 71 if (LinkerHook && !LinkerHook(Task, M)) 72 return false; 73 74 std::string PathPrefix; 75 // If this is the combined module (not a ThinLTO backend compile) or the 76 // user hasn't requested using the input module's path, emit to a file 77 // named from the provided OutputFileName with the Task ID appended. 78 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) { 79 PathPrefix = OutputFileName + utostr(Task); 80 } else 81 PathPrefix = M.getModuleIdentifier(); 82 std::string Path = PathPrefix + "." + PathSuffix + ".bc"; 83 std::error_code EC; 84 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); 85 // Because -save-temps is a debugging feature, we report the error 86 // directly and exit. 87 if (EC) 88 reportOpenError(Path, EC.message()); 89 WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false); 90 return true; 91 }; 92 }; 93 94 setHook("0.preopt", PreOptModuleHook); 95 setHook("1.promote", PostPromoteModuleHook); 96 setHook("2.internalize", PostInternalizeModuleHook); 97 setHook("3.import", PostImportModuleHook); 98 setHook("4.opt", PostOptModuleHook); 99 setHook("5.precodegen", PreCodeGenModuleHook); 100 101 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) { 102 std::string Path = OutputFileName + "index.bc"; 103 std::error_code EC; 104 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None); 105 // Because -save-temps is a debugging feature, we report the error 106 // directly and exit. 107 if (EC) 108 reportOpenError(Path, EC.message()); 109 WriteIndexToFile(Index, OS); 110 111 Path = OutputFileName + "index.dot"; 112 raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None); 113 if (EC) 114 reportOpenError(Path, EC.message()); 115 Index.exportToDot(OSDot); 116 return true; 117 }; 118 119 return Error::success(); 120 } 121 122 namespace { 123 124 std::unique_ptr<TargetMachine> 125 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) { 126 StringRef TheTriple = M.getTargetTriple(); 127 SubtargetFeatures Features; 128 Features.getDefaultSubtargetFeatures(Triple(TheTriple)); 129 for (const std::string &A : Conf.MAttrs) 130 Features.AddFeature(A); 131 132 Reloc::Model RelocModel; 133 if (Conf.RelocModel) 134 RelocModel = *Conf.RelocModel; 135 else 136 RelocModel = 137 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; 138 139 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( 140 TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, 141 Conf.CodeModel, Conf.CGOptLevel)); 142 } 143 144 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, 145 unsigned OptLevel, bool IsThinLTO) { 146 Optional<PGOOptions> PGOOpt; 147 if (!Conf.SampleProfile.empty()) 148 PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true); 149 150 PassBuilder PB(TM, PGOOpt); 151 AAManager AA; 152 153 // Parse a custom AA pipeline if asked to. 154 if (!PB.parseAAPipeline(AA, "default")) 155 report_fatal_error("Error parsing default AA pipeline"); 156 157 LoopAnalysisManager LAM(Conf.DebugPassManager); 158 FunctionAnalysisManager FAM(Conf.DebugPassManager); 159 CGSCCAnalysisManager CGAM(Conf.DebugPassManager); 160 ModuleAnalysisManager MAM(Conf.DebugPassManager); 161 162 // Register the AA manager first so that our version is the one used. 163 FAM.registerPass([&] { return std::move(AA); }); 164 165 // Register all the basic analyses with the managers. 166 PB.registerModuleAnalyses(MAM); 167 PB.registerCGSCCAnalyses(CGAM); 168 PB.registerFunctionAnalyses(FAM); 169 PB.registerLoopAnalyses(LAM); 170 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); 171 172 ModulePassManager MPM(Conf.DebugPassManager); 173 // FIXME (davide): verify the input. 174 175 PassBuilder::OptimizationLevel OL; 176 177 switch (OptLevel) { 178 default: 179 llvm_unreachable("Invalid optimization level"); 180 case 0: 181 OL = PassBuilder::O0; 182 break; 183 case 1: 184 OL = PassBuilder::O1; 185 break; 186 case 2: 187 OL = PassBuilder::O2; 188 break; 189 case 3: 190 OL = PassBuilder::O3; 191 break; 192 } 193 194 if (IsThinLTO) 195 MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager); 196 else 197 MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager); 198 MPM.run(Mod, MAM); 199 200 // FIXME (davide): verify the output. 201 } 202 203 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM, 204 std::string PipelineDesc, 205 std::string AAPipelineDesc, 206 bool DisableVerify) { 207 PassBuilder PB(TM); 208 AAManager AA; 209 210 // Parse a custom AA pipeline if asked to. 211 if (!AAPipelineDesc.empty()) 212 if (!PB.parseAAPipeline(AA, AAPipelineDesc)) 213 report_fatal_error("unable to parse AA pipeline description: " + 214 AAPipelineDesc); 215 216 LoopAnalysisManager LAM; 217 FunctionAnalysisManager FAM; 218 CGSCCAnalysisManager CGAM; 219 ModuleAnalysisManager MAM; 220 221 // Register the AA manager first so that our version is the one used. 222 FAM.registerPass([&] { return std::move(AA); }); 223 224 // Register all the basic analyses with the managers. 225 PB.registerModuleAnalyses(MAM); 226 PB.registerCGSCCAnalyses(CGAM); 227 PB.registerFunctionAnalyses(FAM); 228 PB.registerLoopAnalyses(LAM); 229 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); 230 231 ModulePassManager MPM; 232 233 // Always verify the input. 234 MPM.addPass(VerifierPass()); 235 236 // Now, add all the passes we've been requested to. 237 if (!PB.parsePassPipeline(MPM, PipelineDesc)) 238 report_fatal_error("unable to parse pass pipeline description: " + 239 PipelineDesc); 240 241 if (!DisableVerify) 242 MPM.addPass(VerifierPass()); 243 MPM.run(Mod, MAM); 244 } 245 246 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, 247 bool IsThinLTO, ModuleSummaryIndex *ExportSummary, 248 const ModuleSummaryIndex *ImportSummary) { 249 legacy::PassManager passes; 250 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); 251 252 PassManagerBuilder PMB; 253 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple())); 254 PMB.Inliner = createFunctionInliningPass(); 255 PMB.ExportSummary = ExportSummary; 256 PMB.ImportSummary = ImportSummary; 257 // Unconditionally verify input since it is not verified before this 258 // point and has unknown origin. 259 PMB.VerifyInput = true; 260 PMB.VerifyOutput = !Conf.DisableVerify; 261 PMB.LoopVectorize = true; 262 PMB.SLPVectorize = true; 263 PMB.OptLevel = Conf.OptLevel; 264 PMB.PGOSampleUse = Conf.SampleProfile; 265 if (IsThinLTO) 266 PMB.populateThinLTOPassManager(passes); 267 else 268 PMB.populateLTOPassManager(passes); 269 passes.run(Mod); 270 } 271 272 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, 273 bool IsThinLTO, ModuleSummaryIndex *ExportSummary, 274 const ModuleSummaryIndex *ImportSummary) { 275 // FIXME: Plumb the combined index into the new pass manager. 276 if (!Conf.OptPipeline.empty()) 277 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, 278 Conf.DisableVerify); 279 else if (Conf.UseNewPM) 280 runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO); 281 else 282 runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); 283 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); 284 } 285 286 void codegenWithSplitDwarf(Config &Conf, TargetMachine *TM, 287 AddStreamFn AddStream, unsigned Task, Module &Mod) { 288 SmallString<128> TempFile; 289 int FD = -1; 290 if (auto EC = 291 sys::fs::createTemporaryFile("lto-llvm-fission", "o", FD, TempFile)) 292 report_fatal_error("Could not create temporary file " + 293 TempFile.str() + ": " + EC.message()); 294 llvm::raw_fd_ostream OS(FD, true); 295 SmallString<1024> DwarfFile(Conf.DwoDir); 296 std::string DwoName = sys::path::filename(Mod.getModuleIdentifier()).str() + 297 "-" + std::to_string(Task) + "-"; 298 size_t index = TempFile.str().rfind("lto-llvm-fission"); 299 StringRef TempID = TempFile.str().substr(index + 17, 6); 300 DwoName += TempID.str() + ".dwo"; 301 sys::path::append(DwarfFile, DwoName); 302 TM->Options.MCOptions.SplitDwarfFile = DwarfFile.str().str(); 303 304 legacy::PassManager CodeGenPasses; 305 if (TM->addPassesToEmitFile(CodeGenPasses, OS, Conf.CGFileType)) 306 report_fatal_error("Failed to setup codegen"); 307 CodeGenPasses.run(Mod); 308 309 if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir)) 310 report_fatal_error("Failed to create directory " + 311 Conf.DwoDir + ": " + EC.message()); 312 313 SmallVector<const char*, 5> ExtractArgs, StripArgs; 314 ExtractArgs.push_back(Conf.Objcopy.c_str()); 315 ExtractArgs.push_back("--extract-dwo"); 316 ExtractArgs.push_back(TempFile.c_str()); 317 ExtractArgs.push_back(TM->Options.MCOptions.SplitDwarfFile.c_str()); 318 ExtractArgs.push_back(nullptr); 319 StripArgs.push_back(Conf.Objcopy.c_str()); 320 StripArgs.push_back("--strip-dwo"); 321 StripArgs.push_back(TempFile.c_str()); 322 StripArgs.push_back(nullptr); 323 324 if (auto Ret = sys::ExecuteAndWait(Conf.Objcopy, ExtractArgs.data())) { 325 report_fatal_error("Failed to extract dwo from " + TempFile.str() + 326 ". Exit code " + std::to_string(Ret)); 327 } 328 if (auto Ret = sys::ExecuteAndWait(Conf.Objcopy, StripArgs.data())) { 329 report_fatal_error("Failed to strip dwo from " + TempFile.str() + 330 ". Exit code " + std::to_string(Ret)); 331 } 332 333 auto Stream = AddStream(Task); 334 auto Buffer = MemoryBuffer::getFile(TempFile); 335 if (auto EC = Buffer.getError()) 336 report_fatal_error("Failed to load file " + 337 TempFile.str() + ": " + EC.message()); 338 *Stream->OS << Buffer.get()->getBuffer(); 339 if (auto EC = sys::fs::remove(TempFile)) 340 report_fatal_error("Failed to delete file " + 341 TempFile.str() + ": " + EC.message()); 342 } 343 344 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, 345 unsigned Task, Module &Mod) { 346 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) 347 return; 348 349 if (!Conf.DwoDir.empty()) { 350 codegenWithSplitDwarf(Conf, TM, AddStream, Task, Mod); 351 return; 352 } 353 354 auto Stream = AddStream(Task); 355 legacy::PassManager CodeGenPasses; 356 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, Conf.CGFileType)) 357 report_fatal_error("Failed to setup codegen"); 358 CodeGenPasses.run(Mod); 359 } 360 361 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, 362 unsigned ParallelCodeGenParallelismLevel, 363 std::unique_ptr<Module> Mod) { 364 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel); 365 unsigned ThreadCount = 0; 366 const Target *T = &TM->getTarget(); 367 368 SplitModule( 369 std::move(Mod), ParallelCodeGenParallelismLevel, 370 [&](std::unique_ptr<Module> MPart) { 371 // We want to clone the module in a new context to multi-thread the 372 // codegen. We do it by serializing partition modules to bitcode 373 // (while still on the main thread, in order to avoid data races) and 374 // spinning up new threads which deserialize the partitions into 375 // separate contexts. 376 // FIXME: Provide a more direct way to do this in LLVM. 377 SmallString<0> BC; 378 raw_svector_ostream BCOS(BC); 379 WriteBitcodeToFile(*MPart, BCOS); 380 381 // Enqueue the task 382 CodegenThreadPool.async( 383 [&](const SmallString<0> &BC, unsigned ThreadId) { 384 LTOLLVMContext Ctx(C); 385 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( 386 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"), 387 Ctx); 388 if (!MOrErr) 389 report_fatal_error("Failed to read bitcode"); 390 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); 391 392 std::unique_ptr<TargetMachine> TM = 393 createTargetMachine(C, T, *MPartInCtx); 394 395 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); 396 }, 397 // Pass BC using std::move to ensure that it get moved rather than 398 // copied into the thread's context. 399 std::move(BC), ThreadCount++); 400 }, 401 false); 402 403 // Because the inner lambda (which runs in a worker thread) captures our local 404 // variables, we need to wait for the worker threads to terminate before we 405 // can leave the function scope. 406 CodegenThreadPool.wait(); 407 } 408 409 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) { 410 if (!C.OverrideTriple.empty()) 411 Mod.setTargetTriple(C.OverrideTriple); 412 else if (Mod.getTargetTriple().empty()) 413 Mod.setTargetTriple(C.DefaultTriple); 414 415 std::string Msg; 416 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg); 417 if (!T) 418 return make_error<StringError>(Msg, inconvertibleErrorCode()); 419 return T; 420 } 421 422 } 423 424 static void 425 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) { 426 // Make sure we flush the diagnostic remarks file in case the linker doesn't 427 // call the global destructors before exiting. 428 if (!DiagOutputFile) 429 return; 430 DiagOutputFile->keep(); 431 DiagOutputFile->os().flush(); 432 } 433 434 Error lto::backend(Config &C, AddStreamFn AddStream, 435 unsigned ParallelCodeGenParallelismLevel, 436 std::unique_ptr<Module> Mod, 437 ModuleSummaryIndex &CombinedIndex) { 438 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod); 439 if (!TOrErr) 440 return TOrErr.takeError(); 441 442 std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod); 443 444 // Setup optimization remarks. 445 auto DiagFileOrErr = lto::setupOptimizationRemarks( 446 Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness); 447 if (!DiagFileOrErr) 448 return DiagFileOrErr.takeError(); 449 auto DiagnosticOutputFile = std::move(*DiagFileOrErr); 450 451 if (!C.CodeGenOnly) { 452 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, 453 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) { 454 finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); 455 return Error::success(); 456 } 457 } 458 459 if (ParallelCodeGenParallelismLevel == 1) { 460 codegen(C, TM.get(), AddStream, 0, *Mod); 461 } else { 462 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, 463 std::move(Mod)); 464 } 465 finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); 466 return Error::success(); 467 } 468 469 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, 470 const ModuleSummaryIndex &Index) { 471 std::vector<GlobalValue*> DeadGVs; 472 for (auto &GV : Mod.global_values()) 473 if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID())) 474 if (!Index.isGlobalValueLive(GVS)) { 475 DeadGVs.push_back(&GV); 476 convertToDeclaration(GV); 477 } 478 479 // Now that all dead bodies have been dropped, delete the actual objects 480 // themselves when possible. 481 for (GlobalValue *GV : DeadGVs) { 482 GV->removeDeadConstantUsers(); 483 // Might reference something defined in native object (i.e. dropped a 484 // non-prevailing IR def, but we need to keep the declaration). 485 if (GV->use_empty()) 486 GV->eraseFromParent(); 487 } 488 } 489 490 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, 491 Module &Mod, const ModuleSummaryIndex &CombinedIndex, 492 const FunctionImporter::ImportMapTy &ImportList, 493 const GVSummaryMapTy &DefinedGlobals, 494 MapVector<StringRef, BitcodeModule> &ModuleMap) { 495 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod); 496 if (!TOrErr) 497 return TOrErr.takeError(); 498 499 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod); 500 501 if (Conf.CodeGenOnly) { 502 codegen(Conf, TM.get(), AddStream, Task, Mod); 503 return Error::success(); 504 } 505 506 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod)) 507 return Error::success(); 508 509 renameModuleForThinLTO(Mod, CombinedIndex); 510 511 dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex); 512 513 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals); 514 515 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod)) 516 return Error::success(); 517 518 if (!DefinedGlobals.empty()) 519 thinLTOInternalizeModule(Mod, DefinedGlobals); 520 521 if (Conf.PostInternalizeModuleHook && 522 !Conf.PostInternalizeModuleHook(Task, Mod)) 523 return Error::success(); 524 525 auto ModuleLoader = [&](StringRef Identifier) { 526 assert(Mod.getContext().isODRUniquingDebugTypes() && 527 "ODR Type uniquing should be enabled on the context"); 528 auto I = ModuleMap.find(Identifier); 529 assert(I != ModuleMap.end()); 530 return I->second.getLazyModule(Mod.getContext(), 531 /*ShouldLazyLoadMetadata=*/true, 532 /*IsImporting*/ true); 533 }; 534 535 FunctionImporter Importer(CombinedIndex, ModuleLoader); 536 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError()) 537 return Err; 538 539 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) 540 return Error::success(); 541 542 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, 543 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) 544 return Error::success(); 545 546 codegen(Conf, TM.get(), AddStream, Task, Mod); 547 return Error::success(); 548 } 549