1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Program.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Transforms/IPO.h"
41 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
42 #include "llvm/Transforms/Scalar/LoopPassManager.h"
43 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
44 #include "llvm/Transforms/Utils/SplitModule.h"
45 
46 using namespace llvm;
47 using namespace lto;
48 
49 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
50   errs() << "failed to open " << Path << ": " << Msg << '\n';
51   errs().flush();
52   exit(1);
53 }
54 
55 Error Config::addSaveTemps(std::string OutputFileName,
56                            bool UseInputModulePath) {
57   ShouldDiscardValueNames = false;
58 
59   std::error_code EC;
60   ResolutionFile = llvm::make_unique<raw_fd_ostream>(
61       OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
62   if (EC)
63     return errorCodeToError(EC);
64 
65   auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
66     // Keep track of the hook provided by the linker, which also needs to run.
67     ModuleHookFn LinkerHook = Hook;
68     Hook = [=](unsigned Task, const Module &M) {
69       // If the linker's hook returned false, we need to pass that result
70       // through.
71       if (LinkerHook && !LinkerHook(Task, M))
72         return false;
73 
74       std::string PathPrefix;
75       // If this is the combined module (not a ThinLTO backend compile) or the
76       // user hasn't requested using the input module's path, emit to a file
77       // named from the provided OutputFileName with the Task ID appended.
78       if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
79         PathPrefix = OutputFileName;
80         if (Task != (unsigned)-1)
81           PathPrefix += utostr(Task) + ".";
82       } else
83         PathPrefix = M.getModuleIdentifier() + ".";
84       std::string Path = PathPrefix + PathSuffix + ".bc";
85       std::error_code EC;
86       raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
87       // Because -save-temps is a debugging feature, we report the error
88       // directly and exit.
89       if (EC)
90         reportOpenError(Path, EC.message());
91       WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false);
92       return true;
93     };
94   };
95 
96   setHook("0.preopt", PreOptModuleHook);
97   setHook("1.promote", PostPromoteModuleHook);
98   setHook("2.internalize", PostInternalizeModuleHook);
99   setHook("3.import", PostImportModuleHook);
100   setHook("4.opt", PostOptModuleHook);
101   setHook("5.precodegen", PreCodeGenModuleHook);
102 
103   CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
104     std::string Path = OutputFileName + "index.bc";
105     std::error_code EC;
106     raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
107     // Because -save-temps is a debugging feature, we report the error
108     // directly and exit.
109     if (EC)
110       reportOpenError(Path, EC.message());
111     WriteIndexToFile(Index, OS);
112 
113     Path = OutputFileName + "index.dot";
114     raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None);
115     if (EC)
116       reportOpenError(Path, EC.message());
117     Index.exportToDot(OSDot);
118     return true;
119   };
120 
121   return Error::success();
122 }
123 
124 namespace {
125 
126 std::unique_ptr<TargetMachine>
127 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
128   StringRef TheTriple = M.getTargetTriple();
129   SubtargetFeatures Features;
130   Features.getDefaultSubtargetFeatures(Triple(TheTriple));
131   for (const std::string &A : Conf.MAttrs)
132     Features.AddFeature(A);
133 
134   Reloc::Model RelocModel;
135   if (Conf.RelocModel)
136     RelocModel = *Conf.RelocModel;
137   else
138     RelocModel =
139         M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
140 
141   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
142       TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
143       Conf.CodeModel, Conf.CGOptLevel));
144 }
145 
146 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
147                            unsigned OptLevel, bool IsThinLTO) {
148   Optional<PGOOptions> PGOOpt;
149   if (!Conf.SampleProfile.empty())
150     PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
151 
152   PassBuilder PB(TM, PGOOpt);
153   AAManager AA;
154 
155   // Parse a custom AA pipeline if asked to.
156   if (!PB.parseAAPipeline(AA, "default"))
157     report_fatal_error("Error parsing default AA pipeline");
158 
159   LoopAnalysisManager LAM(Conf.DebugPassManager);
160   FunctionAnalysisManager FAM(Conf.DebugPassManager);
161   CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
162   ModuleAnalysisManager MAM(Conf.DebugPassManager);
163 
164   // Register the AA manager first so that our version is the one used.
165   FAM.registerPass([&] { return std::move(AA); });
166 
167   // Register all the basic analyses with the managers.
168   PB.registerModuleAnalyses(MAM);
169   PB.registerCGSCCAnalyses(CGAM);
170   PB.registerFunctionAnalyses(FAM);
171   PB.registerLoopAnalyses(LAM);
172   PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
173 
174   ModulePassManager MPM(Conf.DebugPassManager);
175   // FIXME (davide): verify the input.
176 
177   PassBuilder::OptimizationLevel OL;
178 
179   switch (OptLevel) {
180   default:
181     llvm_unreachable("Invalid optimization level");
182   case 0:
183     OL = PassBuilder::O0;
184     break;
185   case 1:
186     OL = PassBuilder::O1;
187     break;
188   case 2:
189     OL = PassBuilder::O2;
190     break;
191   case 3:
192     OL = PassBuilder::O3;
193     break;
194   }
195 
196   if (IsThinLTO)
197     MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager);
198   else
199     MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager);
200   MPM.run(Mod, MAM);
201 
202   // FIXME (davide): verify the output.
203 }
204 
205 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
206                                  std::string PipelineDesc,
207                                  std::string AAPipelineDesc,
208                                  bool DisableVerify) {
209   PassBuilder PB(TM);
210   AAManager AA;
211 
212   // Parse a custom AA pipeline if asked to.
213   if (!AAPipelineDesc.empty())
214     if (!PB.parseAAPipeline(AA, AAPipelineDesc))
215       report_fatal_error("unable to parse AA pipeline description: " +
216                          AAPipelineDesc);
217 
218   LoopAnalysisManager LAM;
219   FunctionAnalysisManager FAM;
220   CGSCCAnalysisManager CGAM;
221   ModuleAnalysisManager MAM;
222 
223   // Register the AA manager first so that our version is the one used.
224   FAM.registerPass([&] { return std::move(AA); });
225 
226   // Register all the basic analyses with the managers.
227   PB.registerModuleAnalyses(MAM);
228   PB.registerCGSCCAnalyses(CGAM);
229   PB.registerFunctionAnalyses(FAM);
230   PB.registerLoopAnalyses(LAM);
231   PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
232 
233   ModulePassManager MPM;
234 
235   // Always verify the input.
236   MPM.addPass(VerifierPass());
237 
238   // Now, add all the passes we've been requested to.
239   if (!PB.parsePassPipeline(MPM, PipelineDesc))
240     report_fatal_error("unable to parse pass pipeline description: " +
241                        PipelineDesc);
242 
243   if (!DisableVerify)
244     MPM.addPass(VerifierPass());
245   MPM.run(Mod, MAM);
246 }
247 
248 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
249                            bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
250                            const ModuleSummaryIndex *ImportSummary) {
251   legacy::PassManager passes;
252   passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
253 
254   PassManagerBuilder PMB;
255   PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
256   PMB.Inliner = createFunctionInliningPass();
257   PMB.ExportSummary = ExportSummary;
258   PMB.ImportSummary = ImportSummary;
259   // Unconditionally verify input since it is not verified before this
260   // point and has unknown origin.
261   PMB.VerifyInput = true;
262   PMB.VerifyOutput = !Conf.DisableVerify;
263   PMB.LoopVectorize = true;
264   PMB.SLPVectorize = true;
265   PMB.OptLevel = Conf.OptLevel;
266   PMB.PGOSampleUse = Conf.SampleProfile;
267   if (IsThinLTO)
268     PMB.populateThinLTOPassManager(passes);
269   else
270     PMB.populateLTOPassManager(passes);
271   passes.run(Mod);
272 }
273 
274 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
275          bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
276          const ModuleSummaryIndex *ImportSummary) {
277   // FIXME: Plumb the combined index into the new pass manager.
278   if (!Conf.OptPipeline.empty())
279     runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
280                          Conf.DisableVerify);
281   else if (Conf.UseNewPM)
282     runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO);
283   else
284     runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
285   return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
286 }
287 
288 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
289              unsigned Task, Module &Mod) {
290   if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
291     return;
292 
293   std::unique_ptr<ToolOutputFile> DwoOut;
294   SmallString<1024> DwoFile(Conf.DwoPath);
295   if (!Conf.DwoDir.empty()) {
296     std::error_code EC;
297     if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
298       report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
299                          EC.message());
300 
301     DwoFile = Conf.DwoDir;
302     sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
303   }
304 
305   if (!DwoFile.empty()) {
306     std::error_code EC;
307     TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
308     DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
309     if (EC)
310       report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
311   }
312 
313   auto Stream = AddStream(Task);
314   legacy::PassManager CodeGenPasses;
315   if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
316                               DwoOut ? &DwoOut->os() : nullptr,
317                               Conf.CGFileType))
318     report_fatal_error("Failed to setup codegen");
319   CodeGenPasses.run(Mod);
320 
321   if (DwoOut)
322     DwoOut->keep();
323 }
324 
325 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
326                   unsigned ParallelCodeGenParallelismLevel,
327                   std::unique_ptr<Module> Mod) {
328   ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
329   unsigned ThreadCount = 0;
330   const Target *T = &TM->getTarget();
331 
332   SplitModule(
333       std::move(Mod), ParallelCodeGenParallelismLevel,
334       [&](std::unique_ptr<Module> MPart) {
335         // We want to clone the module in a new context to multi-thread the
336         // codegen. We do it by serializing partition modules to bitcode
337         // (while still on the main thread, in order to avoid data races) and
338         // spinning up new threads which deserialize the partitions into
339         // separate contexts.
340         // FIXME: Provide a more direct way to do this in LLVM.
341         SmallString<0> BC;
342         raw_svector_ostream BCOS(BC);
343         WriteBitcodeToFile(*MPart, BCOS);
344 
345         // Enqueue the task
346         CodegenThreadPool.async(
347             [&](const SmallString<0> &BC, unsigned ThreadId) {
348               LTOLLVMContext Ctx(C);
349               Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
350                   MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
351                   Ctx);
352               if (!MOrErr)
353                 report_fatal_error("Failed to read bitcode");
354               std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
355 
356               std::unique_ptr<TargetMachine> TM =
357                   createTargetMachine(C, T, *MPartInCtx);
358 
359               codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
360             },
361             // Pass BC using std::move to ensure that it get moved rather than
362             // copied into the thread's context.
363             std::move(BC), ThreadCount++);
364       },
365       false);
366 
367   // Because the inner lambda (which runs in a worker thread) captures our local
368   // variables, we need to wait for the worker threads to terminate before we
369   // can leave the function scope.
370   CodegenThreadPool.wait();
371 }
372 
373 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
374   if (!C.OverrideTriple.empty())
375     Mod.setTargetTriple(C.OverrideTriple);
376   else if (Mod.getTargetTriple().empty())
377     Mod.setTargetTriple(C.DefaultTriple);
378 
379   std::string Msg;
380   const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
381   if (!T)
382     return make_error<StringError>(Msg, inconvertibleErrorCode());
383   return T;
384 }
385 
386 }
387 
388 static Error
389 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
390   // Make sure we flush the diagnostic remarks file in case the linker doesn't
391   // call the global destructors before exiting.
392   if (!DiagOutputFile)
393     return Error::success();
394   DiagOutputFile->keep();
395   DiagOutputFile->os().flush();
396   return Error::success();
397 }
398 
399 Error lto::backend(Config &C, AddStreamFn AddStream,
400                    unsigned ParallelCodeGenParallelismLevel,
401                    std::unique_ptr<Module> Mod,
402                    ModuleSummaryIndex &CombinedIndex) {
403   Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
404   if (!TOrErr)
405     return TOrErr.takeError();
406 
407   std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
408 
409   // Setup optimization remarks.
410   auto DiagFileOrErr = lto::setupOptimizationRemarks(
411       Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
412   if (!DiagFileOrErr)
413     return DiagFileOrErr.takeError();
414   auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
415 
416   if (!C.CodeGenOnly) {
417     if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
418              /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
419       return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
420   }
421 
422   if (ParallelCodeGenParallelismLevel == 1) {
423     codegen(C, TM.get(), AddStream, 0, *Mod);
424   } else {
425     splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
426                  std::move(Mod));
427   }
428   return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
429 }
430 
431 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
432                             const ModuleSummaryIndex &Index) {
433   std::vector<GlobalValue*> DeadGVs;
434   for (auto &GV : Mod.global_values())
435     if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID()))
436       if (!Index.isGlobalValueLive(GVS)) {
437         DeadGVs.push_back(&GV);
438         convertToDeclaration(GV);
439       }
440 
441   // Now that all dead bodies have been dropped, delete the actual objects
442   // themselves when possible.
443   for (GlobalValue *GV : DeadGVs) {
444     GV->removeDeadConstantUsers();
445     // Might reference something defined in native object (i.e. dropped a
446     // non-prevailing IR def, but we need to keep the declaration).
447     if (GV->use_empty())
448       GV->eraseFromParent();
449   }
450 }
451 
452 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
453                        Module &Mod, const ModuleSummaryIndex &CombinedIndex,
454                        const FunctionImporter::ImportMapTy &ImportList,
455                        const GVSummaryMapTy &DefinedGlobals,
456                        MapVector<StringRef, BitcodeModule> &ModuleMap) {
457   Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
458   if (!TOrErr)
459     return TOrErr.takeError();
460 
461   std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
462 
463   // Setup optimization remarks.
464   auto DiagFileOrErr = lto::setupOptimizationRemarks(
465       Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
466   if (!DiagFileOrErr)
467     return DiagFileOrErr.takeError();
468   auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
469 
470   if (Conf.CodeGenOnly) {
471     codegen(Conf, TM.get(), AddStream, Task, Mod);
472     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
473   }
474 
475   if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
476     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
477 
478   renameModuleForThinLTO(Mod, CombinedIndex);
479 
480   dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
481 
482   thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
483 
484   if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
485     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
486 
487   if (!DefinedGlobals.empty())
488     thinLTOInternalizeModule(Mod, DefinedGlobals);
489 
490   if (Conf.PostInternalizeModuleHook &&
491       !Conf.PostInternalizeModuleHook(Task, Mod))
492     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
493 
494   auto ModuleLoader = [&](StringRef Identifier) {
495     assert(Mod.getContext().isODRUniquingDebugTypes() &&
496            "ODR Type uniquing should be enabled on the context");
497     auto I = ModuleMap.find(Identifier);
498     assert(I != ModuleMap.end());
499     return I->second.getLazyModule(Mod.getContext(),
500                                    /*ShouldLazyLoadMetadata=*/true,
501                                    /*IsImporting*/ true);
502   };
503 
504   FunctionImporter Importer(CombinedIndex, ModuleLoader);
505   if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
506     return Err;
507 
508   if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
509     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
510 
511   if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
512            /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
513     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
514 
515   codegen(Conf, TM.get(), AddStream, Task, Mod);
516   return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
517 }
518