1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/LoopPassManager.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/Bitcode/ReaderWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Passes/PassBuilder.h"
30 #include "llvm/Support/Error.h"
31 #include "llvm/Support/FileSystem.h"
32 #include "llvm/Support/TargetRegistry.h"
33 #include "llvm/Support/ThreadPool.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include "llvm/Transforms/IPO.h"
36 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
37 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
38 #include "llvm/Transforms/Utils/SplitModule.h"
39 
40 using namespace llvm;
41 using namespace lto;
42 
43 Error Config::addSaveTemps(std::string OutputFileName,
44                            bool UseInputModulePath) {
45   ShouldDiscardValueNames = false;
46 
47   std::error_code EC;
48   ResolutionFile = llvm::make_unique<raw_fd_ostream>(
49       OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
50   if (EC)
51     return errorCodeToError(EC);
52 
53   auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
54     // Keep track of the hook provided by the linker, which also needs to run.
55     ModuleHookFn LinkerHook = Hook;
56     Hook = [=](unsigned Task, const Module &M) {
57       // If the linker's hook returned false, we need to pass that result
58       // through.
59       if (LinkerHook && !LinkerHook(Task, M))
60         return false;
61 
62       std::string PathPrefix;
63       // If this is the combined module (not a ThinLTO backend compile) or the
64       // user hasn't requested using the input module's path, emit to a file
65       // named from the provided OutputFileName with the Task ID appended.
66       if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
67         PathPrefix = OutputFileName + utostr(Task);
68       } else
69         PathPrefix = M.getModuleIdentifier();
70       std::string Path = PathPrefix + "." + PathSuffix + ".bc";
71       std::error_code EC;
72       raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
73       if (EC) {
74         // Because -save-temps is a debugging feature, we report the error
75         // directly and exit.
76         llvm::errs() << "failed to open " << Path << ": " << EC.message()
77                      << '\n';
78         exit(1);
79       }
80       WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false);
81       return true;
82     };
83   };
84 
85   setHook("0.preopt", PreOptModuleHook);
86   setHook("1.promote", PostPromoteModuleHook);
87   setHook("2.internalize", PostInternalizeModuleHook);
88   setHook("3.import", PostImportModuleHook);
89   setHook("4.opt", PostOptModuleHook);
90   setHook("5.precodegen", PreCodeGenModuleHook);
91 
92   CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
93     std::string Path = OutputFileName + "index.bc";
94     std::error_code EC;
95     raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
96     if (EC) {
97       // Because -save-temps is a debugging feature, we report the error
98       // directly and exit.
99       llvm::errs() << "failed to open " << Path << ": " << EC.message() << '\n';
100       exit(1);
101     }
102     WriteIndexToFile(Index, OS);
103     return true;
104   };
105 
106   return Error();
107 }
108 
109 namespace {
110 
111 std::unique_ptr<TargetMachine>
112 createTargetMachine(Config &Conf, StringRef TheTriple,
113                     const Target *TheTarget) {
114   SubtargetFeatures Features;
115   Features.getDefaultSubtargetFeatures(Triple(TheTriple));
116   for (const std::string &A : Conf.MAttrs)
117     Features.AddFeature(A);
118 
119   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
120       TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel,
121       Conf.CodeModel, Conf.CGOptLevel));
122 }
123 
124 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
125                                  std::string PipelineDesc,
126                                  bool DisableVerify) {
127   PassBuilder PB(TM);
128   AAManager AA;
129   LoopAnalysisManager LAM;
130   FunctionAnalysisManager FAM;
131   CGSCCAnalysisManager CGAM;
132   ModuleAnalysisManager MAM;
133 
134   // Register the AA manager first so that our version is the one used.
135   FAM.registerPass([&] { return std::move(AA); });
136 
137   // Register all the basic analyses with the managers.
138   PB.registerModuleAnalyses(MAM);
139   PB.registerCGSCCAnalyses(CGAM);
140   PB.registerFunctionAnalyses(FAM);
141   PB.registerLoopAnalyses(LAM);
142   PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
143 
144   ModulePassManager MPM;
145 
146   // Always verify the input.
147   MPM.addPass(VerifierPass());
148 
149   // Now, add all the passes we've been requested to.
150   if (!PB.parsePassPipeline(MPM, PipelineDesc))
151     report_fatal_error("unable to parse pass pipeline description: " +
152                        PipelineDesc);
153 
154   if (!DisableVerify)
155     MPM.addPass(VerifierPass());
156   MPM.run(Mod, MAM);
157 }
158 
159 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
160                            bool IsThinLto) {
161   legacy::PassManager passes;
162   passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
163 
164   PassManagerBuilder PMB;
165   PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
166   PMB.Inliner = createFunctionInliningPass();
167   // Unconditionally verify input since it is not verified before this
168   // point and has unknown origin.
169   PMB.VerifyInput = true;
170   PMB.VerifyOutput = !Conf.DisableVerify;
171   PMB.LoopVectorize = true;
172   PMB.SLPVectorize = true;
173   PMB.OptLevel = Conf.OptLevel;
174   if (IsThinLto)
175     PMB.populateThinLTOPassManager(passes);
176   else
177     PMB.populateLTOPassManager(passes);
178   passes.run(Mod);
179 }
180 
181 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
182          bool IsThinLto) {
183   Mod.setDataLayout(TM->createDataLayout());
184   if (Conf.OptPipeline.empty())
185     runOldPMPasses(Conf, Mod, TM, IsThinLto);
186   else
187     runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.DisableVerify);
188   return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
189 }
190 
191 /// Monolithic LTO does not support caching (yet), this is a convenient wrapper
192 /// around AddOutput to workaround this.
193 static AddOutputFn getUncachedOutputWrapper(AddOutputFn &AddOutput,
194                                             unsigned Task) {
195   return [Task, &AddOutput](unsigned TaskId) {
196     auto Output = AddOutput(Task);
197     if (Output->isCachingEnabled() && Output->tryLoadFromCache(""))
198       report_fatal_error("Cache hit without a valid key?");
199     assert(Task == TaskId && "Unexpexted TaskId mismatch");
200     return Output;
201   };
202 }
203 
204 void codegen(Config &Conf, TargetMachine *TM, AddOutputFn AddOutput,
205              unsigned Task, Module &Mod) {
206   if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
207     return;
208 
209   auto Output = AddOutput(Task);
210   std::unique_ptr<raw_pwrite_stream> OS = Output->getStream();
211   legacy::PassManager CodeGenPasses;
212   if (TM->addPassesToEmitFile(CodeGenPasses, *OS,
213                               TargetMachine::CGFT_ObjectFile))
214     report_fatal_error("Failed to setup codegen");
215   CodeGenPasses.run(Mod);
216 }
217 
218 void splitCodeGen(Config &C, TargetMachine *TM, AddOutputFn AddOutput,
219                   unsigned ParallelCodeGenParallelismLevel,
220                   std::unique_ptr<Module> Mod) {
221   ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
222   unsigned ThreadCount = 0;
223   const Target *T = &TM->getTarget();
224 
225   SplitModule(
226       std::move(Mod), ParallelCodeGenParallelismLevel,
227       [&](std::unique_ptr<Module> MPart) {
228         // We want to clone the module in a new context to multi-thread the
229         // codegen. We do it by serializing partition modules to bitcode
230         // (while still on the main thread, in order to avoid data races) and
231         // spinning up new threads which deserialize the partitions into
232         // separate contexts.
233         // FIXME: Provide a more direct way to do this in LLVM.
234         SmallString<0> BC;
235         raw_svector_ostream BCOS(BC);
236         WriteBitcodeToFile(MPart.get(), BCOS);
237 
238         // Enqueue the task
239         CodegenThreadPool.async(
240             [&](const SmallString<0> &BC, unsigned ThreadId) {
241               LTOLLVMContext Ctx(C);
242               ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
243                   MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
244                   Ctx);
245               if (!MOrErr)
246                 report_fatal_error("Failed to read bitcode");
247               std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
248 
249               std::unique_ptr<TargetMachine> TM =
250                   createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
251 
252               codegen(C, TM.get(),
253                       getUncachedOutputWrapper(AddOutput, ThreadId), ThreadId,
254                       *MPartInCtx);
255             },
256             // Pass BC using std::move to ensure that it get moved rather than
257             // copied into the thread's context.
258             std::move(BC), ThreadCount++);
259       },
260       false);
261 }
262 
263 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
264   if (!C.OverrideTriple.empty())
265     Mod.setTargetTriple(C.OverrideTriple);
266   else if (Mod.getTargetTriple().empty())
267     Mod.setTargetTriple(C.DefaultTriple);
268 
269   std::string Msg;
270   const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
271   if (!T)
272     return make_error<StringError>(Msg, inconvertibleErrorCode());
273   return T;
274 }
275 
276 }
277 
278 Error lto::backend(Config &C, AddOutputFn AddOutput,
279                    unsigned ParallelCodeGenParallelismLevel,
280                    std::unique_ptr<Module> Mod) {
281   Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
282   if (!TOrErr)
283     return TOrErr.takeError();
284 
285   std::unique_ptr<TargetMachine> TM =
286       createTargetMachine(C, Mod->getTargetTriple(), *TOrErr);
287 
288   if (!C.CodeGenOnly)
289     if (!opt(C, TM.get(), 0, *Mod, /*IsThinLto=*/false))
290       return Error();
291 
292   if (ParallelCodeGenParallelismLevel == 1) {
293     codegen(C, TM.get(), getUncachedOutputWrapper(AddOutput, 0), 0, *Mod);
294   } else {
295     splitCodeGen(C, TM.get(), AddOutput, ParallelCodeGenParallelismLevel,
296                  std::move(Mod));
297   }
298   return Error();
299 }
300 
301 Error lto::thinBackend(Config &Conf, unsigned Task, AddOutputFn AddOutput,
302                        Module &Mod, ModuleSummaryIndex &CombinedIndex,
303                        const FunctionImporter::ImportMapTy &ImportList,
304                        const GVSummaryMapTy &DefinedGlobals,
305                        MapVector<StringRef, MemoryBufferRef> &ModuleMap) {
306   Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
307   if (!TOrErr)
308     return TOrErr.takeError();
309 
310   std::unique_ptr<TargetMachine> TM =
311       createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr);
312 
313   if (Conf.CodeGenOnly) {
314     codegen(Conf, TM.get(), AddOutput, Task, Mod);
315     return Error();
316   }
317 
318   if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
319     return Error();
320 
321   renameModuleForThinLTO(Mod, CombinedIndex);
322 
323   thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
324 
325   if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
326     return Error();
327 
328   if (!DefinedGlobals.empty())
329     thinLTOInternalizeModule(Mod, DefinedGlobals);
330 
331   if (Conf.PostInternalizeModuleHook &&
332       !Conf.PostInternalizeModuleHook(Task, Mod))
333     return Error();
334 
335   auto ModuleLoader = [&](StringRef Identifier) {
336     assert(Mod.getContext().isODRUniquingDebugTypes() &&
337            "ODR Type uniquing shoudl be enabled on the context");
338     return std::move(getLazyBitcodeModule(MemoryBuffer::getMemBuffer(
339                                               ModuleMap[Identifier], false),
340                                           Mod.getContext(),
341                                           /*ShouldLazyLoadMetadata=*/true)
342                          .get());
343   };
344 
345   FunctionImporter Importer(CombinedIndex, ModuleLoader);
346   Importer.importFunctions(Mod, ImportList);
347 
348   if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
349     return Error();
350 
351   if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLto=*/true))
352     return Error();
353 
354   codegen(Conf, TM.get(), AddOutput, Task, Mod);
355   return Error();
356 }
357