1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/LTO/ThinLTOCodeGenerator.h"
16 
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/TargetLibraryInfo.h"
20 #include "llvm/Analysis/TargetTransformInfo.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/Bitcode/BitcodeWriterPass.h"
23 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/DiagnosticPrinter.h"
26 #include "llvm/IR/LegacyPassManager.h"
27 #include "llvm/IR/Mangler.h"
28 #include "llvm/IRReader/IRReader.h"
29 #include "llvm/Linker/Linker.h"
30 #include "llvm/MC/SubtargetFeature.h"
31 #include "llvm/Object/FunctionIndexObjectFile.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/ThreadPool.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Transforms/IPO.h"
37 #include "llvm/Transforms/IPO/FunctionImport.h"
38 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
39 #include "llvm/Transforms/ObjCARC.h"
40 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
41 
42 using namespace llvm;
43 
44 namespace llvm {
45 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
46 extern cl::opt<bool> LTODiscardValueNames;
47 }
48 
49 namespace {
50 
51 static cl::opt<int> ThreadCount("threads",
52                                 cl::init(std::thread::hardware_concurrency()));
53 
54 static void diagnosticHandler(const DiagnosticInfo &DI) {
55   DiagnosticPrinterRawOStream DP(errs());
56   DI.print(DP);
57   errs() << '\n';
58 }
59 
60 // Simple helper to load a module from bitcode
61 static std::unique_ptr<Module>
62 loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
63                      bool Lazy) {
64   SMDiagnostic Err;
65   ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
66   if (Lazy) {
67     ModuleOrErr =
68         getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
69                              /* ShouldLazyLoadMetadata */ Lazy);
70   } else {
71     ModuleOrErr = parseBitcodeFile(Buffer, Context);
72   }
73   if (std::error_code EC = ModuleOrErr.getError()) {
74     Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
75                        EC.message());
76     Err.print("ThinLTO", errs());
77     report_fatal_error("Can't load module, abort.");
78   }
79   return std::move(ModuleOrErr.get());
80 }
81 
82 // Simple helper to save temporary files for debug.
83 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
84                             unsigned count, StringRef Suffix) {
85   if (TempDir.empty())
86     return;
87   // User asked to save temps, let dump the bitcode file after import.
88   auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
89   std::error_code EC;
90   raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
91   if (EC)
92     report_fatal_error(Twine("Failed to open ") + SaveTempPath +
93                        " to save optimized bitcode\n");
94   WriteBitcodeToFile(&TheModule, OS, true, false);
95 }
96 
97 static StringMap<MemoryBufferRef>
98 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
99   StringMap<MemoryBufferRef> ModuleMap;
100   for (auto &ModuleBuffer : Modules) {
101     assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
102                ModuleMap.end() &&
103            "Expect unique Buffer Identifier");
104     ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
105   }
106   return ModuleMap;
107 }
108 
109 /// Provide a "loader" for the FunctionImporter to access function from other
110 /// modules.
111 class ModuleLoader {
112   /// The context that will be used for importing.
113   LLVMContext &Context;
114 
115   /// Map from Module identifier to MemoryBuffer. Used by clients like the
116   /// FunctionImported to request loading a Module.
117   StringMap<MemoryBufferRef> &ModuleMap;
118 
119 public:
120   ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
121       : Context(Context), ModuleMap(ModuleMap) {}
122 
123   /// Load a module on demand.
124   std::unique_ptr<Module> operator()(StringRef Identifier) {
125     return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
126   }
127 };
128 
129 static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) {
130   if (renameModuleForThinLTO(TheModule, Index))
131     report_fatal_error("renameModuleForThinLTO failed");
132 }
133 
134 static void crossImportIntoModule(Module &TheModule,
135                                   const FunctionInfoIndex &Index,
136                                   StringMap<MemoryBufferRef> &ModuleMap) {
137   ModuleLoader Loader(TheModule.getContext(), ModuleMap);
138   FunctionImporter Importer(Index, Loader);
139   Importer.importFunctions(TheModule);
140 }
141 
142 static void optimizeModule(Module &TheModule, TargetMachine &TM) {
143   // Populate the PassManager
144   PassManagerBuilder PMB;
145   PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
146   PMB.Inliner = createFunctionInliningPass();
147   // FIXME: should get it from the bitcode?
148   PMB.OptLevel = 3;
149   PMB.LoopVectorize = true;
150   PMB.SLPVectorize = true;
151   PMB.VerifyInput = true;
152   PMB.VerifyOutput = false;
153 
154   legacy::PassManager PM;
155 
156   // Add the TTI (required to inform the vectorizer about register size for
157   // instance)
158   PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
159 
160   // Add optimizations
161   PMB.populateThinLTOPassManager(PM);
162   PM.add(createObjCARCContractPass());
163 
164   PM.run(TheModule);
165 }
166 
167 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
168                                             TargetMachine &TM) {
169   SmallVector<char, 128> OutputBuffer;
170 
171   // CodeGen
172   {
173     raw_svector_ostream OS(OutputBuffer);
174     legacy::PassManager PM;
175     if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
176                                /* DisableVerify */ true))
177       report_fatal_error("Failed to setup codegen");
178 
179     // Run codegen now. resulting binary is in OutputBuffer.
180     PM.run(TheModule);
181   }
182   return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
183 }
184 
185 static std::unique_ptr<MemoryBuffer>
186 ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index,
187                      StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
188                      ThinLTOCodeGenerator::CachingOptions CacheOptions,
189                      StringRef SaveTempsDir, unsigned count) {
190 
191   // Save temps: after IPO.
192   saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
193 
194   // "Benchmark"-like optimization: single-source case
195   bool SingleModule = (ModuleMap.size() == 1);
196 
197   if (!SingleModule) {
198     promoteModule(TheModule, Index);
199 
200     // Save temps: after promotion.
201     saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
202 
203     crossImportIntoModule(TheModule, Index, ModuleMap);
204 
205     // Save temps: after cross-module import.
206     saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
207   }
208 
209   optimizeModule(TheModule, TM);
210 
211   saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc");
212 
213   return codegenModule(TheModule, TM);
214 }
215 
216 // Initialize the TargetMachine builder for a given Triple
217 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
218                           const Triple &TheTriple) {
219   // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
220   // FIXME this looks pretty terrible...
221   if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
222     if (TheTriple.getArch() == llvm::Triple::x86_64)
223       TMBuilder.MCpu = "core2";
224     else if (TheTriple.getArch() == llvm::Triple::x86)
225       TMBuilder.MCpu = "yonah";
226     else if (TheTriple.getArch() == llvm::Triple::aarch64)
227       TMBuilder.MCpu = "cyclone";
228   }
229   TMBuilder.TheTriple = std::move(TheTriple);
230 }
231 
232 } // end anonymous namespace
233 
234 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
235   MemoryBufferRef Buffer(Data, Identifier);
236   if (Modules.empty()) {
237     // First module added, so initialize the triple and some options
238     LLVMContext Context;
239     Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
240     initTMBuilder(TMBuilder, Triple(TheTriple));
241   }
242 #ifndef NDEBUG
243   else {
244     LLVMContext Context;
245     assert(TMBuilder.TheTriple.str() ==
246                getBitcodeTargetTriple(Buffer, Context) &&
247            "ThinLTO modules with different triple not supported");
248   }
249 #endif
250   Modules.push_back(Buffer);
251 }
252 
253 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
254   PreservedSymbols.insert(Name);
255 }
256 
257 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
258   CrossReferencedSymbols.insert(Name);
259 }
260 
261 // TargetMachine factory
262 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
263   std::string ErrMsg;
264   const Target *TheTarget =
265       TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
266   if (!TheTarget) {
267     report_fatal_error("Can't load target for this Triple: " + ErrMsg);
268   }
269 
270   // Use MAttr as the default set of features.
271   SubtargetFeatures Features(MAttr);
272   Features.getDefaultSubtargetFeatures(TheTriple);
273   std::string FeatureStr = Features.getString();
274   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
275       TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
276       CodeModel::Default, CGOptLevel));
277 }
278 
279 /**
280  * Produce the combined function index from all the bitcode files:
281  * "thin-link".
282  */
283 std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
284   std::unique_ptr<FunctionInfoIndex> CombinedIndex;
285   uint64_t NextModuleId = 0;
286   for (auto &ModuleBuffer : Modules) {
287     ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
288         object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler,
289                                                 false);
290     if (std::error_code EC = ObjOrErr.getError()) {
291       // FIXME diagnose
292       errs() << "error: can't create FunctionIndexObjectFile for buffer: "
293              << EC.message() << "\n";
294       return nullptr;
295     }
296     auto Index = (*ObjOrErr)->takeIndex();
297     if (CombinedIndex) {
298       CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
299     } else {
300       CombinedIndex = std::move(Index);
301     }
302   }
303   return CombinedIndex;
304 }
305 
306 /**
307  * Perform promotion and renaming of exported internal functions.
308  */
309 void ThinLTOCodeGenerator::promote(Module &TheModule,
310                                    FunctionInfoIndex &Index) {
311   promoteModule(TheModule, Index);
312 }
313 
314 /**
315  * Perform cross-module importing for the module identified by ModuleIdentifier.
316  */
317 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
318                                              FunctionInfoIndex &Index) {
319   auto ModuleMap = generateModuleMap(Modules);
320   crossImportIntoModule(TheModule, Index, ModuleMap);
321 }
322 
323 /**
324  * Perform post-importing ThinLTO optimizations.
325  */
326 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
327   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
328   optimizeModule(TheModule, *TMBuilder.create());
329 }
330 
331 /**
332  * Perform ThinLTO CodeGen.
333  */
334 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
335   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
336   return codegenModule(TheModule, *TMBuilder.create());
337 }
338 
339 // Main entry point for the ThinLTO processing
340 void ThinLTOCodeGenerator::run() {
341   // Sequential linking phase
342   auto Index = linkCombinedIndex();
343 
344   // Save temps: index.
345   if (!SaveTempsDir.empty()) {
346     auto SaveTempPath = SaveTempsDir + "index.bc";
347     std::error_code EC;
348     raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
349     if (EC)
350       report_fatal_error(Twine("Failed to open ") + SaveTempPath +
351                          " to save optimized bitcode\n");
352     WriteIndexToFile(*Index, OS);
353   }
354 
355   // Prepare the resulting object vector
356   assert(ProducedBinaries.empty() && "The generator should not be reused");
357   ProducedBinaries.resize(Modules.size());
358 
359   // Prepare the module map.
360   auto ModuleMap = generateModuleMap(Modules);
361 
362   // Parallel optimizer + codegen
363   {
364     ThreadPool Pool(ThreadCount);
365     int count = 0;
366     for (auto &ModuleBuffer : Modules) {
367       Pool.async([&](int count) {
368         LLVMContext Context;
369         Context.setDiscardValueNames(LTODiscardValueNames);
370 
371         // Parse module now
372         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
373 
374         // Save temps: original file.
375         if (!SaveTempsDir.empty()) {
376           saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
377         }
378 
379         ProducedBinaries[count] = ProcessThinLTOModule(
380             *TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions,
381             SaveTempsDir, count);
382       }, count);
383       count++;
384     }
385   }
386 
387   // If statistics were requested, print them out now.
388   if (llvm::AreStatisticsEnabled())
389     llvm::PrintStatistics();
390 }
391