1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/LTO/ThinLTOCodeGenerator.h"
16 
17 #ifdef HAVE_LLVM_REVISION
18 #include "LLVMLTORevision.h"
19 #endif
20 
21 #include "UpdateCompilerUsed.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
25 #include "llvm/Analysis/TargetLibraryInfo.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/Bitcode/BitcodeWriterPass.h"
28 #include "llvm/Bitcode/ReaderWriter.h"
29 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
30 #include "llvm/IR/DiagnosticPrinter.h"
31 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/IR/LegacyPassManager.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/IRReader/IRReader.h"
35 #include "llvm/Linker/Linker.h"
36 #include "llvm/MC/SubtargetFeature.h"
37 #include "llvm/Object/IRObjectFile.h"
38 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/CachePruning.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/SHA1.h"
44 #include "llvm/Support/SourceMgr.h"
45 #include "llvm/Support/TargetRegistry.h"
46 #include "llvm/Support/ThreadPool.h"
47 #include "llvm/Target/TargetMachine.h"
48 #include "llvm/Transforms/IPO.h"
49 #include "llvm/Transforms/IPO/FunctionImport.h"
50 #include "llvm/Transforms/IPO/Internalize.h"
51 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
52 #include "llvm/Transforms/ObjCARC.h"
53 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "thinlto"
58 
59 namespace llvm {
60 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
61 extern cl::opt<bool> LTODiscardValueNames;
62 }
63 
64 namespace {
65 
66 static cl::opt<int> ThreadCount("threads",
67                                 cl::init(std::thread::hardware_concurrency()));
68 
69 static void diagnosticHandler(const DiagnosticInfo &DI) {
70   DiagnosticPrinterRawOStream DP(errs());
71   DI.print(DP);
72   errs() << '\n';
73 }
74 
75 // Simple helper to load a module from bitcode
76 static std::unique_ptr<Module>
77 loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
78                      bool Lazy) {
79   SMDiagnostic Err;
80   ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
81   if (Lazy) {
82     ModuleOrErr =
83         getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
84                              /* ShouldLazyLoadMetadata */ Lazy);
85   } else {
86     ModuleOrErr = parseBitcodeFile(Buffer, Context);
87   }
88   if (std::error_code EC = ModuleOrErr.getError()) {
89     Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
90                        EC.message());
91     Err.print("ThinLTO", errs());
92     report_fatal_error("Can't load module, abort.");
93   }
94   return std::move(ModuleOrErr.get());
95 }
96 
97 // Simple helper to save temporary files for debug.
98 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
99                             unsigned count, StringRef Suffix) {
100   if (TempDir.empty())
101     return;
102   // User asked to save temps, let dump the bitcode file after import.
103   auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
104   std::error_code EC;
105   raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
106   if (EC)
107     report_fatal_error(Twine("Failed to open ") + SaveTempPath +
108                        " to save optimized bitcode\n");
109   WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true);
110 }
111 
112 bool IsFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList,
113                                 const ModuleSummaryIndex &Index,
114                                 StringRef ModulePath) {
115   // Get the first *linker visible* definition for this global in the summary
116   // list.
117   auto FirstDefForLinker = llvm::find_if(
118       GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
119         auto Linkage = Summary->linkage();
120         return !GlobalValue::isAvailableExternallyLinkage(Linkage);
121       });
122   // If \p GV is not the first definition, give up...
123   if ((*FirstDefForLinker)->modulePath() != ModulePath)
124     return false;
125   // If there is any strong definition anywhere, do not bother emitting this.
126   if (llvm::any_of(
127           GVSummaryList,
128           [](const std::unique_ptr<GlobalValueSummary> &Summary) {
129             auto Linkage = Summary->linkage();
130             return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
131                    !GlobalValue::isWeakForLinker(Linkage);
132           }))
133     return false;
134   return true;
135 }
136 
137 static GlobalValue::LinkageTypes
138 ResolveODR(const ModuleSummaryIndex &Index,
139            const FunctionImporter::ExportSetTy &ExportList,
140            const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
141            StringRef ModuleIdentifier, GlobalValue::GUID GUID,
142            const GlobalValueSummary &GV) {
143   auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
144     return GVSummaryList.size() > 1;
145   };
146 
147   auto OriginalLinkage = GV.linkage();
148   switch (OriginalLinkage) {
149   case GlobalValue::ExternalLinkage:
150   case GlobalValue::AvailableExternallyLinkage:
151   case GlobalValue::AppendingLinkage:
152   case GlobalValue::InternalLinkage:
153   case GlobalValue::PrivateLinkage:
154   case GlobalValue::ExternalWeakLinkage:
155   case GlobalValue::CommonLinkage:
156   case GlobalValue::LinkOnceAnyLinkage:
157   case GlobalValue::WeakAnyLinkage:
158     break;
159   case GlobalValue::LinkOnceODRLinkage:
160   case GlobalValue::WeakODRLinkage: {
161     auto &GVSummaryList = Index.findGlobalValueSummaryList(GUID)->second;
162     // We need to emit only one of these, the first module will keep
163     // it, but turned into a weak while the others will drop it.
164     if (!HasMultipleCopies(GVSummaryList)) {
165       // Exported LinkonceODR needs to be promoted to not be discarded
166       if (GlobalValue::isDiscardableIfUnused(OriginalLinkage) &&
167           (ExportList.count(GUID) || GUIDPreservedSymbols.count(GUID)))
168         return GlobalValue::WeakODRLinkage;
169       break;
170     }
171     if (IsFirstDefinitionForLinker(GVSummaryList, Index, ModuleIdentifier))
172       return GlobalValue::WeakODRLinkage;
173     else if (isa<AliasSummary>(&GV))
174       // Alias can't be turned into available_externally.
175       return OriginalLinkage;
176     return GlobalValue::AvailableExternallyLinkage;
177   }
178   }
179   return OriginalLinkage;
180 }
181 
182 /// Resolve LinkOnceODR and WeakODR.
183 ///
184 /// We'd like to drop these function if they are no longer referenced in the
185 /// current module. However there is a chance that another module is still
186 /// referencing them because of the import. We make sure we always emit at least
187 /// one copy.
188 static void ResolveODR(
189     const ModuleSummaryIndex &Index,
190     const FunctionImporter::ExportSetTy &ExportList,
191     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
192     const GVSummaryMapTy &DefinedGlobals, StringRef ModuleIdentifier,
193     std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
194   if (Index.modulePaths().size() == 1)
195     // Nothing to do if we don't have multiple modules
196     return;
197 
198   // We won't optimize the globals that are referenced by an alias for now
199   // Ideally we should turn the alias into a global and duplicate the definition
200   // when needed.
201   DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
202   for (auto &GA : DefinedGlobals) {
203     if (auto AS = dyn_cast<AliasSummary>(GA.second))
204       GlobalInvolvedWithAlias.insert(&AS->getAliasee());
205   }
206 
207   for (auto &GV : DefinedGlobals) {
208     if (GlobalInvolvedWithAlias.count(GV.second))
209       continue;
210     auto NewLinkage =
211         ResolveODR(Index, ExportList, GUIDPreservedSymbols, ModuleIdentifier, GV.first, *GV.second);
212     if (NewLinkage != GV.second->linkage()) {
213       ResolvedODR[GV.first] = NewLinkage;
214     }
215   }
216 }
217 
218 /// Fixup linkage, see ResolveODR() above.
219 void fixupODR(
220     Module &TheModule,
221     const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR) {
222   // Process functions and global now
223   for (auto &GV : TheModule) {
224     auto NewLinkage = ResolvedODR.find(GV.getGUID());
225     if (NewLinkage == ResolvedODR.end())
226       continue;
227     DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
228                  << GV.getLinkage() << " to " << NewLinkage->second << "\n");
229     GV.setLinkage(NewLinkage->second);
230   }
231   for (auto &GV : TheModule.globals()) {
232     auto NewLinkage = ResolvedODR.find(GV.getGUID());
233     if (NewLinkage == ResolvedODR.end())
234       continue;
235     DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
236                  << GV.getLinkage() << " to " << NewLinkage->second << "\n");
237     GV.setLinkage(NewLinkage->second);
238   }
239   for (auto &GV : TheModule.aliases()) {
240     auto NewLinkage = ResolvedODR.find(GV.getGUID());
241     if (NewLinkage == ResolvedODR.end())
242       continue;
243     DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
244                  << GV.getLinkage() << " to " << NewLinkage->second << "\n");
245     GV.setLinkage(NewLinkage->second);
246   }
247 }
248 
249 static StringMap<MemoryBufferRef>
250 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
251   StringMap<MemoryBufferRef> ModuleMap;
252   for (auto &ModuleBuffer : Modules) {
253     assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
254                ModuleMap.end() &&
255            "Expect unique Buffer Identifier");
256     ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
257   }
258   return ModuleMap;
259 }
260 
261 /// Provide a "loader" for the FunctionImporter to access function from other
262 /// modules.
263 class ModuleLoader {
264   /// The context that will be used for importing.
265   LLVMContext &Context;
266 
267   /// Map from Module identifier to MemoryBuffer. Used by clients like the
268   /// FunctionImported to request loading a Module.
269   StringMap<MemoryBufferRef> &ModuleMap;
270 
271 public:
272   ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
273       : Context(Context), ModuleMap(ModuleMap) {}
274 
275   /// Load a module on demand.
276   std::unique_ptr<Module> operator()(StringRef Identifier) {
277     return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
278   }
279 };
280 
281 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
282   if (renameModuleForThinLTO(TheModule, Index))
283     report_fatal_error("renameModuleForThinLTO failed");
284 }
285 
286 static void
287 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
288                       StringMap<MemoryBufferRef> &ModuleMap,
289                       const FunctionImporter::ImportMapTy &ImportList) {
290   ModuleLoader Loader(TheModule.getContext(), ModuleMap);
291   FunctionImporter Importer(Index, Loader);
292   Importer.importFunctions(TheModule, ImportList);
293 }
294 
295 static void optimizeModule(Module &TheModule, TargetMachine &TM) {
296   // Populate the PassManager
297   PassManagerBuilder PMB;
298   PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
299   PMB.Inliner = createFunctionInliningPass();
300   // FIXME: should get it from the bitcode?
301   PMB.OptLevel = 3;
302   PMB.LoopVectorize = true;
303   PMB.SLPVectorize = true;
304   PMB.VerifyInput = true;
305   PMB.VerifyOutput = false;
306 
307   legacy::PassManager PM;
308 
309   // Add the TTI (required to inform the vectorizer about register size for
310   // instance)
311   PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
312 
313   // Add optimizations
314   PMB.populateThinLTOPassManager(PM);
315 
316   PM.run(TheModule);
317 }
318 
319 // Create a DenseSet of GlobalValue to be used with the Internalizer.
320 static DenseSet<const GlobalValue *> computePreservedSymbolsForModule(
321     Module &TheModule, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
322     const FunctionImporter::ExportSetTy &ExportList) {
323   DenseSet<const GlobalValue *> PreservedGV;
324   if (GUIDPreservedSymbols.empty())
325     // Early exit: internalize is disabled when there is nothing to preserve.
326     return PreservedGV;
327 
328   auto AddPreserveGV = [&](const GlobalValue &GV) {
329     auto GUID = GV.getGUID();
330     if (GUIDPreservedSymbols.count(GUID) || ExportList.count(GUID))
331       PreservedGV.insert(&GV);
332   };
333 
334   for (auto &GV : TheModule)
335     AddPreserveGV(GV);
336   for (auto &GV : TheModule.globals())
337     AddPreserveGV(GV);
338   for (auto &GV : TheModule.aliases())
339     AddPreserveGV(GV);
340 
341   return PreservedGV;
342 }
343 
344 // Run internalization on \p TheModule
345 static void
346 doInternalizeModule(Module &TheModule, const TargetMachine &TM,
347                     const DenseSet<const GlobalValue *> &PreservedGV) {
348   if (PreservedGV.empty()) {
349     // Be friendly and don't nuke totally the module when the client didn't
350     // supply anything to preserve.
351     return;
352   }
353 
354   // Parse inline ASM and collect the list of symbols that are not defined in
355   // the current module.
356   StringSet<> AsmUndefinedRefs;
357   object::IRObjectFile::CollectAsmUndefinedRefs(
358       Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
359       [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
360         if (Flags & object::BasicSymbolRef::SF_Undefined)
361           AsmUndefinedRefs.insert(Name);
362       });
363 
364   // Update the llvm.compiler_used globals to force preserving libcalls and
365   // symbols referenced from asm
366   UpdateCompilerUsed(TheModule, TM, AsmUndefinedRefs);
367 
368   // Declare a callback for the internalize pass that will ask for every
369   // candidate GlobalValue if it can be internalized or not.
370   auto MustPreserveGV =
371       [&](const GlobalValue &GV) -> bool { return PreservedGV.count(&GV); };
372 
373   llvm::internalizeModule(TheModule, MustPreserveGV);
374 }
375 
376 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
377 static DenseSet<GlobalValue::GUID>
378 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
379                             const Triple &TheTriple) {
380   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
381   for (auto &Entry : PreservedSymbols) {
382     StringRef Name = Entry.first();
383     if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
384       Name = Name.drop_front();
385     GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
386   }
387   return GUIDPreservedSymbols;
388 }
389 
390 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
391                                             TargetMachine &TM) {
392   SmallVector<char, 128> OutputBuffer;
393 
394   // CodeGen
395   {
396     raw_svector_ostream OS(OutputBuffer);
397     legacy::PassManager PM;
398 
399     // If the bitcode files contain ARC code and were compiled with optimization,
400     // the ObjCARCContractPass must be run, so do it unconditionally here.
401     PM.add(createObjCARCContractPass());
402 
403     // Setup the codegen now.
404     if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
405                                /* DisableVerify */ true))
406       report_fatal_error("Failed to setup codegen");
407 
408     // Run codegen now. resulting binary is in OutputBuffer.
409     PM.run(TheModule);
410   }
411   return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
412 }
413 
414 /// Manage caching for a single Module.
415 class ModuleCacheEntry {
416   SmallString<128> EntryPath;
417 
418 public:
419   // Create a cache entry. This compute a unique hash for the Module considering
420   // the current list of export/import, and offer an interface to query to
421   // access the content in the cache.
422   ModuleCacheEntry(
423       StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
424       const FunctionImporter::ImportMapTy &ImportList,
425       const FunctionImporter::ExportSetTy &ExportList,
426       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
427       const GVSummaryMapTy &DefinedFunctions,
428       const DenseSet<GlobalValue::GUID> &PreservedSymbols) {
429     if (CachePath.empty())
430       return;
431 
432     // Compute the unique hash for this entry
433     // This is based on the current compiler version, the module itself, the
434     // export list, the hash for every single module in the import list, the
435     // list of ResolvedODR for the module, and the list of preserved symbols.
436 
437     SHA1 Hasher;
438 
439     // Start with the compiler revision
440     Hasher.update(LLVM_VERSION_STRING);
441 #ifdef HAVE_LLVM_REVISION
442     Hasher.update(LLVM_REVISION);
443 #endif
444 
445     // Include the hash for the current module
446     auto ModHash = Index.getModuleHash(ModuleID);
447     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
448     for (auto F : ExportList)
449       // The export list can impact the internalization, be conservative here
450       Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
451 
452     // Include the hash for every module we import functions from
453     for (auto &Entry : ImportList) {
454       auto ModHash = Index.getModuleHash(Entry.first());
455       Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
456     }
457 
458     // Include the hash for the resolved ODR.
459     for (auto &Entry : ResolvedODR) {
460       Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
461                                       sizeof(GlobalValue::GUID)));
462       Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
463                                       sizeof(GlobalValue::LinkageTypes)));
464     }
465 
466     // Include the hash for the preserved symbols.
467     for (auto &Entry : PreservedSymbols) {
468       if (DefinedFunctions.count(Entry))
469         Hasher.update(
470             ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
471     }
472 
473     sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
474   }
475 
476   // Access the path to this entry in the cache.
477   StringRef getEntryPath() { return EntryPath; }
478 
479   // Try loading the buffer for this cache entry.
480   ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
481     if (EntryPath.empty())
482       return std::error_code();
483     return MemoryBuffer::getFile(EntryPath);
484   }
485 
486   // Cache the Produced object file
487   void write(MemoryBufferRef OutputBuffer) {
488     if (EntryPath.empty())
489       return;
490 
491     // Write to a temporary to avoid race condition
492     SmallString<128> TempFilename;
493     int TempFD;
494     std::error_code EC =
495         sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
496     if (EC) {
497       errs() << "Error: " << EC.message() << "\n";
498       report_fatal_error("ThinLTO: Can't get a temporary file");
499     }
500     {
501       raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
502       OS << OutputBuffer.getBuffer();
503     }
504     // Rename to final destination (hopefully race condition won't matter here)
505     sys::fs::rename(TempFilename, EntryPath);
506   }
507 };
508 
509 static std::unique_ptr<MemoryBuffer> ProcessThinLTOModule(
510     Module &TheModule, const ModuleSummaryIndex &Index,
511     StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
512     const FunctionImporter::ImportMapTy &ImportList,
513     const FunctionImporter::ExportSetTy &ExportList,
514     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
515     std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
516     ThinLTOCodeGenerator::CachingOptions CacheOptions, bool DisableCodeGen,
517     StringRef SaveTempsDir, unsigned count) {
518 
519   // Save temps: after IPO.
520   saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc");
521 
522   // Prepare for internalization by computing the set of symbols to preserve.
523   // We need to compute the list of symbols to preserve during internalization
524   // before doing any promotion because after renaming we won't (easily) match
525   // to the original name.
526   auto PreservedGV = computePreservedSymbolsForModule(
527       TheModule, GUIDPreservedSymbols, ExportList);
528 
529   // "Benchmark"-like optimization: single-source case
530   bool SingleModule = (ModuleMap.size() == 1);
531 
532   if (!SingleModule) {
533     promoteModule(TheModule, Index);
534 
535     // Resolve the LinkOnce/Weak ODR, trying to turn them into
536     // "available_externally" when possible.
537     // This is a compile-time optimization.
538     fixupODR(TheModule, ResolvedODR);
539 
540     // Save temps: after promotion.
541     saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc");
542   }
543 
544   // Internalization
545   doInternalizeModule(TheModule, TM, PreservedGV);
546 
547   // Save internalized bitcode
548   saveTempBitcode(TheModule, SaveTempsDir, count, ".3.internalized.bc");
549 
550   if (!SingleModule) {
551     crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
552 
553     // Save temps: after cross-module import.
554     saveTempBitcode(TheModule, SaveTempsDir, count, ".4.imported.bc");
555   }
556 
557   optimizeModule(TheModule, TM);
558 
559   saveTempBitcode(TheModule, SaveTempsDir, count, ".5.opt.bc");
560 
561   if (DisableCodeGen) {
562     // Configured to stop before CodeGen, serialize the bitcode and return.
563     SmallVector<char, 128> OutputBuffer;
564     {
565       raw_svector_ostream OS(OutputBuffer);
566       ModuleSummaryIndexBuilder IndexBuilder(&TheModule);
567       WriteBitcodeToFile(&TheModule, OS, true, &IndexBuilder.getIndex());
568     }
569     return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
570   }
571 
572   return codegenModule(TheModule, TM);
573 }
574 
575 // Initialize the TargetMachine builder for a given Triple
576 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
577                           const Triple &TheTriple) {
578   // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
579   // FIXME this looks pretty terrible...
580   if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
581     if (TheTriple.getArch() == llvm::Triple::x86_64)
582       TMBuilder.MCpu = "core2";
583     else if (TheTriple.getArch() == llvm::Triple::x86)
584       TMBuilder.MCpu = "yonah";
585     else if (TheTriple.getArch() == llvm::Triple::aarch64)
586       TMBuilder.MCpu = "cyclone";
587   }
588   TMBuilder.TheTriple = std::move(TheTriple);
589 }
590 
591 } // end anonymous namespace
592 
593 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
594   MemoryBufferRef Buffer(Data, Identifier);
595   if (Modules.empty()) {
596     // First module added, so initialize the triple and some options
597     LLVMContext Context;
598     Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
599     initTMBuilder(TMBuilder, Triple(TheTriple));
600   }
601 #ifndef NDEBUG
602   else {
603     LLVMContext Context;
604     assert(TMBuilder.TheTriple.str() ==
605                getBitcodeTargetTriple(Buffer, Context) &&
606            "ThinLTO modules with different triple not supported");
607   }
608 #endif
609   Modules.push_back(Buffer);
610 }
611 
612 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
613   PreservedSymbols.insert(Name);
614 }
615 
616 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
617   // FIXME: At the moment, we don't take advantage of this extra information,
618   // we're conservatively considering cross-references as preserved.
619   //  CrossReferencedSymbols.insert(Name);
620   PreservedSymbols.insert(Name);
621 }
622 
623 // TargetMachine factory
624 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
625   std::string ErrMsg;
626   const Target *TheTarget =
627       TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
628   if (!TheTarget) {
629     report_fatal_error("Can't load target for this Triple: " + ErrMsg);
630   }
631 
632   // Use MAttr as the default set of features.
633   SubtargetFeatures Features(MAttr);
634   Features.getDefaultSubtargetFeatures(TheTriple);
635   std::string FeatureStr = Features.getString();
636   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
637       TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
638       CodeModel::Default, CGOptLevel));
639 }
640 
641 /**
642  * Produce the combined summary index from all the bitcode files:
643  * "thin-link".
644  */
645 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
646   std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
647   uint64_t NextModuleId = 0;
648   for (auto &ModuleBuffer : Modules) {
649     ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
650         object::ModuleSummaryIndexObjectFile::create(ModuleBuffer,
651                                                      diagnosticHandler);
652     if (std::error_code EC = ObjOrErr.getError()) {
653       // FIXME diagnose
654       errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: "
655              << EC.message() << "\n";
656       return nullptr;
657     }
658     auto Index = (*ObjOrErr)->takeIndex();
659     if (CombinedIndex) {
660       CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
661     } else {
662       CombinedIndex = std::move(Index);
663     }
664   }
665   return CombinedIndex;
666 }
667 
668 /**
669  * Perform promotion and renaming of exported internal functions.
670  */
671 void ThinLTOCodeGenerator::promote(Module &TheModule,
672                                    ModuleSummaryIndex &Index) {
673   auto ModuleCount = Index.modulePaths().size();
674   auto ModuleIdentifier = TheModule.getModuleIdentifier();
675   // Collect for each module the list of function it defines (GUID -> Summary).
676   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
677   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
678 
679   // Generate import/export list
680   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
681   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
682   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
683                            ExportLists);
684   auto &ExportList = ExportLists[ModuleIdentifier];
685 
686   // Convert the preserved symbols set from string to GUID
687   auto GUIDPreservedSymbols =
688   computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
689 
690   // Resolve the LinkOnceODR, trying to turn them into "available_externally"
691   // where possible.
692   // This is a compile-time optimization.
693   // We use a std::map here to be able to have a defined ordering when
694   // producing a hash for the cache entry.
695   std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
696   ResolveODR(Index, ExportList, GUIDPreservedSymbols, ModuleToDefinedGVSummaries[ModuleIdentifier],
697              ModuleIdentifier, ResolvedODR);
698   fixupODR(TheModule, ResolvedODR);
699 
700   promoteModule(TheModule, Index);
701 }
702 
703 /**
704  * Perform cross-module importing for the module identified by ModuleIdentifier.
705  */
706 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
707                                              ModuleSummaryIndex &Index) {
708   auto ModuleMap = generateModuleMap(Modules);
709   auto ModuleCount = Index.modulePaths().size();
710 
711   // Collect for each module the list of function it defines (GUID -> Summary).
712   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
713   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
714 
715   // Generate import/export list
716   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
717   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
718   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
719                            ExportLists);
720   auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
721 
722   crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
723 }
724 
725 /**
726  * Perform internalization.
727  */
728 void ThinLTOCodeGenerator::internalize(Module &TheModule,
729                                        ModuleSummaryIndex &Index) {
730   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
731   auto ModuleCount = Index.modulePaths().size();
732   auto ModuleIdentifier = TheModule.getModuleIdentifier();
733 
734   // Convert the preserved symbols set from string to GUID
735   auto GUIDPreservedSymbols =
736       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
737 
738   // Collect for each module the list of function it defines (GUID -> Summary).
739   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
740   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
741 
742   // Generate import/export list
743   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
744   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
745   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
746                            ExportLists);
747   auto &ExportList = ExportLists[ModuleIdentifier];
748 
749   // Internalization
750   auto PreservedGV = computePreservedSymbolsForModule(
751       TheModule, GUIDPreservedSymbols, ExportList);
752   doInternalizeModule(TheModule, *TMBuilder.create(), PreservedGV);
753 }
754 
755 /**
756  * Perform post-importing ThinLTO optimizations.
757  */
758 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
759   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
760 
761   // Optimize now
762   optimizeModule(TheModule, *TMBuilder.create());
763 }
764 
765 /**
766  * Perform ThinLTO CodeGen.
767  */
768 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
769   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
770   return codegenModule(TheModule, *TMBuilder.create());
771 }
772 
773 // Main entry point for the ThinLTO processing
774 void ThinLTOCodeGenerator::run() {
775   if (CodeGenOnly) {
776     // Perform only parallel codegen and return.
777     ThreadPool Pool;
778     assert(ProducedBinaries.empty() && "The generator should not be reused");
779     ProducedBinaries.resize(Modules.size());
780     int count = 0;
781     for (auto &ModuleBuffer : Modules) {
782       Pool.async([&](int count) {
783         LLVMContext Context;
784         Context.setDiscardValueNames(LTODiscardValueNames);
785 
786         // Parse module now
787         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
788 
789         // CodeGen
790         ProducedBinaries[count] = codegen(*TheModule);
791       }, count++);
792     }
793 
794     return;
795   }
796 
797   // Sequential linking phase
798   auto Index = linkCombinedIndex();
799 
800   // Save temps: index.
801   if (!SaveTempsDir.empty()) {
802     auto SaveTempPath = SaveTempsDir + "index.bc";
803     std::error_code EC;
804     raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
805     if (EC)
806       report_fatal_error(Twine("Failed to open ") + SaveTempPath +
807                          " to save optimized bitcode\n");
808     WriteIndexToFile(*Index, OS);
809   }
810 
811   // Prepare the resulting object vector
812   assert(ProducedBinaries.empty() && "The generator should not be reused");
813   ProducedBinaries.resize(Modules.size());
814 
815   // Prepare the module map.
816   auto ModuleMap = generateModuleMap(Modules);
817   auto ModuleCount = Modules.size();
818 
819   // Collect for each module the list of function it defines (GUID -> Summary).
820   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
821   Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
822 
823   // Collect the import/export lists for all modules from the call-graph in the
824   // combined index.
825   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
826   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
827   ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
828                            ExportLists);
829 
830   // Convert the preserved symbols set from string to GUID, this is needed for
831   // computing the caching hash and the internalization.
832   auto GUIDPreservedSymbols =
833       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
834 
835   // Parallel optimizer + codegen
836   {
837     ThreadPool Pool(ThreadCount);
838     int count = 0;
839     for (auto &ModuleBuffer : Modules) {
840       Pool.async([&](int count) {
841         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
842         auto &ExportList = ExportLists[ModuleIdentifier];
843 
844         auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
845 
846         // Resolve ODR, this has to be done early because it impacts the caching
847         // We use a std::map here to be able to have a defined ordering when
848         // producing a hash for the cache entry.
849         std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> ResolvedODR;
850         ResolveODR(*Index, ExportList, GUIDPreservedSymbols, DefinedFunctions, ModuleIdentifier,
851                    ResolvedODR);
852 
853         // The module may be cached, this helps handling it.
854         ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
855                                     ImportLists[ModuleIdentifier], ExportList,
856                                     ResolvedODR, DefinedFunctions,
857                                     GUIDPreservedSymbols);
858 
859         {
860           auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
861           DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
862                        << CacheEntry.getEntryPath() << "' for buffer " << count
863                        << " " << ModuleIdentifier << "\n");
864 
865           if (ErrOrBuffer) {
866             // Cache Hit!
867             ProducedBinaries[count] = std::move(ErrOrBuffer.get());
868             return;
869           }
870         }
871 
872         LLVMContext Context;
873         Context.setDiscardValueNames(LTODiscardValueNames);
874         Context.enableDebugTypeODRUniquing();
875 
876         // Parse module now
877         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
878 
879         // Save temps: original file.
880         saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
881 
882         auto &ImportList = ImportLists[ModuleIdentifier];
883         // Run the main process now, and generates a binary
884         auto OutputBuffer = ProcessThinLTOModule(
885             *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
886             ExportList, GUIDPreservedSymbols, ResolvedODR, CacheOptions,
887             DisableCodeGen, SaveTempsDir, count);
888 
889         CacheEntry.write(*OutputBuffer);
890         ProducedBinaries[count] = std::move(OutputBuffer);
891       }, count);
892       count++;
893     }
894   }
895 
896   CachePruning(CacheOptions.Path)
897       .setPruningInterval(CacheOptions.PruningInterval)
898       .setEntryExpiration(CacheOptions.Expiration)
899       .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
900       .prune();
901 
902   // If statistics were requested, print them out now.
903   if (llvm::AreStatisticsEnabled())
904     llvm::PrintStatistics();
905 }
906