1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements Function import based on summaries.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/FunctionImport.h"
15 
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/IR/AutoUpgrade.h"
20 #include "llvm/IR/DiagnosticPrinter.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/IRReader/IRReader.h"
24 #include "llvm/Linker/Linker.h"
25 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
30 
31 #define DEBUG_TYPE "function-import"
32 
33 using namespace llvm;
34 
35 STATISTIC(NumImported, "Number of functions imported");
36 
37 /// Limit on instruction count of imported functions.
38 static cl::opt<unsigned> ImportInstrLimit(
39     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
40     cl::desc("Only import functions with less than N instructions"));
41 
42 static cl::opt<float>
43     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
44                       cl::Hidden, cl::value_desc("x"),
45                       cl::desc("As we import functions, multiply the "
46                                "`import-instr-limit` threshold by this factor "
47                                "before processing newly imported functions"));
48 
49 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
50                                   cl::desc("Print imported functions"));
51 
52 // Load lazily a module from \p FileName in \p Context.
53 static std::unique_ptr<Module> loadFile(const std::string &FileName,
54                                         LLVMContext &Context) {
55   SMDiagnostic Err;
56   DEBUG(dbgs() << "Loading '" << FileName << "'\n");
57   // Metadata isn't loaded until functions are imported, to minimize
58   // the memory overhead.
59   std::unique_ptr<Module> Result =
60       getLazyIRFileModule(FileName, Err, Context,
61                           /* ShouldLazyLoadMetadata = */ true);
62   if (!Result) {
63     Err.print("function-import", errs());
64     report_fatal_error("Abort");
65   }
66 
67   return Result;
68 }
69 
70 namespace {
71 
72 /// Given a list of possible callee implementation for a call site, select one
73 /// that fits the \p Threshold.
74 ///
75 /// FIXME: select "best" instead of first that fits. But what is "best"?
76 /// - The smallest: more likely to be inlined.
77 /// - The one with the least outgoing edges (already well optimized).
78 /// - One from a module already being imported from in order to reduce the
79 ///   number of source modules parsed/linked.
80 /// - One that has PGO data attached.
81 /// - [insert you fancy metric here]
82 static const GlobalValueSummary *
83 selectCallee(const GlobalValueInfoList &CalleeInfoList, unsigned Threshold) {
84   auto It = llvm::find_if(
85       CalleeInfoList, [&](const std::unique_ptr<GlobalValueInfo> &GlobInfo) {
86         assert(GlobInfo->summary() &&
87                "We should not have a Global Info without summary");
88         auto *GVSummary = GlobInfo->summary();
89         if (auto *AS = dyn_cast<AliasSummary>(GVSummary))
90           GVSummary = &AS->getAliasee();
91         auto *Summary = cast<FunctionSummary>(GVSummary);
92 
93         if (GlobalValue::isWeakAnyLinkage(Summary->linkage()))
94           return false;
95 
96         if (Summary->instCount() > Threshold)
97           return false;
98 
99         return true;
100       });
101   if (It == CalleeInfoList.end())
102     return nullptr;
103 
104   return cast<GlobalValueSummary>((*It)->summary());
105 }
106 
107 /// Return the summary for the function \p GUID that fits the \p Threshold, or
108 /// null if there's no match.
109 static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
110                                               unsigned Threshold,
111                                               const ModuleSummaryIndex &Index) {
112   auto CalleeInfoList = Index.findGlobalValueInfoList(GUID);
113   if (CalleeInfoList == Index.end()) {
114     return nullptr; // This function does not have a summary
115   }
116   return selectCallee(CalleeInfoList->second, Threshold);
117 }
118 
119 /// Return true if the global \p GUID is exported by module \p ExportModulePath.
120 static bool isGlobalExported(const ModuleSummaryIndex &Index,
121                              StringRef ExportModulePath,
122                              GlobalValue::GUID GUID) {
123   auto CalleeInfoList = Index.findGlobalValueInfoList(GUID);
124   if (CalleeInfoList == Index.end())
125     // This global does not have a summary, it is not part of the ThinLTO
126     // process
127     return false;
128   auto DefinedInCalleeModule = llvm::find_if(
129       CalleeInfoList->second,
130       [&](const std::unique_ptr<GlobalValueInfo> &GlobInfo) {
131         auto *Summary = GlobInfo->summary();
132         assert(Summary && "Unexpected GlobalValueInfo without summary");
133         return Summary->modulePath() == ExportModulePath;
134       });
135   return (DefinedInCalleeModule != CalleeInfoList->second.end());
136 }
137 
138 using EdgeInfo = std::pair<const FunctionSummary *, unsigned /* Threshold */>;
139 
140 /// Compute the list of functions to import for a given caller. Mark these
141 /// imported functions and the symbols they reference in their source module as
142 /// exported from their source module.
143 static void computeImportForFunction(
144     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
145     unsigned Threshold,
146     const std::map<GlobalValue::GUID, GlobalValueSummary *> &DefinedGVSummaries,
147     SmallVectorImpl<EdgeInfo> &Worklist,
148     FunctionImporter::ImportMapTy &ImportsForModule,
149     StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
150   for (auto &Edge : Summary.calls()) {
151     auto GUID = Edge.first.getGUID();
152     DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
153 
154     if (DefinedGVSummaries.count(GUID)) {
155       DEBUG(dbgs() << "ignored! Target already in destination module.\n");
156       continue;
157     }
158 
159     auto *CalleeSummary = selectCallee(GUID, Threshold, Index);
160     if (!CalleeSummary) {
161       DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
162       continue;
163     }
164     // "Resolve" the summary, traversing alias,
165     const FunctionSummary *ResolvedCalleeSummary;
166     if (isa<AliasSummary>(CalleeSummary))
167       ResolvedCalleeSummary = cast<FunctionSummary>(
168           &cast<AliasSummary>(CalleeSummary)->getAliasee());
169     else
170       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
171 
172     assert(ResolvedCalleeSummary->instCount() <= Threshold &&
173            "selectCallee() didn't honor the threshold");
174 
175     auto ExportModulePath = ResolvedCalleeSummary->modulePath();
176     auto &ProcessedThreshold = ImportsForModule[ExportModulePath][GUID];
177     /// Since the traversal of the call graph is DFS, we can revisit a function
178     /// a second time with a higher threshold. In this case, it is added back to
179     /// the worklist with the new threshold.
180     if (ProcessedThreshold && ProcessedThreshold > Threshold) {
181       DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
182                    << ProcessedThreshold << "\n");
183       continue;
184     }
185     // Mark this function as imported in this module, with the current Threshold
186     ProcessedThreshold = Threshold;
187 
188     // Make exports in the source module.
189     if (ExportLists) {
190       auto &ExportList = (*ExportLists)[ExportModulePath];
191       ExportList.insert(GUID);
192       // Mark all functions and globals referenced by this function as exported
193       // to the outside if they are defined in the same source module.
194       for (auto &Edge : ResolvedCalleeSummary->calls()) {
195         auto CalleeGUID = Edge.first.getGUID();
196         if (isGlobalExported(Index, ExportModulePath, CalleeGUID))
197           ExportList.insert(CalleeGUID);
198       }
199       for (auto &Ref : ResolvedCalleeSummary->refs()) {
200         auto GUID = Ref.getGUID();
201         if (isGlobalExported(Index, ExportModulePath, GUID))
202           ExportList.insert(GUID);
203       }
204     }
205 
206     // Insert the newly imported function to the worklist.
207     Worklist.push_back(std::make_pair(ResolvedCalleeSummary, Threshold));
208   }
209 }
210 
211 /// Given the list of globals defined in a module, compute the list of imports
212 /// as well as the list of "exports", i.e. the list of symbols referenced from
213 /// another module (that may require promotion).
214 static void ComputeImportForModule(
215     const std::map<GlobalValue::GUID, GlobalValueSummary *> &DefinedGVSummaries,
216     const ModuleSummaryIndex &Index,
217     FunctionImporter::ImportMapTy &ImportsForModule,
218     StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
219   // Worklist contains the list of function imported in this module, for which
220   // we will analyse the callees and may import further down the callgraph.
221   SmallVector<EdgeInfo, 128> Worklist;
222 
223   // Populate the worklist with the import for the functions in the current
224   // module
225   for (auto &GVInfo : DefinedGVSummaries) {
226     auto *Summary = GVInfo.second;
227     if (auto *AS = dyn_cast<AliasSummary>(Summary))
228       Summary = &AS->getAliasee();
229     auto *FuncSummary = dyn_cast<FunctionSummary>(Summary);
230     if (!FuncSummary)
231       // Skip import for global variables
232       continue;
233     DEBUG(dbgs() << "Initalize import for " << GVInfo.first << "\n");
234     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
235                              DefinedGVSummaries, Worklist, ImportsForModule,
236                              ExportLists);
237   }
238 
239   while (!Worklist.empty()) {
240     auto FuncInfo = Worklist.pop_back_val();
241     auto *Summary = FuncInfo.first;
242     auto Threshold = FuncInfo.second;
243 
244     // Process the newly imported functions and add callees to the worklist.
245     // Adjust the threshold
246     Threshold = Threshold * ImportInstrFactor;
247 
248     computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries,
249                              Worklist, ImportsForModule, ExportLists);
250   }
251 }
252 
253 } // anonymous namespace
254 
255 /// Compute all the import and export for every module using the Index.
256 void llvm::ComputeCrossModuleImport(
257     const ModuleSummaryIndex &Index,
258     const StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> &
259         ModuleToDefinedGVSummaries,
260     StringMap<FunctionImporter::ImportMapTy> &ImportLists,
261     StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
262   // For each module that has function defined, compute the import/export lists.
263   for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
264     auto &ImportsForModule = ImportLists[DefinedGVSummaries.first()];
265     DEBUG(dbgs() << "Computing import for Module '"
266                  << DefinedGVSummaries.first() << "'\n");
267     ComputeImportForModule(DefinedGVSummaries.second, Index, ImportsForModule,
268                            &ExportLists);
269   }
270 
271 #ifndef NDEBUG
272   DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
273                << " modules:\n");
274   for (auto &ModuleImports : ImportLists) {
275     auto ModName = ModuleImports.first();
276     auto &Exports = ExportLists[ModName];
277     DEBUG(dbgs() << "* Module " << ModName << " exports " << Exports.size()
278                  << " functions. Imports from " << ModuleImports.second.size()
279                  << " modules.\n");
280     for (auto &Src : ModuleImports.second) {
281       auto SrcModName = Src.first();
282       DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
283                    << SrcModName << "\n");
284     }
285   }
286 #endif
287 }
288 
289 /// Compute all the imports for the given module in the Index.
290 void llvm::ComputeCrossModuleImportForModule(
291     StringRef ModulePath, const ModuleSummaryIndex &Index,
292     FunctionImporter::ImportMapTy &ImportList) {
293 
294   // Collect the list of functions this module defines.
295   // GUID -> Summary
296   std::map<GlobalValue::GUID, GlobalValueSummary *> FunctionInfoMap;
297   Index.collectDefinedFunctionsForModule(ModulePath, FunctionInfoMap);
298 
299   // Compute the import list for this module.
300   DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
301   ComputeImportForModule(FunctionInfoMap, Index, ImportList);
302 
303 #ifndef NDEBUG
304   DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
305                << ImportList.size() << " modules.\n");
306   for (auto &Src : ImportList) {
307     auto SrcModName = Src.first();
308     DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
309                  << SrcModName << "\n");
310   }
311 #endif
312 }
313 
314 // Automatically import functions in Module \p DestModule based on the summaries
315 // index.
316 //
317 bool FunctionImporter::importFunctions(
318     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
319   DEBUG(dbgs() << "Starting import for Module "
320                << DestModule.getModuleIdentifier() << "\n");
321   unsigned ImportedCount = 0;
322 
323   // Linker that will be used for importing function
324   Linker TheLinker(DestModule);
325   // Do the actual import of functions now, one Module at a time
326   std::set<StringRef> ModuleNameOrderedList;
327   for (auto &FunctionsToImportPerModule : ImportList) {
328     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first());
329   }
330   for (auto &Name : ModuleNameOrderedList) {
331     // Get the module for the import
332     const auto &FunctionsToImportPerModule = ImportList.find(Name);
333     assert(FunctionsToImportPerModule != ImportList.end());
334     std::unique_ptr<Module> SrcModule = ModuleLoader(Name);
335     assert(&DestModule.getContext() == &SrcModule->getContext() &&
336            "Context mismatch");
337 
338     // If modules were created with lazy metadata loading, materialize it
339     // now, before linking it (otherwise this will be a noop).
340     SrcModule->materializeMetadata();
341     UpgradeDebugInfo(*SrcModule);
342 
343     auto &ImportGUIDs = FunctionsToImportPerModule->second;
344     // Find the globals to import
345     DenseSet<const GlobalValue *> GlobalsToImport;
346     for (auto &GV : *SrcModule) {
347       if (!GV.hasName())
348         continue;
349       auto GUID = GV.getGUID();
350       auto Import = ImportGUIDs.count(GUID);
351       DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing " << GUID << " "
352                    << GV.getName() << " from " << SrcModule->getSourceFileName()
353                    << "\n");
354       if (Import) {
355         GV.materialize();
356         GlobalsToImport.insert(&GV);
357       }
358     }
359     for (auto &GV : SrcModule->globals()) {
360       if (!GV.hasName())
361         continue;
362       auto GUID = GV.getGUID();
363       auto Import = ImportGUIDs.count(GUID);
364       DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing " << GUID << " "
365                    << GV.getName() << " from " << SrcModule->getSourceFileName()
366                    << "\n");
367       if (Import) {
368         GV.materialize();
369         GlobalsToImport.insert(&GV);
370       }
371     }
372     for (auto &GV : SrcModule->aliases()) {
373       if (!GV.hasName())
374         continue;
375       auto GUID = GV.getGUID();
376       auto Import = ImportGUIDs.count(GUID);
377       DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing " << GUID << " "
378                    << GV.getName() << " from " << SrcModule->getSourceFileName()
379                    << "\n");
380       if (Import) {
381         // Alias can't point to "available_externally". However when we import
382         // linkOnceODR the linkage does not change. So we import the alias
383         // and aliasee only in this case.
384         GlobalObject *GO = GV.getBaseObject();
385         if (!GO->hasLinkOnceODRLinkage())
386           continue;
387 #ifndef NDEBUG
388         if (!GlobalsToImport.count(GO))
389           DEBUG(dbgs() << " alias triggers importing aliasee " << GO->getGUID()
390                        << " " << GO->getName() << " from "
391                        << SrcModule->getSourceFileName() << "\n");
392 #endif
393         GO->materialize();
394         GlobalsToImport.insert(GO);
395         GV.materialize();
396         GlobalsToImport.insert(&GV);
397       }
398     }
399 
400     // Link in the specified functions.
401     if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
402       return true;
403 
404     if (PrintImports) {
405       for (const auto *GV : GlobalsToImport)
406         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
407                << " from " << SrcModule->getSourceFileName() << "\n";
408     }
409 
410     if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
411                                &GlobalsToImport))
412       report_fatal_error("Function Import: link error");
413 
414     ImportedCount += GlobalsToImport.size();
415   }
416 
417   NumImported += ImportedCount;
418 
419   DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
420                << DestModule.getModuleIdentifier() << "\n");
421   return ImportedCount;
422 }
423 
424 /// Summary file to use for function importing when using -function-import from
425 /// the command line.
426 static cl::opt<std::string>
427     SummaryFile("summary-file",
428                 cl::desc("The summary file to use for function importing."));
429 
430 static void diagnosticHandler(const DiagnosticInfo &DI) {
431   raw_ostream &OS = errs();
432   DiagnosticPrinterRawOStream DP(OS);
433   DI.print(DP);
434   OS << '\n';
435 }
436 
437 /// Parse the summary index out of an IR file and return the summary
438 /// index object if found, or nullptr if not.
439 static std::unique_ptr<ModuleSummaryIndex>
440 getModuleSummaryIndexForFile(StringRef Path, std::string &Error,
441                              DiagnosticHandlerFunction DiagnosticHandler) {
442   std::unique_ptr<MemoryBuffer> Buffer;
443   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
444       MemoryBuffer::getFile(Path);
445   if (std::error_code EC = BufferOrErr.getError()) {
446     Error = EC.message();
447     return nullptr;
448   }
449   Buffer = std::move(BufferOrErr.get());
450   ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
451       object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(),
452                                                    DiagnosticHandler);
453   if (std::error_code EC = ObjOrErr.getError()) {
454     Error = EC.message();
455     return nullptr;
456   }
457   return (*ObjOrErr)->takeIndex();
458 }
459 
460 namespace {
461 /// Pass that performs cross-module function import provided a summary file.
462 class FunctionImportPass : public ModulePass {
463   /// Optional module summary index to use for importing, otherwise
464   /// the summary-file option must be specified.
465   const ModuleSummaryIndex *Index;
466 
467 public:
468   /// Pass identification, replacement for typeid
469   static char ID;
470 
471   /// Specify pass name for debug output
472   const char *getPassName() const override { return "Function Importing"; }
473 
474   explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr)
475       : ModulePass(ID), Index(Index) {}
476 
477   bool runOnModule(Module &M) override {
478     if (SummaryFile.empty() && !Index)
479       report_fatal_error("error: -function-import requires -summary-file or "
480                          "file from frontend\n");
481     std::unique_ptr<ModuleSummaryIndex> IndexPtr;
482     if (!SummaryFile.empty()) {
483       if (Index)
484         report_fatal_error("error: -summary-file and index from frontend\n");
485       std::string Error;
486       IndexPtr =
487           getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler);
488       if (!IndexPtr) {
489         errs() << "Error loading file '" << SummaryFile << "': " << Error
490                << "\n";
491         return false;
492       }
493       Index = IndexPtr.get();
494     }
495 
496     // First step is collecting the import list.
497     FunctionImporter::ImportMapTy ImportList;
498     ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
499                                       ImportList);
500 
501     // Next we need to promote to global scope and rename any local values that
502     // are potentially exported to other modules.
503     if (renameModuleForThinLTO(M, *Index, nullptr)) {
504       errs() << "Error renaming module\n";
505       return false;
506     }
507 
508     // Perform the import now.
509     auto ModuleLoader = [&M](StringRef Identifier) {
510       return loadFile(Identifier, M.getContext());
511     };
512     FunctionImporter Importer(*Index, ModuleLoader);
513     return Importer.importFunctions(M, ImportList);
514   }
515 };
516 } // anonymous namespace
517 
518 char FunctionImportPass::ID = 0;
519 INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
520                       "Summary Based Function Import", false, false)
521 INITIALIZE_PASS_END(FunctionImportPass, "function-import",
522                     "Summary Based Function Import", false, false)
523 
524 namespace llvm {
525 Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) {
526   return new FunctionImportPass(Index);
527 }
528 }
529