1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements Function import based on summaries.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/IPO/FunctionImport.h"
15 
16 #include "llvm/ADT/StringSet.h"
17 #include "llvm/IR/AutoUpgrade.h"
18 #include "llvm/IR/DiagnosticPrinter.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/IRReader/IRReader.h"
22 #include "llvm/Linker/Linker.h"
23 #include "llvm/Object/FunctionIndexObjectFile.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/SourceMgr.h"
27 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
28 
29 #include <map>
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "function-import"
34 
35 /// Limit on instruction count of imported functions.
36 static cl::opt<unsigned> ImportInstrLimit(
37     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
38     cl::desc("Only import functions with less than N instructions"));
39 
40 static cl::opt<float>
41     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
42                       cl::Hidden, cl::value_desc("x"),
43                       cl::desc("As we import functions, multiply the "
44                                "`import-instr-limit` threshold by this factor "
45                                "before processing newly imported functions"));
46 
47 // Load lazily a module from \p FileName in \p Context.
48 static std::unique_ptr<Module> loadFile(const std::string &FileName,
49                                         LLVMContext &Context) {
50   SMDiagnostic Err;
51   DEBUG(dbgs() << "Loading '" << FileName << "'\n");
52   // Metadata isn't loaded until functions are imported, to minimize
53   // the memory overhead.
54   std::unique_ptr<Module> Result =
55       getLazyIRFileModule(FileName, Err, Context,
56                           /* ShouldLazyLoadMetadata = */ true);
57   if (!Result) {
58     Err.print("function-import", errs());
59     return nullptr;
60   }
61 
62   return Result;
63 }
64 
65 namespace {
66 
67 /// Track functions already seen using a map that record the current
68 /// Threshold and the importing decision. Since the traversal of the call graph
69 /// is DFS, we can revisit a function a second time with a higher threshold. In
70 /// this case and if the function was not imported the first time, it is added
71 /// back to the worklist with the new threshold
72 using VisitedFunctionTrackerTy = StringMap<std::pair<unsigned, bool>>;
73 
74 /// Helper to load on demand a Module from file and cache it for subsequent
75 /// queries. It can be used with the FunctionImporter.
76 class ModuleLazyLoaderCache {
77   /// Cache of lazily loaded module for import.
78   StringMap<std::unique_ptr<Module>> ModuleMap;
79 
80   /// Retrieve a Module from the cache or lazily load it on demand.
81   std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
82 
83 public:
84   /// Create the loader, Module will be initialized in \p Context.
85   ModuleLazyLoaderCache(std::function<
86       std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
87       : createLazyModule(createLazyModule) {}
88 
89   /// Retrieve a Module from the cache or lazily load it on demand.
90   Module &operator()(StringRef FileName);
91 
92   std::unique_ptr<Module> takeModule(StringRef FileName) {
93     auto I = ModuleMap.find(FileName);
94     assert(I != ModuleMap.end());
95     std::unique_ptr<Module> Ret = std::move(I->second);
96     ModuleMap.erase(I);
97     return Ret;
98   }
99 };
100 
101 // Get a Module for \p FileName from the cache, or load it lazily.
102 Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
103   auto &Module = ModuleMap[Identifier];
104   if (!Module)
105     Module = createLazyModule(Identifier);
106   return *Module;
107 }
108 } // anonymous namespace
109 
110 /// Walk through the instructions in \p F looking for external
111 /// calls not already in the \p VisitedFunctions map. If any are
112 /// found they are added to the \p Worklist for importing.
113 static void findExternalCalls(
114     const Module &DestModule, Function &F, const FunctionInfoIndex &Index,
115     VisitedFunctionTrackerTy &VisitedFunctions, unsigned Threshold,
116     SmallVectorImpl<std::pair<StringRef, unsigned>> &Worklist) {
117   // We need to suffix internal function calls imported from other modules,
118   // prepare the suffix ahead of time.
119   std::string Suffix;
120   if (F.getParent() != &DestModule)
121     Suffix =
122         (Twine(".llvm.") +
123          Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
124 
125   for (auto &BB : F) {
126     for (auto &I : BB) {
127       if (isa<CallInst>(I)) {
128         auto CalledFunction = cast<CallInst>(I).getCalledFunction();
129         // Insert any new external calls that have not already been
130         // added to set/worklist.
131         if (!CalledFunction || !CalledFunction->hasName())
132           continue;
133         // Ignore intrinsics early
134         if (CalledFunction->isIntrinsic()) {
135           assert(CalledFunction->getIntrinsicID() != 0);
136           continue;
137         }
138         auto ImportedName = CalledFunction->getName();
139         auto Renamed = (ImportedName + Suffix).str();
140         // Rename internal functions
141         if (CalledFunction->hasInternalLinkage()) {
142           ImportedName = Renamed;
143         }
144         // Compute the global identifier used in the function index.
145         auto CalledFunctionGlobalID = Function::getGlobalIdentifier(
146             CalledFunction->getName(), CalledFunction->getLinkage(),
147             CalledFunction->getParent()->getSourceFileName());
148 
149         auto CalledFunctionInfo = std::make_pair(Threshold, false);
150         auto It = VisitedFunctions.insert(
151             std::make_pair(CalledFunctionGlobalID, CalledFunctionInfo));
152         if (!It.second) {
153           // This is a call to a function we already considered, if the function
154           // has been imported the first time, or if the current threshold is
155           // not higher, skip it.
156           auto &FunctionInfo = It.first->second;
157           if (FunctionInfo.second || FunctionInfo.first >= Threshold)
158             continue;
159           It.first->second = CalledFunctionInfo;
160         }
161         // Ignore functions already present in the destination module
162         auto *SrcGV = DestModule.getNamedValue(ImportedName);
163         if (SrcGV) {
164           if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
165             SrcGV = SGA->getBaseObject();
166           assert(isa<Function>(SrcGV) && "Name collision during import");
167           if (!cast<Function>(SrcGV)->isDeclaration()) {
168             DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
169                          << ImportedName << " already in DestinationModule\n");
170             continue;
171           }
172         }
173 
174         Worklist.push_back(std::make_pair(It.first->getKey(), Threshold));
175         DEBUG(dbgs() << DestModule.getModuleIdentifier()
176                      << ": Adding callee for : " << ImportedName << " : "
177                      << F.getName() << "\n");
178       }
179     }
180   }
181 }
182 
183 // Helper function: given a worklist and an index, will process all the worklist
184 // and decide what to import based on the summary information.
185 //
186 // Nothing is actually imported, functions are materialized in their source
187 // module and analyzed there.
188 //
189 // \p ModuleToFunctionsToImportMap is filled with the set of Function to import
190 // per Module.
191 static void
192 GetImportList(Module &DestModule,
193               SmallVectorImpl<std::pair<StringRef, unsigned>> &Worklist,
194               VisitedFunctionTrackerTy &VisitedFunctions,
195               std::map<StringRef, DenseSet<const GlobalValue *>> &
196                   ModuleToFunctionsToImportMap,
197               const FunctionInfoIndex &Index,
198               ModuleLazyLoaderCache &ModuleLoaderCache) {
199   while (!Worklist.empty()) {
200     StringRef CalledFunctionName;
201     unsigned Threshold;
202     std::tie(CalledFunctionName, Threshold) = Worklist.pop_back_val();
203     DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
204                  << CalledFunctionName << " with Threshold " << Threshold
205                  << "\n");
206 
207     // Try to get a summary for this function call.
208     auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
209     if (InfoList == Index.end()) {
210       DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
211                    << CalledFunctionName << " Ignoring.\n");
212       continue;
213     }
214     assert(!InfoList->second.empty() && "No summary, error at import?");
215 
216     // Comdat can have multiple entries, FIXME: what do we do with them?
217     auto &Info = InfoList->second[0];
218     assert(Info && "Nullptr in list, error importing summaries?\n");
219 
220     auto *Summary = Info->functionSummary();
221     if (!Summary) {
222       // FIXME: in case we are lazyloading summaries, we can do it now.
223       DEBUG(dbgs() << DestModule.getModuleIdentifier()
224                    << ": Missing summary for  " << CalledFunctionName
225                    << ", error at import?\n");
226       llvm_unreachable("Missing summary");
227     }
228 
229     if (Summary->instCount() > Threshold) {
230       DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
231                    << CalledFunctionName << " with " << Summary->instCount()
232                    << " instructions (limit " << Threshold << ")\n");
233       continue;
234     }
235 
236     // Mark the function as imported in the VisitedFunctions tracker
237     assert(VisitedFunctions.count(CalledFunctionName));
238     VisitedFunctions[CalledFunctionName].second = true;
239 
240     // Get the module path from the summary.
241     auto ModuleIdentifier = Summary->modulePath();
242     DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
243                  << CalledFunctionName << " from " << ModuleIdentifier << "\n");
244 
245     auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
246 
247     // The function that we will import!
248     GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
249 
250     if (!SGV) {
251       // The function is referenced by a global identifier, which has the
252       // source file name prepended for functions that were originally local
253       // in the source module. Strip any prepended name to recover the original
254       // name in the source module.
255       std::pair<StringRef, StringRef> Split = CalledFunctionName.rsplit(':');
256       SGV = SrcModule.getNamedValue(Split.second);
257       assert(SGV && "Can't find function to import in source module");
258     }
259     if (!SGV) {
260       report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
261                          "' in Module '" + SrcModule.getModuleIdentifier() +
262                          "', error in the summary?\n");
263     }
264 
265     Function *F = dyn_cast<Function>(SGV);
266     if (!F && isa<GlobalAlias>(SGV)) {
267       auto *SGA = dyn_cast<GlobalAlias>(SGV);
268       F = dyn_cast<Function>(SGA->getBaseObject());
269       CalledFunctionName = F->getName();
270     }
271     assert(F && "Imported Function is ... not a Function");
272 
273     // We cannot import weak_any functions/aliases without possibly affecting
274     // the order they are seen and selected by the linker, changing program
275     // semantics.
276     if (SGV->hasWeakAnyLinkage()) {
277       DEBUG(dbgs() << DestModule.getModuleIdentifier()
278                    << ": Ignoring import request for weak-any "
279                    << (isa<Function>(SGV) ? "function " : "alias ")
280                    << CalledFunctionName << " from "
281                    << SrcModule.getModuleIdentifier() << "\n");
282       continue;
283     }
284 
285     // Add the function to the import list
286     auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
287     Entry.insert(F);
288 
289     // Process the newly imported functions and add callees to the worklist.
290     // Adjust the threshold
291     Threshold = Threshold * ImportInstrFactor;
292     F->materialize();
293     findExternalCalls(DestModule, *F, Index, VisitedFunctions, Threshold,
294                       Worklist);
295   }
296 }
297 
298 // Automatically import functions in Module \p DestModule based on the summaries
299 // index.
300 //
301 // The current implementation imports every called functions that exists in the
302 // summaries index.
303 bool FunctionImporter::importFunctions(Module &DestModule) {
304   DEBUG(dbgs() << "Starting import for Module "
305                << DestModule.getModuleIdentifier() << "\n");
306   unsigned ImportedCount = 0;
307 
308   // First step is collecting the called external functions.
309   // We keep the function name as well as the import threshold for its callees.
310   VisitedFunctionTrackerTy VisitedFunctions;
311   SmallVector<std::pair<StringRef, unsigned>, 64> Worklist;
312   for (auto &F : DestModule) {
313     if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
314       continue;
315     findExternalCalls(DestModule, F, Index, VisitedFunctions, ImportInstrLimit,
316                       Worklist);
317   }
318   if (Worklist.empty())
319     return false;
320 
321   /// Second step: for every call to an external function, try to import it.
322 
323   // Linker that will be used for importing function
324   Linker TheLinker(DestModule);
325 
326   // Map of Module -> List of Function to import from the Module
327   std::map<StringRef, DenseSet<const GlobalValue *>>
328       ModuleToFunctionsToImportMap;
329 
330   // Analyze the summaries and get the list of functions to import by
331   // populating ModuleToFunctionsToImportMap
332   ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
333   GetImportList(DestModule, Worklist, VisitedFunctions,
334                 ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
335   assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
336 
337   // Do the actual import of functions now, one Module at a time
338   for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
339     // Get the module for the import
340     auto &FunctionsToImport = FunctionsToImportPerModule.second;
341     std::unique_ptr<Module> SrcModule =
342         ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
343     assert(&DestModule.getContext() == &SrcModule->getContext() &&
344            "Context mismatch");
345 
346     // If modules were created with lazy metadata loading, materialize it
347     // now, before linking it (otherwise this will be a noop).
348     SrcModule->materializeMetadata();
349     UpgradeDebugInfo(*SrcModule);
350 
351     // Link in the specified functions.
352     if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
353                                &Index, &FunctionsToImport))
354       report_fatal_error("Function Import: link error");
355 
356     ImportedCount += FunctionsToImport.size();
357   }
358 
359   DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
360                << DestModule.getModuleIdentifier() << "\n");
361   return ImportedCount;
362 }
363 
364 /// Summary file to use for function importing when using -function-import from
365 /// the command line.
366 static cl::opt<std::string>
367     SummaryFile("summary-file",
368                 cl::desc("The summary file to use for function importing."));
369 
370 static void diagnosticHandler(const DiagnosticInfo &DI) {
371   raw_ostream &OS = errs();
372   DiagnosticPrinterRawOStream DP(OS);
373   DI.print(DP);
374   OS << '\n';
375 }
376 
377 /// Parse the function index out of an IR file and return the function
378 /// index object if found, or nullptr if not.
379 static std::unique_ptr<FunctionInfoIndex>
380 getFunctionIndexForFile(StringRef Path, std::string &Error,
381                         DiagnosticHandlerFunction DiagnosticHandler) {
382   std::unique_ptr<MemoryBuffer> Buffer;
383   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
384       MemoryBuffer::getFile(Path);
385   if (std::error_code EC = BufferOrErr.getError()) {
386     Error = EC.message();
387     return nullptr;
388   }
389   Buffer = std::move(BufferOrErr.get());
390   ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
391       object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
392                                               DiagnosticHandler);
393   if (std::error_code EC = ObjOrErr.getError()) {
394     Error = EC.message();
395     return nullptr;
396   }
397   return (*ObjOrErr)->takeIndex();
398 }
399 
400 namespace {
401 /// Pass that performs cross-module function import provided a summary file.
402 class FunctionImportPass : public ModulePass {
403   /// Optional function summary index to use for importing, otherwise
404   /// the summary-file option must be specified.
405   const FunctionInfoIndex *Index;
406 
407 public:
408   /// Pass identification, replacement for typeid
409   static char ID;
410 
411   /// Specify pass name for debug output
412   const char *getPassName() const override {
413     return "Function Importing";
414   }
415 
416   explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
417       : ModulePass(ID), Index(Index) {}
418 
419   bool runOnModule(Module &M) override {
420     if (SummaryFile.empty() && !Index)
421       report_fatal_error("error: -function-import requires -summary-file or "
422                          "file from frontend\n");
423     std::unique_ptr<FunctionInfoIndex> IndexPtr;
424     if (!SummaryFile.empty()) {
425       if (Index)
426         report_fatal_error("error: -summary-file and index from frontend\n");
427       std::string Error;
428       IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
429       if (!IndexPtr) {
430         errs() << "Error loading file '" << SummaryFile << "': " << Error
431                << "\n";
432         return false;
433       }
434       Index = IndexPtr.get();
435     }
436 
437     // First we need to promote to global scope and rename any local values that
438     // are potentially exported to other modules.
439     if (renameModuleForThinLTO(M, Index)) {
440       errs() << "Error renaming module\n";
441       return false;
442     }
443 
444     // Perform the import now.
445     auto ModuleLoader = [&M](StringRef Identifier) {
446       return loadFile(Identifier, M.getContext());
447     };
448     FunctionImporter Importer(*Index, ModuleLoader);
449     return Importer.importFunctions(M);
450   }
451 };
452 } // anonymous namespace
453 
454 char FunctionImportPass::ID = 0;
455 INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
456                       "Summary Based Function Import", false, false)
457 INITIALIZE_PASS_END(FunctionImportPass, "function-import",
458                     "Summary Based Function Import", false, false)
459 
460 namespace llvm {
461 Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
462   return new FunctionImportPass(Index);
463 }
464 }
465