1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements Function import based on summaries. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/IPO/FunctionImport.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringMap.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/ADT/StringSet.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/AutoUpgrade.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/GlobalAlias.h" 28 #include "llvm/IR/GlobalObject.h" 29 #include "llvm/IR/GlobalValue.h" 30 #include "llvm/IR/GlobalVariable.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/ModuleSummaryIndex.h" 34 #include "llvm/IRReader/IRReader.h" 35 #include "llvm/Linker/IRMover.h" 36 #include "llvm/Object/ModuleSymbolTable.h" 37 #include "llvm/Object/SymbolicFile.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/Error.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/FileSystem.h" 45 #include "llvm/Support/SourceMgr.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Transforms/IPO/Internalize.h" 48 #include "llvm/Transforms/Utils/Cloning.h" 49 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 50 #include "llvm/Transforms/Utils/ValueMapper.h" 51 #include <cassert> 52 #include <memory> 53 #include <set> 54 #include <string> 55 #include <system_error> 56 #include <tuple> 57 #include <utility> 58 59 using namespace llvm; 60 61 #define DEBUG_TYPE "function-import" 62 63 STATISTIC(NumImportedFunctions, "Number of functions imported"); 64 STATISTIC(NumImportedGlobalVars, "Number of global variables imported"); 65 STATISTIC(NumImportedModules, "Number of modules imported from"); 66 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); 67 STATISTIC(NumLiveSymbols, "Number of live symbols in index"); 68 69 /// Limit on instruction count of imported functions. 70 static cl::opt<unsigned> ImportInstrLimit( 71 "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), 72 cl::desc("Only import functions with less than N instructions")); 73 74 static cl::opt<int> ImportCutoff( 75 "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"), 76 cl::desc("Only import first N functions if N>=0 (default -1)")); 77 78 static cl::opt<float> 79 ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7), 80 cl::Hidden, cl::value_desc("x"), 81 cl::desc("As we import functions, multiply the " 82 "`import-instr-limit` threshold by this factor " 83 "before processing newly imported functions")); 84 85 static cl::opt<float> ImportHotInstrFactor( 86 "import-hot-evolution-factor", cl::init(1.0), cl::Hidden, 87 cl::value_desc("x"), 88 cl::desc("As we import functions called from hot callsite, multiply the " 89 "`import-instr-limit` threshold by this factor " 90 "before processing newly imported functions")); 91 92 static cl::opt<float> ImportHotMultiplier( 93 "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"), 94 cl::desc("Multiply the `import-instr-limit` threshold for hot callsites")); 95 96 static cl::opt<float> ImportCriticalMultiplier( 97 "import-critical-multiplier", cl::init(100.0), cl::Hidden, 98 cl::value_desc("x"), 99 cl::desc( 100 "Multiply the `import-instr-limit` threshold for critical callsites")); 101 102 // FIXME: This multiplier was not really tuned up. 103 static cl::opt<float> ImportColdMultiplier( 104 "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"), 105 cl::desc("Multiply the `import-instr-limit` threshold for cold callsites")); 106 107 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden, 108 cl::desc("Print imported functions")); 109 110 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden, 111 cl::desc("Compute dead symbols")); 112 113 static cl::opt<bool> EnableImportMetadata( 114 "enable-import-metadata", cl::init( 115 #if !defined(NDEBUG) 116 true /*Enabled with asserts.*/ 117 #else 118 false 119 #endif 120 ), 121 cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'")); 122 123 /// Summary file to use for function importing when using -function-import from 124 /// the command line. 125 static cl::opt<std::string> 126 SummaryFile("summary-file", 127 cl::desc("The summary file to use for function importing.")); 128 129 /// Used when testing importing from distributed indexes via opt 130 // -function-import. 131 static cl::opt<bool> 132 ImportAllIndex("import-all-index", 133 cl::desc("Import all external functions in index.")); 134 135 // Load lazily a module from \p FileName in \p Context. 136 static std::unique_ptr<Module> loadFile(const std::string &FileName, 137 LLVMContext &Context) { 138 SMDiagnostic Err; 139 LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n"); 140 // Metadata isn't loaded until functions are imported, to minimize 141 // the memory overhead. 142 std::unique_ptr<Module> Result = 143 getLazyIRFileModule(FileName, Err, Context, 144 /* ShouldLazyLoadMetadata = */ true); 145 if (!Result) { 146 Err.print("function-import", errs()); 147 report_fatal_error("Abort"); 148 } 149 150 return Result; 151 } 152 153 /// Given a list of possible callee implementation for a call site, select one 154 /// that fits the \p Threshold. 155 /// 156 /// FIXME: select "best" instead of first that fits. But what is "best"? 157 /// - The smallest: more likely to be inlined. 158 /// - The one with the least outgoing edges (already well optimized). 159 /// - One from a module already being imported from in order to reduce the 160 /// number of source modules parsed/linked. 161 /// - One that has PGO data attached. 162 /// - [insert you fancy metric here] 163 static const GlobalValueSummary * 164 selectCallee(const ModuleSummaryIndex &Index, 165 ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList, 166 unsigned Threshold, StringRef CallerModulePath) { 167 auto It = llvm::find_if( 168 CalleeSummaryList, 169 [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) { 170 auto *GVSummary = SummaryPtr.get(); 171 if (!Index.isGlobalValueLive(GVSummary)) 172 return false; 173 174 // For SamplePGO, in computeImportForFunction the OriginalId 175 // may have been used to locate the callee summary list (See 176 // comment there). 177 // The mapping from OriginalId to GUID may return a GUID 178 // that corresponds to a static variable. Filter it out here. 179 // This can happen when 180 // 1) There is a call to a library function which is not defined 181 // in the index. 182 // 2) There is a static variable with the OriginalGUID identical 183 // to the GUID of the library function in 1); 184 // When this happens, the logic for SamplePGO kicks in and 185 // the static variable in 2) will be found, which needs to be 186 // filtered out. 187 if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) 188 return false; 189 if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) 190 // There is no point in importing these, we can't inline them 191 return false; 192 193 auto *Summary = cast<FunctionSummary>(GVSummary->getBaseObject()); 194 195 // If this is a local function, make sure we import the copy 196 // in the caller's module. The only time a local function can 197 // share an entry in the index is if there is a local with the same name 198 // in another module that had the same source file name (in a different 199 // directory), where each was compiled in their own directory so there 200 // was not distinguishing path. 201 // However, do the import from another module if there is only one 202 // entry in the list - in that case this must be a reference due 203 // to indirect call profile data, since a function pointer can point to 204 // a local in another module. 205 if (GlobalValue::isLocalLinkage(Summary->linkage()) && 206 CalleeSummaryList.size() > 1 && 207 Summary->modulePath() != CallerModulePath) 208 return false; 209 210 if (Summary->instCount() > Threshold) 211 return false; 212 213 if (Summary->notEligibleToImport()) 214 return false; 215 216 return true; 217 }); 218 if (It == CalleeSummaryList.end()) 219 return nullptr; 220 221 return cast<GlobalValueSummary>(It->get()); 222 } 223 224 namespace { 225 226 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */, 227 GlobalValue::GUID>; 228 229 } // anonymous namespace 230 231 static ValueInfo 232 updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) { 233 if (!VI.getSummaryList().empty()) 234 return VI; 235 // For SamplePGO, the indirect call targets for local functions will 236 // have its original name annotated in profile. We try to find the 237 // corresponding PGOFuncName as the GUID. 238 // FIXME: Consider updating the edges in the graph after building 239 // it, rather than needing to perform this mapping on each walk. 240 auto GUID = Index.getGUIDFromOriginalID(VI.getGUID()); 241 if (GUID == 0) 242 return ValueInfo(); 243 return Index.getValueInfo(GUID); 244 } 245 246 static void computeImportForReferencedGlobals( 247 const FunctionSummary &Summary, const GVSummaryMapTy &DefinedGVSummaries, 248 FunctionImporter::ImportMapTy &ImportList, 249 StringMap<FunctionImporter::ExportSetTy> *ExportLists) { 250 for (auto &VI : Summary.refs()) { 251 if (DefinedGVSummaries.count(VI.getGUID())) { 252 LLVM_DEBUG( 253 dbgs() << "Ref ignored! Target already in destination module.\n"); 254 continue; 255 } 256 257 LLVM_DEBUG(dbgs() << " ref -> " << VI.getGUID() << "\n"); 258 259 for (auto &RefSummary : VI.getSummaryList()) 260 if (RefSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind && 261 // Don't try to import regular LTO summaries added to dummy module. 262 !RefSummary->modulePath().empty() && 263 !GlobalValue::isInterposableLinkage(RefSummary->linkage()) && 264 RefSummary->refs().empty()) { 265 ImportList[RefSummary->modulePath()][VI.getGUID()] = 1; 266 if (ExportLists) 267 (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID()); 268 break; 269 } 270 } 271 } 272 273 /// Compute the list of functions to import for a given caller. Mark these 274 /// imported functions and the symbols they reference in their source module as 275 /// exported from their source module. 276 static void computeImportForFunction( 277 const FunctionSummary &Summary, const ModuleSummaryIndex &Index, 278 const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries, 279 SmallVectorImpl<EdgeInfo> &Worklist, 280 FunctionImporter::ImportMapTy &ImportList, 281 StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { 282 computeImportForReferencedGlobals(Summary, DefinedGVSummaries, ImportList, 283 ExportLists); 284 static int ImportCount = 0; 285 for (auto &Edge : Summary.calls()) { 286 ValueInfo VI = Edge.first; 287 LLVM_DEBUG(dbgs() << " edge -> " << VI.getGUID() 288 << " Threshold:" << Threshold << "\n"); 289 290 if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) { 291 LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff 292 << " reached.\n"); 293 continue; 294 } 295 296 VI = updateValueInfoForIndirectCalls(Index, VI); 297 if (!VI) 298 continue; 299 300 if (DefinedGVSummaries.count(VI.getGUID())) { 301 LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n"); 302 continue; 303 } 304 305 auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float { 306 if (Hotness == CalleeInfo::HotnessType::Hot) 307 return ImportHotMultiplier; 308 if (Hotness == CalleeInfo::HotnessType::Cold) 309 return ImportColdMultiplier; 310 if (Hotness == CalleeInfo::HotnessType::Critical) 311 return ImportCriticalMultiplier; 312 return 1.0; 313 }; 314 315 const auto NewThreshold = 316 Threshold * GetBonusMultiplier(Edge.second.getHotness()); 317 318 auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, 319 Summary.modulePath()); 320 if (!CalleeSummary) { 321 LLVM_DEBUG( 322 dbgs() << "ignored! No qualifying callee with summary found.\n"); 323 continue; 324 } 325 326 // "Resolve" the summary 327 const auto *ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary->getBaseObject()); 328 329 assert(ResolvedCalleeSummary->instCount() <= NewThreshold && 330 "selectCallee() didn't honor the threshold"); 331 332 auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) { 333 // Adjust the threshold for next level of imported functions. 334 // The threshold is different for hot callsites because we can then 335 // inline chains of hot calls. 336 if (IsHotCallsite) 337 return Threshold * ImportHotInstrFactor; 338 return Threshold * ImportInstrFactor; 339 }; 340 341 bool IsHotCallsite = 342 Edge.second.getHotness() == CalleeInfo::HotnessType::Hot; 343 const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite); 344 345 auto ExportModulePath = ResolvedCalleeSummary->modulePath(); 346 auto &ProcessedThreshold = ImportList[ExportModulePath][VI.getGUID()]; 347 /// Since the traversal of the call graph is DFS, we can revisit a function 348 /// a second time with a higher threshold. In this case, it is added back to 349 /// the worklist with the new threshold. 350 if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) { 351 LLVM_DEBUG(dbgs() << "ignored! Target was already seen with Threshold " 352 << ProcessedThreshold << "\n"); 353 continue; 354 } 355 bool PreviouslyImported = ProcessedThreshold != 0; 356 // Mark this function as imported in this module, with the current Threshold 357 ProcessedThreshold = AdjThreshold; 358 359 ImportCount++; 360 361 // Make exports in the source module. 362 if (ExportLists) { 363 auto &ExportList = (*ExportLists)[ExportModulePath]; 364 ExportList.insert(VI.getGUID()); 365 if (!PreviouslyImported) { 366 // This is the first time this function was exported from its source 367 // module, so mark all functions and globals it references as exported 368 // to the outside if they are defined in the same source module. 369 // For efficiency, we unconditionally add all the referenced GUIDs 370 // to the ExportList for this module, and will prune out any not 371 // defined in the module later in a single pass. 372 for (auto &Edge : ResolvedCalleeSummary->calls()) { 373 auto CalleeGUID = Edge.first.getGUID(); 374 ExportList.insert(CalleeGUID); 375 } 376 for (auto &Ref : ResolvedCalleeSummary->refs()) { 377 auto GUID = Ref.getGUID(); 378 ExportList.insert(GUID); 379 } 380 } 381 } 382 383 // Insert the newly imported function to the worklist. 384 Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID()); 385 } 386 } 387 388 /// Given the list of globals defined in a module, compute the list of imports 389 /// as well as the list of "exports", i.e. the list of symbols referenced from 390 /// another module (that may require promotion). 391 static void ComputeImportForModule( 392 const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, 393 FunctionImporter::ImportMapTy &ImportList, 394 StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) { 395 // Worklist contains the list of function imported in this module, for which 396 // we will analyse the callees and may import further down the callgraph. 397 SmallVector<EdgeInfo, 128> Worklist; 398 399 // Populate the worklist with the import for the functions in the current 400 // module 401 for (auto &GVSummary : DefinedGVSummaries) { 402 if (!Index.isGlobalValueLive(GVSummary.second)) { 403 LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); 404 continue; 405 } 406 auto *FuncSummary = 407 dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject()); 408 if (!FuncSummary) 409 // Skip import for global variables 410 continue; 411 LLVM_DEBUG(dbgs() << "Initialize import for " << GVSummary.first << "\n"); 412 computeImportForFunction(*FuncSummary, Index, ImportInstrLimit, 413 DefinedGVSummaries, Worklist, ImportList, 414 ExportLists); 415 } 416 417 // Process the newly imported functions and add callees to the worklist. 418 while (!Worklist.empty()) { 419 auto FuncInfo = Worklist.pop_back_val(); 420 auto *Summary = std::get<0>(FuncInfo); 421 auto Threshold = std::get<1>(FuncInfo); 422 auto GUID = std::get<2>(FuncInfo); 423 424 // Check if we later added this summary with a higher threshold. 425 // If so, skip this entry. 426 auto ExportModulePath = Summary->modulePath(); 427 auto &LatestProcessedThreshold = ImportList[ExportModulePath][GUID]; 428 if (LatestProcessedThreshold > Threshold) 429 continue; 430 431 computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries, 432 Worklist, ImportList, ExportLists); 433 } 434 } 435 436 #ifndef NDEBUG 437 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, 438 GlobalValue::GUID G) { 439 if (const auto &VI = Index.getValueInfo(G)) { 440 auto SL = VI.getSummaryList(); 441 if (!SL.empty()) 442 return SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind; 443 } 444 return false; 445 } 446 447 static GlobalValue::GUID getGUID(GlobalValue::GUID G) { return G; } 448 449 static GlobalValue::GUID 450 getGUID(const std::pair<const GlobalValue::GUID, unsigned> &P) { 451 return P.first; 452 } 453 454 template <class T> 455 static unsigned numGlobalVarSummaries(const ModuleSummaryIndex &Index, 456 T &Cont) { 457 unsigned NumGVS = 0; 458 for (auto &V : Cont) 459 if (isGlobalVarSummary(Index, getGUID(V))) 460 ++NumGVS; 461 return NumGVS; 462 } 463 #endif 464 465 /// Compute all the import and export for every module using the Index. 466 void llvm::ComputeCrossModuleImport( 467 const ModuleSummaryIndex &Index, 468 const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, 469 StringMap<FunctionImporter::ImportMapTy> &ImportLists, 470 StringMap<FunctionImporter::ExportSetTy> &ExportLists) { 471 // For each module that has function defined, compute the import/export lists. 472 for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { 473 auto &ImportList = ImportLists[DefinedGVSummaries.first()]; 474 LLVM_DEBUG(dbgs() << "Computing import for Module '" 475 << DefinedGVSummaries.first() << "'\n"); 476 ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, 477 &ExportLists); 478 } 479 480 // When computing imports we added all GUIDs referenced by anything 481 // imported from the module to its ExportList. Now we prune each ExportList 482 // of any not defined in that module. This is more efficient than checking 483 // while computing imports because some of the summary lists may be long 484 // due to linkonce (comdat) copies. 485 for (auto &ELI : ExportLists) { 486 const auto &DefinedGVSummaries = 487 ModuleToDefinedGVSummaries.lookup(ELI.first()); 488 for (auto EI = ELI.second.begin(); EI != ELI.second.end();) { 489 if (!DefinedGVSummaries.count(*EI)) 490 EI = ELI.second.erase(EI); 491 else 492 ++EI; 493 } 494 } 495 496 #ifndef NDEBUG 497 LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size() 498 << " modules:\n"); 499 for (auto &ModuleImports : ImportLists) { 500 auto ModName = ModuleImports.first(); 501 auto &Exports = ExportLists[ModName]; 502 unsigned NumGVS = numGlobalVarSummaries(Index, Exports); 503 LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports " 504 << Exports.size() - NumGVS << " functions and " << NumGVS 505 << " vars. Imports from " << ModuleImports.second.size() 506 << " modules.\n"); 507 for (auto &Src : ModuleImports.second) { 508 auto SrcModName = Src.first(); 509 unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second); 510 LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod 511 << " functions imported from " << SrcModName << "\n"); 512 LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod 513 << " global vars imported from " << SrcModName << "\n"); 514 } 515 } 516 #endif 517 } 518 519 #ifndef NDEBUG 520 static void dumpImportListForModule(const ModuleSummaryIndex &Index, 521 StringRef ModulePath, 522 FunctionImporter::ImportMapTy &ImportList) { 523 LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from " 524 << ImportList.size() << " modules.\n"); 525 for (auto &Src : ImportList) { 526 auto SrcModName = Src.first(); 527 unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second); 528 LLVM_DEBUG(dbgs() << " - " << Src.second.size() - NumGVSPerMod 529 << " functions imported from " << SrcModName << "\n"); 530 LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from " 531 << SrcModName << "\n"); 532 } 533 } 534 #endif 535 536 /// Compute all the imports for the given module in the Index. 537 void llvm::ComputeCrossModuleImportForModule( 538 StringRef ModulePath, const ModuleSummaryIndex &Index, 539 FunctionImporter::ImportMapTy &ImportList) { 540 // Collect the list of functions this module defines. 541 // GUID -> Summary 542 GVSummaryMapTy FunctionSummaryMap; 543 Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap); 544 545 // Compute the import list for this module. 546 LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n"); 547 ComputeImportForModule(FunctionSummaryMap, Index, ImportList); 548 549 #ifndef NDEBUG 550 dumpImportListForModule(Index, ModulePath, ImportList); 551 #endif 552 } 553 554 // Mark all external summaries in Index for import into the given module. 555 // Used for distributed builds using a distributed index. 556 void llvm::ComputeCrossModuleImportForModuleFromIndex( 557 StringRef ModulePath, const ModuleSummaryIndex &Index, 558 FunctionImporter::ImportMapTy &ImportList) { 559 for (auto &GlobalList : Index) { 560 // Ignore entries for undefined references. 561 if (GlobalList.second.SummaryList.empty()) 562 continue; 563 564 auto GUID = GlobalList.first; 565 assert(GlobalList.second.SummaryList.size() == 1 && 566 "Expected individual combined index to have one summary per GUID"); 567 auto &Summary = GlobalList.second.SummaryList[0]; 568 // Skip the summaries for the importing module. These are included to 569 // e.g. record required linkage changes. 570 if (Summary->modulePath() == ModulePath) 571 continue; 572 // Doesn't matter what value we plug in to the map, just needs an entry 573 // to provoke importing by thinBackend. 574 ImportList[Summary->modulePath()][GUID] = 1; 575 } 576 #ifndef NDEBUG 577 dumpImportListForModule(Index, ModulePath, ImportList); 578 #endif 579 } 580 581 void llvm::computeDeadSymbols( 582 ModuleSummaryIndex &Index, 583 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, 584 function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) { 585 assert(!Index.withGlobalValueDeadStripping()); 586 if (!ComputeDead) 587 return; 588 if (GUIDPreservedSymbols.empty()) 589 // Don't do anything when nothing is live, this is friendly with tests. 590 return; 591 unsigned LiveSymbols = 0; 592 SmallVector<ValueInfo, 128> Worklist; 593 Worklist.reserve(GUIDPreservedSymbols.size() * 2); 594 for (auto GUID : GUIDPreservedSymbols) { 595 ValueInfo VI = Index.getValueInfo(GUID); 596 if (!VI) 597 continue; 598 for (auto &S : VI.getSummaryList()) 599 S->setLive(true); 600 } 601 602 // Add values flagged in the index as live roots to the worklist. 603 for (const auto &Entry : Index) 604 for (auto &S : Entry.second.SummaryList) 605 if (S->isLive()) { 606 LLVM_DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); 607 Worklist.push_back(ValueInfo(/*IsAnalysis=*/false, &Entry)); 608 ++LiveSymbols; 609 break; 610 } 611 612 // Make value live and add it to the worklist if it was not live before. 613 auto visit = [&](ValueInfo VI) { 614 // FIXME: If we knew which edges were created for indirect call profiles, 615 // we could skip them here. Any that are live should be reached via 616 // other edges, e.g. reference edges. Otherwise, using a profile collected 617 // on a slightly different binary might provoke preserving, importing 618 // and ultimately promoting calls to functions not linked into this 619 // binary, which increases the binary size unnecessarily. Note that 620 // if this code changes, the importer needs to change so that edges 621 // to functions marked dead are skipped. 622 VI = updateValueInfoForIndirectCalls(Index, VI); 623 if (!VI) 624 return; 625 for (auto &S : VI.getSummaryList()) 626 if (S->isLive()) 627 return; 628 629 // We only keep live symbols that are known to be non-prevailing if any are 630 // available_externally. Those symbols are discarded later in the 631 // EliminateAvailableExternally pass and setting them to not-live breaks 632 // downstreams users of liveness information (PR36483). 633 if (isPrevailing(VI.getGUID()) == PrevailingType::No) { 634 bool AvailableExternally = false; 635 bool Interposable = false; 636 for (auto &S : VI.getSummaryList()) { 637 if (S->linkage() == GlobalValue::AvailableExternallyLinkage) 638 AvailableExternally = true; 639 else if (GlobalValue::isInterposableLinkage(S->linkage())) 640 Interposable = true; 641 } 642 643 if (!AvailableExternally) 644 return; 645 646 if (Interposable) 647 report_fatal_error("Interposable and available_externally symbol"); 648 } 649 650 for (auto &S : VI.getSummaryList()) 651 S->setLive(true); 652 ++LiveSymbols; 653 Worklist.push_back(VI); 654 }; 655 656 while (!Worklist.empty()) { 657 auto VI = Worklist.pop_back_val(); 658 for (auto &Summary : VI.getSummaryList()) { 659 GlobalValueSummary *Base = Summary->getBaseObject(); 660 // Set base value live in case it is an alias. 661 Base->setLive(true); 662 for (auto Ref : Base->refs()) 663 visit(Ref); 664 if (auto *FS = dyn_cast<FunctionSummary>(Base)) 665 for (auto Call : FS->calls()) 666 visit(Call.first); 667 } 668 } 669 Index.setWithGlobalValueDeadStripping(); 670 671 unsigned DeadSymbols = Index.size() - LiveSymbols; 672 LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols 673 << " symbols Dead \n"); 674 NumDeadSymbols += DeadSymbols; 675 NumLiveSymbols += LiveSymbols; 676 } 677 678 /// Compute the set of summaries needed for a ThinLTO backend compilation of 679 /// \p ModulePath. 680 void llvm::gatherImportedSummariesForModule( 681 StringRef ModulePath, 682 const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, 683 const FunctionImporter::ImportMapTy &ImportList, 684 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) { 685 // Include all summaries from the importing module. 686 ModuleToSummariesForIndex[ModulePath] = 687 ModuleToDefinedGVSummaries.lookup(ModulePath); 688 // Include summaries for imports. 689 for (auto &ILI : ImportList) { 690 auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()]; 691 const auto &DefinedGVSummaries = 692 ModuleToDefinedGVSummaries.lookup(ILI.first()); 693 for (auto &GI : ILI.second) { 694 const auto &DS = DefinedGVSummaries.find(GI.first); 695 assert(DS != DefinedGVSummaries.end() && 696 "Expected a defined summary for imported global value"); 697 SummariesForIndex[GI.first] = DS->second; 698 } 699 } 700 } 701 702 /// Emit the files \p ModulePath will import from into \p OutputFilename. 703 std::error_code 704 llvm::EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename, 705 const FunctionImporter::ImportMapTy &ModuleImports) { 706 std::error_code EC; 707 raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None); 708 if (EC) 709 return EC; 710 for (auto &ILI : ModuleImports) 711 ImportsOS << ILI.first() << "\n"; 712 return std::error_code(); 713 } 714 715 bool llvm::convertToDeclaration(GlobalValue &GV) { 716 LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() 717 << "\n"); 718 if (Function *F = dyn_cast<Function>(&GV)) { 719 F->deleteBody(); 720 F->clearMetadata(); 721 F->setComdat(nullptr); 722 } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) { 723 V->setInitializer(nullptr); 724 V->setLinkage(GlobalValue::ExternalLinkage); 725 V->clearMetadata(); 726 V->setComdat(nullptr); 727 } else { 728 GlobalValue *NewGV; 729 if (GV.getValueType()->isFunctionTy()) 730 NewGV = 731 Function::Create(cast<FunctionType>(GV.getValueType()), 732 GlobalValue::ExternalLinkage, "", GV.getParent()); 733 else 734 NewGV = 735 new GlobalVariable(*GV.getParent(), GV.getValueType(), 736 /*isConstant*/ false, GlobalValue::ExternalLinkage, 737 /*init*/ nullptr, "", 738 /*insertbefore*/ nullptr, GV.getThreadLocalMode(), 739 GV.getType()->getAddressSpace()); 740 NewGV->takeName(&GV); 741 GV.replaceAllUsesWith(NewGV); 742 return false; 743 } 744 return true; 745 } 746 747 /// Fixup WeakForLinker linkages in \p TheModule based on summary analysis. 748 void llvm::thinLTOResolveWeakForLinkerModule( 749 Module &TheModule, const GVSummaryMapTy &DefinedGlobals) { 750 auto updateLinkage = [&](GlobalValue &GV) { 751 // See if the global summary analysis computed a new resolved linkage. 752 const auto &GS = DefinedGlobals.find(GV.getGUID()); 753 if (GS == DefinedGlobals.end()) 754 return; 755 auto NewLinkage = GS->second->linkage(); 756 if (NewLinkage == GV.getLinkage()) 757 return; 758 759 // Switch the linkage to weakany if asked for, e.g. we do this for 760 // linker redefined symbols (via --wrap or --defsym). 761 // We record that the visibility should be changed here in `addThinLTO` 762 // as we need access to the resolution vectors for each input file in 763 // order to find which symbols have been redefined. 764 // We may consider reorganizing this code and moving the linkage recording 765 // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex. 766 if (NewLinkage == GlobalValue::WeakAnyLinkage) { 767 GV.setLinkage(NewLinkage); 768 return; 769 } 770 771 if (!GlobalValue::isWeakForLinker(GV.getLinkage())) 772 return; 773 // Check for a non-prevailing def that has interposable linkage 774 // (e.g. non-odr weak or linkonce). In that case we can't simply 775 // convert to available_externally, since it would lose the 776 // interposable property and possibly get inlined. Simply drop 777 // the definition in that case. 778 if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) && 779 GlobalValue::isInterposableLinkage(GV.getLinkage())) { 780 if (!convertToDeclaration(GV)) 781 // FIXME: Change this to collect replaced GVs and later erase 782 // them from the parent module once thinLTOResolveWeakForLinkerGUID is 783 // changed to enable this for aliases. 784 llvm_unreachable("Expected GV to be converted"); 785 } else { 786 // If the original symbols has global unnamed addr and linkonce_odr linkage, 787 // it should be an auto hide symbol. Add hidden visibility to the symbol to 788 // preserve the property. 789 if (GV.hasLinkOnceODRLinkage() && GV.hasGlobalUnnamedAddr() && 790 NewLinkage == GlobalValue::WeakODRLinkage) 791 GV.setVisibility(GlobalValue::HiddenVisibility); 792 793 LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() 794 << "` from " << GV.getLinkage() << " to " << NewLinkage 795 << "\n"); 796 GV.setLinkage(NewLinkage); 797 } 798 // Remove declarations from comdats, including available_externally 799 // as this is a declaration for the linker, and will be dropped eventually. 800 // It is illegal for comdats to contain declarations. 801 auto *GO = dyn_cast_or_null<GlobalObject>(&GV); 802 if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) 803 GO->setComdat(nullptr); 804 }; 805 806 // Process functions and global now 807 for (auto &GV : TheModule) 808 updateLinkage(GV); 809 for (auto &GV : TheModule.globals()) 810 updateLinkage(GV); 811 for (auto &GV : TheModule.aliases()) 812 updateLinkage(GV); 813 } 814 815 /// Run internalization on \p TheModule based on symmary analysis. 816 void llvm::thinLTOInternalizeModule(Module &TheModule, 817 const GVSummaryMapTy &DefinedGlobals) { 818 // Declare a callback for the internalize pass that will ask for every 819 // candidate GlobalValue if it can be internalized or not. 820 auto MustPreserveGV = [&](const GlobalValue &GV) -> bool { 821 // Lookup the linkage recorded in the summaries during global analysis. 822 auto GS = DefinedGlobals.find(GV.getGUID()); 823 if (GS == DefinedGlobals.end()) { 824 // Must have been promoted (possibly conservatively). Find original 825 // name so that we can access the correct summary and see if it can 826 // be internalized again. 827 // FIXME: Eventually we should control promotion instead of promoting 828 // and internalizing again. 829 StringRef OrigName = 830 ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName()); 831 std::string OrigId = GlobalValue::getGlobalIdentifier( 832 OrigName, GlobalValue::InternalLinkage, 833 TheModule.getSourceFileName()); 834 GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId)); 835 if (GS == DefinedGlobals.end()) { 836 // Also check the original non-promoted non-globalized name. In some 837 // cases a preempted weak value is linked in as a local copy because 838 // it is referenced by an alias (IRLinker::linkGlobalValueProto). 839 // In that case, since it was originally not a local value, it was 840 // recorded in the index using the original name. 841 // FIXME: This may not be needed once PR27866 is fixed. 842 GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName)); 843 assert(GS != DefinedGlobals.end()); 844 } 845 } 846 return !GlobalValue::isLocalLinkage(GS->second->linkage()); 847 }; 848 849 // FIXME: See if we can just internalize directly here via linkage changes 850 // based on the index, rather than invoking internalizeModule. 851 internalizeModule(TheModule, MustPreserveGV); 852 } 853 854 /// Make alias a clone of its aliasee. 855 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) { 856 Function *Fn = cast<Function>(GA->getBaseObject()); 857 858 ValueToValueMapTy VMap; 859 Function *NewFn = CloneFunction(Fn, VMap); 860 // Clone should use the original alias's linkage and name, and we ensure 861 // all uses of alias instead use the new clone (casted if necessary). 862 NewFn->setLinkage(GA->getLinkage()); 863 GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, GA->getType())); 864 NewFn->takeName(GA); 865 return NewFn; 866 } 867 868 // Automatically import functions in Module \p DestModule based on the summaries 869 // index. 870 Expected<bool> FunctionImporter::importFunctions( 871 Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) { 872 LLVM_DEBUG(dbgs() << "Starting import for Module " 873 << DestModule.getModuleIdentifier() << "\n"); 874 unsigned ImportedCount = 0, ImportedGVCount = 0; 875 876 IRMover Mover(DestModule); 877 // Do the actual import of functions now, one Module at a time 878 std::set<StringRef> ModuleNameOrderedList; 879 for (auto &FunctionsToImportPerModule : ImportList) { 880 ModuleNameOrderedList.insert(FunctionsToImportPerModule.first()); 881 } 882 for (auto &Name : ModuleNameOrderedList) { 883 // Get the module for the import 884 const auto &FunctionsToImportPerModule = ImportList.find(Name); 885 assert(FunctionsToImportPerModule != ImportList.end()); 886 Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name); 887 if (!SrcModuleOrErr) 888 return SrcModuleOrErr.takeError(); 889 std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr); 890 assert(&DestModule.getContext() == &SrcModule->getContext() && 891 "Context mismatch"); 892 893 // If modules were created with lazy metadata loading, materialize it 894 // now, before linking it (otherwise this will be a noop). 895 if (Error Err = SrcModule->materializeMetadata()) 896 return std::move(Err); 897 898 auto &ImportGUIDs = FunctionsToImportPerModule->second; 899 // Find the globals to import 900 SetVector<GlobalValue *> GlobalsToImport; 901 for (Function &F : *SrcModule) { 902 if (!F.hasName()) 903 continue; 904 auto GUID = F.getGUID(); 905 auto Import = ImportGUIDs.count(GUID); 906 LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing function " 907 << GUID << " " << F.getName() << " from " 908 << SrcModule->getSourceFileName() << "\n"); 909 if (Import) { 910 if (Error Err = F.materialize()) 911 return std::move(Err); 912 if (EnableImportMetadata) { 913 // Add 'thinlto_src_module' metadata for statistics and debugging. 914 F.setMetadata( 915 "thinlto_src_module", 916 MDNode::get(DestModule.getContext(), 917 {MDString::get(DestModule.getContext(), 918 SrcModule->getSourceFileName())})); 919 } 920 GlobalsToImport.insert(&F); 921 } 922 } 923 for (GlobalVariable &GV : SrcModule->globals()) { 924 if (!GV.hasName()) 925 continue; 926 auto GUID = GV.getGUID(); 927 auto Import = ImportGUIDs.count(GUID); 928 LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing global " 929 << GUID << " " << GV.getName() << " from " 930 << SrcModule->getSourceFileName() << "\n"); 931 if (Import) { 932 if (Error Err = GV.materialize()) 933 return std::move(Err); 934 ImportedGVCount += GlobalsToImport.insert(&GV); 935 } 936 } 937 for (GlobalAlias &GA : SrcModule->aliases()) { 938 if (!GA.hasName()) 939 continue; 940 auto GUID = GA.getGUID(); 941 auto Import = ImportGUIDs.count(GUID); 942 LLVM_DEBUG(dbgs() << (Import ? "Is" : "Not") << " importing alias " 943 << GUID << " " << GA.getName() << " from " 944 << SrcModule->getSourceFileName() << "\n"); 945 if (Import) { 946 if (Error Err = GA.materialize()) 947 return std::move(Err); 948 // Import alias as a copy of its aliasee. 949 GlobalObject *Base = GA.getBaseObject(); 950 if (Error Err = Base->materialize()) 951 return std::move(Err); 952 auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA); 953 LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID() 954 << " " << Base->getName() << " from " 955 << SrcModule->getSourceFileName() << "\n"); 956 if (EnableImportMetadata) { 957 // Add 'thinlto_src_module' metadata for statistics and debugging. 958 Fn->setMetadata( 959 "thinlto_src_module", 960 MDNode::get(DestModule.getContext(), 961 {MDString::get(DestModule.getContext(), 962 SrcModule->getSourceFileName())})); 963 } 964 GlobalsToImport.insert(Fn); 965 } 966 } 967 968 // Upgrade debug info after we're done materializing all the globals and we 969 // have loaded all the required metadata! 970 UpgradeDebugInfo(*SrcModule); 971 972 // Link in the specified functions. 973 if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) 974 return true; 975 976 if (PrintImports) { 977 for (const auto *GV : GlobalsToImport) 978 dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName() 979 << " from " << SrcModule->getSourceFileName() << "\n"; 980 } 981 982 if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(), 983 [](GlobalValue &, IRMover::ValueAdder) {}, 984 /*IsPerformingImport=*/true)) 985 report_fatal_error("Function Import: link error"); 986 987 ImportedCount += GlobalsToImport.size(); 988 NumImportedModules++; 989 } 990 991 NumImportedFunctions += (ImportedCount - ImportedGVCount); 992 NumImportedGlobalVars += ImportedGVCount; 993 994 LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount 995 << " functions for Module " 996 << DestModule.getModuleIdentifier() << "\n"); 997 LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount 998 << " global variables for Module " 999 << DestModule.getModuleIdentifier() << "\n"); 1000 return ImportedCount; 1001 } 1002 1003 static bool doImportingForModule(Module &M) { 1004 if (SummaryFile.empty()) 1005 report_fatal_error("error: -function-import requires -summary-file\n"); 1006 Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr = 1007 getModuleSummaryIndexForFile(SummaryFile); 1008 if (!IndexPtrOrErr) { 1009 logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(), 1010 "Error loading file '" + SummaryFile + "': "); 1011 return false; 1012 } 1013 std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr); 1014 1015 // First step is collecting the import list. 1016 FunctionImporter::ImportMapTy ImportList; 1017 // If requested, simply import all functions in the index. This is used 1018 // when testing distributed backend handling via the opt tool, when 1019 // we have distributed indexes containing exactly the summaries to import. 1020 if (ImportAllIndex) 1021 ComputeCrossModuleImportForModuleFromIndex(M.getModuleIdentifier(), *Index, 1022 ImportList); 1023 else 1024 ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index, 1025 ImportList); 1026 1027 // Conservatively mark all internal values as promoted. This interface is 1028 // only used when doing importing via the function importing pass. The pass 1029 // is only enabled when testing importing via the 'opt' tool, which does 1030 // not do the ThinLink that would normally determine what values to promote. 1031 for (auto &I : *Index) { 1032 for (auto &S : I.second.SummaryList) { 1033 if (GlobalValue::isLocalLinkage(S->linkage())) 1034 S->setLinkage(GlobalValue::ExternalLinkage); 1035 } 1036 } 1037 1038 // Next we need to promote to global scope and rename any local values that 1039 // are potentially exported to other modules. 1040 if (renameModuleForThinLTO(M, *Index, nullptr)) { 1041 errs() << "Error renaming module\n"; 1042 return false; 1043 } 1044 1045 // Perform the import now. 1046 auto ModuleLoader = [&M](StringRef Identifier) { 1047 return loadFile(Identifier, M.getContext()); 1048 }; 1049 FunctionImporter Importer(*Index, ModuleLoader); 1050 Expected<bool> Result = Importer.importFunctions(M, ImportList); 1051 1052 // FIXME: Probably need to propagate Errors through the pass manager. 1053 if (!Result) { 1054 logAllUnhandledErrors(Result.takeError(), errs(), 1055 "Error importing module: "); 1056 return false; 1057 } 1058 1059 return *Result; 1060 } 1061 1062 namespace { 1063 1064 /// Pass that performs cross-module function import provided a summary file. 1065 class FunctionImportLegacyPass : public ModulePass { 1066 public: 1067 /// Pass identification, replacement for typeid 1068 static char ID; 1069 1070 explicit FunctionImportLegacyPass() : ModulePass(ID) {} 1071 1072 /// Specify pass name for debug output 1073 StringRef getPassName() const override { return "Function Importing"; } 1074 1075 bool runOnModule(Module &M) override { 1076 if (skipModule(M)) 1077 return false; 1078 1079 return doImportingForModule(M); 1080 } 1081 }; 1082 1083 } // end anonymous namespace 1084 1085 PreservedAnalyses FunctionImportPass::run(Module &M, 1086 ModuleAnalysisManager &AM) { 1087 if (!doImportingForModule(M)) 1088 return PreservedAnalyses::all(); 1089 1090 return PreservedAnalyses::none(); 1091 } 1092 1093 char FunctionImportLegacyPass::ID = 0; 1094 INITIALIZE_PASS(FunctionImportLegacyPass, "function-import", 1095 "Summary Based Function Import", false, false) 1096 1097 namespace llvm { 1098 1099 Pass *createFunctionImportPass() { 1100 return new FunctionImportLegacyPass(); 1101 } 1102 1103 } // end namespace llvm 1104