1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Thin Link Time Optimization library. This library is 11 // intended to be used by linker to optimize code at link time. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" 16 17 #ifdef HAVE_LLVM_REVISION 18 #include "LLVMLTORevision.h" 19 #endif 20 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/ADT/StringExtras.h" 23 #include "llvm/Analysis/ModuleSummaryAnalysis.h" 24 #include "llvm/Analysis/ProfileSummaryInfo.h" 25 #include "llvm/Analysis/TargetLibraryInfo.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Bitcode/BitcodeWriterPass.h" 28 #include "llvm/Bitcode/ReaderWriter.h" 29 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" 30 #include "llvm/IR/DiagnosticPrinter.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/IR/Mangler.h" 34 #include "llvm/IRReader/IRReader.h" 35 #include "llvm/LTO/LTO.h" 36 #include "llvm/Linker/Linker.h" 37 #include "llvm/MC/SubtargetFeature.h" 38 #include "llvm/Object/IRObjectFile.h" 39 #include "llvm/Object/ModuleSummaryIndexObjectFile.h" 40 #include "llvm/Support/CachePruning.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/Path.h" 43 #include "llvm/Support/SHA1.h" 44 #include "llvm/Support/TargetRegistry.h" 45 #include "llvm/Support/ThreadPool.h" 46 #include "llvm/Support/Threading.h" 47 #include "llvm/Target/TargetMachine.h" 48 #include "llvm/Transforms/IPO.h" 49 #include "llvm/Transforms/IPO/FunctionImport.h" 50 #include "llvm/Transforms/IPO/Internalize.h" 51 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 52 #include "llvm/Transforms/ObjCARC.h" 53 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 54 55 #include <numeric> 56 57 using namespace llvm; 58 59 #define DEBUG_TYPE "thinlto" 60 61 namespace llvm { 62 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp 63 extern cl::opt<bool> LTODiscardValueNames; 64 } 65 66 namespace { 67 68 static cl::opt<int> 69 ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency())); 70 71 static void diagnosticHandler(const DiagnosticInfo &DI) { 72 DiagnosticPrinterRawOStream DP(errs()); 73 DI.print(DP); 74 errs() << '\n'; 75 } 76 77 // Simple helper to save temporary files for debug. 78 static void saveTempBitcode(const Module &TheModule, StringRef TempDir, 79 unsigned count, StringRef Suffix) { 80 if (TempDir.empty()) 81 return; 82 // User asked to save temps, let dump the bitcode file after import. 83 std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str(); 84 std::error_code EC; 85 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); 86 if (EC) 87 report_fatal_error(Twine("Failed to open ") + SaveTempPath + 88 " to save optimized bitcode\n"); 89 WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true); 90 } 91 92 static const GlobalValueSummary * 93 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) { 94 // If there is any strong definition anywhere, get it. 95 auto StrongDefForLinker = llvm::find_if( 96 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) { 97 auto Linkage = Summary->linkage(); 98 return !GlobalValue::isAvailableExternallyLinkage(Linkage) && 99 !GlobalValue::isWeakForLinker(Linkage); 100 }); 101 if (StrongDefForLinker != GVSummaryList.end()) 102 return StrongDefForLinker->get(); 103 // Get the first *linker visible* definition for this global in the summary 104 // list. 105 auto FirstDefForLinker = llvm::find_if( 106 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) { 107 auto Linkage = Summary->linkage(); 108 return !GlobalValue::isAvailableExternallyLinkage(Linkage); 109 }); 110 // Extern templates can be emitted as available_externally. 111 if (FirstDefForLinker == GVSummaryList.end()) 112 return nullptr; 113 return FirstDefForLinker->get(); 114 } 115 116 // Populate map of GUID to the prevailing copy for any multiply defined 117 // symbols. Currently assume first copy is prevailing, or any strong 118 // definition. Can be refined with Linker information in the future. 119 static void computePrevailingCopies( 120 const ModuleSummaryIndex &Index, 121 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) { 122 auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) { 123 return GVSummaryList.size() > 1; 124 }; 125 126 for (auto &I : Index) { 127 if (HasMultipleCopies(I.second)) 128 PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second); 129 } 130 } 131 132 static StringMap<MemoryBufferRef> 133 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { 134 StringMap<MemoryBufferRef> ModuleMap; 135 for (auto &ModuleBuffer : Modules) { 136 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == 137 ModuleMap.end() && 138 "Expect unique Buffer Identifier"); 139 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; 140 } 141 return ModuleMap; 142 } 143 144 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { 145 if (renameModuleForThinLTO(TheModule, Index)) 146 report_fatal_error("renameModuleForThinLTO failed"); 147 } 148 149 static void 150 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, 151 StringMap<MemoryBufferRef> &ModuleMap, 152 const FunctionImporter::ImportMapTy &ImportList) { 153 ModuleLoader Loader(TheModule.getContext(), ModuleMap); 154 FunctionImporter Importer(Index, Loader); 155 Importer.importFunctions(TheModule, ImportList); 156 } 157 158 static void optimizeModule(Module &TheModule, TargetMachine &TM) { 159 // Populate the PassManager 160 PassManagerBuilder PMB; 161 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); 162 PMB.Inliner = createFunctionInliningPass(); 163 // FIXME: should get it from the bitcode? 164 PMB.OptLevel = 3; 165 PMB.LoopVectorize = true; 166 PMB.SLPVectorize = true; 167 PMB.VerifyInput = true; 168 PMB.VerifyOutput = false; 169 170 legacy::PassManager PM; 171 172 // Add the TTI (required to inform the vectorizer about register size for 173 // instance) 174 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); 175 176 // Add optimizations 177 PMB.populateThinLTOPassManager(PM); 178 179 PM.run(TheModule); 180 } 181 182 // Convert the PreservedSymbols map from "Name" based to "GUID" based. 183 static DenseSet<GlobalValue::GUID> 184 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols, 185 const Triple &TheTriple) { 186 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size()); 187 for (auto &Entry : PreservedSymbols) { 188 StringRef Name = Entry.first(); 189 if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_') 190 Name = Name.drop_front(); 191 GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name)); 192 } 193 return GUIDPreservedSymbols; 194 } 195 196 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, 197 TargetMachine &TM) { 198 SmallVector<char, 128> OutputBuffer; 199 200 // CodeGen 201 { 202 raw_svector_ostream OS(OutputBuffer); 203 legacy::PassManager PM; 204 205 // If the bitcode files contain ARC code and were compiled with optimization, 206 // the ObjCARCContractPass must be run, so do it unconditionally here. 207 PM.add(createObjCARCContractPass()); 208 209 // Setup the codegen now. 210 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile, 211 /* DisableVerify */ true)) 212 report_fatal_error("Failed to setup codegen"); 213 214 // Run codegen now. resulting binary is in OutputBuffer. 215 PM.run(TheModule); 216 } 217 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer)); 218 } 219 220 /// Manage caching for a single Module. 221 class ModuleCacheEntry { 222 SmallString<128> EntryPath; 223 224 public: 225 // Create a cache entry. This compute a unique hash for the Module considering 226 // the current list of export/import, and offer an interface to query to 227 // access the content in the cache. 228 ModuleCacheEntry( 229 StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID, 230 const FunctionImporter::ImportMapTy &ImportList, 231 const FunctionImporter::ExportSetTy &ExportList, 232 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 233 const GVSummaryMapTy &DefinedFunctions, 234 const DenseSet<GlobalValue::GUID> &PreservedSymbols) { 235 if (CachePath.empty()) 236 return; 237 238 if (!Index.modulePaths().count(ModuleID)) 239 // The module does not have an entry, it can't have a hash at all 240 return; 241 242 // Compute the unique hash for this entry 243 // This is based on the current compiler version, the module itself, the 244 // export list, the hash for every single module in the import list, the 245 // list of ResolvedODR for the module, and the list of preserved symbols. 246 247 // Include the hash for the current module 248 auto ModHash = Index.getModuleHash(ModuleID); 249 250 if (all_of(ModHash, [](uint32_t V) { return V == 0; })) 251 // No hash entry, no caching! 252 return; 253 254 SHA1 Hasher; 255 256 // Start with the compiler revision 257 Hasher.update(LLVM_VERSION_STRING); 258 #ifdef HAVE_LLVM_REVISION 259 Hasher.update(LLVM_REVISION); 260 #endif 261 262 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); 263 for (auto F : ExportList) 264 // The export list can impact the internalization, be conservative here 265 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F))); 266 267 // Include the hash for every module we import functions from 268 for (auto &Entry : ImportList) { 269 auto ModHash = Index.getModuleHash(Entry.first()); 270 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); 271 } 272 273 // Include the hash for the resolved ODR. 274 for (auto &Entry : ResolvedODR) { 275 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first, 276 sizeof(GlobalValue::GUID))); 277 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second, 278 sizeof(GlobalValue::LinkageTypes))); 279 } 280 281 // Include the hash for the preserved symbols. 282 for (auto &Entry : PreservedSymbols) { 283 if (DefinedFunctions.count(Entry)) 284 Hasher.update( 285 ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID))); 286 } 287 288 sys::path::append(EntryPath, CachePath, toHex(Hasher.result())); 289 } 290 291 // Access the path to this entry in the cache. 292 StringRef getEntryPath() { return EntryPath; } 293 294 // Try loading the buffer for this cache entry. 295 ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() { 296 if (EntryPath.empty()) 297 return std::error_code(); 298 return MemoryBuffer::getFile(EntryPath); 299 } 300 301 // Cache the Produced object file 302 std::unique_ptr<MemoryBuffer> 303 write(std::unique_ptr<MemoryBuffer> OutputBuffer) { 304 if (EntryPath.empty()) 305 return OutputBuffer; 306 307 // Write to a temporary to avoid race condition 308 SmallString<128> TempFilename; 309 int TempFD; 310 std::error_code EC = 311 sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename); 312 if (EC) { 313 errs() << "Error: " << EC.message() << "\n"; 314 report_fatal_error("ThinLTO: Can't get a temporary file"); 315 } 316 { 317 raw_fd_ostream OS(TempFD, /* ShouldClose */ true); 318 OS << OutputBuffer->getBuffer(); 319 } 320 // Rename to final destination (hopefully race condition won't matter here) 321 EC = sys::fs::rename(TempFilename, EntryPath); 322 if (EC) { 323 sys::fs::remove(TempFilename); 324 raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None); 325 if (EC) 326 report_fatal_error(Twine("Failed to open ") + EntryPath + 327 " to save cached entry\n"); 328 OS << OutputBuffer->getBuffer(); 329 } 330 auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath); 331 if (auto EC = ReloadedBufferOrErr.getError()) { 332 // FIXME diagnose 333 errs() << "error: can't reload cached file '" << EntryPath 334 << "': " << EC.message() << "\n"; 335 return OutputBuffer; 336 } 337 return std::move(*ReloadedBufferOrErr); 338 } 339 }; 340 341 static std::unique_ptr<MemoryBuffer> 342 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, 343 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM, 344 const FunctionImporter::ImportMapTy &ImportList, 345 const FunctionImporter::ExportSetTy &ExportList, 346 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, 347 const GVSummaryMapTy &DefinedGlobals, 348 const ThinLTOCodeGenerator::CachingOptions &CacheOptions, 349 bool DisableCodeGen, StringRef SaveTempsDir, 350 unsigned count) { 351 352 // "Benchmark"-like optimization: single-source case 353 bool SingleModule = (ModuleMap.size() == 1); 354 355 if (!SingleModule) { 356 promoteModule(TheModule, Index); 357 358 // Apply summary-based LinkOnce/Weak resolution decisions. 359 thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals); 360 361 // Save temps: after promotion. 362 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc"); 363 } 364 365 // Be friendly and don't nuke totally the module when the client didn't 366 // supply anything to preserve. 367 if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) { 368 // Apply summary-based internalization decisions. 369 thinLTOInternalizeModule(TheModule, DefinedGlobals); 370 } 371 372 // Save internalized bitcode 373 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc"); 374 375 if (!SingleModule) { 376 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); 377 378 // Save temps: after cross-module import. 379 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); 380 } 381 382 optimizeModule(TheModule, TM); 383 384 saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); 385 386 if (DisableCodeGen) { 387 // Configured to stop before CodeGen, serialize the bitcode and return. 388 SmallVector<char, 128> OutputBuffer; 389 { 390 raw_svector_ostream OS(OutputBuffer); 391 ProfileSummaryInfo PSI(TheModule); 392 auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr); 393 WriteBitcodeToFile(&TheModule, OS, true, &Index); 394 } 395 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer)); 396 } 397 398 return codegenModule(TheModule, TM); 399 } 400 401 /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map 402 /// for caching, and in the \p Index for application during the ThinLTO 403 /// backends. This is needed for correctness for exported symbols (ensure 404 /// at least one copy kept) and a compile-time optimization (to drop duplicate 405 /// copies when possible). 406 static void resolveWeakForLinkerInIndex( 407 ModuleSummaryIndex &Index, 408 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> 409 &ResolvedODR) { 410 411 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy; 412 computePrevailingCopies(Index, PrevailingCopy); 413 414 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { 415 const auto &Prevailing = PrevailingCopy.find(GUID); 416 // Not in map means that there was only one copy, which must be prevailing. 417 if (Prevailing == PrevailingCopy.end()) 418 return true; 419 return Prevailing->second == S; 420 }; 421 422 auto recordNewLinkage = [&](StringRef ModuleIdentifier, 423 GlobalValue::GUID GUID, 424 GlobalValue::LinkageTypes NewLinkage) { 425 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; 426 }; 427 428 thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage); 429 } 430 431 // Initialize the TargetMachine builder for a given Triple 432 static void initTMBuilder(TargetMachineBuilder &TMBuilder, 433 const Triple &TheTriple) { 434 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator). 435 // FIXME this looks pretty terrible... 436 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) { 437 if (TheTriple.getArch() == llvm::Triple::x86_64) 438 TMBuilder.MCpu = "core2"; 439 else if (TheTriple.getArch() == llvm::Triple::x86) 440 TMBuilder.MCpu = "yonah"; 441 else if (TheTriple.getArch() == llvm::Triple::aarch64) 442 TMBuilder.MCpu = "cyclone"; 443 } 444 TMBuilder.TheTriple = std::move(TheTriple); 445 } 446 447 } // end anonymous namespace 448 449 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { 450 MemoryBufferRef Buffer(Data, Identifier); 451 if (Modules.empty()) { 452 // First module added, so initialize the triple and some options 453 LLVMContext Context; 454 Triple TheTriple(getBitcodeTargetTriple(Buffer, Context)); 455 initTMBuilder(TMBuilder, Triple(TheTriple)); 456 } 457 #ifndef NDEBUG 458 else { 459 LLVMContext Context; 460 assert(TMBuilder.TheTriple.str() == 461 getBitcodeTargetTriple(Buffer, Context) && 462 "ThinLTO modules with different triple not supported"); 463 } 464 #endif 465 Modules.push_back(Buffer); 466 } 467 468 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { 469 PreservedSymbols.insert(Name); 470 } 471 472 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { 473 // FIXME: At the moment, we don't take advantage of this extra information, 474 // we're conservatively considering cross-references as preserved. 475 // CrossReferencedSymbols.insert(Name); 476 PreservedSymbols.insert(Name); 477 } 478 479 // TargetMachine factory 480 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { 481 std::string ErrMsg; 482 const Target *TheTarget = 483 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg); 484 if (!TheTarget) { 485 report_fatal_error("Can't load target for this Triple: " + ErrMsg); 486 } 487 488 // Use MAttr as the default set of features. 489 SubtargetFeatures Features(MAttr); 490 Features.getDefaultSubtargetFeatures(TheTriple); 491 std::string FeatureStr = Features.getString(); 492 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( 493 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, 494 CodeModel::Default, CGOptLevel)); 495 } 496 497 /** 498 * Produce the combined summary index from all the bitcode files: 499 * "thin-link". 500 */ 501 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() { 502 std::unique_ptr<ModuleSummaryIndex> CombinedIndex; 503 uint64_t NextModuleId = 0; 504 for (auto &ModuleBuffer : Modules) { 505 ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr = 506 object::ModuleSummaryIndexObjectFile::create(ModuleBuffer, 507 diagnosticHandler); 508 if (std::error_code EC = ObjOrErr.getError()) { 509 // FIXME diagnose 510 errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: " 511 << EC.message() << "\n"; 512 return nullptr; 513 } 514 auto Index = (*ObjOrErr)->takeIndex(); 515 if (CombinedIndex) { 516 CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); 517 } else { 518 CombinedIndex = std::move(Index); 519 } 520 } 521 return CombinedIndex; 522 } 523 524 /** 525 * Perform promotion and renaming of exported internal functions. 526 * Index is updated to reflect linkage changes from weak resolution. 527 */ 528 void ThinLTOCodeGenerator::promote(Module &TheModule, 529 ModuleSummaryIndex &Index) { 530 auto ModuleCount = Index.modulePaths().size(); 531 auto ModuleIdentifier = TheModule.getModuleIdentifier(); 532 // Collect for each module the list of function it defines (GUID -> Summary). 533 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries; 534 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 535 536 // Generate import/export list 537 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 538 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 539 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, 540 ExportLists); 541 542 // Resolve LinkOnce/Weak symbols. 543 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; 544 resolveWeakForLinkerInIndex(Index, ResolvedODR); 545 546 thinLTOResolveWeakForLinkerModule( 547 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]); 548 549 promoteModule(TheModule, Index); 550 } 551 552 /** 553 * Perform cross-module importing for the module identified by ModuleIdentifier. 554 */ 555 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, 556 ModuleSummaryIndex &Index) { 557 auto ModuleMap = generateModuleMap(Modules); 558 auto ModuleCount = Index.modulePaths().size(); 559 560 // Collect for each module the list of function it defines (GUID -> Summary). 561 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); 562 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 563 564 // Generate import/export list 565 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 566 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 567 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, 568 ExportLists); 569 auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; 570 571 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); 572 } 573 574 /** 575 * Compute the list of summaries needed for importing into module. 576 */ 577 void ThinLTOCodeGenerator::gatherImportedSummariesForModule( 578 StringRef ModulePath, ModuleSummaryIndex &Index, 579 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) { 580 auto ModuleCount = Index.modulePaths().size(); 581 582 // Collect for each module the list of function it defines (GUID -> Summary). 583 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); 584 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 585 586 // Generate import/export list 587 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 588 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 589 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, 590 ExportLists); 591 592 llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, 593 ImportLists[ModulePath], 594 ModuleToSummariesForIndex); 595 } 596 597 /** 598 * Emit the list of files needed for importing into module. 599 */ 600 void ThinLTOCodeGenerator::emitImports(StringRef ModulePath, 601 StringRef OutputName, 602 ModuleSummaryIndex &Index) { 603 auto ModuleCount = Index.modulePaths().size(); 604 605 // Collect for each module the list of function it defines (GUID -> Summary). 606 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); 607 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 608 609 // Generate import/export list 610 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 611 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 612 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, 613 ExportLists); 614 615 std::error_code EC; 616 if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath]))) 617 report_fatal_error(Twine("Failed to open ") + OutputName + 618 " to save imports lists\n"); 619 } 620 621 /** 622 * Perform internalization. Index is updated to reflect linkage changes. 623 */ 624 void ThinLTOCodeGenerator::internalize(Module &TheModule, 625 ModuleSummaryIndex &Index) { 626 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); 627 auto ModuleCount = Index.modulePaths().size(); 628 auto ModuleIdentifier = TheModule.getModuleIdentifier(); 629 630 // Convert the preserved symbols set from string to GUID 631 auto GUIDPreservedSymbols = 632 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); 633 634 // Collect for each module the list of function it defines (GUID -> Summary). 635 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); 636 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 637 638 // Generate import/export list 639 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 640 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 641 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, 642 ExportLists); 643 auto &ExportList = ExportLists[ModuleIdentifier]; 644 645 // Be friendly and don't nuke totally the module when the client didn't 646 // supply anything to preserve. 647 if (ExportList.empty() && GUIDPreservedSymbols.empty()) 648 return; 649 650 // Internalization 651 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { 652 const auto &ExportList = ExportLists.find(ModuleIdentifier); 653 return (ExportList != ExportLists.end() && 654 ExportList->second.count(GUID)) || 655 GUIDPreservedSymbols.count(GUID); 656 }; 657 thinLTOInternalizeAndPromoteInIndex(Index, isExported); 658 thinLTOInternalizeModule(TheModule, 659 ModuleToDefinedGVSummaries[ModuleIdentifier]); 660 } 661 662 /** 663 * Perform post-importing ThinLTO optimizations. 664 */ 665 void ThinLTOCodeGenerator::optimize(Module &TheModule) { 666 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); 667 668 // Optimize now 669 optimizeModule(TheModule, *TMBuilder.create()); 670 } 671 672 /** 673 * Perform ThinLTO CodeGen. 674 */ 675 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) { 676 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); 677 return codegenModule(TheModule, *TMBuilder.create()); 678 } 679 680 // Main entry point for the ThinLTO processing 681 void ThinLTOCodeGenerator::run() { 682 if (CodeGenOnly) { 683 // Perform only parallel codegen and return. 684 ThreadPool Pool; 685 assert(ProducedBinaries.empty() && "The generator should not be reused"); 686 ProducedBinaries.resize(Modules.size()); 687 int count = 0; 688 for (auto &ModuleBuffer : Modules) { 689 Pool.async([&](int count) { 690 LLVMContext Context; 691 Context.setDiscardValueNames(LTODiscardValueNames); 692 693 // Parse module now 694 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); 695 696 // CodeGen 697 ProducedBinaries[count] = codegen(*TheModule); 698 }, count++); 699 } 700 701 return; 702 } 703 704 // Sequential linking phase 705 auto Index = linkCombinedIndex(); 706 707 // Save temps: index. 708 if (!SaveTempsDir.empty()) { 709 auto SaveTempPath = SaveTempsDir + "index.bc"; 710 std::error_code EC; 711 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); 712 if (EC) 713 report_fatal_error(Twine("Failed to open ") + SaveTempPath + 714 " to save optimized bitcode\n"); 715 WriteIndexToFile(*Index, OS); 716 } 717 718 // Prepare the resulting object vector 719 assert(ProducedBinaries.empty() && "The generator should not be reused"); 720 ProducedBinaries.resize(Modules.size()); 721 722 // Prepare the module map. 723 auto ModuleMap = generateModuleMap(Modules); 724 auto ModuleCount = Modules.size(); 725 726 // Collect for each module the list of function it defines (GUID -> Summary). 727 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount); 728 Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); 729 730 // Collect the import/export lists for all modules from the call-graph in the 731 // combined index. 732 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount); 733 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount); 734 ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, 735 ExportLists); 736 737 // Convert the preserved symbols set from string to GUID, this is needed for 738 // computing the caching hash and the internalization. 739 auto GUIDPreservedSymbols = 740 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); 741 742 // We use a std::map here to be able to have a defined ordering when 743 // producing a hash for the cache entry. 744 // FIXME: we should be able to compute the caching hash for the entry based 745 // on the index, and nuke this map. 746 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; 747 748 // Resolve LinkOnce/Weak symbols, this has to be computed early because it 749 // impacts the caching. 750 resolveWeakForLinkerInIndex(*Index, ResolvedODR); 751 752 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { 753 const auto &ExportList = ExportLists.find(ModuleIdentifier); 754 return (ExportList != ExportLists.end() && 755 ExportList->second.count(GUID)) || 756 GUIDPreservedSymbols.count(GUID); 757 }; 758 759 // Use global summary-based analysis to identify symbols that can be 760 // internalized (because they aren't exported or preserved as per callback). 761 // Changes are made in the index, consumed in the ThinLTO backends. 762 thinLTOInternalizeAndPromoteInIndex(*Index, isExported); 763 764 // Make sure that every module has an entry in the ExportLists and 765 // ResolvedODR maps to enable threaded access to these maps below. 766 for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { 767 ExportLists[DefinedGVSummaries.first()]; 768 ResolvedODR[DefinedGVSummaries.first()]; 769 } 770 771 // Compute the ordering we will process the inputs: the rough heuristic here 772 // is to sort them per size so that the largest module get schedule as soon as 773 // possible. This is purely a compile-time optimization. 774 std::vector<int> ModulesOrdering; 775 ModulesOrdering.resize(Modules.size()); 776 std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0); 777 std::sort(ModulesOrdering.begin(), ModulesOrdering.end(), 778 [&](int LeftIndex, int RightIndex) { 779 auto LSize = Modules[LeftIndex].getBufferSize(); 780 auto RSize = Modules[RightIndex].getBufferSize(); 781 return LSize > RSize; 782 }); 783 784 // Parallel optimizer + codegen 785 { 786 ThreadPool Pool(ThreadCount); 787 for (auto IndexCount : ModulesOrdering) { 788 auto &ModuleBuffer = Modules[IndexCount]; 789 Pool.async([&](int count) { 790 auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier(); 791 auto &ExportList = ExportLists[ModuleIdentifier]; 792 793 auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier]; 794 795 // The module may be cached, this helps handling it. 796 ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier, 797 ImportLists[ModuleIdentifier], ExportList, 798 ResolvedODR[ModuleIdentifier], 799 DefinedFunctions, GUIDPreservedSymbols); 800 801 { 802 auto ErrOrBuffer = CacheEntry.tryLoadingBuffer(); 803 DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '" 804 << CacheEntry.getEntryPath() << "' for buffer " << count 805 << " " << ModuleIdentifier << "\n"); 806 807 if (ErrOrBuffer) { 808 // Cache Hit! 809 ProducedBinaries[count] = std::move(ErrOrBuffer.get()); 810 return; 811 } 812 } 813 814 LLVMContext Context; 815 Context.setDiscardValueNames(LTODiscardValueNames); 816 Context.enableDebugTypeODRUniquing(); 817 818 // Parse module now 819 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); 820 821 // Save temps: original file. 822 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); 823 824 auto &ImportList = ImportLists[ModuleIdentifier]; 825 // Run the main process now, and generates a binary 826 auto OutputBuffer = ProcessThinLTOModule( 827 *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList, 828 ExportList, GUIDPreservedSymbols, 829 ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions, 830 DisableCodeGen, SaveTempsDir, count); 831 832 OutputBuffer = CacheEntry.write(std::move(OutputBuffer)); 833 ProducedBinaries[count] = std::move(OutputBuffer); 834 }, IndexCount); 835 } 836 } 837 838 CachePruning(CacheOptions.Path) 839 .setPruningInterval(CacheOptions.PruningInterval) 840 .setEntryExpiration(CacheOptions.Expiration) 841 .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace) 842 .prune(); 843 844 // If statistics were requested, print them out now. 845 if (llvm::AreStatisticsEnabled()) 846 llvm::PrintStatistics(); 847 } 848