1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Thin Link Time Optimization library. This library is
10 // intended to be used by linker to optimize code at link time.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
15 #include "llvm/Support/CommandLine.h"
16
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/AliasAnalysis.h"
20 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
21 #include "llvm/Analysis/ProfileSummaryInfo.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/Bitcode/BitcodeWriter.h"
26 #include "llvm/Bitcode/BitcodeWriterPass.h"
27 #include "llvm/Config/llvm-config.h"
28 #include "llvm/IR/DebugInfo.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/LLVMRemarkStreamer.h"
32 #include "llvm/IR/LegacyPassManager.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/IR/PassTimingInfo.h"
35 #include "llvm/IR/Verifier.h"
36 #include "llvm/IRReader/IRReader.h"
37 #include "llvm/LTO/LTO.h"
38 #include "llvm/LTO/SummaryBasedOptimizations.h"
39 #include "llvm/MC/SubtargetFeature.h"
40 #include "llvm/Object/IRObjectFile.h"
41 #include "llvm/Passes/PassBuilder.h"
42 #include "llvm/Passes/StandardInstrumentations.h"
43 #include "llvm/Remarks/HotnessThresholdParser.h"
44 #include "llvm/Support/CachePruning.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Support/Error.h"
47 #include "llvm/Support/FileUtilities.h"
48 #include "llvm/Support/Path.h"
49 #include "llvm/Support/SHA1.h"
50 #include "llvm/Support/SmallVectorMemoryBuffer.h"
51 #include "llvm/Support/TargetRegistry.h"
52 #include "llvm/Support/ThreadPool.h"
53 #include "llvm/Support/Threading.h"
54 #include "llvm/Support/ToolOutputFile.h"
55 #include "llvm/Target/TargetMachine.h"
56 #include "llvm/Transforms/IPO.h"
57 #include "llvm/Transforms/IPO/FunctionImport.h"
58 #include "llvm/Transforms/IPO/Internalize.h"
59 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
60 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
61 #include "llvm/Transforms/ObjCARC.h"
62 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
63
64 #include <numeric>
65
66 #if !defined(_MSC_VER) && !defined(__MINGW32__)
67 #include <unistd.h>
68 #else
69 #include <io.h>
70 #endif
71
72 using namespace llvm;
73
74 #define DEBUG_TYPE "thinlto"
75
76 namespace llvm {
77 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
78 extern cl::opt<bool> LTODiscardValueNames;
79 extern cl::opt<std::string> RemarksFilename;
80 extern cl::opt<std::string> RemarksPasses;
81 extern cl::opt<bool> RemarksWithHotness;
82 extern cl::opt<Optional<uint64_t>, false, remarks::HotnessThresholdParser>
83 RemarksHotnessThreshold;
84 extern cl::opt<std::string> RemarksFormat;
85 }
86
87 namespace {
88
89 // Default to using all available threads in the system, but using only one
90 // thred per core, as indicated by the usage of
91 // heavyweight_hardware_concurrency() below.
92 static cl::opt<int> ThreadCount("threads", cl::init(0));
93
94 // Simple helper to save temporary files for debug.
saveTempBitcode(const Module & TheModule,StringRef TempDir,unsigned count,StringRef Suffix)95 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
96 unsigned count, StringRef Suffix) {
97 if (TempDir.empty())
98 return;
99 // User asked to save temps, let dump the bitcode file after import.
100 std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
101 std::error_code EC;
102 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
103 if (EC)
104 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
105 " to save optimized bitcode\n");
106 WriteBitcodeToFile(TheModule, OS, /* ShouldPreserveUseListOrder */ true);
107 }
108
109 static const GlobalValueSummary *
getFirstDefinitionForLinker(const GlobalValueSummaryList & GVSummaryList)110 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
111 // If there is any strong definition anywhere, get it.
112 auto StrongDefForLinker = llvm::find_if(
113 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
114 auto Linkage = Summary->linkage();
115 return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
116 !GlobalValue::isWeakForLinker(Linkage);
117 });
118 if (StrongDefForLinker != GVSummaryList.end())
119 return StrongDefForLinker->get();
120 // Get the first *linker visible* definition for this global in the summary
121 // list.
122 auto FirstDefForLinker = llvm::find_if(
123 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
124 auto Linkage = Summary->linkage();
125 return !GlobalValue::isAvailableExternallyLinkage(Linkage);
126 });
127 // Extern templates can be emitted as available_externally.
128 if (FirstDefForLinker == GVSummaryList.end())
129 return nullptr;
130 return FirstDefForLinker->get();
131 }
132
133 // Populate map of GUID to the prevailing copy for any multiply defined
134 // symbols. Currently assume first copy is prevailing, or any strong
135 // definition. Can be refined with Linker information in the future.
computePrevailingCopies(const ModuleSummaryIndex & Index,DenseMap<GlobalValue::GUID,const GlobalValueSummary * > & PrevailingCopy)136 static void computePrevailingCopies(
137 const ModuleSummaryIndex &Index,
138 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
139 auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
140 return GVSummaryList.size() > 1;
141 };
142
143 for (auto &I : Index) {
144 if (HasMultipleCopies(I.second.SummaryList))
145 PrevailingCopy[I.first] =
146 getFirstDefinitionForLinker(I.second.SummaryList);
147 }
148 }
149
150 static StringMap<lto::InputFile *>
generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> & Modules)151 generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
152 StringMap<lto::InputFile *> ModuleMap;
153 for (auto &M : Modules) {
154 assert(ModuleMap.find(M->getName()) == ModuleMap.end() &&
155 "Expect unique Buffer Identifier");
156 ModuleMap[M->getName()] = M.get();
157 }
158 return ModuleMap;
159 }
160
promoteModule(Module & TheModule,const ModuleSummaryIndex & Index,bool ClearDSOLocalOnDeclarations)161 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index,
162 bool ClearDSOLocalOnDeclarations) {
163 if (renameModuleForThinLTO(TheModule, Index, ClearDSOLocalOnDeclarations))
164 report_fatal_error("renameModuleForThinLTO failed");
165 }
166
167 namespace {
168 class ThinLTODiagnosticInfo : public DiagnosticInfo {
169 const Twine &Msg;
170 public:
ThinLTODiagnosticInfo(const Twine & DiagMsg,DiagnosticSeverity Severity=DS_Error)171 ThinLTODiagnosticInfo(const Twine &DiagMsg,
172 DiagnosticSeverity Severity = DS_Error)
173 : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {}
print(DiagnosticPrinter & DP) const174 void print(DiagnosticPrinter &DP) const override { DP << Msg; }
175 };
176 }
177
178 /// Verify the module and strip broken debug info.
verifyLoadedModule(Module & TheModule)179 static void verifyLoadedModule(Module &TheModule) {
180 bool BrokenDebugInfo = false;
181 if (verifyModule(TheModule, &dbgs(), &BrokenDebugInfo))
182 report_fatal_error("Broken module found, compilation aborted!");
183 if (BrokenDebugInfo) {
184 TheModule.getContext().diagnose(ThinLTODiagnosticInfo(
185 "Invalid debug info found, debug info will be stripped", DS_Warning));
186 StripDebugInfo(TheModule);
187 }
188 }
189
loadModuleFromInput(lto::InputFile * Input,LLVMContext & Context,bool Lazy,bool IsImporting)190 static std::unique_ptr<Module> loadModuleFromInput(lto::InputFile *Input,
191 LLVMContext &Context,
192 bool Lazy,
193 bool IsImporting) {
194 auto &Mod = Input->getSingleBitcodeModule();
195 SMDiagnostic Err;
196 Expected<std::unique_ptr<Module>> ModuleOrErr =
197 Lazy ? Mod.getLazyModule(Context,
198 /* ShouldLazyLoadMetadata */ true, IsImporting)
199 : Mod.parseModule(Context);
200 if (!ModuleOrErr) {
201 handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
202 SMDiagnostic Err = SMDiagnostic(Mod.getModuleIdentifier(),
203 SourceMgr::DK_Error, EIB.message());
204 Err.print("ThinLTO", errs());
205 });
206 report_fatal_error("Can't load module, abort.");
207 }
208 if (!Lazy)
209 verifyLoadedModule(*ModuleOrErr.get());
210 return std::move(*ModuleOrErr);
211 }
212
213 static void
crossImportIntoModule(Module & TheModule,const ModuleSummaryIndex & Index,StringMap<lto::InputFile * > & ModuleMap,const FunctionImporter::ImportMapTy & ImportList,bool ClearDSOLocalOnDeclarations)214 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
215 StringMap<lto::InputFile *> &ModuleMap,
216 const FunctionImporter::ImportMapTy &ImportList,
217 bool ClearDSOLocalOnDeclarations) {
218 auto Loader = [&](StringRef Identifier) {
219 auto &Input = ModuleMap[Identifier];
220 return loadModuleFromInput(Input, TheModule.getContext(),
221 /*Lazy=*/true, /*IsImporting*/ true);
222 };
223
224 FunctionImporter Importer(Index, Loader, ClearDSOLocalOnDeclarations);
225 Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
226 if (!Result) {
227 handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
228 SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
229 SourceMgr::DK_Error, EIB.message());
230 Err.print("ThinLTO", errs());
231 });
232 report_fatal_error("importFunctions failed");
233 }
234 // Verify again after cross-importing.
235 verifyLoadedModule(TheModule);
236 }
237
optimizeModule(Module & TheModule,TargetMachine & TM,unsigned OptLevel,bool Freestanding,ModuleSummaryIndex * Index)238 static void optimizeModule(Module &TheModule, TargetMachine &TM,
239 unsigned OptLevel, bool Freestanding,
240 ModuleSummaryIndex *Index) {
241 // Populate the PassManager
242 PassManagerBuilder PMB;
243 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
244 if (Freestanding)
245 PMB.LibraryInfo->disableAllFunctions();
246 PMB.Inliner = createFunctionInliningPass();
247 // FIXME: should get it from the bitcode?
248 PMB.OptLevel = OptLevel;
249 PMB.LoopVectorize = true;
250 PMB.SLPVectorize = true;
251 // Already did this in verifyLoadedModule().
252 PMB.VerifyInput = false;
253 PMB.VerifyOutput = false;
254 PMB.ImportSummary = Index;
255
256 legacy::PassManager PM;
257
258 // Add the TTI (required to inform the vectorizer about register size for
259 // instance)
260 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
261
262 // Add optimizations
263 PMB.populateThinLTOPassManager(PM);
264
265 PM.run(TheModule);
266 }
267
optimizeModuleNewPM(Module & TheModule,TargetMachine & TM,unsigned OptLevel,bool Freestanding,bool DebugPassManager,ModuleSummaryIndex * Index)268 static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
269 unsigned OptLevel, bool Freestanding,
270 bool DebugPassManager,
271 ModuleSummaryIndex *Index) {
272 Optional<PGOOptions> PGOOpt;
273 LoopAnalysisManager LAM;
274 FunctionAnalysisManager FAM;
275 CGSCCAnalysisManager CGAM;
276 ModuleAnalysisManager MAM;
277
278 PassInstrumentationCallbacks PIC;
279 StandardInstrumentations SI(DebugPassManager);
280 SI.registerCallbacks(PIC, &FAM);
281 PipelineTuningOptions PTO;
282 PTO.LoopVectorization = true;
283 PTO.SLPVectorization = true;
284 PassBuilder PB(&TM, PTO, PGOOpt, &PIC);
285
286 std::unique_ptr<TargetLibraryInfoImpl> TLII(
287 new TargetLibraryInfoImpl(Triple(TM.getTargetTriple())));
288 if (Freestanding)
289 TLII->disableAllFunctions();
290 FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
291
292 AAManager AA = PB.buildDefaultAAPipeline();
293
294 // Register the AA manager first so that our version is the one used.
295 FAM.registerPass([&] { return std::move(AA); });
296
297 // Register all the basic analyses with the managers.
298 PB.registerModuleAnalyses(MAM);
299 PB.registerCGSCCAnalyses(CGAM);
300 PB.registerFunctionAnalyses(FAM);
301 PB.registerLoopAnalyses(LAM);
302 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
303
304 ModulePassManager MPM;
305
306 PassBuilder::OptimizationLevel OL;
307
308 switch (OptLevel) {
309 default:
310 llvm_unreachable("Invalid optimization level");
311 case 0:
312 OL = PassBuilder::OptimizationLevel::O0;
313 break;
314 case 1:
315 OL = PassBuilder::OptimizationLevel::O1;
316 break;
317 case 2:
318 OL = PassBuilder::OptimizationLevel::O2;
319 break;
320 case 3:
321 OL = PassBuilder::OptimizationLevel::O3;
322 break;
323 }
324
325 MPM.addPass(PB.buildThinLTODefaultPipeline(OL, Index));
326
327 MPM.run(TheModule, MAM);
328 }
329
330 static void
addUsedSymbolToPreservedGUID(const lto::InputFile & File,DenseSet<GlobalValue::GUID> & PreservedGUID)331 addUsedSymbolToPreservedGUID(const lto::InputFile &File,
332 DenseSet<GlobalValue::GUID> &PreservedGUID) {
333 for (const auto &Sym : File.symbols()) {
334 if (Sym.isUsed())
335 PreservedGUID.insert(GlobalValue::getGUID(Sym.getIRName()));
336 }
337 }
338
339 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
computeGUIDPreservedSymbols(const lto::InputFile & File,const StringSet<> & PreservedSymbols,const Triple & TheTriple,DenseSet<GlobalValue::GUID> & GUIDs)340 static void computeGUIDPreservedSymbols(const lto::InputFile &File,
341 const StringSet<> &PreservedSymbols,
342 const Triple &TheTriple,
343 DenseSet<GlobalValue::GUID> &GUIDs) {
344 // Iterate the symbols in the input file and if the input has preserved symbol
345 // compute the GUID for the symbol.
346 for (const auto &Sym : File.symbols()) {
347 if (PreservedSymbols.count(Sym.getName()) && !Sym.getIRName().empty())
348 GUIDs.insert(GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
349 Sym.getIRName(), GlobalValue::ExternalLinkage, "")));
350 }
351 }
352
353 static DenseSet<GlobalValue::GUID>
computeGUIDPreservedSymbols(const lto::InputFile & File,const StringSet<> & PreservedSymbols,const Triple & TheTriple)354 computeGUIDPreservedSymbols(const lto::InputFile &File,
355 const StringSet<> &PreservedSymbols,
356 const Triple &TheTriple) {
357 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
358 computeGUIDPreservedSymbols(File, PreservedSymbols, TheTriple,
359 GUIDPreservedSymbols);
360 return GUIDPreservedSymbols;
361 }
362
codegenModule(Module & TheModule,TargetMachine & TM)363 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
364 TargetMachine &TM) {
365 SmallVector<char, 128> OutputBuffer;
366
367 // CodeGen
368 {
369 raw_svector_ostream OS(OutputBuffer);
370 legacy::PassManager PM;
371
372 // If the bitcode files contain ARC code and were compiled with optimization,
373 // the ObjCARCContractPass must be run, so do it unconditionally here.
374 PM.add(createObjCARCContractPass());
375
376 // Setup the codegen now.
377 if (TM.addPassesToEmitFile(PM, OS, nullptr, CGFT_ObjectFile,
378 /* DisableVerify */ true))
379 report_fatal_error("Failed to setup codegen");
380
381 // Run codegen now. resulting binary is in OutputBuffer.
382 PM.run(TheModule);
383 }
384 return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
385 }
386
387 /// Manage caching for a single Module.
388 class ModuleCacheEntry {
389 SmallString<128> EntryPath;
390
391 public:
392 // Create a cache entry. This compute a unique hash for the Module considering
393 // the current list of export/import, and offer an interface to query to
394 // access the content in the cache.
ModuleCacheEntry(StringRef CachePath,const ModuleSummaryIndex & Index,StringRef ModuleID,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedGVSummaries,unsigned OptLevel,bool Freestanding,const TargetMachineBuilder & TMBuilder)395 ModuleCacheEntry(
396 StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
397 const FunctionImporter::ImportMapTy &ImportList,
398 const FunctionImporter::ExportSetTy &ExportList,
399 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
400 const GVSummaryMapTy &DefinedGVSummaries, unsigned OptLevel,
401 bool Freestanding, const TargetMachineBuilder &TMBuilder) {
402 if (CachePath.empty())
403 return;
404
405 if (!Index.modulePaths().count(ModuleID))
406 // The module does not have an entry, it can't have a hash at all
407 return;
408
409 if (all_of(Index.getModuleHash(ModuleID),
410 [](uint32_t V) { return V == 0; }))
411 // No hash entry, no caching!
412 return;
413
414 llvm::lto::Config Conf;
415 Conf.OptLevel = OptLevel;
416 Conf.Options = TMBuilder.Options;
417 Conf.CPU = TMBuilder.MCpu;
418 Conf.MAttrs.push_back(TMBuilder.MAttr);
419 Conf.RelocModel = TMBuilder.RelocModel;
420 Conf.CGOptLevel = TMBuilder.CGOptLevel;
421 Conf.Freestanding = Freestanding;
422 SmallString<40> Key;
423 computeLTOCacheKey(Key, Conf, Index, ModuleID, ImportList, ExportList,
424 ResolvedODR, DefinedGVSummaries);
425
426 // This choice of file name allows the cache to be pruned (see pruneCache()
427 // in include/llvm/Support/CachePruning.h).
428 sys::path::append(EntryPath, CachePath, "llvmcache-" + Key);
429 }
430
431 // Access the path to this entry in the cache.
getEntryPath()432 StringRef getEntryPath() { return EntryPath; }
433
434 // Try loading the buffer for this cache entry.
tryLoadingBuffer()435 ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
436 if (EntryPath.empty())
437 return std::error_code();
438 SmallString<64> ResultPath;
439 Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
440 Twine(EntryPath), sys::fs::OF_UpdateAtime, &ResultPath);
441 if (!FDOrErr)
442 return errorToErrorCode(FDOrErr.takeError());
443 ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getOpenFile(
444 *FDOrErr, EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false);
445 sys::fs::closeFile(*FDOrErr);
446 return MBOrErr;
447 }
448
449 // Cache the Produced object file
write(const MemoryBuffer & OutputBuffer)450 void write(const MemoryBuffer &OutputBuffer) {
451 if (EntryPath.empty())
452 return;
453
454 // Write to a temporary to avoid race condition
455 SmallString<128> TempFilename;
456 SmallString<128> CachePath(EntryPath);
457 llvm::sys::path::remove_filename(CachePath);
458 sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
459
460 if (auto Err = handleErrors(
461 llvm::writeFileAtomically(TempFilename, EntryPath,
462 OutputBuffer.getBuffer()),
463 [](const llvm::AtomicFileWriteError &E) {
464 std::string ErrorMsgBuffer;
465 llvm::raw_string_ostream S(ErrorMsgBuffer);
466 E.log(S);
467
468 if (E.Error ==
469 llvm::atomic_write_error::failed_to_create_uniq_file) {
470 errs() << "Error: " << ErrorMsgBuffer << "\n";
471 report_fatal_error("ThinLTO: Can't get a temporary file");
472 }
473 })) {
474 // FIXME
475 consumeError(std::move(Err));
476 }
477 }
478 };
479
480 static std::unique_ptr<MemoryBuffer>
ProcessThinLTOModule(Module & TheModule,ModuleSummaryIndex & Index,StringMap<lto::InputFile * > & ModuleMap,TargetMachine & TM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,const GVSummaryMapTy & DefinedGlobals,const ThinLTOCodeGenerator::CachingOptions & CacheOptions,bool DisableCodeGen,StringRef SaveTempsDir,bool Freestanding,unsigned OptLevel,unsigned count,bool UseNewPM,bool DebugPassManager)481 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
482 StringMap<lto::InputFile *> &ModuleMap, TargetMachine &TM,
483 const FunctionImporter::ImportMapTy &ImportList,
484 const FunctionImporter::ExportSetTy &ExportList,
485 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
486 const GVSummaryMapTy &DefinedGlobals,
487 const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
488 bool DisableCodeGen, StringRef SaveTempsDir,
489 bool Freestanding, unsigned OptLevel, unsigned count,
490 bool UseNewPM, bool DebugPassManager) {
491
492 // "Benchmark"-like optimization: single-source case
493 bool SingleModule = (ModuleMap.size() == 1);
494
495 // When linking an ELF shared object, dso_local should be dropped. We
496 // conservatively do this for -fpic.
497 bool ClearDSOLocalOnDeclarations =
498 TM.getTargetTriple().isOSBinFormatELF() &&
499 TM.getRelocationModel() != Reloc::Static &&
500 TheModule.getPIELevel() == PIELevel::Default;
501
502 if (!SingleModule) {
503 promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations);
504
505 // Apply summary-based prevailing-symbol resolution decisions.
506 thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
507
508 // Save temps: after promotion.
509 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
510 }
511
512 // Be friendly and don't nuke totally the module when the client didn't
513 // supply anything to preserve.
514 if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) {
515 // Apply summary-based internalization decisions.
516 thinLTOInternalizeModule(TheModule, DefinedGlobals);
517 }
518
519 // Save internalized bitcode
520 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
521
522 if (!SingleModule) {
523 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList,
524 ClearDSOLocalOnDeclarations);
525
526 // Save temps: after cross-module import.
527 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
528 }
529
530 if (UseNewPM)
531 optimizeModuleNewPM(TheModule, TM, OptLevel, Freestanding, DebugPassManager,
532 &Index);
533 else
534 optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index);
535
536 saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
537
538 if (DisableCodeGen) {
539 // Configured to stop before CodeGen, serialize the bitcode and return.
540 SmallVector<char, 128> OutputBuffer;
541 {
542 raw_svector_ostream OS(OutputBuffer);
543 ProfileSummaryInfo PSI(TheModule);
544 auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI);
545 WriteBitcodeToFile(TheModule, OS, true, &Index);
546 }
547 return std::make_unique<SmallVectorMemoryBuffer>(std::move(OutputBuffer));
548 }
549
550 return codegenModule(TheModule, TM);
551 }
552
553 /// Resolve prevailing symbols. Record resolutions in the \p ResolvedODR map
554 /// for caching, and in the \p Index for application during the ThinLTO
555 /// backends. This is needed for correctness for exported symbols (ensure
556 /// at least one copy kept) and a compile-time optimization (to drop duplicate
557 /// copies when possible).
resolvePrevailingInIndex(ModuleSummaryIndex & Index,StringMap<std::map<GlobalValue::GUID,GlobalValue::LinkageTypes>> & ResolvedODR,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,const DenseMap<GlobalValue::GUID,const GlobalValueSummary * > & PrevailingCopy)558 static void resolvePrevailingInIndex(
559 ModuleSummaryIndex &Index,
560 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
561 &ResolvedODR,
562 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
563 const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
564 &PrevailingCopy) {
565
566 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
567 const auto &Prevailing = PrevailingCopy.find(GUID);
568 // Not in map means that there was only one copy, which must be prevailing.
569 if (Prevailing == PrevailingCopy.end())
570 return true;
571 return Prevailing->second == S;
572 };
573
574 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
575 GlobalValue::GUID GUID,
576 GlobalValue::LinkageTypes NewLinkage) {
577 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
578 };
579
580 // TODO Conf.VisibilityScheme can be lto::Config::ELF for ELF.
581 lto::Config Conf;
582 thinLTOResolvePrevailingInIndex(Conf, Index, isPrevailing, recordNewLinkage,
583 GUIDPreservedSymbols);
584 }
585
586 // Initialize the TargetMachine builder for a given Triple
initTMBuilder(TargetMachineBuilder & TMBuilder,const Triple & TheTriple)587 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
588 const Triple &TheTriple) {
589 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
590 // FIXME this looks pretty terrible...
591 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
592 if (TheTriple.getArch() == llvm::Triple::x86_64)
593 TMBuilder.MCpu = "core2";
594 else if (TheTriple.getArch() == llvm::Triple::x86)
595 TMBuilder.MCpu = "yonah";
596 else if (TheTriple.getArch() == llvm::Triple::aarch64 ||
597 TheTriple.getArch() == llvm::Triple::aarch64_32)
598 TMBuilder.MCpu = "cyclone";
599 }
600 TMBuilder.TheTriple = std::move(TheTriple);
601 }
602
603 } // end anonymous namespace
604
addModule(StringRef Identifier,StringRef Data)605 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
606 MemoryBufferRef Buffer(Data, Identifier);
607
608 auto InputOrError = lto::InputFile::create(Buffer);
609 if (!InputOrError)
610 report_fatal_error("ThinLTO cannot create input file: " +
611 toString(InputOrError.takeError()));
612
613 auto TripleStr = (*InputOrError)->getTargetTriple();
614 Triple TheTriple(TripleStr);
615
616 if (Modules.empty())
617 initTMBuilder(TMBuilder, Triple(TheTriple));
618 else if (TMBuilder.TheTriple != TheTriple) {
619 if (!TMBuilder.TheTriple.isCompatibleWith(TheTriple))
620 report_fatal_error("ThinLTO modules with incompatible triples not "
621 "supported");
622 initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple)));
623 }
624
625 Modules.emplace_back(std::move(*InputOrError));
626 }
627
preserveSymbol(StringRef Name)628 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
629 PreservedSymbols.insert(Name);
630 }
631
crossReferenceSymbol(StringRef Name)632 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
633 // FIXME: At the moment, we don't take advantage of this extra information,
634 // we're conservatively considering cross-references as preserved.
635 // CrossReferencedSymbols.insert(Name);
636 PreservedSymbols.insert(Name);
637 }
638
639 // TargetMachine factory
create() const640 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
641 std::string ErrMsg;
642 const Target *TheTarget =
643 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
644 if (!TheTarget) {
645 report_fatal_error("Can't load target for this Triple: " + ErrMsg);
646 }
647
648 // Use MAttr as the default set of features.
649 SubtargetFeatures Features(MAttr);
650 Features.getDefaultSubtargetFeatures(TheTriple);
651 std::string FeatureStr = Features.getString();
652
653 std::unique_ptr<TargetMachine> TM(
654 TheTarget->createTargetMachine(TheTriple.str(), MCpu, FeatureStr, Options,
655 RelocModel, None, CGOptLevel));
656 assert(TM && "Cannot create target machine");
657
658 return TM;
659 }
660
661 /**
662 * Produce the combined summary index from all the bitcode files:
663 * "thin-link".
664 */
linkCombinedIndex()665 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
666 std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
667 std::make_unique<ModuleSummaryIndex>(/*HaveGVs=*/false);
668 uint64_t NextModuleId = 0;
669 for (auto &Mod : Modules) {
670 auto &M = Mod->getSingleBitcodeModule();
671 if (Error Err =
672 M.readSummary(*CombinedIndex, Mod->getName(), NextModuleId++)) {
673 // FIXME diagnose
674 logAllUnhandledErrors(
675 std::move(Err), errs(),
676 "error: can't create module summary index for buffer: ");
677 return nullptr;
678 }
679 }
680 return CombinedIndex;
681 }
682
683 namespace {
684 struct IsExported {
685 const StringMap<FunctionImporter::ExportSetTy> &ExportLists;
686 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols;
687
IsExported__anon4ce58e460f11::IsExported688 IsExported(const StringMap<FunctionImporter::ExportSetTy> &ExportLists,
689 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols)
690 : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {}
691
operator ()__anon4ce58e460f11::IsExported692 bool operator()(StringRef ModuleIdentifier, ValueInfo VI) const {
693 const auto &ExportList = ExportLists.find(ModuleIdentifier);
694 return (ExportList != ExportLists.end() && ExportList->second.count(VI)) ||
695 GUIDPreservedSymbols.count(VI.getGUID());
696 }
697 };
698
699 struct IsPrevailing {
700 const DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy;
IsPrevailing__anon4ce58e460f11::IsPrevailing701 IsPrevailing(const DenseMap<GlobalValue::GUID, const GlobalValueSummary *>
702 &PrevailingCopy)
703 : PrevailingCopy(PrevailingCopy) {}
704
operator ()__anon4ce58e460f11::IsPrevailing705 bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const {
706 const auto &Prevailing = PrevailingCopy.find(GUID);
707 // Not in map means that there was only one copy, which must be prevailing.
708 if (Prevailing == PrevailingCopy.end())
709 return true;
710 return Prevailing->second == S;
711 };
712 };
713 } // namespace
714
computeDeadSymbolsInIndex(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols)715 static void computeDeadSymbolsInIndex(
716 ModuleSummaryIndex &Index,
717 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
718 // We have no symbols resolution available. And can't do any better now in the
719 // case where the prevailing symbol is in a native object. It can be refined
720 // with linker information in the future.
721 auto isPrevailing = [&](GlobalValue::GUID G) {
722 return PrevailingType::Unknown;
723 };
724 computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing,
725 /* ImportEnabled = */ true);
726 }
727
728 /**
729 * Perform promotion and renaming of exported internal functions.
730 * Index is updated to reflect linkage changes from weak resolution.
731 */
promote(Module & TheModule,ModuleSummaryIndex & Index,const lto::InputFile & File)732 void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
733 const lto::InputFile &File) {
734 auto ModuleCount = Index.modulePaths().size();
735 auto ModuleIdentifier = TheModule.getModuleIdentifier();
736
737 // Collect for each module the list of function it defines (GUID -> Summary).
738 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
739 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
740
741 // Convert the preserved symbols set from string to GUID
742 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
743 File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
744
745 // Add used symbol to the preserved symbols.
746 addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
747
748 // Compute "dead" symbols, we don't want to import/export these!
749 computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
750
751 // Generate import/export list
752 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
753 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
754 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
755 ExportLists);
756
757 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
758 computePrevailingCopies(Index, PrevailingCopy);
759
760 // Resolve prevailing symbols
761 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
762 resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
763 PrevailingCopy);
764
765 thinLTOResolvePrevailingInModule(
766 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
767
768 // Promote the exported values in the index, so that they are promoted
769 // in the module.
770 thinLTOInternalizeAndPromoteInIndex(
771 Index, IsExported(ExportLists, GUIDPreservedSymbols),
772 IsPrevailing(PrevailingCopy));
773
774 // FIXME Set ClearDSOLocalOnDeclarations.
775 promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false);
776 }
777
778 /**
779 * Perform cross-module importing for the module identified by ModuleIdentifier.
780 */
crossModuleImport(Module & TheModule,ModuleSummaryIndex & Index,const lto::InputFile & File)781 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
782 ModuleSummaryIndex &Index,
783 const lto::InputFile &File) {
784 auto ModuleMap = generateModuleMap(Modules);
785 auto ModuleCount = Index.modulePaths().size();
786
787 // Collect for each module the list of function it defines (GUID -> Summary).
788 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
789 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
790
791 // Convert the preserved symbols set from string to GUID
792 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
793 File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
794
795 addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
796
797 // Compute "dead" symbols, we don't want to import/export these!
798 computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
799
800 // Generate import/export list
801 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
802 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
803 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
804 ExportLists);
805 auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
806
807 // FIXME Set ClearDSOLocalOnDeclarations.
808 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList,
809 /*ClearDSOLocalOnDeclarations=*/false);
810 }
811
812 /**
813 * Compute the list of summaries needed for importing into module.
814 */
gatherImportedSummariesForModule(Module & TheModule,ModuleSummaryIndex & Index,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex,const lto::InputFile & File)815 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
816 Module &TheModule, ModuleSummaryIndex &Index,
817 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
818 const lto::InputFile &File) {
819 auto ModuleCount = Index.modulePaths().size();
820 auto ModuleIdentifier = TheModule.getModuleIdentifier();
821
822 // Collect for each module the list of function it defines (GUID -> Summary).
823 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
824 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
825
826 // Convert the preserved symbols set from string to GUID
827 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
828 File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
829
830 addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
831
832 // Compute "dead" symbols, we don't want to import/export these!
833 computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
834
835 // Generate import/export list
836 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
837 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
838 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
839 ExportLists);
840
841 llvm::gatherImportedSummariesForModule(
842 ModuleIdentifier, ModuleToDefinedGVSummaries,
843 ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
844 }
845
846 /**
847 * Emit the list of files needed for importing into module.
848 */
emitImports(Module & TheModule,StringRef OutputName,ModuleSummaryIndex & Index,const lto::InputFile & File)849 void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
850 ModuleSummaryIndex &Index,
851 const lto::InputFile &File) {
852 auto ModuleCount = Index.modulePaths().size();
853 auto ModuleIdentifier = TheModule.getModuleIdentifier();
854
855 // Collect for each module the list of function it defines (GUID -> Summary).
856 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
857 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
858
859 // Convert the preserved symbols set from string to GUID
860 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
861 File, PreservedSymbols, Triple(TheModule.getTargetTriple()));
862
863 addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
864
865 // Compute "dead" symbols, we don't want to import/export these!
866 computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
867
868 // Generate import/export list
869 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
870 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
871 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
872 ExportLists);
873
874 std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
875 llvm::gatherImportedSummariesForModule(
876 ModuleIdentifier, ModuleToDefinedGVSummaries,
877 ImportLists[ModuleIdentifier], ModuleToSummariesForIndex);
878
879 std::error_code EC;
880 if ((EC = EmitImportsFiles(ModuleIdentifier, OutputName,
881 ModuleToSummariesForIndex)))
882 report_fatal_error(Twine("Failed to open ") + OutputName +
883 " to save imports lists\n");
884 }
885
886 /**
887 * Perform internalization. Runs promote and internalization together.
888 * Index is updated to reflect linkage changes.
889 */
internalize(Module & TheModule,ModuleSummaryIndex & Index,const lto::InputFile & File)890 void ThinLTOCodeGenerator::internalize(Module &TheModule,
891 ModuleSummaryIndex &Index,
892 const lto::InputFile &File) {
893 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
894 auto ModuleCount = Index.modulePaths().size();
895 auto ModuleIdentifier = TheModule.getModuleIdentifier();
896
897 // Convert the preserved symbols set from string to GUID
898 auto GUIDPreservedSymbols =
899 computeGUIDPreservedSymbols(File, PreservedSymbols, TMBuilder.TheTriple);
900
901 addUsedSymbolToPreservedGUID(File, GUIDPreservedSymbols);
902
903 // Collect for each module the list of function it defines (GUID -> Summary).
904 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
905 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
906
907 // Compute "dead" symbols, we don't want to import/export these!
908 computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
909
910 // Generate import/export list
911 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
912 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
913 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
914 ExportLists);
915 auto &ExportList = ExportLists[ModuleIdentifier];
916
917 // Be friendly and don't nuke totally the module when the client didn't
918 // supply anything to preserve.
919 if (ExportList.empty() && GUIDPreservedSymbols.empty())
920 return;
921
922 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
923 computePrevailingCopies(Index, PrevailingCopy);
924
925 // Resolve prevailing symbols
926 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
927 resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
928 PrevailingCopy);
929
930 // Promote the exported values in the index, so that they are promoted
931 // in the module.
932 thinLTOInternalizeAndPromoteInIndex(
933 Index, IsExported(ExportLists, GUIDPreservedSymbols),
934 IsPrevailing(PrevailingCopy));
935
936 // FIXME Set ClearDSOLocalOnDeclarations.
937 promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false);
938
939 // Internalization
940 thinLTOResolvePrevailingInModule(
941 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
942
943 thinLTOInternalizeModule(TheModule,
944 ModuleToDefinedGVSummaries[ModuleIdentifier]);
945 }
946
947 /**
948 * Perform post-importing ThinLTO optimizations.
949 */
optimize(Module & TheModule)950 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
951 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
952
953 // Optimize now
954 optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding,
955 nullptr);
956 }
957
958 /// Write out the generated object file, either from CacheEntryPath or from
959 /// OutputBuffer, preferring hard-link when possible.
960 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
961 std::string
writeGeneratedObject(int count,StringRef CacheEntryPath,const MemoryBuffer & OutputBuffer)962 ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
963 const MemoryBuffer &OutputBuffer) {
964 auto ArchName = TMBuilder.TheTriple.getArchName();
965 SmallString<128> OutputPath(SavedObjectsDirectoryPath);
966 llvm::sys::path::append(OutputPath,
967 Twine(count) + "." + ArchName + ".thinlto.o");
968 OutputPath.c_str(); // Ensure the string is null terminated.
969 if (sys::fs::exists(OutputPath))
970 sys::fs::remove(OutputPath);
971
972 // We don't return a memory buffer to the linker, just a list of files.
973 if (!CacheEntryPath.empty()) {
974 // Cache is enabled, hard-link the entry (or copy if hard-link fails).
975 auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath);
976 if (!Err)
977 return std::string(OutputPath.str());
978 // Hard linking failed, try to copy.
979 Err = sys::fs::copy_file(CacheEntryPath, OutputPath);
980 if (!Err)
981 return std::string(OutputPath.str());
982 // Copy failed (could be because the CacheEntry was removed from the cache
983 // in the meantime by another process), fall back and try to write down the
984 // buffer to the output.
985 errs() << "remark: can't link or copy from cached entry '" << CacheEntryPath
986 << "' to '" << OutputPath << "'\n";
987 }
988 // No cache entry, just write out the buffer.
989 std::error_code Err;
990 raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None);
991 if (Err)
992 report_fatal_error("Can't open output '" + OutputPath + "'\n");
993 OS << OutputBuffer.getBuffer();
994 return std::string(OutputPath.str());
995 }
996
997 // Main entry point for the ThinLTO processing
run()998 void ThinLTOCodeGenerator::run() {
999 // Prepare the resulting object vector
1000 assert(ProducedBinaries.empty() && "The generator should not be reused");
1001 if (SavedObjectsDirectoryPath.empty())
1002 ProducedBinaries.resize(Modules.size());
1003 else {
1004 sys::fs::create_directories(SavedObjectsDirectoryPath);
1005 bool IsDir;
1006 sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
1007 if (!IsDir)
1008 report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
1009 ProducedBinaryFiles.resize(Modules.size());
1010 }
1011
1012 if (CodeGenOnly) {
1013 // Perform only parallel codegen and return.
1014 ThreadPool Pool;
1015 int count = 0;
1016 for (auto &Mod : Modules) {
1017 Pool.async([&](int count) {
1018 LLVMContext Context;
1019 Context.setDiscardValueNames(LTODiscardValueNames);
1020
1021 // Parse module now
1022 auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
1023 /*IsImporting*/ false);
1024
1025 // CodeGen
1026 auto OutputBuffer = codegenModule(*TheModule, *TMBuilder.create());
1027 if (SavedObjectsDirectoryPath.empty())
1028 ProducedBinaries[count] = std::move(OutputBuffer);
1029 else
1030 ProducedBinaryFiles[count] =
1031 writeGeneratedObject(count, "", *OutputBuffer);
1032 }, count++);
1033 }
1034
1035 return;
1036 }
1037
1038 // Sequential linking phase
1039 auto Index = linkCombinedIndex();
1040
1041 // Save temps: index.
1042 if (!SaveTempsDir.empty()) {
1043 auto SaveTempPath = SaveTempsDir + "index.bc";
1044 std::error_code EC;
1045 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::OF_None);
1046 if (EC)
1047 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
1048 " to save optimized bitcode\n");
1049 WriteIndexToFile(*Index, OS);
1050 }
1051
1052
1053 // Prepare the module map.
1054 auto ModuleMap = generateModuleMap(Modules);
1055 auto ModuleCount = Modules.size();
1056
1057 // Collect for each module the list of function it defines (GUID -> Summary).
1058 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
1059 Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
1060
1061 // Convert the preserved symbols set from string to GUID, this is needed for
1062 // computing the caching hash and the internalization.
1063 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1064 for (const auto &M : Modules)
1065 computeGUIDPreservedSymbols(*M, PreservedSymbols, TMBuilder.TheTriple,
1066 GUIDPreservedSymbols);
1067
1068 // Add used symbol from inputs to the preserved symbols.
1069 for (const auto &M : Modules)
1070 addUsedSymbolToPreservedGUID(*M, GUIDPreservedSymbols);
1071
1072 // Compute "dead" symbols, we don't want to import/export these!
1073 computeDeadSymbolsInIndex(*Index, GUIDPreservedSymbols);
1074
1075 // Synthesize entry counts for functions in the combined index.
1076 computeSyntheticCounts(*Index);
1077
1078 // Currently there is no support for enabling whole program visibility via a
1079 // linker option in the old LTO API, but this call allows it to be specified
1080 // via the internal option. Must be done before WPD below.
1081 updateVCallVisibilityInIndex(*Index,
1082 /* WholeProgramVisibilityEnabledInLTO */ false,
1083 // FIXME: This needs linker information via a
1084 // TBD new interface.
1085 /* DynamicExportSymbols */ {});
1086
1087 // Perform index-based WPD. This will return immediately if there are
1088 // no index entries in the typeIdMetadata map (e.g. if we are instead
1089 // performing IR-based WPD in hybrid regular/thin LTO mode).
1090 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
1091 std::set<GlobalValue::GUID> ExportedGUIDs;
1092 runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap);
1093 for (auto GUID : ExportedGUIDs)
1094 GUIDPreservedSymbols.insert(GUID);
1095
1096 // Collect the import/export lists for all modules from the call-graph in the
1097 // combined index.
1098 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
1099 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
1100 ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
1101 ExportLists);
1102
1103 // We use a std::map here to be able to have a defined ordering when
1104 // producing a hash for the cache entry.
1105 // FIXME: we should be able to compute the caching hash for the entry based
1106 // on the index, and nuke this map.
1107 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
1108
1109 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
1110 computePrevailingCopies(*Index, PrevailingCopy);
1111
1112 // Resolve prevailing symbols, this has to be computed early because it
1113 // impacts the caching.
1114 resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols,
1115 PrevailingCopy);
1116
1117 // Use global summary-based analysis to identify symbols that can be
1118 // internalized (because they aren't exported or preserved as per callback).
1119 // Changes are made in the index, consumed in the ThinLTO backends.
1120 updateIndexWPDForExports(*Index,
1121 IsExported(ExportLists, GUIDPreservedSymbols),
1122 LocalWPDTargetsMap);
1123 thinLTOInternalizeAndPromoteInIndex(
1124 *Index, IsExported(ExportLists, GUIDPreservedSymbols),
1125 IsPrevailing(PrevailingCopy));
1126
1127 // Make sure that every module has an entry in the ExportLists, ImportList,
1128 // GVSummary and ResolvedODR maps to enable threaded access to these maps
1129 // below.
1130 for (auto &Module : Modules) {
1131 auto ModuleIdentifier = Module->getName();
1132 ExportLists[ModuleIdentifier];
1133 ImportLists[ModuleIdentifier];
1134 ResolvedODR[ModuleIdentifier];
1135 ModuleToDefinedGVSummaries[ModuleIdentifier];
1136 }
1137
1138 std::vector<BitcodeModule *> ModulesVec;
1139 ModulesVec.reserve(Modules.size());
1140 for (auto &Mod : Modules)
1141 ModulesVec.push_back(&Mod->getSingleBitcodeModule());
1142 std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
1143
1144 // Parallel optimizer + codegen
1145 {
1146 ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount));
1147 for (auto IndexCount : ModulesOrdering) {
1148 auto &Mod = Modules[IndexCount];
1149 Pool.async([&](int count) {
1150 auto ModuleIdentifier = Mod->getName();
1151 auto &ExportList = ExportLists[ModuleIdentifier];
1152
1153 auto &DefinedGVSummaries = ModuleToDefinedGVSummaries[ModuleIdentifier];
1154
1155 // The module may be cached, this helps handling it.
1156 ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
1157 ImportLists[ModuleIdentifier], ExportList,
1158 ResolvedODR[ModuleIdentifier],
1159 DefinedGVSummaries, OptLevel, Freestanding,
1160 TMBuilder);
1161 auto CacheEntryPath = CacheEntry.getEntryPath();
1162
1163 {
1164 auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
1165 LLVM_DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss")
1166 << " '" << CacheEntryPath << "' for buffer "
1167 << count << " " << ModuleIdentifier << "\n");
1168
1169 if (ErrOrBuffer) {
1170 // Cache Hit!
1171 if (SavedObjectsDirectoryPath.empty())
1172 ProducedBinaries[count] = std::move(ErrOrBuffer.get());
1173 else
1174 ProducedBinaryFiles[count] = writeGeneratedObject(
1175 count, CacheEntryPath, *ErrOrBuffer.get());
1176 return;
1177 }
1178 }
1179
1180 LLVMContext Context;
1181 Context.setDiscardValueNames(LTODiscardValueNames);
1182 Context.enableDebugTypeODRUniquing();
1183 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
1184 Context, RemarksFilename, RemarksPasses, RemarksFormat,
1185 RemarksWithHotness, RemarksHotnessThreshold, count);
1186 if (!DiagFileOrErr) {
1187 errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
1188 report_fatal_error("ThinLTO: Can't get an output file for the "
1189 "remarks");
1190 }
1191
1192 // Parse module now
1193 auto TheModule = loadModuleFromInput(Mod.get(), Context, false,
1194 /*IsImporting*/ false);
1195
1196 // Save temps: original file.
1197 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
1198
1199 auto &ImportList = ImportLists[ModuleIdentifier];
1200 // Run the main process now, and generates a binary
1201 auto OutputBuffer = ProcessThinLTOModule(
1202 *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
1203 ExportList, GUIDPreservedSymbols,
1204 ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
1205 DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count,
1206 UseNewPM, DebugPassManager);
1207
1208 // Commit to the cache (if enabled)
1209 CacheEntry.write(*OutputBuffer);
1210
1211 if (SavedObjectsDirectoryPath.empty()) {
1212 // We need to generated a memory buffer for the linker.
1213 if (!CacheEntryPath.empty()) {
1214 // When cache is enabled, reload from the cache if possible.
1215 // Releasing the buffer from the heap and reloading it from the
1216 // cache file with mmap helps us to lower memory pressure.
1217 // The freed memory can be used for the next input file.
1218 // The final binary link will read from the VFS cache (hopefully!)
1219 // or from disk (if the memory pressure was too high).
1220 auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
1221 if (auto EC = ReloadedBufferOrErr.getError()) {
1222 // On error, keep the preexisting buffer and print a diagnostic.
1223 errs() << "remark: can't reload cached file '" << CacheEntryPath
1224 << "': " << EC.message() << "\n";
1225 } else {
1226 OutputBuffer = std::move(*ReloadedBufferOrErr);
1227 }
1228 }
1229 ProducedBinaries[count] = std::move(OutputBuffer);
1230 return;
1231 }
1232 ProducedBinaryFiles[count] = writeGeneratedObject(
1233 count, CacheEntryPath, *OutputBuffer);
1234 }, IndexCount);
1235 }
1236 }
1237
1238 pruneCache(CacheOptions.Path, CacheOptions.Policy);
1239
1240 // If statistics were requested, print them out now.
1241 if (llvm::AreStatisticsEnabled())
1242 llvm::PrintStatistics();
1243 reportAndResetTimings();
1244 }
1245