1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Thin Link Time Optimization library. This library is 11 // intended to be used by linker to optimize code at link time. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/LTO/ThinLTOCodeGenerator.h" 16 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/TargetLibraryInfo.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/Bitcode/ReaderWriter.h" 22 #include "llvm/Bitcode/BitcodeWriterPass.h" 23 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" 24 #include "llvm/IR/LLVMContext.h" 25 #include "llvm/IR/DiagnosticPrinter.h" 26 #include "llvm/IR/LegacyPassManager.h" 27 #include "llvm/IR/Mangler.h" 28 #include "llvm/IRReader/IRReader.h" 29 #include "llvm/Linker/Linker.h" 30 #include "llvm/MC/SubtargetFeature.h" 31 #include "llvm/Object/FunctionIndexObjectFile.h" 32 #include "llvm/Support/SourceMgr.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/ThreadPool.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Transforms/IPO.h" 37 #include "llvm/Transforms/IPO/FunctionImport.h" 38 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 39 #include "llvm/Transforms/ObjCARC.h" 40 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 41 42 using namespace llvm; 43 44 namespace llvm { 45 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp 46 extern cl::opt<bool> LTODiscardValueNames; 47 } 48 49 namespace { 50 51 static cl::opt<int> ThreadCount("threads", 52 cl::init(std::thread::hardware_concurrency())); 53 54 static void diagnosticHandler(const DiagnosticInfo &DI) { 55 DiagnosticPrinterRawOStream DP(errs()); 56 DI.print(DP); 57 errs() << '\n'; 58 } 59 60 // Simple helper to load a module from bitcode 61 static std::unique_ptr<Module> 62 loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, 63 bool Lazy) { 64 SMDiagnostic Err; 65 ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr); 66 if (Lazy) { 67 ModuleOrErr = 68 getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context, 69 /* ShouldLazyLoadMetadata */ Lazy); 70 } else { 71 ModuleOrErr = parseBitcodeFile(Buffer, Context); 72 } 73 if (std::error_code EC = ModuleOrErr.getError()) { 74 Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error, 75 EC.message()); 76 Err.print("ThinLTO", errs()); 77 report_fatal_error("Can't load module, abort."); 78 } 79 return std::move(ModuleOrErr.get()); 80 } 81 82 // Simple helper to save temporary files for debug. 83 static void saveTempBitcode(const Module &TheModule, StringRef TempDir, 84 unsigned count, StringRef Suffix) { 85 if (TempDir.empty()) 86 return; 87 // User asked to save temps, let dump the bitcode file after import. 88 auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix; 89 std::error_code EC; 90 raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None); 91 if (EC) 92 report_fatal_error(Twine("Failed to open ") + SaveTempPath + 93 " to save optimized bitcode\n"); 94 WriteBitcodeToFile(&TheModule, OS, true, false); 95 } 96 97 static StringMap<MemoryBufferRef> 98 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) { 99 StringMap<MemoryBufferRef> ModuleMap; 100 for (auto &ModuleBuffer : Modules) { 101 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) == 102 ModuleMap.end() && 103 "Expect unique Buffer Identifier"); 104 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer; 105 } 106 return ModuleMap; 107 } 108 109 /// Provide a "loader" for the FunctionImporter to access function from other 110 /// modules. 111 class ModuleLoader { 112 /// The context that will be used for importing. 113 LLVMContext &Context; 114 115 /// Map from Module identifier to MemoryBuffer. Used by clients like the 116 /// FunctionImported to request loading a Module. 117 StringMap<MemoryBufferRef> &ModuleMap; 118 119 public: 120 ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap) 121 : Context(Context), ModuleMap(ModuleMap) {} 122 123 /// Load a module on demand. 124 std::unique_ptr<Module> operator()(StringRef Identifier) { 125 return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true); 126 } 127 }; 128 129 static void promoteModule(Module &TheModule, const FunctionInfoIndex &Index) { 130 if (renameModuleForThinLTO(TheModule, Index)) 131 report_fatal_error("renameModuleForThinLTO failed"); 132 } 133 134 static void crossImportIntoModule(Module &TheModule, 135 const FunctionInfoIndex &Index, 136 StringMap<MemoryBufferRef> &ModuleMap) { 137 ModuleLoader Loader(TheModule.getContext(), ModuleMap); 138 FunctionImporter Importer(Index, Loader); 139 Importer.importFunctions(TheModule); 140 } 141 142 static void optimizeModule(Module &TheModule, TargetMachine &TM) { 143 // Populate the PassManager 144 PassManagerBuilder PMB; 145 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); 146 PMB.Inliner = createFunctionInliningPass(); 147 // FIXME: should get it from the bitcode? 148 PMB.OptLevel = 3; 149 PMB.LoopVectorize = true; 150 PMB.SLPVectorize = true; 151 PMB.VerifyInput = true; 152 PMB.VerifyOutput = false; 153 154 legacy::PassManager PM; 155 156 // Add the TTI (required to inform the vectorizer about register size for 157 // instance) 158 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis())); 159 160 // Add optimizations 161 PMB.populateThinLTOPassManager(PM); 162 PM.add(createObjCARCContractPass()); 163 164 PM.run(TheModule); 165 } 166 167 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule, 168 TargetMachine &TM) { 169 SmallVector<char, 128> OutputBuffer; 170 171 // CodeGen 172 { 173 raw_svector_ostream OS(OutputBuffer); 174 legacy::PassManager PM; 175 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile, 176 /* DisableVerify */ true)) 177 report_fatal_error("Failed to setup codegen"); 178 179 // Run codegen now. resulting binary is in OutputBuffer. 180 PM.run(TheModule); 181 } 182 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer)); 183 } 184 185 static std::unique_ptr<MemoryBuffer> 186 ProcessThinLTOModule(Module &TheModule, const FunctionInfoIndex &Index, 187 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM, 188 ThinLTOCodeGenerator::CachingOptions CacheOptions, 189 StringRef SaveTempsDir, unsigned count) { 190 191 // Save temps: after IPO. 192 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.IPO.bc"); 193 194 // "Benchmark"-like optimization: single-source case 195 bool SingleModule = (ModuleMap.size() == 1); 196 197 if (!SingleModule) { 198 promoteModule(TheModule, Index); 199 200 // Save temps: after promotion. 201 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.promoted.bc"); 202 203 crossImportIntoModule(TheModule, Index, ModuleMap); 204 205 // Save temps: after cross-module import. 206 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); 207 } 208 209 optimizeModule(TheModule, TM); 210 211 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.opt.bc"); 212 213 return codegenModule(TheModule, TM); 214 } 215 216 // Initialize the TargetMachine builder for a given Triple 217 static void initTMBuilder(TargetMachineBuilder &TMBuilder, 218 const Triple &TheTriple) { 219 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator). 220 // FIXME this looks pretty terrible... 221 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) { 222 if (TheTriple.getArch() == llvm::Triple::x86_64) 223 TMBuilder.MCpu = "core2"; 224 else if (TheTriple.getArch() == llvm::Triple::x86) 225 TMBuilder.MCpu = "yonah"; 226 else if (TheTriple.getArch() == llvm::Triple::aarch64) 227 TMBuilder.MCpu = "cyclone"; 228 } 229 TMBuilder.TheTriple = std::move(TheTriple); 230 } 231 232 } // end anonymous namespace 233 234 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { 235 MemoryBufferRef Buffer(Data, Identifier); 236 if (Modules.empty()) { 237 // First module added, so initialize the triple and some options 238 LLVMContext Context; 239 Triple TheTriple(getBitcodeTargetTriple(Buffer, Context)); 240 initTMBuilder(TMBuilder, Triple(TheTriple)); 241 } 242 #ifndef NDEBUG 243 else { 244 LLVMContext Context; 245 assert(TMBuilder.TheTriple.str() == 246 getBitcodeTargetTriple(Buffer, Context) && 247 "ThinLTO modules with different triple not supported"); 248 } 249 #endif 250 Modules.push_back(Buffer); 251 } 252 253 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) { 254 PreservedSymbols.insert(Name); 255 } 256 257 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) { 258 CrossReferencedSymbols.insert(Name); 259 } 260 261 // TargetMachine factory 262 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const { 263 std::string ErrMsg; 264 const Target *TheTarget = 265 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg); 266 if (!TheTarget) { 267 report_fatal_error("Can't load target for this Triple: " + ErrMsg); 268 } 269 270 // Use MAttr as the default set of features. 271 SubtargetFeatures Features(MAttr); 272 Features.getDefaultSubtargetFeatures(TheTriple); 273 std::string FeatureStr = Features.getString(); 274 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine( 275 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel, 276 CodeModel::Default, CGOptLevel)); 277 } 278 279 /** 280 * Produce the combined function index from all the bitcode files: 281 * "thin-link". 282 */ 283 std::unique_ptr<FunctionInfoIndex> ThinLTOCodeGenerator::linkCombinedIndex() { 284 std::unique_ptr<FunctionInfoIndex> CombinedIndex; 285 uint64_t NextModuleId = 0; 286 for (auto &ModuleBuffer : Modules) { 287 ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr = 288 object::FunctionIndexObjectFile::create(ModuleBuffer, diagnosticHandler, 289 false); 290 if (std::error_code EC = ObjOrErr.getError()) { 291 // FIXME diagnose 292 errs() << "error: can't create FunctionIndexObjectFile for buffer: " 293 << EC.message() << "\n"; 294 return nullptr; 295 } 296 auto Index = (*ObjOrErr)->takeIndex(); 297 if (CombinedIndex) { 298 CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); 299 } else { 300 CombinedIndex = std::move(Index); 301 } 302 } 303 return CombinedIndex; 304 } 305 306 /** 307 * Perform promotion and renaming of exported internal functions. 308 */ 309 void ThinLTOCodeGenerator::promote(Module &TheModule, 310 FunctionInfoIndex &Index) { 311 promoteModule(TheModule, Index); 312 } 313 314 /** 315 * Perform cross-module importing for the module identified by ModuleIdentifier. 316 */ 317 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, 318 FunctionInfoIndex &Index) { 319 auto ModuleMap = generateModuleMap(Modules); 320 crossImportIntoModule(TheModule, Index, ModuleMap); 321 } 322 323 /** 324 * Perform post-importing ThinLTO optimizations. 325 */ 326 void ThinLTOCodeGenerator::optimize(Module &TheModule) { 327 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); 328 optimizeModule(TheModule, *TMBuilder.create()); 329 } 330 331 /** 332 * Perform ThinLTO CodeGen. 333 */ 334 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) { 335 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); 336 return codegenModule(TheModule, *TMBuilder.create()); 337 } 338 339 // Main entry point for the ThinLTO processing 340 void ThinLTOCodeGenerator::run() { 341 // Sequential linking phase 342 auto Index = linkCombinedIndex(); 343 344 // Save temps: index. 345 if (!SaveTempsDir.empty()) { 346 auto SaveTempPath = SaveTempsDir + "index.bc"; 347 std::error_code EC; 348 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None); 349 if (EC) 350 report_fatal_error(Twine("Failed to open ") + SaveTempPath + 351 " to save optimized bitcode\n"); 352 WriteIndexToFile(*Index, OS); 353 } 354 355 // Prepare the resulting object vector 356 assert(ProducedBinaries.empty() && "The generator should not be reused"); 357 ProducedBinaries.resize(Modules.size()); 358 359 // Prepare the module map. 360 auto ModuleMap = generateModuleMap(Modules); 361 362 // Parallel optimizer + codegen 363 { 364 ThreadPool Pool(ThreadCount); 365 int count = 0; 366 for (auto &ModuleBuffer : Modules) { 367 Pool.async([&](int count) { 368 LLVMContext Context; 369 Context.setDiscardValueNames(LTODiscardValueNames); 370 371 // Parse module now 372 auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false); 373 374 // Save temps: original file. 375 if (!SaveTempsDir.empty()) { 376 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc"); 377 } 378 379 ProducedBinaries[count] = ProcessThinLTOModule( 380 *TheModule, *Index, ModuleMap, *TMBuilder.create(), CacheOptions, 381 SaveTempsDir, count); 382 }, count); 383 count++; 384 } 385 } 386 387 // If statistics were requested, print them out now. 388 if (llvm::AreStatisticsEnabled()) 389 llvm::PrintStatistics(); 390 } 391