1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file composes the individual LLVM-IR passes provided by Polly to a 11 // functional polyhedral optimizer. The polyhedral optimizer is automatically 12 // made available to LLVM based compilers by loading the Polly shared library 13 // into such a compiler. 14 // 15 // The Polly optimizer is made available by executing a static constructor that 16 // registers the individual Polly passes in the LLVM pass manager builder. The 17 // passes are registered such that the default behaviour of the compiler is not 18 // changed, but that the flag '-polly' provided at optimization level '-O3' 19 // enables additional polyhedral optimizations. 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/RegisterPasses.h" 23 #include "polly/Canonicalization.h" 24 #include "polly/CodeGen/CodeGeneration.h" 25 #include "polly/CodeGen/CodegenCleanup.h" 26 #include "polly/CodeGen/PPCGCodeGeneration.h" 27 #include "polly/DeLICM.h" 28 #include "polly/DependenceInfo.h" 29 #include "polly/FlattenSchedule.h" 30 #include "polly/LinkAllPasses.h" 31 #include "polly/Options.h" 32 #include "polly/PolyhedralInfo.h" 33 #include "polly/ScopDetection.h" 34 #include "polly/ScopInfo.h" 35 #include "polly/Simplify.h" 36 #include "polly/Support/DumpModulePass.h" 37 #include "llvm/Analysis/CFGPrinter.h" 38 #include "llvm/IR/LegacyPassManager.h" 39 #include "llvm/Support/TargetSelect.h" 40 #include "llvm/Transforms/IPO.h" 41 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 42 #include "llvm/Transforms/Scalar.h" 43 #include "llvm/Transforms/Vectorize.h" 44 45 using namespace llvm; 46 using namespace polly; 47 48 cl::OptionCategory PollyCategory("Polly Options", 49 "Configure the polly loop optimizer"); 50 51 static cl::opt<bool> 52 PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"), 53 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 54 55 static cl::opt<bool> PollyDetectOnly( 56 "polly-only-scop-detection", 57 cl::desc("Only run scop detection, but no other optimizations"), 58 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 59 60 enum PassPositionChoice { 61 POSITION_EARLY, 62 POSITION_AFTER_LOOPOPT, 63 POSITION_BEFORE_VECTORIZER 64 }; 65 66 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL }; 67 68 static cl::opt<PassPositionChoice> PassPosition( 69 "polly-position", cl::desc("Where to run polly in the pass pipeline"), 70 cl::values( 71 clEnumValN(POSITION_EARLY, "early", "Before everything"), 72 clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt", 73 "After the loop optimizer (but within the inline cycle)"), 74 clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer", 75 "Right before the vectorizer")), 76 cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore, 77 cl::cat(PollyCategory)); 78 79 static cl::opt<OptimizerChoice> 80 Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"), 81 cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"), 82 clEnumValN(OPTIMIZER_ISL, "isl", 83 "The isl scheduling optimizer")), 84 cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore, 85 cl::cat(PollyCategory)); 86 87 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE }; 88 static cl::opt<CodeGenChoice> CodeGeneration( 89 "polly-code-generation", cl::desc("How much code-generation to perform"), 90 cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"), 91 clEnumValN(CODEGEN_AST, "ast", "Only AST generation"), 92 clEnumValN(CODEGEN_NONE, "none", "No code generation")), 93 cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); 94 95 enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID }; 96 static cl::opt<TargetChoice> 97 Target("polly-target", cl::desc("The hardware to target"), 98 cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") 99 #ifdef GPU_CODEGEN 100 , 101 clEnumValN(TARGET_GPU, "gpu", "generate GPU code"), 102 clEnumValN(TARGET_HYBRID, "hybrid", 103 "generate GPU code (preferably) or CPU code") 104 #endif 105 ), 106 cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); 107 108 #ifdef GPU_CODEGEN 109 static cl::opt<GPURuntime> GPURuntimeChoice( 110 "polly-gpu-runtime", cl::desc("The GPU Runtime API to target"), 111 cl::values(clEnumValN(GPURuntime::CUDA, "libcudart", 112 "use the CUDA Runtime API"), 113 clEnumValN(GPURuntime::OpenCL, "libopencl", 114 "use the OpenCL Runtime API")), 115 cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory)); 116 117 static cl::opt<GPUArch> 118 GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"), 119 cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64", 120 "target NVIDIA 64-bit architecture")), 121 cl::init(GPUArch::NVPTX64), cl::ZeroOrMore, 122 cl::cat(PollyCategory)); 123 #endif 124 125 VectorizerChoice polly::PollyVectorizerChoice; 126 static cl::opt<polly::VectorizerChoice, true> Vectorizer( 127 "polly-vectorizer", cl::desc("Select the vectorization strategy"), 128 cl::values( 129 clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"), 130 clEnumValN(polly::VECTORIZER_POLLY, "polly", 131 "Polly internal vectorizer"), 132 clEnumValN( 133 polly::VECTORIZER_STRIPMINE, "stripmine", 134 "Strip-mine outer loops for the loop-vectorizer to trigger")), 135 cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE), 136 cl::ZeroOrMore, cl::cat(PollyCategory)); 137 138 static cl::opt<bool> ImportJScop( 139 "polly-import", 140 cl::desc("Import the polyhedral description of the detected Scops"), 141 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 142 143 static cl::opt<bool> ExportJScop( 144 "polly-export", 145 cl::desc("Export the polyhedral description of the detected Scops"), 146 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 147 148 static cl::opt<bool> DeadCodeElim("polly-run-dce", 149 cl::desc("Run the dead code elimination"), 150 cl::Hidden, cl::init(false), cl::ZeroOrMore, 151 cl::cat(PollyCategory)); 152 153 static cl::opt<bool> PollyViewer( 154 "polly-show", 155 cl::desc("Highlight the code regions that will be optimized in a " 156 "(CFG BBs and LLVM-IR instructions)"), 157 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 158 159 static cl::opt<bool> PollyOnlyViewer( 160 "polly-show-only", 161 cl::desc("Highlight the code regions that will be optimized in " 162 "a (CFG only BBs)"), 163 cl::init(false), cl::cat(PollyCategory)); 164 165 static cl::opt<bool> 166 PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"), 167 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"), 168 cl::init(false), cl::cat(PollyCategory)); 169 170 static cl::opt<bool> PollyOnlyPrinter( 171 "polly-dot-only", 172 cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden, 173 cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"), 174 cl::init(false), cl::cat(PollyCategory)); 175 176 static cl::opt<bool> 177 CFGPrinter("polly-view-cfg", 178 cl::desc("Show the Polly CFG right after code generation"), 179 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 180 181 static cl::opt<bool> 182 EnablePolyhedralInfo("polly-enable-polyhedralinfo", 183 cl::desc("Enable polyhedral interface of Polly"), 184 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 185 186 static cl::opt<bool> 187 DumpBefore("polly-dump-before", 188 cl::desc("Dump module before Polly transformations into a file " 189 "suffixed with \"-before\""), 190 cl::init(false), cl::cat(PollyCategory)); 191 192 static cl::list<std::string> DumpBeforeFile( 193 "polly-dump-before-file", 194 cl::desc("Dump module before Polly transformations to the given file"), 195 cl::cat(PollyCategory)); 196 197 static cl::opt<bool> 198 DumpAfter("polly-dump-after", 199 cl::desc("Dump module after Polly transformations into a file " 200 "suffixed with \"-after\""), 201 cl::init(false), cl::cat(PollyCategory)); 202 203 static cl::list<std::string> DumpAfterFile( 204 "polly-dump-after-file", 205 cl::desc("Dump module after Polly transformations to the given file"), 206 cl::ZeroOrMore, cl::cat(PollyCategory)); 207 208 static cl::opt<bool> 209 EnableDeLICM("polly-enable-delicm", 210 cl::desc("Eliminate scalar loop carried dependences"), 211 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 212 213 static cl::opt<bool> 214 EnableSimplify("polly-enable-simplify", 215 cl::desc("Simplify SCoP after optimizations"), 216 cl::init(false), cl::cat(PollyCategory)); 217 218 static cl::opt<bool> EnablePruneUnprofitable( 219 "polly-enable-prune-unprofitable", 220 cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden, 221 cl::init(true), cl::cat(PollyCategory)); 222 223 namespace polly { 224 void initializePollyPasses(PassRegistry &Registry) { 225 initializeCodeGenerationPass(Registry); 226 227 #ifdef GPU_CODEGEN 228 initializePPCGCodeGenerationPass(Registry); 229 LLVMInitializeNVPTXTarget(); 230 LLVMInitializeNVPTXTargetInfo(); 231 LLVMInitializeNVPTXTargetMC(); 232 LLVMInitializeNVPTXAsmPrinter(); 233 #endif 234 initializeCodePreparationPass(Registry); 235 initializeDeadCodeElimPass(Registry); 236 initializeDependenceInfoPass(Registry); 237 initializeDependenceInfoWrapperPassPass(Registry); 238 initializeJSONExporterPass(Registry); 239 initializeJSONImporterPass(Registry); 240 initializeIslAstInfoWrapperPassPass(Registry); 241 initializeIslScheduleOptimizerPass(Registry); 242 initializePollyCanonicalizePass(Registry); 243 initializePolyhedralInfoPass(Registry); 244 initializeScopDetectionWrapperPassPass(Registry); 245 initializeScopInfoRegionPassPass(Registry); 246 initializeScopInfoWrapperPassPass(Registry); 247 initializeCodegenCleanupPass(Registry); 248 initializeFlattenSchedulePass(Registry); 249 initializeDeLICMPass(Registry); 250 initializeSimplifyPass(Registry); 251 initializeDumpModulePass(Registry); 252 initializePruneUnprofitablePass(Registry); 253 } 254 255 /// Register Polly passes such that they form a polyhedral optimizer. 256 /// 257 /// The individual Polly passes are registered in the pass manager such that 258 /// they form a full polyhedral optimizer. The flow of the optimizer starts with 259 /// a set of preparing transformations that canonicalize the LLVM-IR such that 260 /// the LLVM-IR is easier for us to understand and to optimizes. On the 261 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects 262 /// static control flow regions. Those regions are then translated by the 263 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling 264 /// optimizer is run on the polyhedral representation and finally the optimized 265 /// polyhedral representation is code generated back to LLVM-IR. 266 /// 267 /// Besides this core functionality, we optionally schedule passes that provide 268 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that 269 /// allow the export/import of the polyhedral representation 270 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation. 271 /// 272 /// For certain parts of the Polly optimizer, several alternatives are provided: 273 /// 274 /// As scheduling optimizer we support the isl scheduling optimizer 275 /// (http://freecode.com/projects/isl). 276 /// It is also possible to run Polly with no optimizer. This mode is mainly 277 /// provided to analyze the run and compile time changes caused by the 278 /// scheduling optimizer. 279 /// 280 /// Polly supports the isl internal code generator. 281 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) { 282 if (DumpBefore) 283 PM.add(polly::createDumpModulePass("-before", true)); 284 for (auto &Filename : DumpBeforeFile) 285 PM.add(polly::createDumpModulePass(Filename, false)); 286 287 PM.add(polly::createScopDetectionWrapperPassPass()); 288 289 if (PollyDetectOnly) 290 return; 291 292 if (PollyViewer) 293 PM.add(polly::createDOTViewerPass()); 294 if (PollyOnlyViewer) 295 PM.add(polly::createDOTOnlyViewerPass()); 296 if (PollyPrinter) 297 PM.add(polly::createDOTPrinterPass()); 298 if (PollyOnlyPrinter) 299 PM.add(polly::createDOTOnlyPrinterPass()); 300 301 PM.add(polly::createScopInfoRegionPassPass()); 302 if (EnablePolyhedralInfo) 303 PM.add(polly::createPolyhedralInfoPass()); 304 305 if (EnableDeLICM) 306 PM.add(polly::createDeLICMPass()); 307 if (EnableSimplify) 308 PM.add(polly::createSimplifyPass()); 309 310 if (ImportJScop) 311 PM.add(polly::createJSONImporterPass()); 312 313 if (DeadCodeElim) 314 PM.add(polly::createDeadCodeElimPass()); 315 316 if (EnablePruneUnprofitable) 317 PM.add(polly::createPruneUnprofitablePass()); 318 319 #ifdef GPU_CODEGEN 320 if (Target == TARGET_HYBRID) 321 PM.add( 322 polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); 323 #endif 324 if (Target == TARGET_CPU || Target == TARGET_HYBRID) 325 switch (Optimizer) { 326 case OPTIMIZER_NONE: 327 break; /* Do nothing */ 328 329 case OPTIMIZER_ISL: 330 PM.add(polly::createIslScheduleOptimizerPass()); 331 break; 332 } 333 334 if (ExportJScop) 335 PM.add(polly::createJSONExporterPass()); 336 337 if (Target == TARGET_CPU || Target == TARGET_HYBRID) 338 switch (CodeGeneration) { 339 case CODEGEN_AST: 340 PM.add(polly::createIslAstInfoWrapperPassPass()); 341 break; 342 case CODEGEN_FULL: 343 PM.add(polly::createCodeGenerationPass()); 344 break; 345 case CODEGEN_NONE: 346 break; 347 } 348 #ifdef GPU_CODEGEN 349 else 350 PM.add( 351 polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); 352 #endif 353 354 // FIXME: This dummy ModulePass keeps some programs from miscompiling, 355 // probably some not correctly preserved analyses. It acts as a barrier to 356 // force all analysis results to be recomputed. 357 PM.add(createBarrierNoopPass()); 358 359 if (DumpAfter) 360 PM.add(polly::createDumpModulePass("-after", true)); 361 for (auto &Filename : DumpAfterFile) 362 PM.add(polly::createDumpModulePass(Filename, false)); 363 364 if (CFGPrinter) 365 PM.add(llvm::createCFGPrinterLegacyPassPass()); 366 } 367 368 static bool shouldEnablePolly() { 369 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer) 370 PollyTrackFailures = true; 371 372 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer || 373 ExportJScop || ImportJScop) 374 PollyEnabled = true; 375 376 return PollyEnabled; 377 } 378 379 static void 380 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder, 381 llvm::legacy::PassManagerBase &PM) { 382 if (!polly::shouldEnablePolly()) 383 return; 384 385 if (PassPosition != POSITION_EARLY) 386 return; 387 388 registerCanonicalicationPasses(PM); 389 polly::registerPollyPasses(PM); 390 } 391 392 static void 393 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder, 394 llvm::legacy::PassManagerBase &PM) { 395 if (!polly::shouldEnablePolly()) 396 return; 397 398 if (PassPosition != POSITION_AFTER_LOOPOPT) 399 return; 400 401 PM.add(polly::createCodePreparationPass()); 402 polly::registerPollyPasses(PM); 403 PM.add(createCodegenCleanupPass()); 404 } 405 406 static void 407 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder, 408 llvm::legacy::PassManagerBase &PM) { 409 if (!polly::shouldEnablePolly()) 410 return; 411 412 if (PassPosition != POSITION_BEFORE_VECTORIZER) 413 return; 414 415 PM.add(polly::createCodePreparationPass()); 416 polly::registerPollyPasses(PM); 417 PM.add(createCodegenCleanupPass()); 418 } 419 420 /// Register Polly to be available as an optimizer 421 /// 422 /// 423 /// We can currently run Polly at three different points int the pass manager. 424 /// a) very early, b) after the canonicalizing loop transformations and c) right 425 /// before the vectorizer. 426 /// 427 /// The default is currently a), to register Polly such that it runs as early as 428 /// possible. This has several implications: 429 /// 430 /// 1) We need to schedule more canonicalization passes 431 /// 432 /// As nothing is run before Polly, it is necessary to run a set of preparing 433 /// transformations before Polly to canonicalize the LLVM-IR and to allow 434 /// Polly to detect and understand the code. 435 /// 436 /// 2) LICM and LoopIdiom pass have not yet been run 437 /// 438 /// Loop invariant code motion as well as the loop idiom recognition pass make 439 /// it more difficult for Polly to transform code. LICM may introduce 440 /// additional data dependences that are hard to eliminate and the loop idiom 441 /// recognition pass may introduce calls to memset that we currently do not 442 /// understand. By running Polly early enough (meaning before these passes) we 443 /// avoid difficulties that may be introduced by these passes. 444 /// 445 /// 3) We get the full -O3 optimization sequence after Polly 446 /// 447 /// The LLVM-IR that is generated by Polly has been optimized on a high level, 448 /// but it may be rather inefficient on the lower/scalar level. By scheduling 449 /// Polly before all other passes, we have the full sequence of -O3 450 /// optimizations behind us, such that inefficiencies on the low level can 451 /// be optimized away. 452 /// 453 /// We are currently evaluating the benefit or running Polly at position b) or 454 /// c). b) is likely to early as it interacts with the inliner. c) is nice 455 /// as everything is fully inlined and canonicalized, but we need to be able 456 /// to handle LICMed code to make it useful. 457 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly( 458 llvm::PassManagerBuilder::EP_ModuleOptimizerEarly, 459 registerPollyEarlyAsPossiblePasses); 460 461 static llvm::RegisterStandardPasses 462 RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd, 463 registerPollyLoopOptimizerEndPasses); 464 465 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate( 466 llvm::PassManagerBuilder::EP_VectorizerStart, 467 registerPollyScalarOptimizerLatePasses); 468 } // namespace polly 469