1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file composes the individual LLVM-IR passes provided by Polly to a 11 // functional polyhedral optimizer. The polyhedral optimizer is automatically 12 // made available to LLVM based compilers by loading the Polly shared library 13 // into such a compiler. 14 // 15 // The Polly optimizer is made available by executing a static constructor that 16 // registers the individual Polly passes in the LLVM pass manager builder. The 17 // passes are registered such that the default behaviour of the compiler is not 18 // changed, but that the flag '-polly' provided at optimization level '-O3' 19 // enables additional polyhedral optimizations. 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/RegisterPasses.h" 23 #include "polly/Canonicalization.h" 24 #include "polly/CodeGen/CodeGeneration.h" 25 #include "polly/CodeGen/CodegenCleanup.h" 26 #include "polly/CodeGen/PPCGCodeGeneration.h" 27 #include "polly/DeLICM.h" 28 #include "polly/DependenceInfo.h" 29 #include "polly/FlattenSchedule.h" 30 #include "polly/LinkAllPasses.h" 31 #include "polly/Options.h" 32 #include "polly/PolyhedralInfo.h" 33 #include "polly/ScopDetection.h" 34 #include "polly/ScopInfo.h" 35 #include "polly/Simplify.h" 36 #include "polly/Support/DumpModulePass.h" 37 #include "llvm/Analysis/CFGPrinter.h" 38 #include "llvm/IR/LegacyPassManager.h" 39 #include "llvm/Transforms/IPO.h" 40 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 41 #include "llvm/Transforms/Scalar.h" 42 #include "llvm/Transforms/Vectorize.h" 43 44 using namespace llvm; 45 using namespace polly; 46 47 cl::OptionCategory PollyCategory("Polly Options", 48 "Configure the polly loop optimizer"); 49 50 static cl::opt<bool> 51 PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"), 52 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 53 54 static cl::opt<bool> PollyDetectOnly( 55 "polly-only-scop-detection", 56 cl::desc("Only run scop detection, but no other optimizations"), 57 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 58 59 enum PassPositionChoice { 60 POSITION_EARLY, 61 POSITION_AFTER_LOOPOPT, 62 POSITION_BEFORE_VECTORIZER 63 }; 64 65 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL }; 66 67 static cl::opt<PassPositionChoice> PassPosition( 68 "polly-position", cl::desc("Where to run polly in the pass pipeline"), 69 cl::values( 70 clEnumValN(POSITION_EARLY, "early", "Before everything"), 71 clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt", 72 "After the loop optimizer (but within the inline cycle)"), 73 clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer", 74 "Right before the vectorizer")), 75 cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore, 76 cl::cat(PollyCategory)); 77 78 static cl::opt<OptimizerChoice> 79 Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"), 80 cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"), 81 clEnumValN(OPTIMIZER_ISL, "isl", 82 "The isl scheduling optimizer")), 83 cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore, 84 cl::cat(PollyCategory)); 85 86 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE }; 87 static cl::opt<CodeGenChoice> CodeGeneration( 88 "polly-code-generation", cl::desc("How much code-generation to perform"), 89 cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"), 90 clEnumValN(CODEGEN_AST, "ast", "Only AST generation"), 91 clEnumValN(CODEGEN_NONE, "none", "No code generation")), 92 cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); 93 94 enum TargetChoice { TARGET_CPU, TARGET_GPU }; 95 static cl::opt<TargetChoice> 96 Target("polly-target", cl::desc("The hardware to target"), 97 cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") 98 #ifdef GPU_CODEGEN 99 , 100 clEnumValN(TARGET_GPU, "gpu", "generate GPU code") 101 #endif 102 ), 103 cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); 104 105 #ifdef GPU_CODEGEN 106 static cl::opt<GPURuntime> GPURuntimeChoice( 107 "polly-gpu-runtime", cl::desc("The GPU Runtime API to target"), 108 cl::values(clEnumValN(GPURuntime::CUDA, "libcudart", 109 "use the CUDA Runtime API"), 110 clEnumValN(GPURuntime::OpenCL, "libopencl", 111 "use the OpenCL Runtime API")), 112 cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory)); 113 114 static cl::opt<GPUArch> 115 GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"), 116 cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64", 117 "target NVIDIA 64-bit architecture")), 118 cl::init(GPUArch::NVPTX64), cl::ZeroOrMore, 119 cl::cat(PollyCategory)); 120 #endif 121 122 VectorizerChoice polly::PollyVectorizerChoice; 123 static cl::opt<polly::VectorizerChoice, true> Vectorizer( 124 "polly-vectorizer", cl::desc("Select the vectorization strategy"), 125 cl::values( 126 clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"), 127 clEnumValN(polly::VECTORIZER_POLLY, "polly", 128 "Polly internal vectorizer"), 129 clEnumValN( 130 polly::VECTORIZER_STRIPMINE, "stripmine", 131 "Strip-mine outer loops for the loop-vectorizer to trigger")), 132 cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE), 133 cl::ZeroOrMore, cl::cat(PollyCategory)); 134 135 static cl::opt<bool> ImportJScop( 136 "polly-import", 137 cl::desc("Import the polyhedral description of the detected Scops"), 138 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 139 140 static cl::opt<bool> ExportJScop( 141 "polly-export", 142 cl::desc("Export the polyhedral description of the detected Scops"), 143 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 144 145 static cl::opt<bool> DeadCodeElim("polly-run-dce", 146 cl::desc("Run the dead code elimination"), 147 cl::Hidden, cl::init(false), cl::ZeroOrMore, 148 cl::cat(PollyCategory)); 149 150 static cl::opt<bool> PollyViewer( 151 "polly-show", 152 cl::desc("Highlight the code regions that will be optimized in a " 153 "(CFG BBs and LLVM-IR instructions)"), 154 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 155 156 static cl::opt<bool> PollyOnlyViewer( 157 "polly-show-only", 158 cl::desc("Highlight the code regions that will be optimized in " 159 "a (CFG only BBs)"), 160 cl::init(false), cl::cat(PollyCategory)); 161 162 static cl::opt<bool> 163 PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"), 164 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"), 165 cl::init(false), cl::cat(PollyCategory)); 166 167 static cl::opt<bool> PollyOnlyPrinter( 168 "polly-dot-only", 169 cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden, 170 cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"), 171 cl::init(false), cl::cat(PollyCategory)); 172 173 static cl::opt<bool> 174 CFGPrinter("polly-view-cfg", 175 cl::desc("Show the Polly CFG right after code generation"), 176 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 177 178 static cl::opt<bool> 179 EnablePolyhedralInfo("polly-enable-polyhedralinfo", 180 cl::desc("Enable polyhedral interface of Polly"), 181 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 182 183 static cl::opt<bool> 184 DumpBefore("polly-dump-before", 185 cl::desc("Dump module before Polly transformations into a file " 186 "suffixed with \"-before\""), 187 cl::init(false), cl::cat(PollyCategory)); 188 189 static cl::list<std::string> DumpBeforeFile( 190 "polly-dump-before-file", 191 cl::desc("Dump module before Polly transformations to the given file"), 192 cl::cat(PollyCategory)); 193 194 static cl::opt<bool> 195 DumpAfter("polly-dump-after", 196 cl::desc("Dump module after Polly transformations into a file " 197 "suffixed with \"-after\""), 198 cl::init(false), cl::cat(PollyCategory)); 199 200 static cl::list<std::string> DumpAfterFile( 201 "polly-dump-after-file", 202 cl::desc("Dump module after Polly transformations to the given file"), 203 cl::ZeroOrMore, cl::cat(PollyCategory)); 204 205 static cl::opt<bool> 206 EnableDeLICM("polly-enable-delicm", 207 cl::desc("Eliminate scalar loop carried dependences"), 208 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 209 210 static cl::opt<bool> 211 EnableSimplify("polly-enable-simplify", 212 cl::desc("Simplify SCoP after optimizations"), 213 cl::init(false), cl::cat(PollyCategory)); 214 215 static cl::opt<bool> EnablePruneUnprofitable( 216 "polly-enable-prune-unprofitable", 217 cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden, 218 cl::init(true), cl::cat(PollyCategory)); 219 220 namespace polly { 221 void initializePollyPasses(PassRegistry &Registry) { 222 initializeCodeGenerationPass(Registry); 223 224 #ifdef GPU_CODEGEN 225 initializePPCGCodeGenerationPass(Registry); 226 #endif 227 initializeCodePreparationPass(Registry); 228 initializeDeadCodeElimPass(Registry); 229 initializeDependenceInfoPass(Registry); 230 initializeDependenceInfoWrapperPassPass(Registry); 231 initializeJSONExporterPass(Registry); 232 initializeJSONImporterPass(Registry); 233 initializeIslAstInfoWrapperPassPass(Registry); 234 initializeIslScheduleOptimizerPass(Registry); 235 initializePollyCanonicalizePass(Registry); 236 initializePolyhedralInfoPass(Registry); 237 initializeScopDetectionWrapperPassPass(Registry); 238 initializeScopInfoRegionPassPass(Registry); 239 initializeScopInfoWrapperPassPass(Registry); 240 initializeCodegenCleanupPass(Registry); 241 initializeFlattenSchedulePass(Registry); 242 initializeDeLICMPass(Registry); 243 initializeSimplifyPass(Registry); 244 initializeDumpModulePass(Registry); 245 initializePruneUnprofitablePass(Registry); 246 } 247 248 /// Register Polly passes such that they form a polyhedral optimizer. 249 /// 250 /// The individual Polly passes are registered in the pass manager such that 251 /// they form a full polyhedral optimizer. The flow of the optimizer starts with 252 /// a set of preparing transformations that canonicalize the LLVM-IR such that 253 /// the LLVM-IR is easier for us to understand and to optimizes. On the 254 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects 255 /// static control flow regions. Those regions are then translated by the 256 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling 257 /// optimizer is run on the polyhedral representation and finally the optimized 258 /// polyhedral representation is code generated back to LLVM-IR. 259 /// 260 /// Besides this core functionality, we optionally schedule passes that provide 261 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that 262 /// allow the export/import of the polyhedral representation 263 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation. 264 /// 265 /// For certain parts of the Polly optimizer, several alternatives are provided: 266 /// 267 /// As scheduling optimizer we support the isl scheduling optimizer 268 /// (http://freecode.com/projects/isl). 269 /// It is also possible to run Polly with no optimizer. This mode is mainly 270 /// provided to analyze the run and compile time changes caused by the 271 /// scheduling optimizer. 272 /// 273 /// Polly supports the isl internal code generator. 274 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) { 275 if (DumpBefore) 276 PM.add(polly::createDumpModulePass("-before", true)); 277 for (auto &Filename : DumpBeforeFile) 278 PM.add(polly::createDumpModulePass(Filename, false)); 279 280 PM.add(polly::createScopDetectionWrapperPassPass()); 281 282 if (PollyDetectOnly) 283 return; 284 285 if (PollyViewer) 286 PM.add(polly::createDOTViewerPass()); 287 if (PollyOnlyViewer) 288 PM.add(polly::createDOTOnlyViewerPass()); 289 if (PollyPrinter) 290 PM.add(polly::createDOTPrinterPass()); 291 if (PollyOnlyPrinter) 292 PM.add(polly::createDOTOnlyPrinterPass()); 293 294 PM.add(polly::createScopInfoRegionPassPass()); 295 if (EnablePolyhedralInfo) 296 PM.add(polly::createPolyhedralInfoPass()); 297 298 if (EnableDeLICM) 299 PM.add(polly::createDeLICMPass()); 300 if (EnableSimplify) 301 PM.add(polly::createSimplifyPass()); 302 303 if (ImportJScop) 304 PM.add(polly::createJSONImporterPass()); 305 306 if (DeadCodeElim) 307 PM.add(polly::createDeadCodeElimPass()); 308 309 if (EnablePruneUnprofitable) 310 PM.add(polly::createPruneUnprofitablePass()); 311 312 if (Target == TARGET_GPU) { 313 // GPU generation provides its own scheduling optimization strategy. 314 } else { 315 switch (Optimizer) { 316 case OPTIMIZER_NONE: 317 break; /* Do nothing */ 318 319 case OPTIMIZER_ISL: 320 PM.add(polly::createIslScheduleOptimizerPass()); 321 break; 322 } 323 } 324 325 if (ExportJScop) 326 PM.add(polly::createJSONExporterPass()); 327 328 if (Target == TARGET_GPU) { 329 #ifdef GPU_CODEGEN 330 PM.add( 331 polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); 332 #endif 333 } else { 334 switch (CodeGeneration) { 335 case CODEGEN_AST: 336 PM.add(polly::createIslAstInfoWrapperPassPass()); 337 break; 338 case CODEGEN_FULL: 339 PM.add(polly::createCodeGenerationPass()); 340 break; 341 case CODEGEN_NONE: 342 break; 343 } 344 } 345 346 // FIXME: This dummy ModulePass keeps some programs from miscompiling, 347 // probably some not correctly preserved analyses. It acts as a barrier to 348 // force all analysis results to be recomputed. 349 PM.add(createBarrierNoopPass()); 350 351 if (DumpAfter) 352 PM.add(polly::createDumpModulePass("-after", true)); 353 for (auto &Filename : DumpAfterFile) 354 PM.add(polly::createDumpModulePass(Filename, false)); 355 356 if (CFGPrinter) 357 PM.add(llvm::createCFGPrinterLegacyPassPass()); 358 } 359 360 static bool shouldEnablePolly() { 361 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer) 362 PollyTrackFailures = true; 363 364 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer || 365 ExportJScop || ImportJScop) 366 PollyEnabled = true; 367 368 return PollyEnabled; 369 } 370 371 static void 372 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder, 373 llvm::legacy::PassManagerBase &PM) { 374 if (!polly::shouldEnablePolly()) 375 return; 376 377 if (PassPosition != POSITION_EARLY) 378 return; 379 380 registerCanonicalicationPasses(PM); 381 polly::registerPollyPasses(PM); 382 } 383 384 static void 385 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder, 386 llvm::legacy::PassManagerBase &PM) { 387 if (!polly::shouldEnablePolly()) 388 return; 389 390 if (PassPosition != POSITION_AFTER_LOOPOPT) 391 return; 392 393 PM.add(polly::createCodePreparationPass()); 394 polly::registerPollyPasses(PM); 395 PM.add(createCodegenCleanupPass()); 396 } 397 398 static void 399 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder, 400 llvm::legacy::PassManagerBase &PM) { 401 if (!polly::shouldEnablePolly()) 402 return; 403 404 if (PassPosition != POSITION_BEFORE_VECTORIZER) 405 return; 406 407 PM.add(polly::createCodePreparationPass()); 408 polly::registerPollyPasses(PM); 409 PM.add(createCodegenCleanupPass()); 410 } 411 412 /// Register Polly to be available as an optimizer 413 /// 414 /// 415 /// We can currently run Polly at three different points int the pass manager. 416 /// a) very early, b) after the canonicalizing loop transformations and c) right 417 /// before the vectorizer. 418 /// 419 /// The default is currently a), to register Polly such that it runs as early as 420 /// possible. This has several implications: 421 /// 422 /// 1) We need to schedule more canonicalization passes 423 /// 424 /// As nothing is run before Polly, it is necessary to run a set of preparing 425 /// transformations before Polly to canonicalize the LLVM-IR and to allow 426 /// Polly to detect and understand the code. 427 /// 428 /// 2) LICM and LoopIdiom pass have not yet been run 429 /// 430 /// Loop invariant code motion as well as the loop idiom recognition pass make 431 /// it more difficult for Polly to transform code. LICM may introduce 432 /// additional data dependences that are hard to eliminate and the loop idiom 433 /// recognition pass may introduce calls to memset that we currently do not 434 /// understand. By running Polly early enough (meaning before these passes) we 435 /// avoid difficulties that may be introduced by these passes. 436 /// 437 /// 3) We get the full -O3 optimization sequence after Polly 438 /// 439 /// The LLVM-IR that is generated by Polly has been optimized on a high level, 440 /// but it may be rather inefficient on the lower/scalar level. By scheduling 441 /// Polly before all other passes, we have the full sequence of -O3 442 /// optimizations behind us, such that inefficiencies on the low level can 443 /// be optimized away. 444 /// 445 /// We are currently evaluating the benefit or running Polly at position b) or 446 /// c). b) is likely to early as it interacts with the inliner. c) is nice 447 /// as everything is fully inlined and canonicalized, but we need to be able 448 /// to handle LICMed code to make it useful. 449 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly( 450 llvm::PassManagerBuilder::EP_ModuleOptimizerEarly, 451 registerPollyEarlyAsPossiblePasses); 452 453 static llvm::RegisterStandardPasses 454 RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd, 455 registerPollyLoopOptimizerEndPasses); 456 457 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate( 458 llvm::PassManagerBuilder::EP_VectorizerStart, 459 registerPollyScalarOptimizerLatePasses); 460 } // namespace polly 461