1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file composes the individual LLVM-IR passes provided by Polly to a 11 // functional polyhedral optimizer. The polyhedral optimizer is automatically 12 // made available to LLVM based compilers by loading the Polly shared library 13 // into such a compiler. 14 // 15 // The Polly optimizer is made available by executing a static constructor that 16 // registers the individual Polly passes in the LLVM pass manager builder. The 17 // passes are registered such that the default behaviour of the compiler is not 18 // changed, but that the flag '-polly' provided at optimization level '-O3' 19 // enables additional polyhedral optimizations. 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/RegisterPasses.h" 23 #include "polly/Canonicalization.h" 24 #include "polly/CodeGen/CodeGeneration.h" 25 #include "polly/CodeGen/CodegenCleanup.h" 26 #include "polly/DependenceInfo.h" 27 #include "polly/FlattenSchedule.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/Options.h" 30 #include "polly/PolyhedralInfo.h" 31 #include "polly/ScopDetection.h" 32 #include "polly/ScopInfo.h" 33 #include "llvm/Analysis/CFGPrinter.h" 34 #include "llvm/IR/LegacyPassManager.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 37 #include "llvm/Transforms/Scalar.h" 38 #include "llvm/Transforms/Vectorize.h" 39 40 using namespace llvm; 41 using namespace polly; 42 43 cl::OptionCategory PollyCategory("Polly Options", 44 "Configure the polly loop optimizer"); 45 46 static cl::opt<bool> 47 PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"), 48 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 49 50 static cl::opt<bool> PollyDetectOnly( 51 "polly-only-scop-detection", 52 cl::desc("Only run scop detection, but no other optimizations"), 53 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 54 55 enum PassPositionChoice { 56 POSITION_EARLY, 57 POSITION_AFTER_LOOPOPT, 58 POSITION_BEFORE_VECTORIZER 59 }; 60 61 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL }; 62 63 static cl::opt<PassPositionChoice> PassPosition( 64 "polly-position", cl::desc("Where to run polly in the pass pipeline"), 65 cl::values( 66 clEnumValN(POSITION_EARLY, "early", "Before everything"), 67 clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt", 68 "After the loop optimizer (but within the inline cycle)"), 69 clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer", 70 "Right before the vectorizer")), 71 cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore, 72 cl::cat(PollyCategory)); 73 74 static cl::opt<OptimizerChoice> 75 Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"), 76 cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"), 77 clEnumValN(OPTIMIZER_ISL, "isl", 78 "The isl scheduling optimizer")), 79 cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore, 80 cl::cat(PollyCategory)); 81 82 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE }; 83 static cl::opt<CodeGenChoice> CodeGeneration( 84 "polly-code-generation", cl::desc("How much code-generation to perform"), 85 cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"), 86 clEnumValN(CODEGEN_AST, "ast", "Only AST generation"), 87 clEnumValN(CODEGEN_NONE, "none", "No code generation")), 88 cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); 89 90 enum TargetChoice { TARGET_CPU, TARGET_GPU }; 91 static cl::opt<TargetChoice> 92 Target("polly-target", cl::desc("The hardware to target"), 93 cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") 94 #ifdef GPU_CODEGEN 95 , 96 clEnumValN(TARGET_GPU, "gpu", "generate GPU code") 97 #endif 98 ), 99 cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); 100 101 VectorizerChoice polly::PollyVectorizerChoice; 102 static cl::opt<polly::VectorizerChoice, true> Vectorizer( 103 "polly-vectorizer", cl::desc("Select the vectorization strategy"), 104 cl::values( 105 clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"), 106 clEnumValN(polly::VECTORIZER_POLLY, "polly", 107 "Polly internal vectorizer"), 108 clEnumValN( 109 polly::VECTORIZER_STRIPMINE, "stripmine", 110 "Strip-mine outer loops for the loop-vectorizer to trigger")), 111 cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE), 112 cl::ZeroOrMore, cl::cat(PollyCategory)); 113 114 static cl::opt<bool> ImportJScop( 115 "polly-import", 116 cl::desc("Export the polyhedral description of the detected Scops"), 117 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 118 119 static cl::opt<bool> ExportJScop( 120 "polly-export", 121 cl::desc("Export the polyhedral description of the detected Scops"), 122 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 123 124 static cl::opt<bool> DeadCodeElim("polly-run-dce", 125 cl::desc("Run the dead code elimination"), 126 cl::Hidden, cl::init(false), cl::ZeroOrMore, 127 cl::cat(PollyCategory)); 128 129 static cl::opt<bool> PollyViewer( 130 "polly-show", 131 cl::desc("Highlight the code regions that will be optimized in a " 132 "(CFG BBs and LLVM-IR instructions)"), 133 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 134 135 static cl::opt<bool> PollyOnlyViewer( 136 "polly-show-only", 137 cl::desc("Highlight the code regions that will be optimized in " 138 "a (CFG only BBs)"), 139 cl::init(false), cl::cat(PollyCategory)); 140 141 static cl::opt<bool> 142 PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"), 143 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"), 144 cl::init(false), cl::cat(PollyCategory)); 145 146 static cl::opt<bool> PollyOnlyPrinter( 147 "polly-dot-only", 148 cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden, 149 cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"), 150 cl::init(false), cl::cat(PollyCategory)); 151 152 static cl::opt<bool> 153 CFGPrinter("polly-view-cfg", 154 cl::desc("Show the Polly CFG right after code generation"), 155 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 156 157 static cl::opt<bool> 158 EnablePolyhedralInfo("polly-enable-polyhedralinfo", 159 cl::desc("Enable polyhedral interface of Polly"), 160 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 161 162 namespace polly { 163 void initializePollyPasses(PassRegistry &Registry) { 164 initializeCodeGenerationPass(Registry); 165 166 #ifdef GPU_CODEGEN 167 initializePPCGCodeGenerationPass(Registry); 168 #endif 169 initializeCodePreparationPass(Registry); 170 initializeDeadCodeElimPass(Registry); 171 initializeDependenceInfoPass(Registry); 172 initializeDependenceInfoWrapperPassPass(Registry); 173 initializeJSONExporterPass(Registry); 174 initializeJSONImporterPass(Registry); 175 initializeIslAstInfoPass(Registry); 176 initializeIslScheduleOptimizerPass(Registry); 177 initializePollyCanonicalizePass(Registry); 178 initializePolyhedralInfoPass(Registry); 179 initializeScopDetectionPass(Registry); 180 initializeScopInfoRegionPassPass(Registry); 181 initializeScopInfoWrapperPassPass(Registry); 182 initializeCodegenCleanupPass(Registry); 183 initializeFlattenSchedulePass(Registry); 184 } 185 186 /// Register Polly passes such that they form a polyhedral optimizer. 187 /// 188 /// The individual Polly passes are registered in the pass manager such that 189 /// they form a full polyhedral optimizer. The flow of the optimizer starts with 190 /// a set of preparing transformations that canonicalize the LLVM-IR such that 191 /// the LLVM-IR is easier for us to understand and to optimizes. On the 192 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects 193 /// static control flow regions. Those regions are then translated by the 194 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling 195 /// optimizer is run on the polyhedral representation and finally the optimized 196 /// polyhedral representation is code generated back to LLVM-IR. 197 /// 198 /// Besides this core functionality, we optionally schedule passes that provide 199 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that 200 /// allow the export/import of the polyhedral representation 201 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation. 202 /// 203 /// For certain parts of the Polly optimizer, several alternatives are provided: 204 /// 205 /// As scheduling optimizer we support the isl scheduling optimizer 206 /// (http://freecode.com/projects/isl). 207 /// It is also possible to run Polly with no optimizer. This mode is mainly 208 /// provided to analyze the run and compile time changes caused by the 209 /// scheduling optimizer. 210 /// 211 /// Polly supports the isl internal code generator. 212 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) { 213 PM.add(polly::createScopDetectionPass()); 214 215 if (PollyDetectOnly) 216 return; 217 218 if (PollyViewer) 219 PM.add(polly::createDOTViewerPass()); 220 if (PollyOnlyViewer) 221 PM.add(polly::createDOTOnlyViewerPass()); 222 if (PollyPrinter) 223 PM.add(polly::createDOTPrinterPass()); 224 if (PollyOnlyPrinter) 225 PM.add(polly::createDOTOnlyPrinterPass()); 226 227 PM.add(polly::createScopInfoRegionPassPass()); 228 if (EnablePolyhedralInfo) 229 PM.add(polly::createPolyhedralInfoPass()); 230 231 if (ImportJScop) 232 PM.add(polly::createJSONImporterPass()); 233 234 if (DeadCodeElim) 235 PM.add(polly::createDeadCodeElimPass()); 236 237 if (Target == TARGET_GPU) { 238 // GPU generation provides its own scheduling optimization strategy. 239 } else { 240 switch (Optimizer) { 241 case OPTIMIZER_NONE: 242 break; /* Do nothing */ 243 244 case OPTIMIZER_ISL: 245 PM.add(polly::createIslScheduleOptimizerPass()); 246 break; 247 } 248 } 249 250 if (ExportJScop) 251 PM.add(polly::createJSONExporterPass()); 252 253 if (Target == TARGET_GPU) { 254 #ifdef GPU_CODEGEN 255 PM.add(polly::createPPCGCodeGenerationPass()); 256 #endif 257 } else { 258 switch (CodeGeneration) { 259 case CODEGEN_AST: 260 PM.add(polly::createIslAstInfoPass()); 261 break; 262 case CODEGEN_FULL: 263 PM.add(polly::createCodeGenerationPass()); 264 break; 265 case CODEGEN_NONE: 266 break; 267 } 268 } 269 270 // FIXME: This dummy ModulePass keeps some programs from miscompiling, 271 // probably some not correctly preserved analyses. It acts as a barrier to 272 // force all analysis results to be recomputed. 273 PM.add(createBarrierNoopPass()); 274 275 if (CFGPrinter) 276 PM.add(llvm::createCFGPrinterLegacyPassPass()); 277 278 if (Target == TARGET_GPU) { 279 // Invariant load hoisting not yet supported by GPU code generation. 280 PollyInvariantLoadHoisting = false; 281 } 282 } 283 284 static bool shouldEnablePolly() { 285 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer) 286 PollyTrackFailures = true; 287 288 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer || 289 ExportJScop || ImportJScop) 290 PollyEnabled = true; 291 292 return PollyEnabled; 293 } 294 295 static void 296 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder, 297 llvm::legacy::PassManagerBase &PM) { 298 if (!polly::shouldEnablePolly()) 299 return; 300 301 if (PassPosition != POSITION_EARLY) 302 return; 303 304 registerCanonicalicationPasses(PM); 305 polly::registerPollyPasses(PM); 306 } 307 308 static void 309 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder, 310 llvm::legacy::PassManagerBase &PM) { 311 if (!polly::shouldEnablePolly()) 312 return; 313 314 if (PassPosition != POSITION_AFTER_LOOPOPT) 315 return; 316 317 PM.add(polly::createCodePreparationPass()); 318 polly::registerPollyPasses(PM); 319 PM.add(createCodegenCleanupPass()); 320 } 321 322 static void 323 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder, 324 llvm::legacy::PassManagerBase &PM) { 325 if (!polly::shouldEnablePolly()) 326 return; 327 328 if (PassPosition != POSITION_BEFORE_VECTORIZER) 329 return; 330 331 PM.add(polly::createCodePreparationPass()); 332 polly::registerPollyPasses(PM); 333 PM.add(createCodegenCleanupPass()); 334 } 335 336 /// Register Polly to be available as an optimizer 337 /// 338 /// 339 /// We can currently run Polly at three different points int the pass manager. 340 /// a) very early, b) after the canonicalizing loop transformations and c) right 341 /// before the vectorizer. 342 /// 343 /// The default is currently a), to register Polly such that it runs as early as 344 /// possible. This has several implications: 345 /// 346 /// 1) We need to schedule more canonicalization passes 347 /// 348 /// As nothing is run before Polly, it is necessary to run a set of preparing 349 /// transformations before Polly to canonicalize the LLVM-IR and to allow 350 /// Polly to detect and understand the code. 351 /// 352 /// 2) LICM and LoopIdiom pass have not yet been run 353 /// 354 /// Loop invariant code motion as well as the loop idiom recognition pass make 355 /// it more difficult for Polly to transform code. LICM may introduce 356 /// additional data dependences that are hard to eliminate and the loop idiom 357 /// recognition pass may introduce calls to memset that we currently do not 358 /// understand. By running Polly early enough (meaning before these passes) we 359 /// avoid difficulties that may be introduced by these passes. 360 /// 361 /// 3) We get the full -O3 optimization sequence after Polly 362 /// 363 /// The LLVM-IR that is generated by Polly has been optimized on a high level, 364 /// but it may be rather inefficient on the lower/scalar level. By scheduling 365 /// Polly before all other passes, we have the full sequence of -O3 366 /// optimizations behind us, such that inefficiencies on the low level can 367 /// be optimized away. 368 /// 369 /// We are currently evaluating the benefit or running Polly at position b) or 370 /// c). b) is likely to early as it interacts with the inliner. c) is nice 371 /// as everything is fully inlined and canonicalized, but we need to be able 372 /// to handle LICMed code to make it useful. 373 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly( 374 llvm::PassManagerBuilder::EP_ModuleOptimizerEarly, 375 registerPollyEarlyAsPossiblePasses); 376 377 static llvm::RegisterStandardPasses 378 RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd, 379 registerPollyLoopOptimizerEndPasses); 380 381 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate( 382 llvm::PassManagerBuilder::EP_VectorizerStart, 383 registerPollyScalarOptimizerLatePasses); 384 } // namespace polly 385