1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file composes the individual LLVM-IR passes provided by Polly to a 11 // functional polyhedral optimizer. The polyhedral optimizer is automatically 12 // made available to LLVM based compilers by loading the Polly shared library 13 // into such a compiler. 14 // 15 // The Polly optimizer is made available by executing a static constructor that 16 // registers the individual Polly passes in the LLVM pass manager builder. The 17 // passes are registered such that the default behaviour of the compiler is not 18 // changed, but that the flag '-polly' provided at optimization level '-O3' 19 // enables additional polyhedral optimizations. 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/RegisterPasses.h" 23 #include "polly/Canonicalization.h" 24 #include "polly/CodeGen/CodeGeneration.h" 25 #include "polly/CodeGen/CodegenCleanup.h" 26 #include "polly/DeLICM.h" 27 #include "polly/DependenceInfo.h" 28 #include "polly/FlattenSchedule.h" 29 #include "polly/LinkAllPasses.h" 30 #include "polly/Options.h" 31 #include "polly/PolyhedralInfo.h" 32 #include "polly/ScopDetection.h" 33 #include "polly/ScopInfo.h" 34 #include "llvm/Analysis/CFGPrinter.h" 35 #include "llvm/IR/LegacyPassManager.h" 36 #include "llvm/Transforms/IPO.h" 37 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 38 #include "llvm/Transforms/Scalar.h" 39 #include "llvm/Transforms/Vectorize.h" 40 41 using namespace llvm; 42 using namespace polly; 43 44 cl::OptionCategory PollyCategory("Polly Options", 45 "Configure the polly loop optimizer"); 46 47 static cl::opt<bool> 48 PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"), 49 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 50 51 static cl::opt<bool> PollyDetectOnly( 52 "polly-only-scop-detection", 53 cl::desc("Only run scop detection, but no other optimizations"), 54 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 55 56 enum PassPositionChoice { 57 POSITION_EARLY, 58 POSITION_AFTER_LOOPOPT, 59 POSITION_BEFORE_VECTORIZER 60 }; 61 62 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL }; 63 64 static cl::opt<PassPositionChoice> PassPosition( 65 "polly-position", cl::desc("Where to run polly in the pass pipeline"), 66 cl::values( 67 clEnumValN(POSITION_EARLY, "early", "Before everything"), 68 clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt", 69 "After the loop optimizer (but within the inline cycle)"), 70 clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer", 71 "Right before the vectorizer")), 72 cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore, 73 cl::cat(PollyCategory)); 74 75 static cl::opt<OptimizerChoice> 76 Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"), 77 cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"), 78 clEnumValN(OPTIMIZER_ISL, "isl", 79 "The isl scheduling optimizer")), 80 cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore, 81 cl::cat(PollyCategory)); 82 83 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE }; 84 static cl::opt<CodeGenChoice> CodeGeneration( 85 "polly-code-generation", cl::desc("How much code-generation to perform"), 86 cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"), 87 clEnumValN(CODEGEN_AST, "ast", "Only AST generation"), 88 clEnumValN(CODEGEN_NONE, "none", "No code generation")), 89 cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); 90 91 enum TargetChoice { TARGET_CPU, TARGET_GPU }; 92 static cl::opt<TargetChoice> 93 Target("polly-target", cl::desc("The hardware to target"), 94 cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") 95 #ifdef GPU_CODEGEN 96 , 97 clEnumValN(TARGET_GPU, "gpu", "generate GPU code") 98 #endif 99 ), 100 cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); 101 102 VectorizerChoice polly::PollyVectorizerChoice; 103 static cl::opt<polly::VectorizerChoice, true> Vectorizer( 104 "polly-vectorizer", cl::desc("Select the vectorization strategy"), 105 cl::values( 106 clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"), 107 clEnumValN(polly::VECTORIZER_POLLY, "polly", 108 "Polly internal vectorizer"), 109 clEnumValN( 110 polly::VECTORIZER_STRIPMINE, "stripmine", 111 "Strip-mine outer loops for the loop-vectorizer to trigger")), 112 cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE), 113 cl::ZeroOrMore, cl::cat(PollyCategory)); 114 115 static cl::opt<bool> ImportJScop( 116 "polly-import", 117 cl::desc("Export the polyhedral description of the detected Scops"), 118 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 119 120 static cl::opt<bool> ExportJScop( 121 "polly-export", 122 cl::desc("Export the polyhedral description of the detected Scops"), 123 cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 124 125 static cl::opt<bool> DeadCodeElim("polly-run-dce", 126 cl::desc("Run the dead code elimination"), 127 cl::Hidden, cl::init(false), cl::ZeroOrMore, 128 cl::cat(PollyCategory)); 129 130 static cl::opt<bool> PollyViewer( 131 "polly-show", 132 cl::desc("Highlight the code regions that will be optimized in a " 133 "(CFG BBs and LLVM-IR instructions)"), 134 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 135 136 static cl::opt<bool> PollyOnlyViewer( 137 "polly-show-only", 138 cl::desc("Highlight the code regions that will be optimized in " 139 "a (CFG only BBs)"), 140 cl::init(false), cl::cat(PollyCategory)); 141 142 static cl::opt<bool> 143 PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"), 144 cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"), 145 cl::init(false), cl::cat(PollyCategory)); 146 147 static cl::opt<bool> PollyOnlyPrinter( 148 "polly-dot-only", 149 cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden, 150 cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"), 151 cl::init(false), cl::cat(PollyCategory)); 152 153 static cl::opt<bool> 154 CFGPrinter("polly-view-cfg", 155 cl::desc("Show the Polly CFG right after code generation"), 156 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 157 158 static cl::opt<bool> 159 EnablePolyhedralInfo("polly-enable-polyhedralinfo", 160 cl::desc("Enable polyhedral interface of Polly"), 161 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 162 163 static cl::opt<bool> 164 EnableDeLICM("polly-enable-delicm", 165 cl::desc("Eliminate scalar loop carried dependences"), 166 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 167 168 namespace polly { 169 void initializePollyPasses(PassRegistry &Registry) { 170 initializeCodeGenerationPass(Registry); 171 172 #ifdef GPU_CODEGEN 173 initializePPCGCodeGenerationPass(Registry); 174 #endif 175 initializeCodePreparationPass(Registry); 176 initializeDeadCodeElimPass(Registry); 177 initializeDependenceInfoPass(Registry); 178 initializeDependenceInfoWrapperPassPass(Registry); 179 initializeJSONExporterPass(Registry); 180 initializeJSONImporterPass(Registry); 181 initializeIslAstInfoPass(Registry); 182 initializeIslScheduleOptimizerPass(Registry); 183 initializePollyCanonicalizePass(Registry); 184 initializePolyhedralInfoPass(Registry); 185 initializeScopDetectionPass(Registry); 186 initializeScopInfoRegionPassPass(Registry); 187 initializeScopInfoWrapperPassPass(Registry); 188 initializeCodegenCleanupPass(Registry); 189 initializeFlattenSchedulePass(Registry); 190 initializeDeLICMPass(Registry); 191 } 192 193 /// Register Polly passes such that they form a polyhedral optimizer. 194 /// 195 /// The individual Polly passes are registered in the pass manager such that 196 /// they form a full polyhedral optimizer. The flow of the optimizer starts with 197 /// a set of preparing transformations that canonicalize the LLVM-IR such that 198 /// the LLVM-IR is easier for us to understand and to optimizes. On the 199 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects 200 /// static control flow regions. Those regions are then translated by the 201 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling 202 /// optimizer is run on the polyhedral representation and finally the optimized 203 /// polyhedral representation is code generated back to LLVM-IR. 204 /// 205 /// Besides this core functionality, we optionally schedule passes that provide 206 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that 207 /// allow the export/import of the polyhedral representation 208 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation. 209 /// 210 /// For certain parts of the Polly optimizer, several alternatives are provided: 211 /// 212 /// As scheduling optimizer we support the isl scheduling optimizer 213 /// (http://freecode.com/projects/isl). 214 /// It is also possible to run Polly with no optimizer. This mode is mainly 215 /// provided to analyze the run and compile time changes caused by the 216 /// scheduling optimizer. 217 /// 218 /// Polly supports the isl internal code generator. 219 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) { 220 PM.add(polly::createScopDetectionPass()); 221 222 if (PollyDetectOnly) 223 return; 224 225 if (PollyViewer) 226 PM.add(polly::createDOTViewerPass()); 227 if (PollyOnlyViewer) 228 PM.add(polly::createDOTOnlyViewerPass()); 229 if (PollyPrinter) 230 PM.add(polly::createDOTPrinterPass()); 231 if (PollyOnlyPrinter) 232 PM.add(polly::createDOTOnlyPrinterPass()); 233 234 PM.add(polly::createScopInfoRegionPassPass()); 235 if (EnablePolyhedralInfo) 236 PM.add(polly::createPolyhedralInfoPass()); 237 238 if (EnableDeLICM) 239 PM.add(polly::createDeLICMPass()); 240 241 if (ImportJScop) 242 PM.add(polly::createJSONImporterPass()); 243 244 if (DeadCodeElim) 245 PM.add(polly::createDeadCodeElimPass()); 246 247 if (Target == TARGET_GPU) { 248 // GPU generation provides its own scheduling optimization strategy. 249 } else { 250 switch (Optimizer) { 251 case OPTIMIZER_NONE: 252 break; /* Do nothing */ 253 254 case OPTIMIZER_ISL: 255 PM.add(polly::createIslScheduleOptimizerPass()); 256 break; 257 } 258 } 259 260 if (ExportJScop) 261 PM.add(polly::createJSONExporterPass()); 262 263 if (Target == TARGET_GPU) { 264 #ifdef GPU_CODEGEN 265 PM.add(polly::createPPCGCodeGenerationPass()); 266 #endif 267 } else { 268 switch (CodeGeneration) { 269 case CODEGEN_AST: 270 PM.add(polly::createIslAstInfoPass()); 271 break; 272 case CODEGEN_FULL: 273 PM.add(polly::createCodeGenerationPass()); 274 break; 275 case CODEGEN_NONE: 276 break; 277 } 278 } 279 280 // FIXME: This dummy ModulePass keeps some programs from miscompiling, 281 // probably some not correctly preserved analyses. It acts as a barrier to 282 // force all analysis results to be recomputed. 283 PM.add(createBarrierNoopPass()); 284 285 if (CFGPrinter) 286 PM.add(llvm::createCFGPrinterLegacyPassPass()); 287 288 if (Target == TARGET_GPU) { 289 // Invariant load hoisting not yet supported by GPU code generation. 290 PollyInvariantLoadHoisting = false; 291 } 292 } 293 294 static bool shouldEnablePolly() { 295 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer) 296 PollyTrackFailures = true; 297 298 if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer || 299 ExportJScop || ImportJScop) 300 PollyEnabled = true; 301 302 return PollyEnabled; 303 } 304 305 static void 306 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder, 307 llvm::legacy::PassManagerBase &PM) { 308 if (!polly::shouldEnablePolly()) 309 return; 310 311 if (PassPosition != POSITION_EARLY) 312 return; 313 314 registerCanonicalicationPasses(PM); 315 polly::registerPollyPasses(PM); 316 } 317 318 static void 319 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder, 320 llvm::legacy::PassManagerBase &PM) { 321 if (!polly::shouldEnablePolly()) 322 return; 323 324 if (PassPosition != POSITION_AFTER_LOOPOPT) 325 return; 326 327 PM.add(polly::createCodePreparationPass()); 328 polly::registerPollyPasses(PM); 329 PM.add(createCodegenCleanupPass()); 330 } 331 332 static void 333 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder, 334 llvm::legacy::PassManagerBase &PM) { 335 if (!polly::shouldEnablePolly()) 336 return; 337 338 if (PassPosition != POSITION_BEFORE_VECTORIZER) 339 return; 340 341 PM.add(polly::createCodePreparationPass()); 342 polly::registerPollyPasses(PM); 343 PM.add(createCodegenCleanupPass()); 344 } 345 346 /// Register Polly to be available as an optimizer 347 /// 348 /// 349 /// We can currently run Polly at three different points int the pass manager. 350 /// a) very early, b) after the canonicalizing loop transformations and c) right 351 /// before the vectorizer. 352 /// 353 /// The default is currently a), to register Polly such that it runs as early as 354 /// possible. This has several implications: 355 /// 356 /// 1) We need to schedule more canonicalization passes 357 /// 358 /// As nothing is run before Polly, it is necessary to run a set of preparing 359 /// transformations before Polly to canonicalize the LLVM-IR and to allow 360 /// Polly to detect and understand the code. 361 /// 362 /// 2) LICM and LoopIdiom pass have not yet been run 363 /// 364 /// Loop invariant code motion as well as the loop idiom recognition pass make 365 /// it more difficult for Polly to transform code. LICM may introduce 366 /// additional data dependences that are hard to eliminate and the loop idiom 367 /// recognition pass may introduce calls to memset that we currently do not 368 /// understand. By running Polly early enough (meaning before these passes) we 369 /// avoid difficulties that may be introduced by these passes. 370 /// 371 /// 3) We get the full -O3 optimization sequence after Polly 372 /// 373 /// The LLVM-IR that is generated by Polly has been optimized on a high level, 374 /// but it may be rather inefficient on the lower/scalar level. By scheduling 375 /// Polly before all other passes, we have the full sequence of -O3 376 /// optimizations behind us, such that inefficiencies on the low level can 377 /// be optimized away. 378 /// 379 /// We are currently evaluating the benefit or running Polly at position b) or 380 /// c). b) is likely to early as it interacts with the inliner. c) is nice 381 /// as everything is fully inlined and canonicalized, but we need to be able 382 /// to handle LICMed code to make it useful. 383 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly( 384 llvm::PassManagerBuilder::EP_ModuleOptimizerEarly, 385 registerPollyEarlyAsPossiblePasses); 386 387 static llvm::RegisterStandardPasses 388 RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd, 389 registerPollyLoopOptimizerEndPasses); 390 391 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate( 392 llvm::PassManagerBuilder::EP_VectorizerStart, 393 registerPollyScalarOptimizerLatePasses); 394 } // namespace polly 395