1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file composes the individual LLVM-IR passes provided by Polly to a
11 // functional polyhedral optimizer. The polyhedral optimizer is automatically
12 // made available to LLVM based compilers by loading the Polly shared library
13 // into such a compiler.
14 //
15 // The Polly optimizer is made available by executing a static constructor that
16 // registers the individual Polly passes in the LLVM pass manager builder. The
17 // passes are registered such that the default behaviour of the compiler is not
18 // changed, but that the flag '-polly' provided at optimization level '-O3'
19 // enables additional polyhedral optimizations.
20 //===----------------------------------------------------------------------===//
21 
22 #include "polly/RegisterPasses.h"
23 #include "polly/Canonicalization.h"
24 #include "polly/CodeGen/CodeGeneration.h"
25 #include "polly/CodeGen/CodegenCleanup.h"
26 #include "polly/CodeGen/PPCGCodeGeneration.h"
27 #include "polly/DeLICM.h"
28 #include "polly/DependenceInfo.h"
29 #include "polly/FlattenSchedule.h"
30 #include "polly/LinkAllPasses.h"
31 #include "polly/Options.h"
32 #include "polly/PolyhedralInfo.h"
33 #include "polly/ScopDetection.h"
34 #include "polly/ScopInfo.h"
35 #include "polly/Simplify.h"
36 #include "polly/Support/DumpModulePass.h"
37 #include "llvm/Analysis/CFGPrinter.h"
38 #include "llvm/IR/LegacyPassManager.h"
39 #include "llvm/Support/TargetSelect.h"
40 #include "llvm/Transforms/IPO.h"
41 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
42 #include "llvm/Transforms/Scalar.h"
43 #include "llvm/Transforms/Vectorize.h"
44 
45 using namespace llvm;
46 using namespace polly;
47 
48 cl::OptionCategory PollyCategory("Polly Options",
49                                  "Configure the polly loop optimizer");
50 
51 static cl::opt<bool>
52     PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
53                  cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
54 
55 static cl::opt<bool> PollyDetectOnly(
56     "polly-only-scop-detection",
57     cl::desc("Only run scop detection, but no other optimizations"),
58     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
59 
60 enum PassPositionChoice {
61   POSITION_EARLY,
62   POSITION_AFTER_LOOPOPT,
63   POSITION_BEFORE_VECTORIZER
64 };
65 
66 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
67 
68 static cl::opt<PassPositionChoice> PassPosition(
69     "polly-position", cl::desc("Where to run polly in the pass pipeline"),
70     cl::values(
71         clEnumValN(POSITION_EARLY, "early", "Before everything"),
72         clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
73                    "After the loop optimizer (but within the inline cycle)"),
74         clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
75                    "Right before the vectorizer")),
76     cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
77     cl::cat(PollyCategory));
78 
79 static cl::opt<OptimizerChoice>
80     Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
81               cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
82                          clEnumValN(OPTIMIZER_ISL, "isl",
83                                     "The isl scheduling optimizer")),
84               cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
85               cl::cat(PollyCategory));
86 
87 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
88 static cl::opt<CodeGenChoice> CodeGeneration(
89     "polly-code-generation", cl::desc("How much code-generation to perform"),
90     cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
91                clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
92                clEnumValN(CODEGEN_NONE, "none", "No code generation")),
93     cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
94 
95 enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID };
96 static cl::opt<TargetChoice>
97     Target("polly-target", cl::desc("The hardware to target"),
98            cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
99 #ifdef GPU_CODEGEN
100                           ,
101                       clEnumValN(TARGET_GPU, "gpu", "generate GPU code"),
102                       clEnumValN(TARGET_HYBRID, "hybrid",
103                                  "generate GPU code (preferably) or CPU code")
104 #endif
105                           ),
106            cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
107 
108 #ifdef GPU_CODEGEN
109 static cl::opt<GPURuntime> GPURuntimeChoice(
110     "polly-gpu-runtime", cl::desc("The GPU Runtime API to target"),
111     cl::values(clEnumValN(GPURuntime::CUDA, "libcudart",
112                           "use the CUDA Runtime API"),
113                clEnumValN(GPURuntime::OpenCL, "libopencl",
114                           "use the OpenCL Runtime API")),
115     cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory));
116 
117 static cl::opt<GPUArch>
118     GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"),
119                   cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64",
120                                         "target NVIDIA 64-bit architecture")),
121                   cl::init(GPUArch::NVPTX64), cl::ZeroOrMore,
122                   cl::cat(PollyCategory));
123 #endif
124 
125 VectorizerChoice polly::PollyVectorizerChoice;
126 static cl::opt<polly::VectorizerChoice, true> Vectorizer(
127     "polly-vectorizer", cl::desc("Select the vectorization strategy"),
128     cl::values(
129         clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
130         clEnumValN(polly::VECTORIZER_POLLY, "polly",
131                    "Polly internal vectorizer"),
132         clEnumValN(
133             polly::VECTORIZER_STRIPMINE, "stripmine",
134             "Strip-mine outer loops for the loop-vectorizer to trigger")),
135     cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
136     cl::ZeroOrMore, cl::cat(PollyCategory));
137 
138 static cl::opt<bool> ImportJScop(
139     "polly-import",
140     cl::desc("Import the polyhedral description of the detected Scops"),
141     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
142 
143 static cl::opt<bool> ExportJScop(
144     "polly-export",
145     cl::desc("Export the polyhedral description of the detected Scops"),
146     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
147 
148 static cl::opt<bool> DeadCodeElim("polly-run-dce",
149                                   cl::desc("Run the dead code elimination"),
150                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
151                                   cl::cat(PollyCategory));
152 
153 static cl::opt<bool> PollyViewer(
154     "polly-show",
155     cl::desc("Highlight the code regions that will be optimized in a "
156              "(CFG BBs and LLVM-IR instructions)"),
157     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
158 
159 static cl::opt<bool> PollyOnlyViewer(
160     "polly-show-only",
161     cl::desc("Highlight the code regions that will be optimized in "
162              "a (CFG only BBs)"),
163     cl::init(false), cl::cat(PollyCategory));
164 
165 static cl::opt<bool>
166     PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
167                  cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
168                  cl::init(false), cl::cat(PollyCategory));
169 
170 static cl::opt<bool> PollyOnlyPrinter(
171     "polly-dot-only",
172     cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
173     cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
174     cl::init(false), cl::cat(PollyCategory));
175 
176 static cl::opt<bool>
177     CFGPrinter("polly-view-cfg",
178                cl::desc("Show the Polly CFG right after code generation"),
179                cl::Hidden, cl::init(false), cl::cat(PollyCategory));
180 
181 static cl::opt<bool>
182     EnablePolyhedralInfo("polly-enable-polyhedralinfo",
183                          cl::desc("Enable polyhedral interface of Polly"),
184                          cl::Hidden, cl::init(false), cl::cat(PollyCategory));
185 
186 static cl::opt<bool>
187     DumpBefore("polly-dump-before",
188                cl::desc("Dump module before Polly transformations into a file "
189                         "suffixed with \"-before\""),
190                cl::init(false), cl::cat(PollyCategory));
191 
192 static cl::list<std::string> DumpBeforeFile(
193     "polly-dump-before-file",
194     cl::desc("Dump module before Polly transformations to the given file"),
195     cl::cat(PollyCategory));
196 
197 static cl::opt<bool>
198     DumpAfter("polly-dump-after",
199               cl::desc("Dump module after Polly transformations into a file "
200                        "suffixed with \"-after\""),
201               cl::init(false), cl::cat(PollyCategory));
202 
203 static cl::list<std::string> DumpAfterFile(
204     "polly-dump-after-file",
205     cl::desc("Dump module after Polly transformations to the given file"),
206     cl::ZeroOrMore, cl::cat(PollyCategory));
207 
208 static cl::opt<bool>
209     EnableDeLICM("polly-enable-delicm",
210                  cl::desc("Eliminate scalar loop carried dependences"),
211                  cl::Hidden, cl::init(false), cl::cat(PollyCategory));
212 
213 static cl::opt<bool>
214     EnableSimplify("polly-enable-simplify",
215                    cl::desc("Simplify SCoP after optimizations"),
216                    cl::init(false), cl::cat(PollyCategory));
217 
218 static cl::opt<bool> EnablePruneUnprofitable(
219     "polly-enable-prune-unprofitable",
220     cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden,
221     cl::init(true), cl::cat(PollyCategory));
222 
223 namespace polly {
224 void initializePollyPasses(PassRegistry &Registry) {
225   initializeCodeGenerationPass(Registry);
226 
227 #ifdef GPU_CODEGEN
228   initializePPCGCodeGenerationPass(Registry);
229   LLVMInitializeNVPTXTarget();
230   LLVMInitializeNVPTXTargetInfo();
231   LLVMInitializeNVPTXTargetMC();
232   LLVMInitializeNVPTXAsmPrinter();
233 #endif
234   initializeCodePreparationPass(Registry);
235   initializeDeadCodeElimPass(Registry);
236   initializeDependenceInfoPass(Registry);
237   initializeDependenceInfoWrapperPassPass(Registry);
238   initializeJSONExporterPass(Registry);
239   initializeJSONImporterPass(Registry);
240   initializeIslAstInfoWrapperPassPass(Registry);
241   initializeIslScheduleOptimizerPass(Registry);
242   initializePollyCanonicalizePass(Registry);
243   initializePolyhedralInfoPass(Registry);
244   initializeScopDetectionWrapperPassPass(Registry);
245   initializeScopInfoRegionPassPass(Registry);
246   initializeScopInfoWrapperPassPass(Registry);
247   initializeCodegenCleanupPass(Registry);
248   initializeFlattenSchedulePass(Registry);
249   initializeDeLICMPass(Registry);
250   initializeSimplifyPass(Registry);
251   initializeDumpModulePass(Registry);
252   initializePruneUnprofitablePass(Registry);
253 }
254 
255 /// Register Polly passes such that they form a polyhedral optimizer.
256 ///
257 /// The individual Polly passes are registered in the pass manager such that
258 /// they form a full polyhedral optimizer. The flow of the optimizer starts with
259 /// a set of preparing transformations that canonicalize the LLVM-IR such that
260 /// the LLVM-IR is easier for us to understand and to optimizes. On the
261 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
262 /// static control flow regions. Those regions are then translated by the
263 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
264 /// optimizer is run on the polyhedral representation and finally the optimized
265 /// polyhedral representation is code generated back to LLVM-IR.
266 ///
267 /// Besides this core functionality, we optionally schedule passes that provide
268 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
269 /// allow the export/import of the polyhedral representation
270 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
271 ///
272 /// For certain parts of the Polly optimizer, several alternatives are provided:
273 ///
274 /// As scheduling optimizer we support the isl scheduling optimizer
275 /// (http://freecode.com/projects/isl).
276 /// It is also possible to run Polly with no optimizer. This mode is mainly
277 /// provided to analyze the run and compile time changes caused by the
278 /// scheduling optimizer.
279 ///
280 /// Polly supports the isl internal code generator.
281 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
282   if (DumpBefore)
283     PM.add(polly::createDumpModulePass("-before", true));
284   for (auto &Filename : DumpBeforeFile)
285     PM.add(polly::createDumpModulePass(Filename, false));
286 
287   PM.add(polly::createScopDetectionWrapperPassPass());
288 
289   if (PollyDetectOnly)
290     return;
291 
292   if (PollyViewer)
293     PM.add(polly::createDOTViewerPass());
294   if (PollyOnlyViewer)
295     PM.add(polly::createDOTOnlyViewerPass());
296   if (PollyPrinter)
297     PM.add(polly::createDOTPrinterPass());
298   if (PollyOnlyPrinter)
299     PM.add(polly::createDOTOnlyPrinterPass());
300 
301   PM.add(polly::createScopInfoRegionPassPass());
302   if (EnablePolyhedralInfo)
303     PM.add(polly::createPolyhedralInfoPass());
304 
305   if (EnableDeLICM)
306     PM.add(polly::createDeLICMPass());
307   if (EnableSimplify)
308     PM.add(polly::createSimplifyPass());
309 
310   if (ImportJScop)
311     PM.add(polly::createJSONImporterPass());
312 
313   if (DeadCodeElim)
314     PM.add(polly::createDeadCodeElimPass());
315 
316   if (EnablePruneUnprofitable)
317     PM.add(polly::createPruneUnprofitablePass());
318 
319 #ifdef GPU_CODEGEN
320   if (Target == TARGET_HYBRID)
321     PM.add(
322         polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
323 #endif
324   if (Target == TARGET_CPU || Target == TARGET_HYBRID)
325     switch (Optimizer) {
326     case OPTIMIZER_NONE:
327       break; /* Do nothing */
328 
329     case OPTIMIZER_ISL:
330       PM.add(polly::createIslScheduleOptimizerPass());
331       break;
332     }
333 
334   if (ExportJScop)
335     PM.add(polly::createJSONExporterPass());
336 
337   if (Target == TARGET_CPU || Target == TARGET_HYBRID)
338     switch (CodeGeneration) {
339     case CODEGEN_AST:
340       PM.add(polly::createIslAstInfoWrapperPassPass());
341       break;
342     case CODEGEN_FULL:
343       PM.add(polly::createCodeGenerationPass());
344       break;
345     case CODEGEN_NONE:
346       break;
347     }
348 #ifdef GPU_CODEGEN
349   else
350     PM.add(
351         polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
352 #endif
353 
354   // FIXME: This dummy ModulePass keeps some programs from miscompiling,
355   // probably some not correctly preserved analyses. It acts as a barrier to
356   // force all analysis results to be recomputed.
357   PM.add(createBarrierNoopPass());
358 
359   if (DumpAfter)
360     PM.add(polly::createDumpModulePass("-after", true));
361   for (auto &Filename : DumpAfterFile)
362     PM.add(polly::createDumpModulePass(Filename, false));
363 
364   if (CFGPrinter)
365     PM.add(llvm::createCFGPrinterLegacyPassPass());
366 }
367 
368 static bool shouldEnablePolly() {
369   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer)
370     PollyTrackFailures = true;
371 
372   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer ||
373       ExportJScop || ImportJScop)
374     PollyEnabled = true;
375 
376   return PollyEnabled;
377 }
378 
379 static void
380 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
381                                    llvm::legacy::PassManagerBase &PM) {
382   if (!polly::shouldEnablePolly())
383     return;
384 
385   if (PassPosition != POSITION_EARLY)
386     return;
387 
388   registerCanonicalicationPasses(PM);
389   polly::registerPollyPasses(PM);
390 }
391 
392 static void
393 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
394                                     llvm::legacy::PassManagerBase &PM) {
395   if (!polly::shouldEnablePolly())
396     return;
397 
398   if (PassPosition != POSITION_AFTER_LOOPOPT)
399     return;
400 
401   PM.add(polly::createCodePreparationPass());
402   polly::registerPollyPasses(PM);
403   PM.add(createCodegenCleanupPass());
404 }
405 
406 static void
407 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
408                                        llvm::legacy::PassManagerBase &PM) {
409   if (!polly::shouldEnablePolly())
410     return;
411 
412   if (PassPosition != POSITION_BEFORE_VECTORIZER)
413     return;
414 
415   PM.add(polly::createCodePreparationPass());
416   polly::registerPollyPasses(PM);
417   PM.add(createCodegenCleanupPass());
418 }
419 
420 /// Register Polly to be available as an optimizer
421 ///
422 ///
423 /// We can currently run Polly at three different points int the pass manager.
424 /// a) very early, b) after the canonicalizing loop transformations and c) right
425 /// before the vectorizer.
426 ///
427 /// The default is currently a), to register Polly such that it runs as early as
428 /// possible. This has several implications:
429 ///
430 ///   1) We need to schedule more canonicalization passes
431 ///
432 ///   As nothing is run before Polly, it is necessary to run a set of preparing
433 ///   transformations before Polly to canonicalize the LLVM-IR and to allow
434 ///   Polly to detect and understand the code.
435 ///
436 ///   2) LICM and LoopIdiom pass have not yet been run
437 ///
438 ///   Loop invariant code motion as well as the loop idiom recognition pass make
439 ///   it more difficult for Polly to transform code. LICM may introduce
440 ///   additional data dependences that are hard to eliminate and the loop idiom
441 ///   recognition pass may introduce calls to memset that we currently do not
442 ///   understand. By running Polly early enough (meaning before these passes) we
443 ///   avoid difficulties that may be introduced by these passes.
444 ///
445 ///   3) We get the full -O3 optimization sequence after Polly
446 ///
447 ///   The LLVM-IR that is generated by Polly has been optimized on a high level,
448 ///   but it may be rather inefficient on the lower/scalar level. By scheduling
449 ///   Polly before all other passes, we have the full sequence of -O3
450 ///   optimizations behind us, such that inefficiencies on the low level can
451 ///   be optimized away.
452 ///
453 /// We are currently evaluating the benefit or running Polly at position b) or
454 /// c). b) is likely to early as it interacts with the inliner. c) is nice
455 /// as everything is fully inlined and canonicalized, but we need to be able
456 /// to handle LICMed code to make it useful.
457 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
458     llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
459     registerPollyEarlyAsPossiblePasses);
460 
461 static llvm::RegisterStandardPasses
462     RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
463                                   registerPollyLoopOptimizerEndPasses);
464 
465 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
466     llvm::PassManagerBuilder::EP_VectorizerStart,
467     registerPollyScalarOptimizerLatePasses);
468 } // namespace polly
469