1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file composes the individual LLVM-IR passes provided by Polly to a
11 // functional polyhedral optimizer. The polyhedral optimizer is automatically
12 // made available to LLVM based compilers by loading the Polly shared library
13 // into such a compiler.
14 //
15 // The Polly optimizer is made available by executing a static constructor that
16 // registers the individual Polly passes in the LLVM pass manager builder. The
17 // passes are registered such that the default behaviour of the compiler is not
18 // changed, but that the flag '-polly' provided at optimization level '-O3'
19 // enables additional polyhedral optimizations.
20 //===----------------------------------------------------------------------===//
21 
22 #include "polly/RegisterPasses.h"
23 #include "polly/Canonicalization.h"
24 #include "polly/CodeGen/CodeGeneration.h"
25 #include "polly/CodeGen/CodegenCleanup.h"
26 #include "polly/CodeGen/PPCGCodeGeneration.h"
27 #include "polly/DeLICM.h"
28 #include "polly/DependenceInfo.h"
29 #include "polly/FlattenSchedule.h"
30 #include "polly/LinkAllPasses.h"
31 #include "polly/Options.h"
32 #include "polly/PolyhedralInfo.h"
33 #include "polly/ScopDetection.h"
34 #include "polly/ScopInfo.h"
35 #include "polly/Simplify.h"
36 #include "polly/Support/DumpModulePass.h"
37 #include "llvm/Analysis/CFGPrinter.h"
38 #include "llvm/IR/LegacyPassManager.h"
39 #include "llvm/Transforms/IPO.h"
40 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
41 #include "llvm/Transforms/Scalar.h"
42 #include "llvm/Transforms/Vectorize.h"
43 
44 using namespace llvm;
45 using namespace polly;
46 
47 cl::OptionCategory PollyCategory("Polly Options",
48                                  "Configure the polly loop optimizer");
49 
50 static cl::opt<bool>
51     PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
52                  cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
53 
54 static cl::opt<bool> PollyDetectOnly(
55     "polly-only-scop-detection",
56     cl::desc("Only run scop detection, but no other optimizations"),
57     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
58 
59 enum PassPositionChoice {
60   POSITION_EARLY,
61   POSITION_AFTER_LOOPOPT,
62   POSITION_BEFORE_VECTORIZER
63 };
64 
65 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
66 
67 static cl::opt<PassPositionChoice> PassPosition(
68     "polly-position", cl::desc("Where to run polly in the pass pipeline"),
69     cl::values(
70         clEnumValN(POSITION_EARLY, "early", "Before everything"),
71         clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
72                    "After the loop optimizer (but within the inline cycle)"),
73         clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
74                    "Right before the vectorizer")),
75     cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
76     cl::cat(PollyCategory));
77 
78 static cl::opt<OptimizerChoice>
79     Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
80               cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
81                          clEnumValN(OPTIMIZER_ISL, "isl",
82                                     "The isl scheduling optimizer")),
83               cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
84               cl::cat(PollyCategory));
85 
86 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
87 static cl::opt<CodeGenChoice> CodeGeneration(
88     "polly-code-generation", cl::desc("How much code-generation to perform"),
89     cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
90                clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
91                clEnumValN(CODEGEN_NONE, "none", "No code generation")),
92     cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
93 
94 enum TargetChoice { TARGET_CPU, TARGET_GPU };
95 static cl::opt<TargetChoice>
96     Target("polly-target", cl::desc("The hardware to target"),
97            cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
98 #ifdef GPU_CODEGEN
99                           ,
100                       clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
101 #endif
102                           ),
103            cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
104 
105 #ifdef GPU_CODEGEN
106 static cl::opt<GPURuntime> GPURuntimeChoice(
107     "polly-gpu-runtime", cl::desc("The GPU Runtime API to target"),
108     cl::values(clEnumValN(GPURuntime::CUDA, "libcudart",
109                           "use the CUDA Runtime API"),
110                clEnumValN(GPURuntime::OpenCL, "libopencl",
111                           "use the OpenCL Runtime API")),
112     cl::init(GPURuntime::CUDA), cl::ZeroOrMore, cl::cat(PollyCategory));
113 
114 static cl::opt<GPUArch>
115     GPUArchChoice("polly-gpu-arch", cl::desc("The GPU Architecture to target"),
116                   cl::values(clEnumValN(GPUArch::NVPTX64, "nvptx64",
117                                         "target NVIDIA 64-bit architecture")),
118                   cl::init(GPUArch::NVPTX64), cl::ZeroOrMore,
119                   cl::cat(PollyCategory));
120 #endif
121 
122 VectorizerChoice polly::PollyVectorizerChoice;
123 static cl::opt<polly::VectorizerChoice, true> Vectorizer(
124     "polly-vectorizer", cl::desc("Select the vectorization strategy"),
125     cl::values(
126         clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
127         clEnumValN(polly::VECTORIZER_POLLY, "polly",
128                    "Polly internal vectorizer"),
129         clEnumValN(
130             polly::VECTORIZER_STRIPMINE, "stripmine",
131             "Strip-mine outer loops for the loop-vectorizer to trigger")),
132     cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
133     cl::ZeroOrMore, cl::cat(PollyCategory));
134 
135 static cl::opt<bool> ImportJScop(
136     "polly-import",
137     cl::desc("Import the polyhedral description of the detected Scops"),
138     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
139 
140 static cl::opt<bool> ExportJScop(
141     "polly-export",
142     cl::desc("Export the polyhedral description of the detected Scops"),
143     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
144 
145 static cl::opt<bool> DeadCodeElim("polly-run-dce",
146                                   cl::desc("Run the dead code elimination"),
147                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
148                                   cl::cat(PollyCategory));
149 
150 static cl::opt<bool> PollyViewer(
151     "polly-show",
152     cl::desc("Highlight the code regions that will be optimized in a "
153              "(CFG BBs and LLVM-IR instructions)"),
154     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
155 
156 static cl::opt<bool> PollyOnlyViewer(
157     "polly-show-only",
158     cl::desc("Highlight the code regions that will be optimized in "
159              "a (CFG only BBs)"),
160     cl::init(false), cl::cat(PollyCategory));
161 
162 static cl::opt<bool>
163     PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
164                  cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
165                  cl::init(false), cl::cat(PollyCategory));
166 
167 static cl::opt<bool> PollyOnlyPrinter(
168     "polly-dot-only",
169     cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
170     cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
171     cl::init(false), cl::cat(PollyCategory));
172 
173 static cl::opt<bool>
174     CFGPrinter("polly-view-cfg",
175                cl::desc("Show the Polly CFG right after code generation"),
176                cl::Hidden, cl::init(false), cl::cat(PollyCategory));
177 
178 static cl::opt<bool>
179     EnablePolyhedralInfo("polly-enable-polyhedralinfo",
180                          cl::desc("Enable polyhedral interface of Polly"),
181                          cl::Hidden, cl::init(false), cl::cat(PollyCategory));
182 
183 static cl::opt<bool>
184     DumpBefore("polly-dump-before",
185                cl::desc("Dump module before Polly transformations into a file "
186                         "suffixed with \"-before\""),
187                cl::init(false), cl::cat(PollyCategory));
188 
189 static cl::list<std::string> DumpBeforeFile(
190     "polly-dump-before-file",
191     cl::desc("Dump module before Polly transformations to the given file"),
192     cl::cat(PollyCategory));
193 
194 static cl::opt<bool>
195     DumpAfter("polly-dump-after",
196               cl::desc("Dump module after Polly transformations into a file "
197                        "suffixed with \"-after\""),
198               cl::init(false), cl::cat(PollyCategory));
199 
200 static cl::list<std::string> DumpAfterFile(
201     "polly-dump-after-file",
202     cl::desc("Dump module after Polly transformations to the given file"),
203     cl::ZeroOrMore, cl::cat(PollyCategory));
204 
205 static cl::opt<bool>
206     EnableDeLICM("polly-enable-delicm",
207                  cl::desc("Eliminate scalar loop carried dependences"),
208                  cl::Hidden, cl::init(false), cl::cat(PollyCategory));
209 
210 static cl::opt<bool>
211     EnableSimplify("polly-enable-simplify",
212                    cl::desc("Simplify SCoP after optimizations"),
213                    cl::init(false), cl::cat(PollyCategory));
214 
215 static cl::opt<bool> EnablePruneUnprofitable(
216     "polly-enable-prune-unprofitable",
217     cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden,
218     cl::init(true), cl::cat(PollyCategory));
219 
220 namespace polly {
221 void initializePollyPasses(PassRegistry &Registry) {
222   initializeCodeGenerationPass(Registry);
223 
224 #ifdef GPU_CODEGEN
225   initializePPCGCodeGenerationPass(Registry);
226 #endif
227   initializeCodePreparationPass(Registry);
228   initializeDeadCodeElimPass(Registry);
229   initializeDependenceInfoPass(Registry);
230   initializeDependenceInfoWrapperPassPass(Registry);
231   initializeJSONExporterPass(Registry);
232   initializeJSONImporterPass(Registry);
233   initializeIslAstInfoWrapperPassPass(Registry);
234   initializeIslScheduleOptimizerPass(Registry);
235   initializePollyCanonicalizePass(Registry);
236   initializePolyhedralInfoPass(Registry);
237   initializeScopDetectionWrapperPassPass(Registry);
238   initializeScopInfoRegionPassPass(Registry);
239   initializeScopInfoWrapperPassPass(Registry);
240   initializeCodegenCleanupPass(Registry);
241   initializeFlattenSchedulePass(Registry);
242   initializeDeLICMPass(Registry);
243   initializeSimplifyPass(Registry);
244   initializeDumpModulePass(Registry);
245   initializePruneUnprofitablePass(Registry);
246 }
247 
248 /// Register Polly passes such that they form a polyhedral optimizer.
249 ///
250 /// The individual Polly passes are registered in the pass manager such that
251 /// they form a full polyhedral optimizer. The flow of the optimizer starts with
252 /// a set of preparing transformations that canonicalize the LLVM-IR such that
253 /// the LLVM-IR is easier for us to understand and to optimizes. On the
254 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
255 /// static control flow regions. Those regions are then translated by the
256 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
257 /// optimizer is run on the polyhedral representation and finally the optimized
258 /// polyhedral representation is code generated back to LLVM-IR.
259 ///
260 /// Besides this core functionality, we optionally schedule passes that provide
261 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
262 /// allow the export/import of the polyhedral representation
263 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
264 ///
265 /// For certain parts of the Polly optimizer, several alternatives are provided:
266 ///
267 /// As scheduling optimizer we support the isl scheduling optimizer
268 /// (http://freecode.com/projects/isl).
269 /// It is also possible to run Polly with no optimizer. This mode is mainly
270 /// provided to analyze the run and compile time changes caused by the
271 /// scheduling optimizer.
272 ///
273 /// Polly supports the isl internal code generator.
274 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
275   if (DumpBefore)
276     PM.add(polly::createDumpModulePass("-before", true));
277   for (auto &Filename : DumpBeforeFile)
278     PM.add(polly::createDumpModulePass(Filename, false));
279 
280   PM.add(polly::createScopDetectionWrapperPassPass());
281 
282   if (PollyDetectOnly)
283     return;
284 
285   if (PollyViewer)
286     PM.add(polly::createDOTViewerPass());
287   if (PollyOnlyViewer)
288     PM.add(polly::createDOTOnlyViewerPass());
289   if (PollyPrinter)
290     PM.add(polly::createDOTPrinterPass());
291   if (PollyOnlyPrinter)
292     PM.add(polly::createDOTOnlyPrinterPass());
293 
294   PM.add(polly::createScopInfoRegionPassPass());
295   if (EnablePolyhedralInfo)
296     PM.add(polly::createPolyhedralInfoPass());
297 
298   if (EnableDeLICM)
299     PM.add(polly::createDeLICMPass());
300   if (EnableSimplify)
301     PM.add(polly::createSimplifyPass());
302 
303   if (ImportJScop)
304     PM.add(polly::createJSONImporterPass());
305 
306   if (DeadCodeElim)
307     PM.add(polly::createDeadCodeElimPass());
308 
309   if (EnablePruneUnprofitable)
310     PM.add(polly::createPruneUnprofitablePass());
311 
312   if (Target == TARGET_GPU) {
313     // GPU generation provides its own scheduling optimization strategy.
314   } else {
315     switch (Optimizer) {
316     case OPTIMIZER_NONE:
317       break; /* Do nothing */
318 
319     case OPTIMIZER_ISL:
320       PM.add(polly::createIslScheduleOptimizerPass());
321       break;
322     }
323   }
324 
325   if (ExportJScop)
326     PM.add(polly::createJSONExporterPass());
327 
328   if (Target == TARGET_GPU) {
329 #ifdef GPU_CODEGEN
330     PM.add(
331         polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
332 #endif
333   } else {
334     switch (CodeGeneration) {
335     case CODEGEN_AST:
336       PM.add(polly::createIslAstInfoWrapperPassPass());
337       break;
338     case CODEGEN_FULL:
339       PM.add(polly::createCodeGenerationPass());
340       break;
341     case CODEGEN_NONE:
342       break;
343     }
344   }
345 
346   // FIXME: This dummy ModulePass keeps some programs from miscompiling,
347   // probably some not correctly preserved analyses. It acts as a barrier to
348   // force all analysis results to be recomputed.
349   PM.add(createBarrierNoopPass());
350 
351   if (DumpAfter)
352     PM.add(polly::createDumpModulePass("-after", true));
353   for (auto &Filename : DumpAfterFile)
354     PM.add(polly::createDumpModulePass(Filename, false));
355 
356   if (CFGPrinter)
357     PM.add(llvm::createCFGPrinterLegacyPassPass());
358 }
359 
360 static bool shouldEnablePolly() {
361   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer)
362     PollyTrackFailures = true;
363 
364   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer ||
365       ExportJScop || ImportJScop)
366     PollyEnabled = true;
367 
368   return PollyEnabled;
369 }
370 
371 static void
372 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
373                                    llvm::legacy::PassManagerBase &PM) {
374   if (!polly::shouldEnablePolly())
375     return;
376 
377   if (PassPosition != POSITION_EARLY)
378     return;
379 
380   registerCanonicalicationPasses(PM);
381   polly::registerPollyPasses(PM);
382 }
383 
384 static void
385 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
386                                     llvm::legacy::PassManagerBase &PM) {
387   if (!polly::shouldEnablePolly())
388     return;
389 
390   if (PassPosition != POSITION_AFTER_LOOPOPT)
391     return;
392 
393   PM.add(polly::createCodePreparationPass());
394   polly::registerPollyPasses(PM);
395   PM.add(createCodegenCleanupPass());
396 }
397 
398 static void
399 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
400                                        llvm::legacy::PassManagerBase &PM) {
401   if (!polly::shouldEnablePolly())
402     return;
403 
404   if (PassPosition != POSITION_BEFORE_VECTORIZER)
405     return;
406 
407   PM.add(polly::createCodePreparationPass());
408   polly::registerPollyPasses(PM);
409   PM.add(createCodegenCleanupPass());
410 }
411 
412 /// Register Polly to be available as an optimizer
413 ///
414 ///
415 /// We can currently run Polly at three different points int the pass manager.
416 /// a) very early, b) after the canonicalizing loop transformations and c) right
417 /// before the vectorizer.
418 ///
419 /// The default is currently a), to register Polly such that it runs as early as
420 /// possible. This has several implications:
421 ///
422 ///   1) We need to schedule more canonicalization passes
423 ///
424 ///   As nothing is run before Polly, it is necessary to run a set of preparing
425 ///   transformations before Polly to canonicalize the LLVM-IR and to allow
426 ///   Polly to detect and understand the code.
427 ///
428 ///   2) LICM and LoopIdiom pass have not yet been run
429 ///
430 ///   Loop invariant code motion as well as the loop idiom recognition pass make
431 ///   it more difficult for Polly to transform code. LICM may introduce
432 ///   additional data dependences that are hard to eliminate and the loop idiom
433 ///   recognition pass may introduce calls to memset that we currently do not
434 ///   understand. By running Polly early enough (meaning before these passes) we
435 ///   avoid difficulties that may be introduced by these passes.
436 ///
437 ///   3) We get the full -O3 optimization sequence after Polly
438 ///
439 ///   The LLVM-IR that is generated by Polly has been optimized on a high level,
440 ///   but it may be rather inefficient on the lower/scalar level. By scheduling
441 ///   Polly before all other passes, we have the full sequence of -O3
442 ///   optimizations behind us, such that inefficiencies on the low level can
443 ///   be optimized away.
444 ///
445 /// We are currently evaluating the benefit or running Polly at position b) or
446 /// c). b) is likely to early as it interacts with the inliner. c) is nice
447 /// as everything is fully inlined and canonicalized, but we need to be able
448 /// to handle LICMed code to make it useful.
449 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
450     llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
451     registerPollyEarlyAsPossiblePasses);
452 
453 static llvm::RegisterStandardPasses
454     RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
455                                   registerPollyLoopOptimizerEndPasses);
456 
457 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
458     llvm::PassManagerBuilder::EP_VectorizerStart,
459     registerPollyScalarOptimizerLatePasses);
460 } // namespace polly
461