1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file composes the individual LLVM-IR passes provided by Polly to a
11 // functional polyhedral optimizer. The polyhedral optimizer is automatically
12 // made available to LLVM based compilers by loading the Polly shared library
13 // into such a compiler.
14 //
15 // The Polly optimizer is made available by executing a static constructor that
16 // registers the individual Polly passes in the LLVM pass manager builder. The
17 // passes are registered such that the default behaviour of the compiler is not
18 // changed, but that the flag '-polly' provided at optimization level '-O3'
19 // enables additional polyhedral optimizations.
20 //===----------------------------------------------------------------------===//
21 
22 #include "polly/RegisterPasses.h"
23 #include "polly/Canonicalization.h"
24 #include "polly/CodeGen/CodeGeneration.h"
25 #include "polly/CodeGen/CodegenCleanup.h"
26 #include "polly/DeLICM.h"
27 #include "polly/DependenceInfo.h"
28 #include "polly/FlattenSchedule.h"
29 #include "polly/LinkAllPasses.h"
30 #include "polly/Options.h"
31 #include "polly/PolyhedralInfo.h"
32 #include "polly/ScopDetection.h"
33 #include "polly/ScopInfo.h"
34 #include "llvm/Analysis/CFGPrinter.h"
35 #include "llvm/IR/LegacyPassManager.h"
36 #include "llvm/Transforms/IPO.h"
37 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
38 #include "llvm/Transforms/Scalar.h"
39 #include "llvm/Transforms/Vectorize.h"
40 
41 using namespace llvm;
42 using namespace polly;
43 
44 cl::OptionCategory PollyCategory("Polly Options",
45                                  "Configure the polly loop optimizer");
46 
47 static cl::opt<bool>
48     PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
49                  cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
50 
51 static cl::opt<bool> PollyDetectOnly(
52     "polly-only-scop-detection",
53     cl::desc("Only run scop detection, but no other optimizations"),
54     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
55 
56 enum PassPositionChoice {
57   POSITION_EARLY,
58   POSITION_AFTER_LOOPOPT,
59   POSITION_BEFORE_VECTORIZER
60 };
61 
62 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
63 
64 static cl::opt<PassPositionChoice> PassPosition(
65     "polly-position", cl::desc("Where to run polly in the pass pipeline"),
66     cl::values(
67         clEnumValN(POSITION_EARLY, "early", "Before everything"),
68         clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
69                    "After the loop optimizer (but within the inline cycle)"),
70         clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
71                    "Right before the vectorizer")),
72     cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
73     cl::cat(PollyCategory));
74 
75 static cl::opt<OptimizerChoice>
76     Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
77               cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
78                          clEnumValN(OPTIMIZER_ISL, "isl",
79                                     "The isl scheduling optimizer")),
80               cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
81               cl::cat(PollyCategory));
82 
83 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
84 static cl::opt<CodeGenChoice> CodeGeneration(
85     "polly-code-generation", cl::desc("How much code-generation to perform"),
86     cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
87                clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
88                clEnumValN(CODEGEN_NONE, "none", "No code generation")),
89     cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
90 
91 enum TargetChoice { TARGET_CPU, TARGET_GPU };
92 static cl::opt<TargetChoice>
93     Target("polly-target", cl::desc("The hardware to target"),
94            cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
95 #ifdef GPU_CODEGEN
96                           ,
97                       clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
98 #endif
99                           ),
100            cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
101 
102 VectorizerChoice polly::PollyVectorizerChoice;
103 static cl::opt<polly::VectorizerChoice, true> Vectorizer(
104     "polly-vectorizer", cl::desc("Select the vectorization strategy"),
105     cl::values(
106         clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
107         clEnumValN(polly::VECTORIZER_POLLY, "polly",
108                    "Polly internal vectorizer"),
109         clEnumValN(
110             polly::VECTORIZER_STRIPMINE, "stripmine",
111             "Strip-mine outer loops for the loop-vectorizer to trigger")),
112     cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
113     cl::ZeroOrMore, cl::cat(PollyCategory));
114 
115 static cl::opt<bool> ImportJScop(
116     "polly-import",
117     cl::desc("Export the polyhedral description of the detected Scops"),
118     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
119 
120 static cl::opt<bool> ExportJScop(
121     "polly-export",
122     cl::desc("Export the polyhedral description of the detected Scops"),
123     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
124 
125 static cl::opt<bool> DeadCodeElim("polly-run-dce",
126                                   cl::desc("Run the dead code elimination"),
127                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
128                                   cl::cat(PollyCategory));
129 
130 static cl::opt<bool> PollyViewer(
131     "polly-show",
132     cl::desc("Highlight the code regions that will be optimized in a "
133              "(CFG BBs and LLVM-IR instructions)"),
134     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
135 
136 static cl::opt<bool> PollyOnlyViewer(
137     "polly-show-only",
138     cl::desc("Highlight the code regions that will be optimized in "
139              "a (CFG only BBs)"),
140     cl::init(false), cl::cat(PollyCategory));
141 
142 static cl::opt<bool>
143     PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
144                  cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
145                  cl::init(false), cl::cat(PollyCategory));
146 
147 static cl::opt<bool> PollyOnlyPrinter(
148     "polly-dot-only",
149     cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
150     cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
151     cl::init(false), cl::cat(PollyCategory));
152 
153 static cl::opt<bool>
154     CFGPrinter("polly-view-cfg",
155                cl::desc("Show the Polly CFG right after code generation"),
156                cl::Hidden, cl::init(false), cl::cat(PollyCategory));
157 
158 static cl::opt<bool>
159     EnablePolyhedralInfo("polly-enable-polyhedralinfo",
160                          cl::desc("Enable polyhedral interface of Polly"),
161                          cl::Hidden, cl::init(false), cl::cat(PollyCategory));
162 
163 static cl::opt<bool>
164     EnableDeLICM("polly-enable-delicm",
165                  cl::desc("Eliminate scalar loop carried dependences"),
166                  cl::Hidden, cl::init(false), cl::cat(PollyCategory));
167 
168 namespace polly {
169 void initializePollyPasses(PassRegistry &Registry) {
170   initializeCodeGenerationPass(Registry);
171 
172 #ifdef GPU_CODEGEN
173   initializePPCGCodeGenerationPass(Registry);
174 #endif
175   initializeCodePreparationPass(Registry);
176   initializeDeadCodeElimPass(Registry);
177   initializeDependenceInfoPass(Registry);
178   initializeDependenceInfoWrapperPassPass(Registry);
179   initializeJSONExporterPass(Registry);
180   initializeJSONImporterPass(Registry);
181   initializeIslAstInfoPass(Registry);
182   initializeIslScheduleOptimizerPass(Registry);
183   initializePollyCanonicalizePass(Registry);
184   initializePolyhedralInfoPass(Registry);
185   initializeScopDetectionPass(Registry);
186   initializeScopInfoRegionPassPass(Registry);
187   initializeScopInfoWrapperPassPass(Registry);
188   initializeCodegenCleanupPass(Registry);
189   initializeFlattenSchedulePass(Registry);
190   initializeDeLICMPass(Registry);
191 }
192 
193 /// Register Polly passes such that they form a polyhedral optimizer.
194 ///
195 /// The individual Polly passes are registered in the pass manager such that
196 /// they form a full polyhedral optimizer. The flow of the optimizer starts with
197 /// a set of preparing transformations that canonicalize the LLVM-IR such that
198 /// the LLVM-IR is easier for us to understand and to optimizes. On the
199 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
200 /// static control flow regions. Those regions are then translated by the
201 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
202 /// optimizer is run on the polyhedral representation and finally the optimized
203 /// polyhedral representation is code generated back to LLVM-IR.
204 ///
205 /// Besides this core functionality, we optionally schedule passes that provide
206 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
207 /// allow the export/import of the polyhedral representation
208 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
209 ///
210 /// For certain parts of the Polly optimizer, several alternatives are provided:
211 ///
212 /// As scheduling optimizer we support the isl scheduling optimizer
213 /// (http://freecode.com/projects/isl).
214 /// It is also possible to run Polly with no optimizer. This mode is mainly
215 /// provided to analyze the run and compile time changes caused by the
216 /// scheduling optimizer.
217 ///
218 /// Polly supports the isl internal code generator.
219 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
220   PM.add(polly::createScopDetectionPass());
221 
222   if (PollyDetectOnly)
223     return;
224 
225   if (PollyViewer)
226     PM.add(polly::createDOTViewerPass());
227   if (PollyOnlyViewer)
228     PM.add(polly::createDOTOnlyViewerPass());
229   if (PollyPrinter)
230     PM.add(polly::createDOTPrinterPass());
231   if (PollyOnlyPrinter)
232     PM.add(polly::createDOTOnlyPrinterPass());
233 
234   PM.add(polly::createScopInfoRegionPassPass());
235   if (EnablePolyhedralInfo)
236     PM.add(polly::createPolyhedralInfoPass());
237 
238   if (EnableDeLICM)
239     PM.add(polly::createDeLICMPass());
240 
241   if (ImportJScop)
242     PM.add(polly::createJSONImporterPass());
243 
244   if (DeadCodeElim)
245     PM.add(polly::createDeadCodeElimPass());
246 
247   if (Target == TARGET_GPU) {
248     // GPU generation provides its own scheduling optimization strategy.
249   } else {
250     switch (Optimizer) {
251     case OPTIMIZER_NONE:
252       break; /* Do nothing */
253 
254     case OPTIMIZER_ISL:
255       PM.add(polly::createIslScheduleOptimizerPass());
256       break;
257     }
258   }
259 
260   if (ExportJScop)
261     PM.add(polly::createJSONExporterPass());
262 
263   if (Target == TARGET_GPU) {
264 #ifdef GPU_CODEGEN
265     PM.add(polly::createPPCGCodeGenerationPass());
266 #endif
267   } else {
268     switch (CodeGeneration) {
269     case CODEGEN_AST:
270       PM.add(polly::createIslAstInfoPass());
271       break;
272     case CODEGEN_FULL:
273       PM.add(polly::createCodeGenerationPass());
274       break;
275     case CODEGEN_NONE:
276       break;
277     }
278   }
279 
280   // FIXME: This dummy ModulePass keeps some programs from miscompiling,
281   // probably some not correctly preserved analyses. It acts as a barrier to
282   // force all analysis results to be recomputed.
283   PM.add(createBarrierNoopPass());
284 
285   if (CFGPrinter)
286     PM.add(llvm::createCFGPrinterLegacyPassPass());
287 
288   if (Target == TARGET_GPU) {
289     // Invariant load hoisting not yet supported by GPU code generation.
290     PollyInvariantLoadHoisting = false;
291   }
292 }
293 
294 static bool shouldEnablePolly() {
295   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer)
296     PollyTrackFailures = true;
297 
298   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer ||
299       ExportJScop || ImportJScop)
300     PollyEnabled = true;
301 
302   return PollyEnabled;
303 }
304 
305 static void
306 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
307                                    llvm::legacy::PassManagerBase &PM) {
308   if (!polly::shouldEnablePolly())
309     return;
310 
311   if (PassPosition != POSITION_EARLY)
312     return;
313 
314   registerCanonicalicationPasses(PM);
315   polly::registerPollyPasses(PM);
316 }
317 
318 static void
319 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
320                                     llvm::legacy::PassManagerBase &PM) {
321   if (!polly::shouldEnablePolly())
322     return;
323 
324   if (PassPosition != POSITION_AFTER_LOOPOPT)
325     return;
326 
327   PM.add(polly::createCodePreparationPass());
328   polly::registerPollyPasses(PM);
329   PM.add(createCodegenCleanupPass());
330 }
331 
332 static void
333 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
334                                        llvm::legacy::PassManagerBase &PM) {
335   if (!polly::shouldEnablePolly())
336     return;
337 
338   if (PassPosition != POSITION_BEFORE_VECTORIZER)
339     return;
340 
341   PM.add(polly::createCodePreparationPass());
342   polly::registerPollyPasses(PM);
343   PM.add(createCodegenCleanupPass());
344 }
345 
346 /// Register Polly to be available as an optimizer
347 ///
348 ///
349 /// We can currently run Polly at three different points int the pass manager.
350 /// a) very early, b) after the canonicalizing loop transformations and c) right
351 /// before the vectorizer.
352 ///
353 /// The default is currently a), to register Polly such that it runs as early as
354 /// possible. This has several implications:
355 ///
356 ///   1) We need to schedule more canonicalization passes
357 ///
358 ///   As nothing is run before Polly, it is necessary to run a set of preparing
359 ///   transformations before Polly to canonicalize the LLVM-IR and to allow
360 ///   Polly to detect and understand the code.
361 ///
362 ///   2) LICM and LoopIdiom pass have not yet been run
363 ///
364 ///   Loop invariant code motion as well as the loop idiom recognition pass make
365 ///   it more difficult for Polly to transform code. LICM may introduce
366 ///   additional data dependences that are hard to eliminate and the loop idiom
367 ///   recognition pass may introduce calls to memset that we currently do not
368 ///   understand. By running Polly early enough (meaning before these passes) we
369 ///   avoid difficulties that may be introduced by these passes.
370 ///
371 ///   3) We get the full -O3 optimization sequence after Polly
372 ///
373 ///   The LLVM-IR that is generated by Polly has been optimized on a high level,
374 ///   but it may be rather inefficient on the lower/scalar level. By scheduling
375 ///   Polly before all other passes, we have the full sequence of -O3
376 ///   optimizations behind us, such that inefficiencies on the low level can
377 ///   be optimized away.
378 ///
379 /// We are currently evaluating the benefit or running Polly at position b) or
380 /// c). b) is likely to early as it interacts with the inliner. c) is nice
381 /// as everything is fully inlined and canonicalized, but we need to be able
382 /// to handle LICMed code to make it useful.
383 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
384     llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
385     registerPollyEarlyAsPossiblePasses);
386 
387 static llvm::RegisterStandardPasses
388     RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
389                                   registerPollyLoopOptimizerEndPasses);
390 
391 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
392     llvm::PassManagerBuilder::EP_VectorizerStart,
393     registerPollyScalarOptimizerLatePasses);
394 } // namespace polly
395