1 //===------ RegisterPasses.cpp - Add the Polly Passes to default passes  --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file composes the individual LLVM-IR passes provided by Polly to a
11 // functional polyhedral optimizer. The polyhedral optimizer is automatically
12 // made available to LLVM based compilers by loading the Polly shared library
13 // into such a compiler.
14 //
15 // The Polly optimizer is made available by executing a static constructor that
16 // registers the individual Polly passes in the LLVM pass manager builder. The
17 // passes are registered such that the default behaviour of the compiler is not
18 // changed, but that the flag '-polly' provided at optimization level '-O3'
19 // enables additional polyhedral optimizations.
20 //===----------------------------------------------------------------------===//
21 
22 #include "polly/RegisterPasses.h"
23 #include "polly/Canonicalization.h"
24 #include "polly/CodeGen/CodeGeneration.h"
25 #include "polly/CodeGen/CodegenCleanup.h"
26 #include "polly/DependenceInfo.h"
27 #include "polly/FlattenSchedule.h"
28 #include "polly/LinkAllPasses.h"
29 #include "polly/Options.h"
30 #include "polly/PolyhedralInfo.h"
31 #include "polly/ScopDetection.h"
32 #include "polly/ScopInfo.h"
33 #include "llvm/Analysis/CFGPrinter.h"
34 #include "llvm/IR/LegacyPassManager.h"
35 #include "llvm/Transforms/IPO.h"
36 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
37 #include "llvm/Transforms/Scalar.h"
38 #include "llvm/Transforms/Vectorize.h"
39 
40 using namespace llvm;
41 using namespace polly;
42 
43 cl::OptionCategory PollyCategory("Polly Options",
44                                  "Configure the polly loop optimizer");
45 
46 static cl::opt<bool>
47     PollyEnabled("polly", cl::desc("Enable the polly optimizer (only at -O3)"),
48                  cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
49 
50 static cl::opt<bool> PollyDetectOnly(
51     "polly-only-scop-detection",
52     cl::desc("Only run scop detection, but no other optimizations"),
53     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
54 
55 enum PassPositionChoice {
56   POSITION_EARLY,
57   POSITION_AFTER_LOOPOPT,
58   POSITION_BEFORE_VECTORIZER
59 };
60 
61 enum OptimizerChoice { OPTIMIZER_NONE, OPTIMIZER_ISL };
62 
63 static cl::opt<PassPositionChoice> PassPosition(
64     "polly-position", cl::desc("Where to run polly in the pass pipeline"),
65     cl::values(
66         clEnumValN(POSITION_EARLY, "early", "Before everything"),
67         clEnumValN(POSITION_AFTER_LOOPOPT, "after-loopopt",
68                    "After the loop optimizer (but within the inline cycle)"),
69         clEnumValN(POSITION_BEFORE_VECTORIZER, "before-vectorizer",
70                    "Right before the vectorizer")),
71     cl::Hidden, cl::init(POSITION_EARLY), cl::ZeroOrMore,
72     cl::cat(PollyCategory));
73 
74 static cl::opt<OptimizerChoice>
75     Optimizer("polly-optimizer", cl::desc("Select the scheduling optimizer"),
76               cl::values(clEnumValN(OPTIMIZER_NONE, "none", "No optimizer"),
77                          clEnumValN(OPTIMIZER_ISL, "isl",
78                                     "The isl scheduling optimizer")),
79               cl::Hidden, cl::init(OPTIMIZER_ISL), cl::ZeroOrMore,
80               cl::cat(PollyCategory));
81 
82 enum CodeGenChoice { CODEGEN_FULL, CODEGEN_AST, CODEGEN_NONE };
83 static cl::opt<CodeGenChoice> CodeGeneration(
84     "polly-code-generation", cl::desc("How much code-generation to perform"),
85     cl::values(clEnumValN(CODEGEN_FULL, "full", "AST and IR generation"),
86                clEnumValN(CODEGEN_AST, "ast", "Only AST generation"),
87                clEnumValN(CODEGEN_NONE, "none", "No code generation")),
88     cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
89 
90 enum TargetChoice { TARGET_CPU, TARGET_GPU };
91 static cl::opt<TargetChoice>
92     Target("polly-target", cl::desc("The hardware to target"),
93            cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
94 #ifdef GPU_CODEGEN
95                           ,
96                       clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
97 #endif
98                           ),
99            cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
100 
101 VectorizerChoice polly::PollyVectorizerChoice;
102 static cl::opt<polly::VectorizerChoice, true> Vectorizer(
103     "polly-vectorizer", cl::desc("Select the vectorization strategy"),
104     cl::values(
105         clEnumValN(polly::VECTORIZER_NONE, "none", "No Vectorization"),
106         clEnumValN(polly::VECTORIZER_POLLY, "polly",
107                    "Polly internal vectorizer"),
108         clEnumValN(
109             polly::VECTORIZER_STRIPMINE, "stripmine",
110             "Strip-mine outer loops for the loop-vectorizer to trigger")),
111     cl::location(PollyVectorizerChoice), cl::init(polly::VECTORIZER_NONE),
112     cl::ZeroOrMore, cl::cat(PollyCategory));
113 
114 static cl::opt<bool> ImportJScop(
115     "polly-import",
116     cl::desc("Export the polyhedral description of the detected Scops"),
117     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
118 
119 static cl::opt<bool> ExportJScop(
120     "polly-export",
121     cl::desc("Export the polyhedral description of the detected Scops"),
122     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
123 
124 static cl::opt<bool> DeadCodeElim("polly-run-dce",
125                                   cl::desc("Run the dead code elimination"),
126                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
127                                   cl::cat(PollyCategory));
128 
129 static cl::opt<bool> PollyViewer(
130     "polly-show",
131     cl::desc("Highlight the code regions that will be optimized in a "
132              "(CFG BBs and LLVM-IR instructions)"),
133     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
134 
135 static cl::opt<bool> PollyOnlyViewer(
136     "polly-show-only",
137     cl::desc("Highlight the code regions that will be optimized in "
138              "a (CFG only BBs)"),
139     cl::init(false), cl::cat(PollyCategory));
140 
141 static cl::opt<bool>
142     PollyPrinter("polly-dot", cl::desc("Enable the Polly DOT printer in -O3"),
143                  cl::Hidden, cl::value_desc("Run the Polly DOT printer at -O3"),
144                  cl::init(false), cl::cat(PollyCategory));
145 
146 static cl::opt<bool> PollyOnlyPrinter(
147     "polly-dot-only",
148     cl::desc("Enable the Polly DOT printer in -O3 (no BB content)"), cl::Hidden,
149     cl::value_desc("Run the Polly DOT printer at -O3 (no BB content"),
150     cl::init(false), cl::cat(PollyCategory));
151 
152 static cl::opt<bool>
153     CFGPrinter("polly-view-cfg",
154                cl::desc("Show the Polly CFG right after code generation"),
155                cl::Hidden, cl::init(false), cl::cat(PollyCategory));
156 
157 static cl::opt<bool>
158     EnablePolyhedralInfo("polly-enable-polyhedralinfo",
159                          cl::desc("Enable polyhedral interface of Polly"),
160                          cl::Hidden, cl::init(false), cl::cat(PollyCategory));
161 
162 namespace polly {
163 void initializePollyPasses(PassRegistry &Registry) {
164   initializeCodeGenerationPass(Registry);
165 
166 #ifdef GPU_CODEGEN
167   initializePPCGCodeGenerationPass(Registry);
168 #endif
169   initializeCodePreparationPass(Registry);
170   initializeDeadCodeElimPass(Registry);
171   initializeDependenceInfoPass(Registry);
172   initializeDependenceInfoWrapperPassPass(Registry);
173   initializeJSONExporterPass(Registry);
174   initializeJSONImporterPass(Registry);
175   initializeIslAstInfoPass(Registry);
176   initializeIslScheduleOptimizerPass(Registry);
177   initializePollyCanonicalizePass(Registry);
178   initializePolyhedralInfoPass(Registry);
179   initializeScopDetectionPass(Registry);
180   initializeScopInfoRegionPassPass(Registry);
181   initializeScopInfoWrapperPassPass(Registry);
182   initializeCodegenCleanupPass(Registry);
183   initializeFlattenSchedulePass(Registry);
184 }
185 
186 /// Register Polly passes such that they form a polyhedral optimizer.
187 ///
188 /// The individual Polly passes are registered in the pass manager such that
189 /// they form a full polyhedral optimizer. The flow of the optimizer starts with
190 /// a set of preparing transformations that canonicalize the LLVM-IR such that
191 /// the LLVM-IR is easier for us to understand and to optimizes. On the
192 /// canonicalized LLVM-IR we first run the ScopDetection pass, which detects
193 /// static control flow regions. Those regions are then translated by the
194 /// ScopInfo pass into a polyhedral representation. As a next step, a scheduling
195 /// optimizer is run on the polyhedral representation and finally the optimized
196 /// polyhedral representation is code generated back to LLVM-IR.
197 ///
198 /// Besides this core functionality, we optionally schedule passes that provide
199 /// a graphical view of the scops (Polly[Only]Viewer, Polly[Only]Printer), that
200 /// allow the export/import of the polyhedral representation
201 /// (JSCON[Exporter|Importer]) or that show the cfg after code generation.
202 ///
203 /// For certain parts of the Polly optimizer, several alternatives are provided:
204 ///
205 /// As scheduling optimizer we support the isl scheduling optimizer
206 /// (http://freecode.com/projects/isl).
207 /// It is also possible to run Polly with no optimizer. This mode is mainly
208 /// provided to analyze the run and compile time changes caused by the
209 /// scheduling optimizer.
210 ///
211 /// Polly supports the isl internal code generator.
212 void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
213   PM.add(polly::createScopDetectionPass());
214 
215   if (PollyDetectOnly)
216     return;
217 
218   if (PollyViewer)
219     PM.add(polly::createDOTViewerPass());
220   if (PollyOnlyViewer)
221     PM.add(polly::createDOTOnlyViewerPass());
222   if (PollyPrinter)
223     PM.add(polly::createDOTPrinterPass());
224   if (PollyOnlyPrinter)
225     PM.add(polly::createDOTOnlyPrinterPass());
226 
227   PM.add(polly::createScopInfoRegionPassPass());
228   if (EnablePolyhedralInfo)
229     PM.add(polly::createPolyhedralInfoPass());
230 
231   if (ImportJScop)
232     PM.add(polly::createJSONImporterPass());
233 
234   if (DeadCodeElim)
235     PM.add(polly::createDeadCodeElimPass());
236 
237   if (Target == TARGET_GPU) {
238     // GPU generation provides its own scheduling optimization strategy.
239   } else {
240     switch (Optimizer) {
241     case OPTIMIZER_NONE:
242       break; /* Do nothing */
243 
244     case OPTIMIZER_ISL:
245       PM.add(polly::createIslScheduleOptimizerPass());
246       break;
247     }
248   }
249 
250   if (ExportJScop)
251     PM.add(polly::createJSONExporterPass());
252 
253   if (Target == TARGET_GPU) {
254 #ifdef GPU_CODEGEN
255     PM.add(polly::createPPCGCodeGenerationPass());
256 #endif
257   } else {
258     switch (CodeGeneration) {
259     case CODEGEN_AST:
260       PM.add(polly::createIslAstInfoPass());
261       break;
262     case CODEGEN_FULL:
263       PM.add(polly::createCodeGenerationPass());
264       break;
265     case CODEGEN_NONE:
266       break;
267     }
268   }
269 
270   // FIXME: This dummy ModulePass keeps some programs from miscompiling,
271   // probably some not correctly preserved analyses. It acts as a barrier to
272   // force all analysis results to be recomputed.
273   PM.add(createBarrierNoopPass());
274 
275   if (CFGPrinter)
276     PM.add(llvm::createCFGPrinterLegacyPassPass());
277 
278   if (Target == TARGET_GPU) {
279     // Invariant load hoisting not yet supported by GPU code generation.
280     PollyInvariantLoadHoisting = false;
281   }
282 }
283 
284 static bool shouldEnablePolly() {
285   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer)
286     PollyTrackFailures = true;
287 
288   if (PollyOnlyPrinter || PollyPrinter || PollyOnlyViewer || PollyViewer ||
289       ExportJScop || ImportJScop)
290     PollyEnabled = true;
291 
292   return PollyEnabled;
293 }
294 
295 static void
296 registerPollyEarlyAsPossiblePasses(const llvm::PassManagerBuilder &Builder,
297                                    llvm::legacy::PassManagerBase &PM) {
298   if (!polly::shouldEnablePolly())
299     return;
300 
301   if (PassPosition != POSITION_EARLY)
302     return;
303 
304   registerCanonicalicationPasses(PM);
305   polly::registerPollyPasses(PM);
306 }
307 
308 static void
309 registerPollyLoopOptimizerEndPasses(const llvm::PassManagerBuilder &Builder,
310                                     llvm::legacy::PassManagerBase &PM) {
311   if (!polly::shouldEnablePolly())
312     return;
313 
314   if (PassPosition != POSITION_AFTER_LOOPOPT)
315     return;
316 
317   PM.add(polly::createCodePreparationPass());
318   polly::registerPollyPasses(PM);
319   PM.add(createCodegenCleanupPass());
320 }
321 
322 static void
323 registerPollyScalarOptimizerLatePasses(const llvm::PassManagerBuilder &Builder,
324                                        llvm::legacy::PassManagerBase &PM) {
325   if (!polly::shouldEnablePolly())
326     return;
327 
328   if (PassPosition != POSITION_BEFORE_VECTORIZER)
329     return;
330 
331   PM.add(polly::createCodePreparationPass());
332   polly::registerPollyPasses(PM);
333   PM.add(createCodegenCleanupPass());
334 }
335 
336 /// Register Polly to be available as an optimizer
337 ///
338 ///
339 /// We can currently run Polly at three different points int the pass manager.
340 /// a) very early, b) after the canonicalizing loop transformations and c) right
341 /// before the vectorizer.
342 ///
343 /// The default is currently a), to register Polly such that it runs as early as
344 /// possible. This has several implications:
345 ///
346 ///   1) We need to schedule more canonicalization passes
347 ///
348 ///   As nothing is run before Polly, it is necessary to run a set of preparing
349 ///   transformations before Polly to canonicalize the LLVM-IR and to allow
350 ///   Polly to detect and understand the code.
351 ///
352 ///   2) LICM and LoopIdiom pass have not yet been run
353 ///
354 ///   Loop invariant code motion as well as the loop idiom recognition pass make
355 ///   it more difficult for Polly to transform code. LICM may introduce
356 ///   additional data dependences that are hard to eliminate and the loop idiom
357 ///   recognition pass may introduce calls to memset that we currently do not
358 ///   understand. By running Polly early enough (meaning before these passes) we
359 ///   avoid difficulties that may be introduced by these passes.
360 ///
361 ///   3) We get the full -O3 optimization sequence after Polly
362 ///
363 ///   The LLVM-IR that is generated by Polly has been optimized on a high level,
364 ///   but it may be rather inefficient on the lower/scalar level. By scheduling
365 ///   Polly before all other passes, we have the full sequence of -O3
366 ///   optimizations behind us, such that inefficiencies on the low level can
367 ///   be optimized away.
368 ///
369 /// We are currently evaluating the benefit or running Polly at position b) or
370 /// c). b) is likely to early as it interacts with the inliner. c) is nice
371 /// as everything is fully inlined and canonicalized, but we need to be able
372 /// to handle LICMed code to make it useful.
373 static llvm::RegisterStandardPasses RegisterPollyOptimizerEarly(
374     llvm::PassManagerBuilder::EP_ModuleOptimizerEarly,
375     registerPollyEarlyAsPossiblePasses);
376 
377 static llvm::RegisterStandardPasses
378     RegisterPollyOptimizerLoopEnd(llvm::PassManagerBuilder::EP_LoopOptimizerEnd,
379                                   registerPollyLoopOptimizerEndPasses);
380 
381 static llvm::RegisterStandardPasses RegisterPollyOptimizerScalarLate(
382     llvm::PassManagerBuilder::EP_VectorizerStart,
383     registerPollyScalarOptimizerLatePasses);
384 } // namespace polly
385