1 //===- Schedule.cpp - Calculate an optimized schedule ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass generates an entirey new schedule tree from the data dependences
11 // and iteration domains. The new schedule tree is computed in two steps:
12 //
13 // 1) The isl scheduling optimizer is run
14 //
15 // The isl scheduling optimizer creates a new schedule tree that maximizes
16 // parallelism and tileability and minimizes data-dependence distances. The
17 // algorithm used is a modified version of the ``Pluto'' algorithm:
18 //
19 //   U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan.
20 //   A Practical Automatic Polyhedral Parallelizer and Locality Optimizer.
21 //   In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language
22 //   Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008.
23 //
24 // 2) A set of post-scheduling transformations is applied on the schedule tree.
25 //
26 // These optimizations include:
27 //
28 //  - Tiling of the innermost tilable bands
29 //  - Prevectorization - The coice of a possible outer loop that is strip-mined
30 //                       to the innermost level to enable inner-loop
31 //                       vectorization.
32 //  - Some optimizations for spatial locality are also planned.
33 //
34 // For a detailed description of the schedule tree itself please see section 6
35 // of:
36 //
37 // Polyhedral AST generation is more than scanning polyhedra
38 // Tobias Grosser, Sven Verdoolaege, Albert Cohen
39 // ACM Transations on Programming Languages and Systems (TOPLAS),
40 // 37(4), July 2015
41 // http://www.grosser.es/#pub-polyhedral-AST-generation
42 //
43 // This publication also contains a detailed discussion of the different options
44 // for polyhedral loop unrolling, full/partial tile separation and other uses
45 // of the schedule tree.
46 //
47 //===----------------------------------------------------------------------===//
48 
49 #include "polly/ScheduleOptimizer.h"
50 #include "polly/CodeGen/CodeGeneration.h"
51 #include "polly/DependenceInfo.h"
52 #include "polly/LinkAllPasses.h"
53 #include "polly/Options.h"
54 #include "polly/ScopInfo.h"
55 #include "polly/Support/GICHelper.h"
56 #include "llvm/Support/Debug.h"
57 #include "isl/aff.h"
58 #include "isl/band.h"
59 #include "isl/constraint.h"
60 #include "isl/map.h"
61 #include "isl/options.h"
62 #include "isl/printer.h"
63 #include "isl/schedule.h"
64 #include "isl/schedule_node.h"
65 #include "isl/space.h"
66 #include "isl/union_map.h"
67 #include "isl/union_set.h"
68 #include <ciso646>
69 
70 using namespace llvm;
71 using namespace polly;
72 
73 #define DEBUG_TYPE "polly-opt-isl"
74 
75 static cl::opt<std::string>
76     OptimizeDeps("polly-opt-optimize-only",
77                  cl::desc("Only a certain kind of dependences (all/raw)"),
78                  cl::Hidden, cl::init("all"), cl::ZeroOrMore,
79                  cl::cat(PollyCategory));
80 
81 static cl::opt<std::string>
82     SimplifyDeps("polly-opt-simplify-deps",
83                  cl::desc("Dependences should be simplified (yes/no)"),
84                  cl::Hidden, cl::init("yes"), cl::ZeroOrMore,
85                  cl::cat(PollyCategory));
86 
87 static cl::opt<int> MaxConstantTerm(
88     "polly-opt-max-constant-term",
89     cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
90     cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
91 
92 static cl::opt<int> MaxCoefficient(
93     "polly-opt-max-coefficient",
94     cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
95     cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
96 
97 static cl::opt<std::string> FusionStrategy(
98     "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"),
99     cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory));
100 
101 static cl::opt<std::string>
102     MaximizeBandDepth("polly-opt-maximize-bands",
103                       cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
104                       cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
105 
106 static cl::opt<std::string> OuterCoincidence(
107     "polly-opt-outer-coincidence",
108     cl::desc("Try to construct schedules where the outer member of each band "
109              "satisfies the coincidence constraints (yes/no)"),
110     cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory));
111 
112 static cl::opt<int> PrevectorWidth(
113     "polly-prevect-width",
114     cl::desc(
115         "The number of loop iterations to strip-mine for pre-vectorization"),
116     cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
117 
118 static cl::opt<bool> FirstLevelTiling("polly-tiling",
119                                       cl::desc("Enable loop tiling"),
120                                       cl::init(true), cl::ZeroOrMore,
121                                       cl::cat(PollyCategory));
122 
123 static cl::opt<int> FirstLevelDefaultTileSize(
124     "polly-default-tile-size",
125     cl::desc("The default tile size (if not enough were provided by"
126              " --polly-tile-sizes)"),
127     cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory));
128 
129 static cl::list<int> FirstLevelTileSizes(
130     "polly-tile-sizes", cl::desc("A tile size for each loop dimension, filled "
131                                  "with --polly-default-tile-size"),
132     cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory));
133 
134 static cl::opt<bool>
135     SecondLevelTiling("polly-2nd-level-tiling",
136                       cl::desc("Enable a 2nd level loop of loop tiling"),
137                       cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
138 
139 static cl::opt<int> SecondLevelDefaultTileSize(
140     "polly-2nd-level-default-tile-size",
141     cl::desc("The default 2nd-level tile size (if not enough were provided by"
142              " --polly-2nd-level-tile-sizes)"),
143     cl::Hidden, cl::init(16), cl::ZeroOrMore, cl::cat(PollyCategory));
144 
145 static cl::list<int>
146     SecondLevelTileSizes("polly-2nd-level-tile-sizes",
147                          cl::desc("A tile size for each loop dimension, filled "
148                                   "with --polly-default-tile-size"),
149                          cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
150                          cl::cat(PollyCategory));
151 
152 static cl::opt<bool> RegisterTiling("polly-register-tiling",
153                                     cl::desc("Enable register tiling"),
154                                     cl::init(false), cl::ZeroOrMore,
155                                     cl::cat(PollyCategory));
156 
157 static cl::opt<int> RegisterDefaultTileSize(
158     "polly-register-tiling-default-tile-size",
159     cl::desc("The default register tile size (if not enough were provided by"
160              " --polly-register-tile-sizes)"),
161     cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
162 
163 static cl::list<int>
164     RegisterTileSizes("polly-register-tile-sizes",
165                       cl::desc("A tile size for each loop dimension, filled "
166                                "with --polly-register-tile-size"),
167                       cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
168                       cl::cat(PollyCategory));
169 
170 static cl::opt<bool>
171     PMBasedOpts("polly-pattern-matching-based-opts",
172                 cl::desc("Perform optimizations based on pattern matching"),
173                 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
174 
175 /// @brief Create an isl_union_set, which describes the isolate option based
176 ///        on IsoalteDomain.
177 ///
178 /// @param IsolateDomain An isl_set whose last dimension is the only one that
179 ///                      should belong to the current band node.
180 static __isl_give isl_union_set *
181 getIsolateOptions(__isl_take isl_set *IsolateDomain) {
182   auto Dims = isl_set_dim(IsolateDomain, isl_dim_set);
183   auto *IsolateRelation = isl_map_from_domain(IsolateDomain);
184   IsolateRelation = isl_map_move_dims(IsolateRelation, isl_dim_out, 0,
185                                       isl_dim_in, Dims - 1, 1);
186   auto *IsolateOption = isl_map_wrap(IsolateRelation);
187   auto *Id = isl_id_alloc(isl_set_get_ctx(IsolateOption), "isolate", NULL);
188   return isl_union_set_from_set(isl_set_set_tuple_id(IsolateOption, Id));
189 }
190 
191 /// @brief Create an isl_union_set, which describes the atomic option for the
192 ///        dimension of the current node.
193 ///
194 /// It may help to reduce the size of generated code.
195 ///
196 /// @param Ctx An isl_ctx, which is used to create the isl_union_set.
197 static __isl_give isl_union_set *getAtomicOptions(__isl_take isl_ctx *Ctx) {
198   auto *Space = isl_space_set_alloc(Ctx, 0, 1);
199   auto *AtomicOption = isl_set_universe(Space);
200   auto *Id = isl_id_alloc(Ctx, "atomic", NULL);
201   return isl_union_set_from_set(isl_set_set_tuple_id(AtomicOption, Id));
202 }
203 
204 /// @brief Make the last dimension of Set to take values
205 ///        from 0 to VectorWidth - 1.
206 ///
207 /// @param Set         A set, which should be modified.
208 /// @param VectorWidth A parameter, which determines the constraint.
209 static __isl_give isl_set *addExtentConstraints(__isl_take isl_set *Set,
210                                                 int VectorWidth) {
211   auto Dims = isl_set_dim(Set, isl_dim_set);
212   auto Space = isl_set_get_space(Set);
213   auto *LocalSpace = isl_local_space_from_space(Space);
214   auto *ExtConstr =
215       isl_constraint_alloc_inequality(isl_local_space_copy(LocalSpace));
216   ExtConstr = isl_constraint_set_constant_si(ExtConstr, 0);
217   ExtConstr =
218       isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, 1);
219   Set = isl_set_add_constraint(Set, ExtConstr);
220   ExtConstr = isl_constraint_alloc_inequality(LocalSpace);
221   ExtConstr = isl_constraint_set_constant_si(ExtConstr, VectorWidth - 1);
222   ExtConstr =
223       isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, -1);
224   return isl_set_add_constraint(Set, ExtConstr);
225 }
226 
227 /// @brief Build the desired set of partial tile prefixes.
228 ///
229 /// We build a set of partial tile prefixes, which are prefixes of the vector
230 /// loop that have exactly VectorWidth iterations.
231 ///
232 /// 1. Get all prefixes of the vector loop.
233 /// 2. Extend it to a set, which has exactly VectorWidth iterations for
234 ///    any prefix from the set that was built on the previous step.
235 /// 3. Subtract loop domain from it, project out the vector loop dimension and
236 ///    get a set of prefixes, which don't have exactly VectorWidth iterations.
237 /// 4. Subtract it from all prefixes of the vector loop and get the desired
238 ///    set.
239 ///
240 /// @param ScheduleRange A range of a map, which describes a prefix schedule
241 ///                      relation.
242 static __isl_give isl_set *
243 getPartialTilePrefixes(__isl_take isl_set *ScheduleRange, int VectorWidth) {
244   auto Dims = isl_set_dim(ScheduleRange, isl_dim_set);
245   auto *LoopPrefixes = isl_set_project_out(isl_set_copy(ScheduleRange),
246                                            isl_dim_set, Dims - 1, 1);
247   auto *ExtentPrefixes =
248       isl_set_add_dims(isl_set_copy(LoopPrefixes), isl_dim_set, 1);
249   ExtentPrefixes = addExtentConstraints(ExtentPrefixes, VectorWidth);
250   auto *BadPrefixes = isl_set_subtract(ExtentPrefixes, ScheduleRange);
251   BadPrefixes = isl_set_project_out(BadPrefixes, isl_dim_set, Dims - 1, 1);
252   return isl_set_subtract(LoopPrefixes, BadPrefixes);
253 }
254 
255 __isl_give isl_schedule_node *ScheduleTreeOptimizer::isolateFullPartialTiles(
256     __isl_take isl_schedule_node *Node, int VectorWidth) {
257   assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
258   Node = isl_schedule_node_child(Node, 0);
259   Node = isl_schedule_node_child(Node, 0);
260   auto *SchedRelUMap = isl_schedule_node_get_prefix_schedule_relation(Node);
261   auto *ScheduleRelation = isl_map_from_union_map(SchedRelUMap);
262   auto *ScheduleRange = isl_map_range(ScheduleRelation);
263   auto *IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
264   auto *AtomicOption = getAtomicOptions(isl_set_get_ctx(IsolateDomain));
265   auto *IsolateOption = getIsolateOptions(IsolateDomain);
266   Node = isl_schedule_node_parent(Node);
267   Node = isl_schedule_node_parent(Node);
268   auto *Options = isl_union_set_union(IsolateOption, AtomicOption);
269   Node = isl_schedule_node_band_set_ast_build_options(Node, Options);
270   return Node;
271 }
272 
273 __isl_give isl_schedule_node *
274 ScheduleTreeOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node,
275                                         unsigned DimToVectorize,
276                                         int VectorWidth) {
277   assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band);
278 
279   auto Space = isl_schedule_node_band_get_space(Node);
280   auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set);
281   isl_space_free(Space);
282   assert(DimToVectorize < ScheduleDimensions);
283 
284   if (DimToVectorize > 0) {
285     Node = isl_schedule_node_band_split(Node, DimToVectorize);
286     Node = isl_schedule_node_child(Node, 0);
287   }
288   if (DimToVectorize < ScheduleDimensions - 1)
289     Node = isl_schedule_node_band_split(Node, 1);
290   Space = isl_schedule_node_band_get_space(Node);
291   auto Sizes = isl_multi_val_zero(Space);
292   auto Ctx = isl_schedule_node_get_ctx(Node);
293   Sizes =
294       isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth));
295   Node = isl_schedule_node_band_tile(Node, Sizes);
296   Node = isolateFullPartialTiles(Node, VectorWidth);
297   Node = isl_schedule_node_child(Node, 0);
298   // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
299   // we will have troubles to match it in the backend.
300   Node = isl_schedule_node_band_set_ast_build_options(
301       Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }"));
302   Node = isl_schedule_node_band_sink(Node);
303   Node = isl_schedule_node_child(Node, 0);
304   if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf)
305     Node = isl_schedule_node_parent(Node);
306   isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr);
307   Node = isl_schedule_node_insert_mark(Node, LoopMarker);
308   return Node;
309 }
310 
311 __isl_give isl_schedule_node *
312 ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node,
313                                 const char *Identifier, ArrayRef<int> TileSizes,
314                                 int DefaultTileSize) {
315   auto Ctx = isl_schedule_node_get_ctx(Node);
316   auto Space = isl_schedule_node_band_get_space(Node);
317   auto Dims = isl_space_dim(Space, isl_dim_set);
318   auto Sizes = isl_multi_val_zero(Space);
319   std::string IdentifierString(Identifier);
320   for (unsigned i = 0; i < Dims; i++) {
321     auto tileSize = i < TileSizes.size() ? TileSizes[i] : DefaultTileSize;
322     Sizes = isl_multi_val_set_val(Sizes, i, isl_val_int_from_si(Ctx, tileSize));
323   }
324   auto TileLoopMarkerStr = IdentifierString + " - Tiles";
325   isl_id *TileLoopMarker =
326       isl_id_alloc(Ctx, TileLoopMarkerStr.c_str(), nullptr);
327   Node = isl_schedule_node_insert_mark(Node, TileLoopMarker);
328   Node = isl_schedule_node_child(Node, 0);
329   Node = isl_schedule_node_band_tile(Node, Sizes);
330   Node = isl_schedule_node_child(Node, 0);
331   auto PointLoopMarkerStr = IdentifierString + " - Points";
332   isl_id *PointLoopMarker =
333       isl_id_alloc(Ctx, PointLoopMarkerStr.c_str(), nullptr);
334   Node = isl_schedule_node_insert_mark(Node, PointLoopMarker);
335   Node = isl_schedule_node_child(Node, 0);
336   return Node;
337 }
338 
339 bool ScheduleTreeOptimizer::isTileableBandNode(
340     __isl_keep isl_schedule_node *Node) {
341   if (isl_schedule_node_get_type(Node) != isl_schedule_node_band)
342     return false;
343 
344   if (isl_schedule_node_n_children(Node) != 1)
345     return false;
346 
347   if (!isl_schedule_node_band_get_permutable(Node))
348     return false;
349 
350   auto Space = isl_schedule_node_band_get_space(Node);
351   auto Dims = isl_space_dim(Space, isl_dim_set);
352   isl_space_free(Space);
353 
354   if (Dims <= 1)
355     return false;
356 
357   auto Child = isl_schedule_node_get_child(Node, 0);
358   auto Type = isl_schedule_node_get_type(Child);
359   isl_schedule_node_free(Child);
360 
361   if (Type != isl_schedule_node_leaf)
362     return false;
363 
364   return true;
365 }
366 
367 __isl_give isl_schedule_node *
368 ScheduleTreeOptimizer::standardBandOpts(__isl_take isl_schedule_node *Node,
369                                         void *User) {
370   if (FirstLevelTiling)
371     Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
372                     FirstLevelDefaultTileSize);
373 
374   if (SecondLevelTiling)
375     Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
376                     SecondLevelDefaultTileSize);
377 
378   if (RegisterTiling) {
379     auto *Ctx = isl_schedule_node_get_ctx(Node);
380     Node = tileNode(Node, "Register tiling", RegisterTileSizes,
381                     RegisterDefaultTileSize);
382     Node = isl_schedule_node_band_set_ast_build_options(
383         Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}"));
384   }
385 
386   if (PollyVectorizerChoice == VECTORIZER_NONE)
387     return Node;
388 
389   auto Space = isl_schedule_node_band_get_space(Node);
390   auto Dims = isl_space_dim(Space, isl_dim_set);
391   isl_space_free(Space);
392 
393   for (int i = Dims - 1; i >= 0; i--)
394     if (isl_schedule_node_band_member_get_coincident(Node, i)) {
395       Node = prevectSchedBand(Node, i, PrevectorWidth);
396       break;
397     }
398 
399   return Node;
400 }
401 
402 /// @brief Check whether output dimensions of the map rely on the specified
403 ///        input dimension.
404 ///
405 /// @param IslMap The isl map to be considered.
406 /// @param DimNum The number of an input dimension to be checked.
407 static bool isInputDimUsed(__isl_take isl_map *IslMap, unsigned DimNum) {
408   auto *CheckedAccessRelation =
409       isl_map_project_out(isl_map_copy(IslMap), isl_dim_in, DimNum, 1);
410   CheckedAccessRelation =
411       isl_map_insert_dims(CheckedAccessRelation, isl_dim_in, DimNum, 1);
412   auto *InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_in);
413   CheckedAccessRelation =
414       isl_map_set_tuple_id(CheckedAccessRelation, isl_dim_in, InputDimsId);
415   InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_out);
416   CheckedAccessRelation =
417       isl_map_set_tuple_id(CheckedAccessRelation, isl_dim_out, InputDimsId);
418   auto res = !isl_map_is_equal(CheckedAccessRelation, IslMap);
419   isl_map_free(CheckedAccessRelation);
420   isl_map_free(IslMap);
421   return res;
422 }
423 
424 /// @brief Check if the SCoP statement could probably be optimized with
425 ///        analytical modeling.
426 ///
427 /// containsMatrMult tries to determine whether the following conditions
428 /// are true:
429 /// 1. all memory accesses of the statement will have stride 0 or 1,
430 ///    if we interchange loops (switch the variable used in the inner
431 ///    loop to the outer loop).
432 /// 2. all memory accesses of the statement except from the last one, are
433 ///    read memory access and the last one is write memory access.
434 /// 3. all subscripts of the last memory access of the statement don't contain
435 ///    the variable used in the inner loop.
436 ///
437 /// @param PartialSchedule The PartialSchedule that contains a SCoP statement
438 ///        to check.
439 static bool containsMatrMult(__isl_keep isl_map *PartialSchedule) {
440   auto InputDimsId = isl_map_get_tuple_id(PartialSchedule, isl_dim_in);
441   auto *ScpStmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimsId));
442   isl_id_free(InputDimsId);
443   if (ScpStmt->size() <= 1)
444     return false;
445   auto MemA = ScpStmt->begin();
446   for (unsigned i = 0; i < ScpStmt->size() - 2 && MemA != ScpStmt->end();
447        i++, MemA++)
448     if (!(*MemA)->isRead() ||
449         ((*MemA)->isArrayKind() &&
450          !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) ||
451            (*MemA)->isStrideZero(isl_map_copy(PartialSchedule)))))
452       return false;
453   MemA++;
454   if (!(*MemA)->isWrite() || !(*MemA)->isArrayKind() ||
455       !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) ||
456         (*MemA)->isStrideZero(isl_map_copy(PartialSchedule))))
457     return false;
458   auto DimNum = isl_map_dim(PartialSchedule, isl_dim_in);
459   return !isInputDimUsed((*MemA)->getAccessRelation(), DimNum - 1);
460 }
461 
462 /// @brief Circular shift of output dimensions of the integer map.
463 ///
464 /// @param IslMap The isl map to be modified.
465 static __isl_give isl_map *circularShiftOutputDims(__isl_take isl_map *IslMap) {
466   auto DimNum = isl_map_dim(IslMap, isl_dim_out);
467   if (DimNum == 0)
468     return IslMap;
469   auto InputDimsId = isl_map_get_tuple_id(IslMap, isl_dim_in);
470   IslMap = isl_map_move_dims(IslMap, isl_dim_in, 0, isl_dim_out, DimNum - 1, 1);
471   IslMap = isl_map_move_dims(IslMap, isl_dim_out, 0, isl_dim_in, 0, 1);
472   return isl_map_set_tuple_id(IslMap, isl_dim_in, InputDimsId);
473 }
474 
475 bool ScheduleTreeOptimizer::isMatrMultPattern(
476     __isl_keep isl_schedule_node *Node) {
477   auto *PartialSchedule =
478       isl_schedule_node_band_get_partial_schedule_union_map(Node);
479   if (isl_union_map_n_map(PartialSchedule) != 1)
480     return false;
481   auto *NewPartialSchedule = isl_map_from_union_map(PartialSchedule);
482   auto DimNum = isl_map_dim(NewPartialSchedule, isl_dim_in);
483   if (DimNum != 3) {
484     isl_map_free(NewPartialSchedule);
485     return false;
486   }
487   NewPartialSchedule = circularShiftOutputDims(NewPartialSchedule);
488   if (containsMatrMult(NewPartialSchedule)) {
489     isl_map_free(NewPartialSchedule);
490     return true;
491   }
492   isl_map_free(NewPartialSchedule);
493   return false;
494 }
495 
496 __isl_give isl_schedule_node *
497 ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
498                                     void *User) {
499   if (!isTileableBandNode(Node))
500     return Node;
501 
502   if (PMBasedOpts && isMatrMultPattern(Node))
503     DEBUG(dbgs() << "The matrix multiplication pattern was detected\n");
504 
505   return standardBandOpts(Node, User);
506 }
507 
508 __isl_give isl_schedule *
509 ScheduleTreeOptimizer::optimizeSchedule(__isl_take isl_schedule *Schedule) {
510   isl_schedule_node *Root = isl_schedule_get_root(Schedule);
511   Root = optimizeScheduleNode(Root);
512   isl_schedule_free(Schedule);
513   auto S = isl_schedule_node_get_schedule(Root);
514   isl_schedule_node_free(Root);
515   return S;
516 }
517 
518 __isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeScheduleNode(
519     __isl_take isl_schedule_node *Node) {
520   Node = isl_schedule_node_map_descendant_bottom_up(Node, optimizeBand, NULL);
521   return Node;
522 }
523 
524 bool ScheduleTreeOptimizer::isProfitableSchedule(
525     Scop &S, __isl_keep isl_union_map *NewSchedule) {
526   // To understand if the schedule has been optimized we check if the schedule
527   // has changed at all.
528   // TODO: We can improve this by tracking if any necessarily beneficial
529   // transformations have been performed. This can e.g. be tiling, loop
530   // interchange, or ...) We can track this either at the place where the
531   // transformation has been performed or, in case of automatic ILP based
532   // optimizations, by comparing (yet to be defined) performance metrics
533   // before/after the scheduling optimizer
534   // (e.g., #stride-one accesses)
535   isl_union_map *OldSchedule = S.getSchedule();
536   bool changed = !isl_union_map_is_equal(OldSchedule, NewSchedule);
537   isl_union_map_free(OldSchedule);
538   return changed;
539 }
540 
541 namespace {
542 class IslScheduleOptimizer : public ScopPass {
543 public:
544   static char ID;
545   explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = nullptr; }
546 
547   ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); }
548 
549   /// @brief Optimize the schedule of the SCoP @p S.
550   bool runOnScop(Scop &S) override;
551 
552   /// @brief Print the new schedule for the SCoP @p S.
553   void printScop(raw_ostream &OS, Scop &S) const override;
554 
555   /// @brief Register all analyses and transformation required.
556   void getAnalysisUsage(AnalysisUsage &AU) const override;
557 
558   /// @brief Release the internal memory.
559   void releaseMemory() override {
560     isl_schedule_free(LastSchedule);
561     LastSchedule = nullptr;
562   }
563 
564 private:
565   isl_schedule *LastSchedule;
566 };
567 }
568 
569 char IslScheduleOptimizer::ID = 0;
570 
571 bool IslScheduleOptimizer::runOnScop(Scop &S) {
572 
573   // Skip empty SCoPs but still allow code generation as it will delete the
574   // loops present but not needed.
575   if (S.getSize() == 0) {
576     S.markAsOptimized();
577     return false;
578   }
579 
580   const Dependences &D =
581       getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
582 
583   if (!D.hasValidDependences())
584     return false;
585 
586   isl_schedule_free(LastSchedule);
587   LastSchedule = nullptr;
588 
589   // Build input data.
590   int ValidityKinds =
591       Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
592   int ProximityKinds;
593 
594   if (OptimizeDeps == "all")
595     ProximityKinds =
596         Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
597   else if (OptimizeDeps == "raw")
598     ProximityKinds = Dependences::TYPE_RAW;
599   else {
600     errs() << "Do not know how to optimize for '" << OptimizeDeps << "'"
601            << " Falling back to optimizing all dependences.\n";
602     ProximityKinds =
603         Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
604   }
605 
606   isl_union_set *Domain = S.getDomains();
607 
608   if (!Domain)
609     return false;
610 
611   isl_union_map *Validity = D.getDependences(ValidityKinds);
612   isl_union_map *Proximity = D.getDependences(ProximityKinds);
613 
614   // Simplify the dependences by removing the constraints introduced by the
615   // domains. This can speed up the scheduling time significantly, as large
616   // constant coefficients will be removed from the dependences. The
617   // introduction of some additional dependences reduces the possible
618   // transformations, but in most cases, such transformation do not seem to be
619   // interesting anyway. In some cases this option may stop the scheduler to
620   // find any schedule.
621   if (SimplifyDeps == "yes") {
622     Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain));
623     Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain));
624     Proximity =
625         isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain));
626     Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain));
627   } else if (SimplifyDeps != "no") {
628     errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
629               "or 'no'. Falling back to default: 'yes'\n";
630   }
631 
632   DEBUG(dbgs() << "\n\nCompute schedule from: ");
633   DEBUG(dbgs() << "Domain := " << stringFromIslObj(Domain) << ";\n");
634   DEBUG(dbgs() << "Proximity := " << stringFromIslObj(Proximity) << ";\n");
635   DEBUG(dbgs() << "Validity := " << stringFromIslObj(Validity) << ";\n");
636 
637   unsigned IslSerializeSCCs;
638 
639   if (FusionStrategy == "max") {
640     IslSerializeSCCs = 0;
641   } else if (FusionStrategy == "min") {
642     IslSerializeSCCs = 1;
643   } else {
644     errs() << "warning: Unknown fusion strategy. Falling back to maximal "
645               "fusion.\n";
646     IslSerializeSCCs = 0;
647   }
648 
649   int IslMaximizeBands;
650 
651   if (MaximizeBandDepth == "yes") {
652     IslMaximizeBands = 1;
653   } else if (MaximizeBandDepth == "no") {
654     IslMaximizeBands = 0;
655   } else {
656     errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'"
657               " or 'no'. Falling back to default: 'yes'\n";
658     IslMaximizeBands = 1;
659   }
660 
661   int IslOuterCoincidence;
662 
663   if (OuterCoincidence == "yes") {
664     IslOuterCoincidence = 1;
665   } else if (OuterCoincidence == "no") {
666     IslOuterCoincidence = 0;
667   } else {
668     errs() << "warning: Option -polly-opt-outer-coincidence should either be "
669               "'yes' or 'no'. Falling back to default: 'no'\n";
670     IslOuterCoincidence = 0;
671   }
672 
673   isl_options_set_schedule_outer_coincidence(S.getIslCtx(),
674                                              IslOuterCoincidence);
675   isl_options_set_schedule_serialize_sccs(S.getIslCtx(), IslSerializeSCCs);
676   isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands);
677   isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm);
678   isl_options_set_schedule_max_coefficient(S.getIslCtx(), MaxCoefficient);
679   isl_options_set_tile_scale_tile_loops(S.getIslCtx(), 0);
680 
681   isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE);
682 
683   isl_schedule_constraints *ScheduleConstraints;
684   ScheduleConstraints = isl_schedule_constraints_on_domain(Domain);
685   ScheduleConstraints =
686       isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity);
687   ScheduleConstraints = isl_schedule_constraints_set_validity(
688       ScheduleConstraints, isl_union_map_copy(Validity));
689   ScheduleConstraints =
690       isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity);
691   isl_schedule *Schedule;
692   Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints);
693   isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT);
694 
695   // In cases the scheduler is not able to optimize the code, we just do not
696   // touch the schedule.
697   if (!Schedule)
698     return false;
699 
700   DEBUG({
701     auto *P = isl_printer_to_str(S.getIslCtx());
702     P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
703     P = isl_printer_print_schedule(P, Schedule);
704     dbgs() << "NewScheduleTree: \n" << isl_printer_get_str(P) << "\n";
705     isl_printer_free(P);
706   });
707 
708   isl_schedule *NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule);
709   isl_union_map *NewScheduleMap = isl_schedule_get_map(NewSchedule);
710 
711   if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewScheduleMap)) {
712     isl_union_map_free(NewScheduleMap);
713     isl_schedule_free(NewSchedule);
714     return false;
715   }
716 
717   S.setScheduleTree(NewSchedule);
718   S.markAsOptimized();
719 
720   isl_union_map_free(NewScheduleMap);
721   return false;
722 }
723 
724 void IslScheduleOptimizer::printScop(raw_ostream &OS, Scop &) const {
725   isl_printer *p;
726   char *ScheduleStr;
727 
728   OS << "Calculated schedule:\n";
729 
730   if (!LastSchedule) {
731     OS << "n/a\n";
732     return;
733   }
734 
735   p = isl_printer_to_str(isl_schedule_get_ctx(LastSchedule));
736   p = isl_printer_print_schedule(p, LastSchedule);
737   ScheduleStr = isl_printer_get_str(p);
738   isl_printer_free(p);
739 
740   OS << ScheduleStr << "\n";
741 }
742 
743 void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const {
744   ScopPass::getAnalysisUsage(AU);
745   AU.addRequired<DependenceInfo>();
746 }
747 
748 Pass *polly::createIslScheduleOptimizerPass() {
749   return new IslScheduleOptimizer();
750 }
751 
752 INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl",
753                       "Polly - Optimize schedule of SCoP", false, false);
754 INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
755 INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
756 INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl",
757                     "Polly - Optimize schedule of SCoP", false, false)
758