1 //===- Schedule.cpp - Calculate an optimized schedule ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates an entirely new schedule tree from the data dependences
10 // and iteration domains. The new schedule tree is computed in two steps:
11 //
12 // 1) The isl scheduling optimizer is run
13 //
14 // The isl scheduling optimizer creates a new schedule tree that maximizes
15 // parallelism and tileability and minimizes data-dependence distances. The
16 // algorithm used is a modified version of the ``Pluto'' algorithm:
17 //
18 //   U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan.
19 //   A Practical Automatic Polyhedral Parallelizer and Locality Optimizer.
20 //   In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language
21 //   Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008.
22 //
23 // 2) A set of post-scheduling transformations is applied on the schedule tree.
24 //
25 // These optimizations include:
26 //
27 //  - Tiling of the innermost tilable bands
28 //  - Prevectorization - The choice of a possible outer loop that is strip-mined
29 //                       to the innermost level to enable inner-loop
30 //                       vectorization.
31 //  - Some optimizations for spatial locality are also planned.
32 //
33 // For a detailed description of the schedule tree itself please see section 6
34 // of:
35 //
36 // Polyhedral AST generation is more than scanning polyhedra
37 // Tobias Grosser, Sven Verdoolaege, Albert Cohen
38 // ACM Transactions on Programming Languages and Systems (TOPLAS),
39 // 37(4), July 2015
40 // http://www.grosser.es/#pub-polyhedral-AST-generation
41 //
42 // This publication also contains a detailed discussion of the different options
43 // for polyhedral loop unrolling, full/partial tile separation and other uses
44 // of the schedule tree.
45 //
46 //===----------------------------------------------------------------------===//
47 
48 #include "polly/ScheduleOptimizer.h"
49 #include "polly/CodeGen/CodeGeneration.h"
50 #include "polly/DependenceInfo.h"
51 #include "polly/ManualOptimizer.h"
52 #include "polly/MatmulOptimizer.h"
53 #include "polly/Options.h"
54 #include "polly/ScheduleTreeTransform.h"
55 #include "polly/Support/ISLOStream.h"
56 #include "llvm/ADT/Sequence.h"
57 #include "llvm/ADT/Statistic.h"
58 #include "llvm/InitializePasses.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "isl/options.h"
61 
62 using namespace llvm;
63 using namespace polly;
64 
65 namespace llvm {
66 class Loop;
67 class Module;
68 } // namespace llvm
69 
70 #define DEBUG_TYPE "polly-opt-isl"
71 
72 static cl::opt<std::string>
73     OptimizeDeps("polly-opt-optimize-only",
74                  cl::desc("Only a certain kind of dependences (all/raw)"),
75                  cl::Hidden, cl::init("all"), cl::ZeroOrMore,
76                  cl::cat(PollyCategory));
77 
78 static cl::opt<std::string>
79     SimplifyDeps("polly-opt-simplify-deps",
80                  cl::desc("Dependences should be simplified (yes/no)"),
81                  cl::Hidden, cl::init("yes"), cl::ZeroOrMore,
82                  cl::cat(PollyCategory));
83 
84 static cl::opt<int> MaxConstantTerm(
85     "polly-opt-max-constant-term",
86     cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden,
87     cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
88 
89 static cl::opt<int> MaxCoefficient(
90     "polly-opt-max-coefficient",
91     cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden,
92     cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory));
93 
94 static cl::opt<std::string> FusionStrategy(
95     "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"),
96     cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory));
97 
98 static cl::opt<std::string>
99     MaximizeBandDepth("polly-opt-maximize-bands",
100                       cl::desc("Maximize the band depth (yes/no)"), cl::Hidden,
101                       cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory));
102 
103 static cl::opt<std::string> OuterCoincidence(
104     "polly-opt-outer-coincidence",
105     cl::desc("Try to construct schedules where the outer member of each band "
106              "satisfies the coincidence constraints (yes/no)"),
107     cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory));
108 
109 static cl::opt<int> PrevectorWidth(
110     "polly-prevect-width",
111     cl::desc(
112         "The number of loop iterations to strip-mine for pre-vectorization"),
113     cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory));
114 
115 static cl::opt<bool> FirstLevelTiling("polly-tiling",
116                                       cl::desc("Enable loop tiling"),
117                                       cl::init(true), cl::ZeroOrMore,
118                                       cl::cat(PollyCategory));
119 
120 static cl::opt<int> FirstLevelDefaultTileSize(
121     "polly-default-tile-size",
122     cl::desc("The default tile size (if not enough were provided by"
123              " --polly-tile-sizes)"),
124     cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory));
125 
126 static cl::list<int>
127     FirstLevelTileSizes("polly-tile-sizes",
128                         cl::desc("A tile size for each loop dimension, filled "
129                                  "with --polly-default-tile-size"),
130                         cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
131                         cl::cat(PollyCategory));
132 
133 static cl::opt<bool>
134     SecondLevelTiling("polly-2nd-level-tiling",
135                       cl::desc("Enable a 2nd level loop of loop tiling"),
136                       cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
137 
138 static cl::opt<int> SecondLevelDefaultTileSize(
139     "polly-2nd-level-default-tile-size",
140     cl::desc("The default 2nd-level tile size (if not enough were provided by"
141              " --polly-2nd-level-tile-sizes)"),
142     cl::Hidden, cl::init(16), cl::ZeroOrMore, cl::cat(PollyCategory));
143 
144 static cl::list<int>
145     SecondLevelTileSizes("polly-2nd-level-tile-sizes",
146                          cl::desc("A tile size for each loop dimension, filled "
147                                   "with --polly-default-tile-size"),
148                          cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
149                          cl::cat(PollyCategory));
150 
151 static cl::opt<bool> RegisterTiling("polly-register-tiling",
152                                     cl::desc("Enable register tiling"),
153                                     cl::init(false), cl::ZeroOrMore,
154                                     cl::cat(PollyCategory));
155 
156 static cl::opt<int> RegisterDefaultTileSize(
157     "polly-register-tiling-default-tile-size",
158     cl::desc("The default register tile size (if not enough were provided by"
159              " --polly-register-tile-sizes)"),
160     cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory));
161 
162 static cl::list<int>
163     RegisterTileSizes("polly-register-tile-sizes",
164                       cl::desc("A tile size for each loop dimension, filled "
165                                "with --polly-register-tile-size"),
166                       cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated,
167                       cl::cat(PollyCategory));
168 
169 static cl::opt<bool> PragmaBasedOpts(
170     "polly-pragma-based-opts",
171     cl::desc("Apply user-directed transformation from metadata"),
172     cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
173 
174 static cl::opt<bool>
175     PMBasedOpts("polly-pattern-matching-based-opts",
176                 cl::desc("Perform optimizations based on pattern matching"),
177                 cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory));
178 
179 static cl::opt<bool> OptimizedScops(
180     "polly-optimized-scops",
181     cl::desc("Polly - Dump polyhedral description of Scops optimized with "
182              "the isl scheduling optimizer and the set of post-scheduling "
183              "transformations is applied on the schedule tree"),
184     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
185 
186 STATISTIC(ScopsProcessed, "Number of scops processed");
187 STATISTIC(ScopsRescheduled, "Number of scops rescheduled");
188 STATISTIC(ScopsOptimized, "Number of scops optimized");
189 
190 STATISTIC(NumAffineLoopsOptimized, "Number of affine loops optimized");
191 STATISTIC(NumBoxedLoopsOptimized, "Number of boxed loops optimized");
192 
193 #define THREE_STATISTICS(VARNAME, DESC)                                        \
194   static Statistic VARNAME[3] = {                                              \
195       {DEBUG_TYPE, #VARNAME "0", DESC " (original)"},                          \
196       {DEBUG_TYPE, #VARNAME "1", DESC " (after scheduler)"},                   \
197       {DEBUG_TYPE, #VARNAME "2", DESC " (after optimizer)"}}
198 
199 THREE_STATISTICS(NumBands, "Number of bands");
200 THREE_STATISTICS(NumBandMembers, "Number of band members");
201 THREE_STATISTICS(NumCoincident, "Number of coincident band members");
202 THREE_STATISTICS(NumPermutable, "Number of permutable bands");
203 THREE_STATISTICS(NumFilters, "Number of filter nodes");
204 THREE_STATISTICS(NumExtension, "Number of extension nodes");
205 
206 STATISTIC(FirstLevelTileOpts, "Number of first level tiling applied");
207 STATISTIC(SecondLevelTileOpts, "Number of second level tiling applied");
208 STATISTIC(RegisterTileOpts, "Number of register tiling applied");
209 STATISTIC(PrevectOpts, "Number of strip-mining for prevectorization applied");
210 STATISTIC(MatMulOpts,
211           "Number of matrix multiplication patterns detected and optimized");
212 
213 namespace {
214 /// Additional parameters of the schedule optimizer.
215 ///
216 /// Target Transform Info and the SCoP dependencies used by the schedule
217 /// optimizer.
218 struct OptimizerAdditionalInfoTy {
219   const llvm::TargetTransformInfo *TTI;
220   const Dependences *D;
221 };
222 
223 class ScheduleTreeOptimizer {
224 public:
225   /// Apply schedule tree transformations.
226   ///
227   /// This function takes an (possibly already optimized) schedule tree and
228   /// applies a set of additional optimizations on the schedule tree. The
229   /// transformations applied include:
230   ///
231   ///   - Tiling
232   ///   - Prevectorization
233   ///
234   /// @param Schedule The schedule object the transformations will be applied
235   ///                 to.
236   /// @param OAI      Target Transform Info and the SCoP dependencies.
237   /// @returns        The transformed schedule.
238   static isl::schedule
239   optimizeSchedule(isl::schedule Schedule,
240                    const OptimizerAdditionalInfoTy *OAI = nullptr);
241 
242   /// Apply schedule tree transformations.
243   ///
244   /// This function takes a node in an (possibly already optimized) schedule
245   /// tree and applies a set of additional optimizations on this schedule tree
246   /// node and its descendants. The transformations applied include:
247   ///
248   ///   - Tiling
249   ///   - Prevectorization
250   ///
251   /// @param Node The schedule object post-transformations will be applied to.
252   /// @param OAI  Target Transform Info and the SCoP dependencies.
253   /// @returns    The transformed schedule.
254   static isl::schedule_node
255   optimizeScheduleNode(isl::schedule_node Node,
256                        const OptimizerAdditionalInfoTy *OAI = nullptr);
257 
258   /// Decide if the @p NewSchedule is profitable for @p S.
259   ///
260   /// @param S           The SCoP we optimize.
261   /// @param NewSchedule The new schedule we computed.
262   ///
263   /// @return True, if we believe @p NewSchedule is an improvement for @p S.
264   static bool isProfitableSchedule(polly::Scop &S, isl::schedule NewSchedule);
265 
266   /// Isolate a set of partial tile prefixes.
267   ///
268   /// This set should ensure that it contains only partial tile prefixes that
269   /// have exactly VectorWidth iterations.
270   ///
271   /// @param Node A schedule node band, which is a parent of a band node,
272   ///             that contains a vector loop.
273   /// @return Modified isl_schedule_node.
274   static isl::schedule_node isolateFullPartialTiles(isl::schedule_node Node,
275                                                     int VectorWidth);
276 
277 private:
278   /// Check if this node is a band node we want to tile.
279   ///
280   /// We look for innermost band nodes where individual dimensions are marked as
281   /// permutable.
282   ///
283   /// @param Node The node to check.
284   static bool isTileableBandNode(isl::schedule_node Node);
285 
286   /// Pre-vectorizes one scheduling dimension of a schedule band.
287   ///
288   /// prevectSchedBand splits out the dimension DimToVectorize, tiles it and
289   /// sinks the resulting point loop.
290   ///
291   /// Example (DimToVectorize=0, VectorWidth=4):
292   ///
293   /// | Before transformation:
294   /// |
295   /// | A[i,j] -> [i,j]
296   /// |
297   /// | for (i = 0; i < 128; i++)
298   /// |    for (j = 0; j < 128; j++)
299   /// |      A(i,j);
300   ///
301   /// | After transformation:
302   /// |
303   /// | for (it = 0; it < 32; it+=1)
304   /// |    for (j = 0; j < 128; j++)
305   /// |      for (ip = 0; ip <= 3; ip++)
306   /// |        A(4 * it + ip,j);
307   ///
308   /// The goal of this transformation is to create a trivially vectorizable
309   /// loop.  This means a parallel loop at the innermost level that has a
310   /// constant number of iterations corresponding to the target vector width.
311   ///
312   /// This transformation creates a loop at the innermost level. The loop has
313   /// a constant number of iterations, if the number of loop iterations at
314   /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is
315   /// currently constant and not yet target specific. This function does not
316   /// reason about parallelism.
317   static isl::schedule_node prevectSchedBand(isl::schedule_node Node,
318                                              unsigned DimToVectorize,
319                                              int VectorWidth);
320 
321   /// Apply additional optimizations on the bands in the schedule tree.
322   ///
323   /// We are looking for an innermost band node and apply the following
324   /// transformations:
325   ///
326   ///  - Tile the band
327   ///      - if the band is tileable
328   ///      - if the band has more than one loop dimension
329   ///
330   ///  - Prevectorize the schedule of the band (or the point loop in case of
331   ///    tiling).
332   ///      - if vectorization is enabled
333   ///
334   /// @param Node The schedule node to (possibly) optimize.
335   /// @param User A pointer to forward some use information
336   ///        (currently unused).
337   static isl_schedule_node *optimizeBand(isl_schedule_node *Node, void *User);
338 
339   /// Apply additional optimizations on the bands in the schedule tree.
340   ///
341   /// We apply the following
342   /// transformations:
343   ///
344   ///  - Tile the band
345   ///  - Prevectorize the schedule of the band (or the point loop in case of
346   ///    tiling).
347   ///      - if vectorization is enabled
348   ///
349   /// @param Node The schedule node to (possibly) optimize.
350   /// @param User A pointer to forward some use information
351   ///        (currently unused).
352   static isl::schedule_node standardBandOpts(isl::schedule_node Node,
353                                              void *User);
354 };
355 
356 isl::schedule_node
357 ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node,
358                                                int VectorWidth) {
359   assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
360   Node = Node.child(0).child(0);
361   isl::union_map SchedRelUMap = Node.get_prefix_schedule_relation();
362   isl::union_set ScheduleRangeUSet = SchedRelUMap.range();
363   isl::set ScheduleRange{ScheduleRangeUSet};
364   isl::set IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
365   auto AtomicOption = getDimOptions(IsolateDomain.ctx(), "atomic");
366   isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, 1);
367   Node = Node.parent().parent();
368   isl::union_set Options = IsolateOption.unite(AtomicOption);
369   isl::schedule_node_band Result =
370       Node.as<isl::schedule_node_band>().set_ast_build_options(Options);
371   return Result;
372 }
373 
374 isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand(
375     isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) {
376   assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
377 
378   auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
379   isl_size ScheduleDimensions = Space.dim(isl::dim::set).release();
380   assert((isl_size)DimToVectorize < ScheduleDimensions);
381 
382   if (DimToVectorize > 0) {
383     Node = isl::manage(
384         isl_schedule_node_band_split(Node.release(), DimToVectorize));
385     Node = Node.child(0);
386   }
387   if ((isl_size)DimToVectorize < ScheduleDimensions - 1)
388     Node = isl::manage(isl_schedule_node_band_split(Node.release(), 1));
389   Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
390   auto Sizes = isl::multi_val::zero(Space);
391   Sizes = Sizes.set_val(0, isl::val(Node.ctx(), VectorWidth));
392   Node =
393       isl::manage(isl_schedule_node_band_tile(Node.release(), Sizes.release()));
394   Node = isolateFullPartialTiles(Node, VectorWidth);
395   Node = Node.child(0);
396   // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise,
397   // we will have troubles to match it in the backend.
398   isl::schedule_node_band NodeBand =
399       Node.as<isl::schedule_node_band>().set_ast_build_options(
400           isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }"));
401   Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release()));
402   Node = Node.child(0);
403   if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf)
404     Node = Node.parent();
405   auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr);
406   PrevectOpts++;
407   return Node.insert_mark(LoopMarker);
408 }
409 
410 static bool isSimpleInnermostBand(const isl::schedule_node &Node) {
411   assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band);
412   assert(isl_schedule_node_n_children(Node.get()) == 1);
413 
414   auto ChildType = isl_schedule_node_get_type(Node.child(0).get());
415 
416   if (ChildType == isl_schedule_node_leaf)
417     return true;
418 
419   if (ChildType != isl_schedule_node_sequence)
420     return false;
421 
422   auto Sequence = Node.child(0);
423 
424   for (int c = 0, nc = isl_schedule_node_n_children(Sequence.get()); c < nc;
425        ++c) {
426     auto Child = Sequence.child(c);
427     if (isl_schedule_node_get_type(Child.get()) != isl_schedule_node_filter)
428       return false;
429     if (isl_schedule_node_get_type(Child.child(0).get()) !=
430         isl_schedule_node_leaf)
431       return false;
432   }
433   return true;
434 }
435 
436 bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) {
437   if (isl_schedule_node_get_type(Node.get()) != isl_schedule_node_band)
438     return false;
439 
440   if (isl_schedule_node_n_children(Node.get()) != 1)
441     return false;
442 
443   if (!isl_schedule_node_band_get_permutable(Node.get()))
444     return false;
445 
446   auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
447   auto Dims = Space.dim(isl::dim::set).release();
448 
449   if (Dims <= 1)
450     return false;
451 
452   return isSimpleInnermostBand(Node);
453 }
454 
455 __isl_give isl::schedule_node
456 ScheduleTreeOptimizer::standardBandOpts(isl::schedule_node Node, void *User) {
457   if (FirstLevelTiling) {
458     Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes,
459                     FirstLevelDefaultTileSize);
460     FirstLevelTileOpts++;
461   }
462 
463   if (SecondLevelTiling) {
464     Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes,
465                     SecondLevelDefaultTileSize);
466     SecondLevelTileOpts++;
467   }
468 
469   if (RegisterTiling) {
470     Node =
471         applyRegisterTiling(Node, RegisterTileSizes, RegisterDefaultTileSize);
472     RegisterTileOpts++;
473   }
474 
475   if (PollyVectorizerChoice == VECTORIZER_NONE)
476     return Node;
477 
478   auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get()));
479   auto Dims = Space.dim(isl::dim::set).release();
480 
481   for (int i = Dims - 1; i >= 0; i--)
482     if (Node.as<isl::schedule_node_band>().member_get_coincident(i)) {
483       Node = prevectSchedBand(Node, i, PrevectorWidth);
484       break;
485     }
486 
487   return Node;
488 }
489 
490 __isl_give isl_schedule_node *
491 ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node,
492                                     void *User) {
493   if (!isTileableBandNode(isl::manage_copy(Node)))
494     return Node;
495 
496   const OptimizerAdditionalInfoTy *OAI =
497       static_cast<const OptimizerAdditionalInfoTy *>(User);
498 
499   if (PMBasedOpts && User) {
500     isl::schedule_node PatternOptimizedSchedule =
501         tryOptimizeMatMulPattern(isl::manage_copy(Node), OAI->TTI, OAI->D);
502     if (!PatternOptimizedSchedule.is_null()) {
503       MatMulOpts++;
504       isl_schedule_node_free(Node);
505       return PatternOptimizedSchedule.release();
506     }
507   }
508 
509   return standardBandOpts(isl::manage(Node), User).release();
510 }
511 
512 isl::schedule
513 ScheduleTreeOptimizer::optimizeSchedule(isl::schedule Schedule,
514                                         const OptimizerAdditionalInfoTy *OAI) {
515   auto Root = Schedule.get_root();
516   Root = optimizeScheduleNode(Root, OAI);
517   return Root.get_schedule();
518 }
519 
520 isl::schedule_node ScheduleTreeOptimizer::optimizeScheduleNode(
521     isl::schedule_node Node, const OptimizerAdditionalInfoTy *OAI) {
522   Node = isl::manage(isl_schedule_node_map_descendant_bottom_up(
523       Node.release(), optimizeBand,
524       const_cast<void *>(static_cast<const void *>(OAI))));
525   return Node;
526 }
527 
528 bool ScheduleTreeOptimizer::isProfitableSchedule(Scop &S,
529                                                  isl::schedule NewSchedule) {
530   // To understand if the schedule has been optimized we check if the schedule
531   // has changed at all.
532   // TODO: We can improve this by tracking if any necessarily beneficial
533   // transformations have been performed. This can e.g. be tiling, loop
534   // interchange, or ...) We can track this either at the place where the
535   // transformation has been performed or, in case of automatic ILP based
536   // optimizations, by comparing (yet to be defined) performance metrics
537   // before/after the scheduling optimizer
538   // (e.g., #stride-one accesses)
539   auto NewScheduleMap = NewSchedule.get_map();
540   auto OldSchedule = S.getSchedule();
541   assert(!OldSchedule.is_null() &&
542          "Only IslScheduleOptimizer can insert extension nodes "
543          "that make Scop::getSchedule() return nullptr.");
544   bool changed = !OldSchedule.is_equal(NewScheduleMap);
545   return changed;
546 }
547 
548 class IslScheduleOptimizerWrapperPass : public ScopPass {
549 public:
550   static char ID;
551 
552   explicit IslScheduleOptimizerWrapperPass() : ScopPass(ID) {}
553 
554   /// Optimize the schedule of the SCoP @p S.
555   bool runOnScop(Scop &S) override;
556 
557   /// Print the new schedule for the SCoP @p S.
558   void printScop(raw_ostream &OS, Scop &S) const override;
559 
560   /// Register all analyses and transformation required.
561   void getAnalysisUsage(AnalysisUsage &AU) const override;
562 
563   /// Release the internal memory.
564   void releaseMemory() override {
565     LastSchedule = {};
566     IslCtx.reset();
567   }
568 
569 private:
570   std::shared_ptr<isl_ctx> IslCtx;
571   isl::schedule LastSchedule;
572 };
573 
574 char IslScheduleOptimizerWrapperPass::ID = 0;
575 
576 #ifndef NDEBUG
577 static void printSchedule(llvm::raw_ostream &OS, const isl::schedule &Schedule,
578                           StringRef Desc) {
579   isl::ctx Ctx = Schedule.ctx();
580   isl_printer *P = isl_printer_to_str(Ctx.get());
581   P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
582   P = isl_printer_print_schedule(P, Schedule.get());
583   char *Str = isl_printer_get_str(P);
584   OS << Desc << ": \n" << Str << "\n";
585   free(Str);
586   isl_printer_free(P);
587 }
588 #endif
589 
590 /// Collect statistics for the schedule tree.
591 ///
592 /// @param Schedule The schedule tree to analyze. If not a schedule tree it is
593 /// ignored.
594 /// @param Version  The version of the schedule tree that is analyzed.
595 ///                 0 for the original schedule tree before any transformation.
596 ///                 1 for the schedule tree after isl's rescheduling.
597 ///                 2 for the schedule tree after optimizations are applied
598 ///                 (tiling, pattern matching)
599 static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
600   auto Root = Schedule.get_root();
601   if (Root.is_null())
602     return;
603 
604   isl_schedule_node_foreach_descendant_top_down(
605       Root.get(),
606       [](__isl_keep isl_schedule_node *nodeptr, void *user) -> isl_bool {
607         isl::schedule_node Node = isl::manage_copy(nodeptr);
608         int Version = *static_cast<int *>(user);
609 
610         switch (isl_schedule_node_get_type(Node.get())) {
611         case isl_schedule_node_band: {
612           NumBands[Version]++;
613           if (isl_schedule_node_band_get_permutable(Node.get()) ==
614               isl_bool_true)
615             NumPermutable[Version]++;
616 
617           int CountMembers = isl_schedule_node_band_n_member(Node.get());
618           NumBandMembers[Version] += CountMembers;
619           for (int i = 0; i < CountMembers; i += 1) {
620             if (Node.as<isl::schedule_node_band>().member_get_coincident(i))
621               NumCoincident[Version]++;
622           }
623           break;
624         }
625 
626         case isl_schedule_node_filter:
627           NumFilters[Version]++;
628           break;
629 
630         case isl_schedule_node_extension:
631           NumExtension[Version]++;
632           break;
633 
634         default:
635           break;
636         }
637 
638         return isl_bool_true;
639       },
640       &Version);
641 }
642 
643 static bool runIslScheduleOptimizer(
644     Scop &S,
645     function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
646     TargetTransformInfo *TTI, isl::schedule &LastSchedule) {
647   // Skip SCoPs in case they're already optimised by PPCGCodeGeneration
648   if (S.isToBeSkipped())
649     return false;
650 
651   // Skip empty SCoPs but still allow code generation as it will delete the
652   // loops present but not needed.
653   if (S.getSize() == 0) {
654     S.markAsOptimized();
655     return false;
656   }
657 
658   ScopsProcessed++;
659 
660   // Schedule without optimizations.
661   isl::schedule Schedule = S.getScheduleTree();
662   walkScheduleTreeForStatistics(S.getScheduleTree(), 0);
663   LLVM_DEBUG(printSchedule(dbgs(), Schedule, "Original schedule tree"));
664 
665   bool HasUserTransformation = false;
666   if (PragmaBasedOpts) {
667     isl::schedule ManuallyTransformed =
668         applyManualTransformations(&S, Schedule);
669     if (ManuallyTransformed.is_null()) {
670       LLVM_DEBUG(dbgs() << "Error during manual optimization\n");
671       return false;
672     }
673 
674     if (ManuallyTransformed.get() != Schedule.get()) {
675       // User transformations have precedence over other transformations.
676       HasUserTransformation = true;
677       Schedule = std::move(ManuallyTransformed);
678       LLVM_DEBUG(
679           printSchedule(dbgs(), Schedule, "After manual transformations"));
680     }
681   }
682 
683   // Only continue if either manual transformations have been applied or we are
684   // allowed to apply heuristics.
685   // TODO: Detect disabled heuristics and no user-directed transformation
686   // metadata earlier in ScopDetection.
687   if (!HasUserTransformation && S.hasDisableHeuristicsHint()) {
688     LLVM_DEBUG(dbgs() << "Heuristic optimizations disabled by metadata\n");
689     return false;
690   }
691 
692   // Get dependency analysis.
693   const Dependences &D = GetDeps(Dependences::AL_Statement);
694   if (D.getSharedIslCtx() != S.getSharedIslCtx()) {
695     LLVM_DEBUG(dbgs() << "DependenceInfo for another SCoP/isl_ctx\n");
696     return false;
697   }
698   if (!D.hasValidDependences()) {
699     LLVM_DEBUG(dbgs() << "Dependency information not available\n");
700     return false;
701   }
702 
703   // Apply ISL's algorithm only if not overriden by the user. Note that
704   // post-rescheduling optimizations (tiling, pattern-based, prevectorization)
705   // rely on the coincidence/permutable annotations on schedule tree bands that
706   // are added by the rescheduling analyzer. Therefore, disabling the
707   // rescheduler implicitly also disables these optimizations.
708   if (HasUserTransformation) {
709     LLVM_DEBUG(
710         dbgs() << "Skipping rescheduling due to manual transformation\n");
711   } else {
712     // Build input data.
713     int ValidityKinds =
714         Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
715     int ProximityKinds;
716 
717     if (OptimizeDeps == "all")
718       ProximityKinds =
719           Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
720     else if (OptimizeDeps == "raw")
721       ProximityKinds = Dependences::TYPE_RAW;
722     else {
723       errs() << "Do not know how to optimize for '" << OptimizeDeps << "'"
724              << " Falling back to optimizing all dependences.\n";
725       ProximityKinds =
726           Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW;
727     }
728 
729     isl::union_set Domain = S.getDomains();
730 
731     if (Domain.is_null())
732       return false;
733 
734     isl::union_map Validity = D.getDependences(ValidityKinds);
735     isl::union_map Proximity = D.getDependences(ProximityKinds);
736 
737     // Simplify the dependences by removing the constraints introduced by the
738     // domains. This can speed up the scheduling time significantly, as large
739     // constant coefficients will be removed from the dependences. The
740     // introduction of some additional dependences reduces the possible
741     // transformations, but in most cases, such transformation do not seem to be
742     // interesting anyway. In some cases this option may stop the scheduler to
743     // find any schedule.
744     if (SimplifyDeps == "yes") {
745       Validity = Validity.gist_domain(Domain);
746       Validity = Validity.gist_range(Domain);
747       Proximity = Proximity.gist_domain(Domain);
748       Proximity = Proximity.gist_range(Domain);
749     } else if (SimplifyDeps != "no") {
750       errs()
751           << "warning: Option -polly-opt-simplify-deps should either be 'yes' "
752              "or 'no'. Falling back to default: 'yes'\n";
753     }
754 
755     LLVM_DEBUG(dbgs() << "\n\nCompute schedule from: ");
756     LLVM_DEBUG(dbgs() << "Domain := " << Domain << ";\n");
757     LLVM_DEBUG(dbgs() << "Proximity := " << Proximity << ";\n");
758     LLVM_DEBUG(dbgs() << "Validity := " << Validity << ";\n");
759 
760     unsigned IslSerializeSCCs;
761 
762     if (FusionStrategy == "max") {
763       IslSerializeSCCs = 0;
764     } else if (FusionStrategy == "min") {
765       IslSerializeSCCs = 1;
766     } else {
767       errs() << "warning: Unknown fusion strategy. Falling back to maximal "
768                 "fusion.\n";
769       IslSerializeSCCs = 0;
770     }
771 
772     int IslMaximizeBands;
773 
774     if (MaximizeBandDepth == "yes") {
775       IslMaximizeBands = 1;
776     } else if (MaximizeBandDepth == "no") {
777       IslMaximizeBands = 0;
778     } else {
779       errs()
780           << "warning: Option -polly-opt-maximize-bands should either be 'yes'"
781              " or 'no'. Falling back to default: 'yes'\n";
782       IslMaximizeBands = 1;
783     }
784 
785     int IslOuterCoincidence;
786 
787     if (OuterCoincidence == "yes") {
788       IslOuterCoincidence = 1;
789     } else if (OuterCoincidence == "no") {
790       IslOuterCoincidence = 0;
791     } else {
792       errs() << "warning: Option -polly-opt-outer-coincidence should either be "
793                 "'yes' or 'no'. Falling back to default: 'no'\n";
794       IslOuterCoincidence = 0;
795     }
796 
797     isl_ctx *Ctx = S.getIslCtx().get();
798 
799     isl_options_set_schedule_outer_coincidence(Ctx, IslOuterCoincidence);
800     isl_options_set_schedule_serialize_sccs(Ctx, IslSerializeSCCs);
801     isl_options_set_schedule_maximize_band_depth(Ctx, IslMaximizeBands);
802     isl_options_set_schedule_max_constant_term(Ctx, MaxConstantTerm);
803     isl_options_set_schedule_max_coefficient(Ctx, MaxCoefficient);
804     isl_options_set_tile_scale_tile_loops(Ctx, 0);
805 
806     auto OnErrorStatus = isl_options_get_on_error(Ctx);
807     isl_options_set_on_error(Ctx, ISL_ON_ERROR_CONTINUE);
808 
809     auto SC = isl::schedule_constraints::on_domain(Domain);
810     SC = SC.set_proximity(Proximity);
811     SC = SC.set_validity(Validity);
812     SC = SC.set_coincidence(Validity);
813     Schedule = SC.compute_schedule();
814     isl_options_set_on_error(Ctx, OnErrorStatus);
815 
816     ScopsRescheduled++;
817     LLVM_DEBUG(printSchedule(dbgs(), Schedule, "After rescheduling"));
818   }
819 
820   walkScheduleTreeForStatistics(Schedule, 1);
821 
822   // In cases the scheduler is not able to optimize the code, we just do not
823   // touch the schedule.
824   if (Schedule.is_null())
825     return false;
826 
827   // Apply post-rescheduling optimizations.
828   const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)};
829   Schedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
830   Schedule = hoistExtensionNodes(Schedule);
831   LLVM_DEBUG(printSchedule(dbgs(), Schedule, "After post-optimizations"));
832   walkScheduleTreeForStatistics(Schedule, 2);
833 
834   if (!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
835     return false;
836 
837   auto ScopStats = S.getStatistics();
838   ScopsOptimized++;
839   NumAffineLoopsOptimized += ScopStats.NumAffineLoops;
840   NumBoxedLoopsOptimized += ScopStats.NumBoxedLoops;
841   LastSchedule = Schedule;
842 
843   S.setScheduleTree(Schedule);
844   S.markAsOptimized();
845 
846   if (OptimizedScops)
847     errs() << S;
848 
849   return false;
850 }
851 
852 bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
853   releaseMemory();
854 
855   Function &F = S.getFunction();
856   IslCtx = S.getSharedIslCtx();
857 
858   auto getDependences =
859       [this](Dependences::AnalysisLevel) -> const Dependences & {
860     return getAnalysis<DependenceInfo>().getDependences(
861         Dependences::AL_Statement);
862   };
863   // auto &Deps  = getAnalysis<DependenceInfo>();
864   TargetTransformInfo *TTI =
865       &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
866   return runIslScheduleOptimizer(S, getDependences, TTI, LastSchedule);
867 }
868 
869 static void runScheduleOptimizerPrinter(raw_ostream &OS,
870                                         isl::schedule LastSchedule) {
871   isl_printer *p;
872   char *ScheduleStr;
873 
874   OS << "Calculated schedule:\n";
875 
876   if (LastSchedule.is_null()) {
877     OS << "n/a\n";
878     return;
879   }
880 
881   p = isl_printer_to_str(LastSchedule.ctx().get());
882   p = isl_printer_set_yaml_style(p, ISL_YAML_STYLE_BLOCK);
883   p = isl_printer_print_schedule(p, LastSchedule.get());
884   ScheduleStr = isl_printer_get_str(p);
885   isl_printer_free(p);
886 
887   OS << ScheduleStr << "\n";
888 
889   free(ScheduleStr);
890 }
891 
892 void IslScheduleOptimizerWrapperPass::printScop(raw_ostream &OS, Scop &) const {
893   runScheduleOptimizerPrinter(OS, LastSchedule);
894 }
895 
896 void IslScheduleOptimizerWrapperPass::getAnalysisUsage(
897     AnalysisUsage &AU) const {
898   ScopPass::getAnalysisUsage(AU);
899   AU.addRequired<DependenceInfo>();
900   AU.addRequired<TargetTransformInfoWrapperPass>();
901 
902   AU.addPreserved<DependenceInfo>();
903 }
904 
905 } // namespace
906 
907 Pass *polly::createIslScheduleOptimizerWrapperPass() {
908   return new IslScheduleOptimizerWrapperPass();
909 }
910 
911 INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
912                       "Polly - Optimize schedule of SCoP", false, false);
913 INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
914 INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
915 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass);
916 INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
917                     "Polly - Optimize schedule of SCoP", false, false)
918 
919 static llvm::PreservedAnalyses
920 runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM,
921                                 ScopStandardAnalysisResults &SAR, SPMUpdater &U,
922                                 raw_ostream *OS) {
923   DependenceAnalysis::Result &Deps = SAM.getResult<DependenceAnalysis>(S, SAR);
924   auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & {
925     return Deps.getDependences(Dependences::AL_Statement);
926   };
927   TargetTransformInfo *TTI = &SAR.TTI;
928   isl::schedule LastSchedule;
929   bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, LastSchedule);
930   if (OS) {
931     *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
932         << S.getName() << "' in function '" << S.getFunction().getName()
933         << "':\n";
934     runScheduleOptimizerPrinter(*OS, LastSchedule);
935   }
936 
937   if (!Modified)
938     return PreservedAnalyses::all();
939 
940   PreservedAnalyses PA;
941   PA.preserveSet<AllAnalysesOn<Module>>();
942   PA.preserveSet<AllAnalysesOn<Function>>();
943   PA.preserveSet<AllAnalysesOn<Loop>>();
944   return PA;
945 }
946 
947 llvm::PreservedAnalyses
948 IslScheduleOptimizerPass::run(Scop &S, ScopAnalysisManager &SAM,
949                               ScopStandardAnalysisResults &SAR, SPMUpdater &U) {
950   return runIslScheduleOptimizerUsingNPM(S, SAM, SAR, U, nullptr);
951 }
952 
953 llvm::PreservedAnalyses
954 IslScheduleOptimizerPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
955                                      ScopStandardAnalysisResults &SAR,
956                                      SPMUpdater &U) {
957   return runIslScheduleOptimizerUsingNPM(S, SAM, SAR, U, &OS);
958 }
959