1 //===- IslAst.cpp - isl code generator interface --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The isl code generator interface takes a Scop and generates a isl_ast. This
11 // ist_ast can either be returned directly or it can be pretty printed to
12 // stdout.
13 //
14 // A typical isl_ast output looks like this:
15 //
16 // for (c2 = max(0, ceild(n + m, 2); c2 <= min(511, floord(5 * n, 3)); c2++) {
17 //   bb2(c2);
18 // }
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslAst.h"
24 #include "polly/DependenceInfo.h"
25 #include "polly/LinkAllPasses.h"
26 #include "polly/Options.h"
27 #include "polly/ScopInfo.h"
28 #include "polly/Support/GICHelper.h"
29 #include "llvm/Analysis/RegionInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include "isl/aff.h"
32 #include "isl/ast_build.h"
33 #include "isl/list.h"
34 #include "isl/map.h"
35 #include "isl/set.h"
36 #include "isl/union_map.h"
37 
38 #define DEBUG_TYPE "polly-ast"
39 
40 using namespace llvm;
41 using namespace polly;
42 
43 using IslAstUserPayload = IslAstInfo::IslAstUserPayload;
44 
45 static cl::opt<bool>
46     PollyParallel("polly-parallel",
47                   cl::desc("Generate thread parallel code (isl codegen only)"),
48                   cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
49 
50 static cl::opt<bool> PollyParallelForce(
51     "polly-parallel-force",
52     cl::desc(
53         "Force generation of thread parallel code ignoring any cost model"),
54     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
55 
56 static cl::opt<bool> UseContext("polly-ast-use-context",
57                                 cl::desc("Use context"), cl::Hidden,
58                                 cl::init(false), cl::ZeroOrMore,
59                                 cl::cat(PollyCategory));
60 
61 static cl::opt<bool> DetectParallel("polly-ast-detect-parallel",
62                                     cl::desc("Detect parallelism"), cl::Hidden,
63                                     cl::init(false), cl::ZeroOrMore,
64                                     cl::cat(PollyCategory));
65 
66 static cl::opt<bool> NoEarlyExit(
67     "polly-no-early-exit",
68     cl::desc("Do not exit early if no benefit of the Polly version was found."),
69     cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
70 
71 namespace polly {
72 class IslAst {
73 public:
74   IslAst(Scop *Scop, const Dependences &D);
75 
76   ~IslAst();
77 
78   /// Print a source code representation of the program.
79   void pprint(llvm::raw_ostream &OS);
80 
81   __isl_give isl_ast_node *getAst();
82 
83   /// @brief Get the run-time conditions for the Scop.
84   __isl_give isl_ast_expr *getRunCondition();
85 
86 private:
87   Scop *S;
88   isl_ast_node *Root;
89   isl_ast_expr *RunCondition;
90 
91   void buildRunCondition(__isl_keep isl_ast_build *Build);
92 };
93 } // End namespace polly.
94 
95 /// @brief Free an IslAstUserPayload object pointed to by @p Ptr
96 static void freeIslAstUserPayload(void *Ptr) {
97   delete ((IslAstInfo::IslAstUserPayload *)Ptr);
98 }
99 
100 IslAstInfo::IslAstUserPayload::~IslAstUserPayload() {
101   isl_ast_build_free(Build);
102   isl_pw_aff_free(MinimalDependenceDistance);
103 }
104 
105 /// @brief Temporary information used when building the ast.
106 struct AstBuildUserInfo {
107   /// @brief Construct and initialize the helper struct for AST creation.
108   AstBuildUserInfo()
109       : Deps(nullptr), InParallelFor(false), LastForNodeId(nullptr) {}
110 
111   /// @brief The dependence information used for the parallelism check.
112   const Dependences *Deps;
113 
114   /// @brief Flag to indicate that we are inside a parallel for node.
115   bool InParallelFor;
116 
117   /// @brief The last iterator id created for the current SCoP.
118   isl_id *LastForNodeId;
119 };
120 
121 /// @brief Print a string @p str in a single line using @p Printer.
122 static isl_printer *printLine(__isl_take isl_printer *Printer,
123                               const std::string &str,
124                               __isl_keep isl_pw_aff *PWA = nullptr) {
125   Printer = isl_printer_start_line(Printer);
126   Printer = isl_printer_print_str(Printer, str.c_str());
127   if (PWA)
128     Printer = isl_printer_print_pw_aff(Printer, PWA);
129   return isl_printer_end_line(Printer);
130 }
131 
132 /// @brief Return all broken reductions as a string of clauses (OpenMP style).
133 static const std::string getBrokenReductionsStr(__isl_keep isl_ast_node *Node) {
134   IslAstInfo::MemoryAccessSet *BrokenReductions;
135   std::string str;
136 
137   BrokenReductions = IslAstInfo::getBrokenReductions(Node);
138   if (!BrokenReductions || BrokenReductions->empty())
139     return "";
140 
141   // Map each type of reduction to a comma separated list of the base addresses.
142   std::map<MemoryAccess::ReductionType, std::string> Clauses;
143   for (MemoryAccess *MA : *BrokenReductions)
144     if (MA->isWrite())
145       Clauses[MA->getReductionType()] +=
146           ", " + MA->getBaseAddr()->getName().str();
147 
148   // Now print the reductions sorted by type. Each type will cause a clause
149   // like:  reduction (+ : sum0, sum1, sum2)
150   for (const auto &ReductionClause : Clauses) {
151     str += " reduction (";
152     str += MemoryAccess::getReductionOperatorStr(ReductionClause.first);
153     // Remove the first two symbols (", ") to make the output look pretty.
154     str += " : " + ReductionClause.second.substr(2) + ")";
155   }
156 
157   return str;
158 }
159 
160 /// @brief Callback executed for each for node in the ast in order to print it.
161 static isl_printer *cbPrintFor(__isl_take isl_printer *Printer,
162                                __isl_take isl_ast_print_options *Options,
163                                __isl_keep isl_ast_node *Node, void *) {
164 
165   isl_pw_aff *DD = IslAstInfo::getMinimalDependenceDistance(Node);
166   const std::string BrokenReductionsStr = getBrokenReductionsStr(Node);
167   const std::string KnownParallelStr = "#pragma known-parallel";
168   const std::string DepDisPragmaStr = "#pragma minimal dependence distance: ";
169   const std::string SimdPragmaStr = "#pragma simd";
170   const std::string OmpPragmaStr = "#pragma omp parallel for";
171 
172   if (DD)
173     Printer = printLine(Printer, DepDisPragmaStr, DD);
174 
175   if (IslAstInfo::isInnermostParallel(Node))
176     Printer = printLine(Printer, SimdPragmaStr + BrokenReductionsStr);
177 
178   if (IslAstInfo::isExecutedInParallel(Node))
179     Printer = printLine(Printer, OmpPragmaStr);
180   else if (IslAstInfo::isOutermostParallel(Node))
181     Printer = printLine(Printer, KnownParallelStr + BrokenReductionsStr);
182 
183   isl_pw_aff_free(DD);
184   return isl_ast_node_for_print(Node, Printer, Options);
185 }
186 
187 /// @brief Check if the current scheduling dimension is parallel
188 ///
189 /// In case the dimension is parallel we also check if any reduction
190 /// dependences is broken when we exploit this parallelism. If so,
191 /// @p IsReductionParallel will be set to true. The reduction dependences we use
192 /// to check are actually the union of the transitive closure of the initial
193 /// reduction dependences together with their reveresal. Even though these
194 /// dependences connect all iterations with each other (thus they are cyclic)
195 /// we can perform the parallelism check as we are only interested in a zero
196 /// (or non-zero) dependence distance on the dimension in question.
197 static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build,
198                                      const Dependences *D,
199                                      IslAstUserPayload *NodeInfo) {
200   if (!D->hasValidDependences())
201     return false;
202 
203   isl_union_map *Schedule = isl_ast_build_get_schedule(Build);
204   isl_union_map *Deps = D->getDependences(
205       Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR);
206 
207   if (!D->isParallel(Schedule, Deps, &NodeInfo->MinimalDependenceDistance) &&
208       !isl_union_map_free(Schedule))
209     return false;
210 
211   isl_union_map *RedDeps = D->getDependences(Dependences::TYPE_TC_RED);
212   if (!D->isParallel(Schedule, RedDeps))
213     NodeInfo->IsReductionParallel = true;
214 
215   if (!NodeInfo->IsReductionParallel && !isl_union_map_free(Schedule))
216     return true;
217 
218   // Annotate reduction parallel nodes with the memory accesses which caused the
219   // reduction dependences parallel execution of the node conflicts with.
220   for (const auto &MaRedPair : D->getReductionDependences()) {
221     if (!MaRedPair.second)
222       continue;
223     RedDeps = isl_union_map_from_map(isl_map_copy(MaRedPair.second));
224     if (!D->isParallel(Schedule, RedDeps))
225       NodeInfo->BrokenReductions.insert(MaRedPair.first);
226   }
227 
228   isl_union_map_free(Schedule);
229   return true;
230 }
231 
232 // This method is executed before the construction of a for node. It creates
233 // an isl_id that is used to annotate the subsequently generated ast for nodes.
234 //
235 // In this function we also run the following analyses:
236 //
237 // - Detection of openmp parallel loops
238 //
239 static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build,
240                                             void *User) {
241   AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
242   IslAstUserPayload *Payload = new IslAstUserPayload();
243   isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", Payload);
244   Id = isl_id_set_free_user(Id, freeIslAstUserPayload);
245   BuildInfo->LastForNodeId = Id;
246 
247   // Test for parallelism only if we are not already inside a parallel loop
248   if (!BuildInfo->InParallelFor)
249     BuildInfo->InParallelFor = Payload->IsOutermostParallel =
250         astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
251 
252   return Id;
253 }
254 
255 // This method is executed after the construction of a for node.
256 //
257 // It performs the following actions:
258 //
259 // - Reset the 'InParallelFor' flag, as soon as we leave a for node,
260 //   that is marked as openmp parallel.
261 //
262 static __isl_give isl_ast_node *
263 astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build,
264                  void *User) {
265   isl_id *Id = isl_ast_node_get_annotation(Node);
266   assert(Id && "Post order visit assumes annotated for nodes");
267   IslAstUserPayload *Payload = (IslAstUserPayload *)isl_id_get_user(Id);
268   assert(Payload && "Post order visit assumes annotated for nodes");
269 
270   AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User;
271   assert(!Payload->Build && "Build environment already set");
272   Payload->Build = isl_ast_build_copy(Build);
273   Payload->IsInnermost = (Id == BuildInfo->LastForNodeId);
274 
275   // Innermost loops that are surrounded by parallel loops have not yet been
276   // tested for parallelism. Test them here to ensure we check all innermost
277   // loops for parallelism.
278   if (Payload->IsInnermost && BuildInfo->InParallelFor) {
279     if (Payload->IsOutermostParallel)
280       Payload->IsInnermostParallel = true;
281     else
282       Payload->IsInnermostParallel =
283           astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload);
284   }
285   if (Payload->IsOutermostParallel)
286     BuildInfo->InParallelFor = false;
287 
288   isl_id_free(Id);
289   return Node;
290 }
291 
292 static __isl_give isl_ast_node *AtEachDomain(__isl_take isl_ast_node *Node,
293                                              __isl_keep isl_ast_build *Build,
294                                              void *User) {
295   assert(!isl_ast_node_get_annotation(Node) && "Node already annotated");
296 
297   IslAstUserPayload *Payload = new IslAstUserPayload();
298   isl_id *Id = isl_id_alloc(isl_ast_build_get_ctx(Build), "", Payload);
299   Id = isl_id_set_free_user(Id, freeIslAstUserPayload);
300 
301   Payload->Build = isl_ast_build_copy(Build);
302 
303   return isl_ast_node_set_annotation(Node, Id);
304 }
305 
306 void IslAst::buildRunCondition(__isl_keep isl_ast_build *Build) {
307   // The conditions that need to be checked at run-time for this scop are
308   // available as an isl_set in the AssumedContext from which we can directly
309   // derive a run-time condition.
310   RunCondition = isl_ast_build_expr_from_set(Build, S->getAssumedContext());
311 
312   // Create the alias checks from the minimal/maximal accesses in each alias
313   // group. This operation is by construction quadratic in the number of
314   // elements in each alias group.
315   isl_ast_expr *NonAliasGroup, *MinExpr, *MaxExpr;
316   for (const Scop::MinMaxVectorTy *MinMaxAccesses : S->getAliasGroups()) {
317     auto AccEnd = MinMaxAccesses->end();
318     for (auto AccIt0 = MinMaxAccesses->begin(); AccIt0 != AccEnd; ++AccIt0) {
319       for (auto AccIt1 = AccIt0 + 1; AccIt1 != AccEnd; ++AccIt1) {
320         MinExpr =
321             isl_ast_expr_address_of(isl_ast_build_access_from_pw_multi_aff(
322                 Build, isl_pw_multi_aff_copy(AccIt0->first)));
323         MaxExpr =
324             isl_ast_expr_address_of(isl_ast_build_access_from_pw_multi_aff(
325                 Build, isl_pw_multi_aff_copy(AccIt1->second)));
326         NonAliasGroup = isl_ast_expr_le(MaxExpr, MinExpr);
327         MinExpr =
328             isl_ast_expr_address_of(isl_ast_build_access_from_pw_multi_aff(
329                 Build, isl_pw_multi_aff_copy(AccIt1->first)));
330         MaxExpr =
331             isl_ast_expr_address_of(isl_ast_build_access_from_pw_multi_aff(
332                 Build, isl_pw_multi_aff_copy(AccIt0->second)));
333         NonAliasGroup =
334             isl_ast_expr_or(NonAliasGroup, isl_ast_expr_le(MaxExpr, MinExpr));
335         RunCondition = isl_ast_expr_and(RunCondition, NonAliasGroup);
336       }
337     }
338   }
339 }
340 
341 /// @brief Simple cost analysis for a given SCoP
342 ///
343 /// TODO: Improve this analysis and extract it to make it usable in other
344 ///       places too.
345 ///       In order to improve the cost model we could either keep track of
346 ///       performed optimizations (e.g., tiling) or compute properties on the
347 ///       original as well as optimized SCoP (e.g., #stride-one-accesses).
348 static bool benefitsFromPolly(Scop *Scop, bool PerformParallelTest) {
349 
350   // First check the user choice.
351   if (NoEarlyExit)
352     return true;
353 
354   // Check if nothing interesting happened.
355   if (!PerformParallelTest && !Scop->isOptimized() &&
356       Scop->getAliasGroups().empty())
357     return false;
358 
359   // The default assumption is that Polly improves the code.
360   return true;
361 }
362 
363 IslAst::IslAst(Scop *Scop, const Dependences &D)
364     : S(Scop), Root(nullptr), RunCondition(nullptr) {
365 
366   bool PerformParallelTest = PollyParallel || DetectParallel ||
367                              PollyVectorizerChoice != VECTORIZER_NONE;
368 
369   // Skip AST and code generation if there was no benefit achieved.
370   if (!benefitsFromPolly(Scop, PerformParallelTest))
371     return;
372 
373   isl_ctx *Ctx = S->getIslCtx();
374   isl_options_set_ast_build_atomic_upper_bound(Ctx, true);
375   isl_ast_build *Build;
376   AstBuildUserInfo BuildInfo;
377 
378   if (UseContext)
379     Build = isl_ast_build_from_context(S->getContext());
380   else
381     Build = isl_ast_build_from_context(isl_set_universe(S->getParamSpace()));
382 
383   Build = isl_ast_build_set_at_each_domain(Build, AtEachDomain, nullptr);
384 
385   if (PerformParallelTest) {
386     BuildInfo.Deps = &D;
387     BuildInfo.InParallelFor = 0;
388 
389     Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor,
390                                               &BuildInfo);
391     Build =
392         isl_ast_build_set_after_each_for(Build, &astBuildAfterFor, &BuildInfo);
393   }
394 
395   buildRunCondition(Build);
396 
397   Root = isl_ast_build_node_from_schedule(Build, S->getScheduleTree());
398 
399   isl_ast_build_free(Build);
400 }
401 
402 IslAst::~IslAst() {
403   isl_ast_node_free(Root);
404   isl_ast_expr_free(RunCondition);
405 }
406 
407 __isl_give isl_ast_node *IslAst::getAst() { return isl_ast_node_copy(Root); }
408 __isl_give isl_ast_expr *IslAst::getRunCondition() {
409   return isl_ast_expr_copy(RunCondition);
410 }
411 
412 void IslAstInfo::releaseMemory() {
413   if (Ast) {
414     delete Ast;
415     Ast = nullptr;
416   }
417 }
418 
419 bool IslAstInfo::runOnScop(Scop &Scop) {
420   if (Ast)
421     delete Ast;
422 
423   S = &Scop;
424 
425   const Dependences &D = getAnalysis<DependenceInfo>().getDependences();
426 
427   Ast = new IslAst(&Scop, D);
428 
429   DEBUG(printScop(dbgs(), Scop));
430   return false;
431 }
432 
433 __isl_give isl_ast_node *IslAstInfo::getAst() const { return Ast->getAst(); }
434 __isl_give isl_ast_expr *IslAstInfo::getRunCondition() const {
435   return Ast->getRunCondition();
436 }
437 
438 IslAstUserPayload *IslAstInfo::getNodePayload(__isl_keep isl_ast_node *Node) {
439   isl_id *Id = isl_ast_node_get_annotation(Node);
440   if (!Id)
441     return nullptr;
442   IslAstUserPayload *Payload = (IslAstUserPayload *)isl_id_get_user(Id);
443   isl_id_free(Id);
444   return Payload;
445 }
446 
447 bool IslAstInfo::isInnermost(__isl_keep isl_ast_node *Node) {
448   IslAstUserPayload *Payload = getNodePayload(Node);
449   return Payload && Payload->IsInnermost;
450 }
451 
452 bool IslAstInfo::isParallel(__isl_keep isl_ast_node *Node) {
453   return IslAstInfo::isInnermostParallel(Node) ||
454          IslAstInfo::isOutermostParallel(Node);
455 }
456 
457 bool IslAstInfo::isInnermostParallel(__isl_keep isl_ast_node *Node) {
458   IslAstUserPayload *Payload = getNodePayload(Node);
459   return Payload && Payload->IsInnermostParallel;
460 }
461 
462 bool IslAstInfo::isOutermostParallel(__isl_keep isl_ast_node *Node) {
463   IslAstUserPayload *Payload = getNodePayload(Node);
464   return Payload && Payload->IsOutermostParallel;
465 }
466 
467 bool IslAstInfo::isReductionParallel(__isl_keep isl_ast_node *Node) {
468   IslAstUserPayload *Payload = getNodePayload(Node);
469   return Payload && Payload->IsReductionParallel;
470 }
471 
472 bool IslAstInfo::isExecutedInParallel(__isl_keep isl_ast_node *Node) {
473 
474   if (!PollyParallel)
475     return false;
476 
477   // Do not parallelize innermost loops.
478   //
479   // Parallelizing innermost loops is often not profitable, especially if
480   // they have a low number of iterations.
481   //
482   // TODO: Decide this based on the number of loop iterations that will be
483   //       executed. This can possibly require run-time checks, which again
484   //       raises the question of both run-time check overhead and code size
485   //       costs.
486   if (!PollyParallelForce && isInnermost(Node))
487     return false;
488 
489   return isOutermostParallel(Node) && !isReductionParallel(Node);
490 }
491 
492 isl_union_map *IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) {
493   IslAstUserPayload *Payload = getNodePayload(Node);
494   return Payload ? isl_ast_build_get_schedule(Payload->Build) : nullptr;
495 }
496 
497 isl_pw_aff *
498 IslAstInfo::getMinimalDependenceDistance(__isl_keep isl_ast_node *Node) {
499   IslAstUserPayload *Payload = getNodePayload(Node);
500   return Payload ? isl_pw_aff_copy(Payload->MinimalDependenceDistance)
501                  : nullptr;
502 }
503 
504 IslAstInfo::MemoryAccessSet *
505 IslAstInfo::getBrokenReductions(__isl_keep isl_ast_node *Node) {
506   IslAstUserPayload *Payload = getNodePayload(Node);
507   return Payload ? &Payload->BrokenReductions : nullptr;
508 }
509 
510 isl_ast_build *IslAstInfo::getBuild(__isl_keep isl_ast_node *Node) {
511   IslAstUserPayload *Payload = getNodePayload(Node);
512   return Payload ? Payload->Build : nullptr;
513 }
514 
515 void IslAstInfo::printScop(raw_ostream &OS, Scop &S) const {
516   isl_ast_print_options *Options;
517   isl_ast_node *RootNode = getAst();
518   Function *F = S.getRegion().getEntry()->getParent();
519 
520   OS << ":: isl ast :: " << F->getName() << " :: " << S.getRegion().getNameStr()
521      << "\n";
522 
523   if (!RootNode) {
524     OS << ":: isl ast generation and code generation was skipped!\n\n";
525     return;
526   }
527 
528   isl_ast_expr *RunCondition = getRunCondition();
529   char *RtCStr, *AstStr;
530 
531   Options = isl_ast_print_options_alloc(S.getIslCtx());
532   Options = isl_ast_print_options_set_print_for(Options, cbPrintFor, nullptr);
533 
534   isl_printer *P = isl_printer_to_str(S.getIslCtx());
535   P = isl_printer_print_ast_expr(P, RunCondition);
536   RtCStr = isl_printer_get_str(P);
537   P = isl_printer_flush(P);
538   P = isl_printer_indent(P, 4);
539   P = isl_printer_set_output_format(P, ISL_FORMAT_C);
540   P = isl_ast_node_print(RootNode, P, Options);
541   AstStr = isl_printer_get_str(P);
542 
543   isl_union_map *Schedule =
544       isl_union_map_intersect_domain(S.getSchedule(), S.getDomains());
545 
546   DEBUG({
547     dbgs() << S.getContextStr() << "\n";
548     dbgs() << stringFromIslObj(Schedule);
549   });
550   OS << "\nif (" << RtCStr << ")\n\n";
551   OS << AstStr << "\n";
552   OS << "else\n";
553   OS << "    {  /* original code */ }\n\n";
554 
555   free(RtCStr);
556   free(AstStr);
557 
558   isl_ast_expr_free(RunCondition);
559   isl_union_map_free(Schedule);
560   isl_ast_node_free(RootNode);
561   isl_printer_free(P);
562 }
563 
564 void IslAstInfo::getAnalysisUsage(AnalysisUsage &AU) const {
565   // Get the Common analysis usage of ScopPasses.
566   ScopPass::getAnalysisUsage(AU);
567   AU.addRequired<ScopInfo>();
568   AU.addRequired<DependenceInfo>();
569 }
570 
571 char IslAstInfo::ID = 0;
572 
573 Pass *polly::createIslAstInfoPass() { return new IslAstInfo(); }
574 
575 INITIALIZE_PASS_BEGIN(IslAstInfo, "polly-ast",
576                       "Polly - Generate an AST of the SCoP (isl)", false,
577                       false);
578 INITIALIZE_PASS_DEPENDENCY(ScopInfo);
579 INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
580 INITIALIZE_PASS_END(IslAstInfo, "polly-ast",
581                     "Polly - Generate an AST from the SCoP (isl)", false, false)
582