1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg
11 // GPU mapping strategy.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "polly/CodeGen/IslNodeBuilder.h"
16 #include "polly/CodeGen/Utils.h"
17 #include "polly/DependenceInfo.h"
18 #include "polly/LinkAllPasses.h"
19 #include "polly/Options.h"
20 #include "polly/ScopInfo.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/BasicAliasAnalysis.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/PostDominators.h"
25 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
26 
27 #include "isl/union_map.h"
28 
29 extern "C" {
30 #include "ppcg/cuda.h"
31 #include "ppcg/gpu.h"
32 #include "ppcg/gpu_print.h"
33 #include "ppcg/ppcg.h"
34 #include "ppcg/schedule.h"
35 }
36 
37 #include "llvm/Support/Debug.h"
38 
39 using namespace polly;
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "polly-codegen-ppcg"
43 
44 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule",
45                                   cl::desc("Dump the computed GPU Schedule"),
46                                   cl::Hidden, cl::init(false), cl::ZeroOrMore,
47                                   cl::cat(PollyCategory));
48 
49 static cl::opt<bool>
50     DumpCode("polly-acc-dump-code",
51              cl::desc("Dump C code describing the GPU mapping"), cl::Hidden,
52              cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
53 
54 /// Create the ast expressions for a ScopStmt.
55 ///
56 /// This function is a callback for to generate the ast expressions for each
57 /// of the scheduled ScopStmts.
58 static __isl_give isl_id_to_ast_expr *pollyBuildAstExprForStmt(
59     void *Stmt, isl_ast_build *Build,
60     isl_multi_pw_aff *(*FunctionIndex)(__isl_take isl_multi_pw_aff *MPA,
61                                        isl_id *Id, void *User),
62     void *UserIndex,
63     isl_ast_expr *(*FunctionExpr)(isl_ast_expr *Expr, isl_id *Id, void *User),
64     void *User_expr) {
65 
66   // TODO: Implement the AST expression generation. For now we just return a
67   // nullptr to ensure that we do not free uninitialized pointers.
68 
69   return nullptr;
70 }
71 
72 /// Generate code for a GPU specific isl AST.
73 ///
74 /// The GPUNodeBuilder augments the general existing IslNodeBuilder, which
75 /// generates code for general-prupose AST nodes, with special functionality
76 /// for generating GPU specific user nodes.
77 ///
78 /// @see GPUNodeBuilder::createUser
79 class GPUNodeBuilder : public IslNodeBuilder {
80 public:
81   GPUNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P,
82                  const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE,
83                  DominatorTree &DT, Scop &S)
84       : IslNodeBuilder(Builder, Annotator, P, DL, LI, SE, DT, S) {}
85 
86 private:
87   /// Create code for user-defined AST nodes.
88   ///
89   /// These AST nodes can be of type:
90   ///
91   ///   - ScopStmt:      A computational statement (TODO)
92   ///   - Kernel:        A GPU kernel call (TODO)
93   ///   - Data-Transfer: A GPU <-> CPU data-transfer (TODO)
94   ///
95   /// @param UserStmt The ast node to generate code for.
96   virtual void createUser(__isl_take isl_ast_node *UserStmt);
97 };
98 
99 void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
100   isl_ast_node_free(UserStmt);
101   return;
102 }
103 
104 namespace {
105 class PPCGCodeGeneration : public ScopPass {
106 public:
107   static char ID;
108 
109   /// The scop that is currently processed.
110   Scop *S;
111 
112   LoopInfo *LI;
113   DominatorTree *DT;
114   ScalarEvolution *SE;
115   const DataLayout *DL;
116   RegionInfo *RI;
117 
118   PPCGCodeGeneration() : ScopPass(ID) {}
119 
120   /// Construct compilation options for PPCG.
121   ///
122   /// @returns The compilation options.
123   ppcg_options *createPPCGOptions() {
124     auto DebugOptions =
125         (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options));
126     auto Options = (ppcg_options *)malloc(sizeof(ppcg_options));
127 
128     DebugOptions->dump_schedule_constraints = false;
129     DebugOptions->dump_schedule = false;
130     DebugOptions->dump_final_schedule = false;
131     DebugOptions->dump_sizes = false;
132 
133     Options->debug = DebugOptions;
134 
135     Options->reschedule = true;
136     Options->scale_tile_loops = false;
137     Options->wrap = false;
138 
139     Options->non_negative_parameters = false;
140     Options->ctx = nullptr;
141     Options->sizes = nullptr;
142 
143     Options->tile_size = 32;
144 
145     Options->use_private_memory = false;
146     Options->use_shared_memory = false;
147     Options->max_shared_memory = 0;
148 
149     Options->target = PPCG_TARGET_CUDA;
150     Options->openmp = false;
151     Options->linearize_device_arrays = true;
152     Options->live_range_reordering = false;
153 
154     Options->opencl_compiler_options = nullptr;
155     Options->opencl_use_gpu = false;
156     Options->opencl_n_include_file = 0;
157     Options->opencl_include_files = nullptr;
158     Options->opencl_print_kernel_types = false;
159     Options->opencl_embed_kernel_code = false;
160 
161     Options->save_schedule_file = nullptr;
162     Options->load_schedule_file = nullptr;
163 
164     return Options;
165   }
166 
167   /// Get a tagged access relation containing all accesses of type @p AccessTy.
168   ///
169   /// Instead of a normal access of the form:
170   ///
171   ///   Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)]
172   ///
173   /// a tagged access has the form
174   ///
175   ///   [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)]
176   ///
177   /// where 'id' is an additional space that references the memory access that
178   /// triggered the access.
179   ///
180   /// @param AccessTy The type of the memory accesses to collect.
181   ///
182   /// @return The relation describing all tagged memory accesses.
183   isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) {
184     isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace());
185 
186     for (auto &Stmt : *S)
187       for (auto &Acc : Stmt)
188         if (Acc->getType() == AccessTy) {
189           isl_map *Relation = Acc->getAccessRelation();
190           Relation = isl_map_intersect_domain(Relation, Stmt.getDomain());
191 
192           isl_space *Space = isl_map_get_space(Relation);
193           Space = isl_space_range(Space);
194           Space = isl_space_from_range(Space);
195           Space = isl_space_set_tuple_id(Space, isl_dim_in, Acc->getId());
196           isl_map *Universe = isl_map_universe(Space);
197           Relation = isl_map_domain_product(Relation, Universe);
198           Accesses = isl_union_map_add_map(Accesses, Relation);
199         }
200 
201     return Accesses;
202   }
203 
204   /// Get the set of all read accesses, tagged with the access id.
205   ///
206   /// @see getTaggedAccesses
207   isl_union_map *getTaggedReads() {
208     return getTaggedAccesses(MemoryAccess::READ);
209   }
210 
211   /// Get the set of all may (and must) accesses, tagged with the access id.
212   ///
213   /// @see getTaggedAccesses
214   isl_union_map *getTaggedMayWrites() {
215     return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE),
216                                getTaggedAccesses(MemoryAccess::MUST_WRITE));
217   }
218 
219   /// Get the set of all must accesses, tagged with the access id.
220   ///
221   /// @see getTaggedAccesses
222   isl_union_map *getTaggedMustWrites() {
223     return getTaggedAccesses(MemoryAccess::MUST_WRITE);
224   }
225 
226   /// Collect parameter and array names as isl_ids.
227   ///
228   /// To reason about the different parameters and arrays used, ppcg requires
229   /// a list of all isl_ids in use. As PPCG traditionally performs
230   /// source-to-source compilation each of these isl_ids is mapped to the
231   /// expression that represents it. As we do not have a corresponding
232   /// expression in Polly, we just map each id to a 'zero' expression to match
233   /// the data format that ppcg expects.
234   ///
235   /// @returns Retun a map from collected ids to 'zero' ast expressions.
236   __isl_give isl_id_to_ast_expr *getNames() {
237     auto *Names = isl_id_to_ast_expr_alloc(
238         S->getIslCtx(),
239         S->getNumParams() + std::distance(S->array_begin(), S->array_end()));
240     auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx()));
241     auto *Space = S->getParamSpace();
242 
243     for (int I = 0, E = S->getNumParams(); I < E; ++I) {
244       isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I);
245       Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
246     }
247 
248     for (auto &Array : S->arrays()) {
249       auto Id = Array.second->getBasePtrId();
250       Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero));
251     }
252 
253     isl_space_free(Space);
254     isl_ast_expr_free(Zero);
255 
256     return Names;
257   }
258 
259   /// Create a new PPCG scop from the current scop.
260   ///
261   /// The PPCG scop is initialized with data from the current polly::Scop. From
262   /// this initial data, the data-dependences in the PPCG scop are initialized.
263   /// We do not use Polly's dependence analysis for now, to ensure we match
264   /// the PPCG default behaviour more closely.
265   ///
266   /// @returns A new ppcg scop.
267   ppcg_scop *createPPCGScop() {
268     auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop));
269 
270     PPCGScop->options = createPPCGOptions();
271 
272     PPCGScop->start = 0;
273     PPCGScop->end = 0;
274 
275     PPCGScop->context = S->getContext();
276     PPCGScop->domain = S->getDomains();
277     PPCGScop->call = nullptr;
278     PPCGScop->tagged_reads = getTaggedReads();
279     PPCGScop->reads = S->getReads();
280     PPCGScop->live_in = nullptr;
281     PPCGScop->tagged_may_writes = getTaggedMayWrites();
282     PPCGScop->may_writes = S->getWrites();
283     PPCGScop->tagged_must_writes = getTaggedMustWrites();
284     PPCGScop->must_writes = S->getMustWrites();
285     PPCGScop->live_out = nullptr;
286     PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace());
287     PPCGScop->tagger = nullptr;
288 
289     PPCGScop->independence = nullptr;
290     PPCGScop->dep_flow = nullptr;
291     PPCGScop->tagged_dep_flow = nullptr;
292     PPCGScop->dep_false = nullptr;
293     PPCGScop->dep_forced = nullptr;
294     PPCGScop->dep_order = nullptr;
295     PPCGScop->tagged_dep_order = nullptr;
296 
297     PPCGScop->schedule = S->getScheduleTree();
298     PPCGScop->names = getNames();
299 
300     PPCGScop->pet = nullptr;
301 
302     compute_tagger(PPCGScop);
303     compute_dependences(PPCGScop);
304 
305     return PPCGScop;
306   }
307 
308   /// Collect the array acesses in a statement.
309   ///
310   /// @param Stmt The statement for which to collect the accesses.
311   ///
312   /// @returns A list of array accesses.
313   gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) {
314     gpu_stmt_access *Accesses = nullptr;
315 
316     for (MemoryAccess *Acc : Stmt) {
317       auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access);
318       Access->read = Acc->isRead();
319       Access->write = Acc->isWrite();
320       Access->access = Acc->getAccessRelation();
321       isl_space *Space = isl_map_get_space(Access->access);
322       Space = isl_space_range(Space);
323       Space = isl_space_from_range(Space);
324       Space = isl_space_set_tuple_id(Space, isl_dim_in, Acc->getId());
325       isl_map *Universe = isl_map_universe(Space);
326       Access->tagged_access =
327           isl_map_domain_product(Acc->getAccessRelation(), Universe);
328       Access->exact_write = Acc->isWrite();
329       Access->ref_id = Acc->getId();
330       Access->next = Accesses;
331       Accesses = Access;
332     }
333 
334     return Accesses;
335   }
336 
337   /// Collect the list of GPU statements.
338   ///
339   /// Each statement has an id, a pointer to the underlying data structure,
340   /// as well as a list with all memory accesses.
341   ///
342   /// TODO: Initialize the list of memory accesses.
343   ///
344   /// @returns A linked-list of statements.
345   gpu_stmt *getStatements() {
346     gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt,
347                                        std::distance(S->begin(), S->end()));
348 
349     int i = 0;
350     for (auto &Stmt : *S) {
351       gpu_stmt *GPUStmt = &Stmts[i];
352 
353       GPUStmt->id = Stmt.getDomainId();
354 
355       // We use the pet stmt pointer to keep track of the Polly statements.
356       GPUStmt->stmt = (pet_stmt *)&Stmt;
357       GPUStmt->accesses = getStmtAccesses(Stmt);
358       i++;
359     }
360 
361     return Stmts;
362   }
363 
364   /// Derive the extent of an array.
365   ///
366   /// The extent of an array is defined by the set of memory locations for
367   /// which a memory access in the iteration domain exists.
368   ///
369   /// @param Array The array to derive the extent for.
370   ///
371   /// @returns An isl_set describing the extent of the array.
372   __isl_give isl_set *getExtent(ScopArrayInfo *Array) {
373     isl_union_map *Accesses = S->getAccesses();
374     Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
375     isl_union_set *AccessUSet = isl_union_map_range(Accesses);
376     isl_set *AccessSet =
377         isl_union_set_extract_set(AccessUSet, Array->getSpace());
378     isl_union_set_free(AccessUSet);
379 
380     return AccessSet;
381   }
382 
383   /// Derive the bounds of an array.
384   ///
385   /// For the first dimension we derive the bound of the array from the extent
386   /// of this dimension. For inner dimensions we obtain their size directly from
387   /// ScopArrayInfo.
388   ///
389   /// @param PPCGArray The array to compute bounds for.
390   /// @param Array The polly array from which to take the information.
391   void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) {
392     if (PPCGArray.n_index > 0) {
393       isl_set *Dom = isl_set_copy(PPCGArray.extent);
394       Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1);
395       isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0);
396       isl_set_free(Dom);
397       Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound));
398       isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom));
399       isl_aff *One = isl_aff_zero_on_domain(LS);
400       One = isl_aff_add_constant_si(One, 1);
401       Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One));
402       Bound = isl_pw_aff_gist(Bound, S->getContext());
403       PPCGArray.bound[0] = Bound;
404     }
405 
406     for (unsigned i = 1; i < PPCGArray.n_index; ++i) {
407       isl_pw_aff *Bound = Array->getDimensionSizePw(i);
408       auto LS = isl_pw_aff_get_domain_space(Bound);
409       auto Aff = isl_multi_aff_zero(LS);
410       Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff);
411       PPCGArray.bound[i] = Bound;
412     }
413   }
414 
415   /// Create the arrays for @p PPCGProg.
416   ///
417   /// @param PPCGProg The program to compute the arrays for.
418   void createArrays(gpu_prog *PPCGProg) {
419     int i = 0;
420     for (auto &Element : S->arrays()) {
421       ScopArrayInfo *Array = Element.second.get();
422 
423       std::string TypeName;
424       raw_string_ostream OS(TypeName);
425 
426       OS << *Array->getElementType();
427       TypeName = OS.str();
428 
429       gpu_array_info &PPCGArray = PPCGProg->array[i];
430 
431       PPCGArray.space = Array->getSpace();
432       PPCGArray.type = strdup(TypeName.c_str());
433       PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8;
434       PPCGArray.name = strdup(Array->getName().c_str());
435       PPCGArray.extent = nullptr;
436       PPCGArray.n_index = Array->getNumberOfDimensions();
437       PPCGArray.bound =
438           isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index);
439       PPCGArray.extent = getExtent(Array);
440       PPCGArray.n_ref = 0;
441       PPCGArray.refs = nullptr;
442       PPCGArray.accessed = true;
443       PPCGArray.read_only_scalar = false;
444       PPCGArray.has_compound_element = false;
445       PPCGArray.local = false;
446       PPCGArray.declare_local = false;
447       PPCGArray.global = false;
448       PPCGArray.linearize = false;
449       PPCGArray.dep_order = nullptr;
450 
451       setArrayBounds(PPCGArray, Array);
452       i++;
453 
454       collect_references(PPCGProg, &PPCGArray);
455     }
456   }
457 
458   /// Create an identity map between the arrays in the scop.
459   ///
460   /// @returns An identity map between the arrays in the scop.
461   isl_union_map *getArrayIdentity() {
462     isl_union_map *Maps = isl_union_map_empty(S->getParamSpace());
463 
464     for (auto &Item : S->arrays()) {
465       ScopArrayInfo *Array = Item.second.get();
466       isl_space *Space = Array->getSpace();
467       Space = isl_space_map_from_set(Space);
468       isl_map *Identity = isl_map_identity(Space);
469       Maps = isl_union_map_add_map(Maps, Identity);
470     }
471 
472     return Maps;
473   }
474 
475   /// Create a default-initialized PPCG GPU program.
476   ///
477   /// @returns A new gpu grogram description.
478   gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) {
479 
480     if (!PPCGScop)
481       return nullptr;
482 
483     auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog);
484 
485     PPCGProg->ctx = S->getIslCtx();
486     PPCGProg->scop = PPCGScop;
487     PPCGProg->context = isl_set_copy(PPCGScop->context);
488     PPCGProg->read = isl_union_map_copy(PPCGScop->reads);
489     PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes);
490     PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes);
491     PPCGProg->tagged_must_kill =
492         isl_union_map_copy(PPCGScop->tagged_must_kills);
493     PPCGProg->to_inner = getArrayIdentity();
494     PPCGProg->to_outer = getArrayIdentity();
495     PPCGProg->may_persist = compute_may_persist(PPCGProg);
496     PPCGProg->any_to_outer = nullptr;
497     PPCGProg->array_order = nullptr;
498     PPCGProg->n_stmts = std::distance(S->begin(), S->end());
499     PPCGProg->stmts = getStatements();
500     PPCGProg->n_array = std::distance(S->array_begin(), S->array_end());
501     PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info,
502                                        PPCGProg->n_array);
503 
504     createArrays(PPCGProg);
505 
506     return PPCGProg;
507   }
508 
509   struct PrintGPUUserData {
510     struct cuda_info *CudaInfo;
511     struct gpu_prog *PPCGProg;
512     std::vector<ppcg_kernel *> Kernels;
513   };
514 
515   /// Print a user statement node in the host code.
516   ///
517   /// We use ppcg's printing facilities to print the actual statement and
518   /// additionally build up a list of all kernels that are encountered in the
519   /// host ast.
520   ///
521   /// @param P The printer to print to
522   /// @param Options The printing options to use
523   /// @param Node The node to print
524   /// @param User A user pointer to carry additional data. This pointer is
525   ///             expected to be of type PrintGPUUserData.
526   ///
527   /// @returns A printer to which the output has been printed.
528   static __isl_give isl_printer *
529   printHostUser(__isl_take isl_printer *P,
530                 __isl_take isl_ast_print_options *Options,
531                 __isl_take isl_ast_node *Node, void *User) {
532     auto Data = (struct PrintGPUUserData *)User;
533     auto Id = isl_ast_node_get_annotation(Node);
534 
535     if (Id) {
536       bool IsUser = !strcmp(isl_id_get_name(Id), "user");
537 
538       // If this is a user statement, format it ourselves as ppcg would
539       // otherwise try to call pet functionality that is not available in
540       // Polly.
541       if (IsUser) {
542         P = isl_printer_start_line(P);
543         P = isl_printer_print_ast_node(P, Node);
544         P = isl_printer_end_line(P);
545         isl_id_free(Id);
546         isl_ast_print_options_free(Options);
547         return P;
548       }
549 
550       auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id);
551       isl_id_free(Id);
552       Data->Kernels.push_back(Kernel);
553     }
554 
555     return print_host_user(P, Options, Node, User);
556   }
557 
558   /// Print C code corresponding to the control flow in @p Kernel.
559   ///
560   /// @param Kernel The kernel to print
561   void printKernel(ppcg_kernel *Kernel) {
562     auto *P = isl_printer_to_str(S->getIslCtx());
563     P = isl_printer_set_output_format(P, ISL_FORMAT_C);
564     auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
565     P = isl_ast_node_print(Kernel->tree, P, Options);
566     char *String = isl_printer_get_str(P);
567     printf("%s\n", String);
568     free(String);
569     isl_printer_free(P);
570   }
571 
572   /// Print C code corresponding to the GPU code described by @p Tree.
573   ///
574   /// @param Tree An AST describing GPU code
575   /// @param PPCGProg The PPCG program from which @Tree has been constructed.
576   void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) {
577     auto *P = isl_printer_to_str(S->getIslCtx());
578     P = isl_printer_set_output_format(P, ISL_FORMAT_C);
579 
580     PrintGPUUserData Data;
581     Data.PPCGProg = PPCGProg;
582 
583     auto *Options = isl_ast_print_options_alloc(S->getIslCtx());
584     Options =
585         isl_ast_print_options_set_print_user(Options, printHostUser, &Data);
586     P = isl_ast_node_print(Tree, P, Options);
587     char *String = isl_printer_get_str(P);
588     printf("# host\n");
589     printf("%s\n", String);
590     free(String);
591     isl_printer_free(P);
592 
593     for (auto Kernel : Data.Kernels) {
594       printf("# kernel%d\n", Kernel->id);
595       printKernel(Kernel);
596     }
597   }
598 
599   // Generate a GPU program using PPCG.
600   //
601   // GPU mapping consists of multiple steps:
602   //
603   //  1) Compute new schedule for the program.
604   //  2) Map schedule to GPU (TODO)
605   //  3) Generate code for new schedule (TODO)
606   //
607   // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer
608   // is mostly CPU specific. Instead, we use PPCG's GPU code generation
609   // strategy directly from this pass.
610   gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) {
611 
612     auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen);
613 
614     PPCGGen->ctx = S->getIslCtx();
615     PPCGGen->options = PPCGScop->options;
616     PPCGGen->print = nullptr;
617     PPCGGen->print_user = nullptr;
618     PPCGGen->build_ast_expr = &pollyBuildAstExprForStmt;
619     PPCGGen->prog = PPCGProg;
620     PPCGGen->tree = nullptr;
621     PPCGGen->types.n = 0;
622     PPCGGen->types.name = nullptr;
623     PPCGGen->sizes = nullptr;
624     PPCGGen->used_sizes = nullptr;
625     PPCGGen->kernel_id = 0;
626 
627     // Set scheduling strategy to same strategy PPCG is using.
628     isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true);
629     isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true);
630     isl_options_set_schedule_whole_component(PPCGGen->ctx, false);
631 
632     isl_schedule *Schedule = get_schedule(PPCGGen);
633 
634     int has_permutable = has_any_permutable_node(Schedule);
635 
636     if (!has_permutable || has_permutable < 0) {
637       Schedule = isl_schedule_free(Schedule);
638     } else {
639       Schedule = map_to_device(PPCGGen, Schedule);
640       PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule));
641     }
642 
643     if (DumpSchedule) {
644       isl_printer *P = isl_printer_to_str(S->getIslCtx());
645       P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK);
646       P = isl_printer_print_str(P, "Schedule\n");
647       P = isl_printer_print_str(P, "========\n");
648       if (Schedule)
649         P = isl_printer_print_schedule(P, Schedule);
650       else
651         P = isl_printer_print_str(P, "No schedule found\n");
652 
653       printf("%s\n", isl_printer_get_str(P));
654       isl_printer_free(P);
655     }
656 
657     if (DumpCode) {
658       printf("Code\n");
659       printf("====\n");
660       if (PPCGGen->tree)
661         printGPUTree(PPCGGen->tree, PPCGProg);
662       else
663         printf("No code generated\n");
664     }
665 
666     isl_schedule_free(Schedule);
667 
668     return PPCGGen;
669   }
670 
671   /// Free gpu_gen structure.
672   ///
673   /// @param PPCGGen The ppcg_gen object to free.
674   void freePPCGGen(gpu_gen *PPCGGen) {
675     isl_ast_node_free(PPCGGen->tree);
676     isl_union_map_free(PPCGGen->sizes);
677     isl_union_map_free(PPCGGen->used_sizes);
678     free(PPCGGen);
679   }
680 
681   /// Free the options in the ppcg scop structure.
682   ///
683   /// ppcg is not freeing these options for us. To avoid leaks we do this
684   /// ourselves.
685   ///
686   /// @param PPCGScop The scop referencing the options to free.
687   void freeOptions(ppcg_scop *PPCGScop) {
688     free(PPCGScop->options->debug);
689     PPCGScop->options->debug = nullptr;
690     free(PPCGScop->options);
691     PPCGScop->options = nullptr;
692   }
693 
694   /// Generate code for a given GPU AST described by @p Root.
695   ///
696   /// @param An isl_ast_node pointing to the root of the GPU AST.
697   void generateCode(__isl_take isl_ast_node *Root) {
698     ScopAnnotator Annotator;
699     Annotator.buildAliasScopes(*S);
700 
701     Region *R = &S->getRegion();
702 
703     simplifyRegion(R, DT, LI, RI);
704 
705     BasicBlock *EnteringBB = R->getEnteringBlock();
706 
707     PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
708 
709     GPUNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT,
710                                *S);
711 
712     // Only build the run-time condition and parameters _after_ having
713     // introduced the conditional branch. This is important as the conditional
714     // branch will guard the original scop from new induction variables that
715     // the SCEVExpander may introduce while code generating the parameters and
716     // which may introduce scalar dependences that prevent us from correctly
717     // code generating this scop.
718     BasicBlock *StartBlock =
719         executeScopConditionally(*S, this, Builder.getTrue());
720 
721     // TODO: Handle LICM
722     // TODO: Verify run-time checks
723     auto SplitBlock = StartBlock->getSinglePredecessor();
724     Builder.SetInsertPoint(SplitBlock->getTerminator());
725     NodeBuilder.addParameters(S->getContext());
726     Builder.SetInsertPoint(&*StartBlock->begin());
727     NodeBuilder.create(Root);
728     NodeBuilder.finalizeSCoP(*S);
729   }
730 
731   bool runOnScop(Scop &CurrentScop) override {
732     S = &CurrentScop;
733     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
734     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
735     SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
736     DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout();
737     RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
738 
739     auto PPCGScop = createPPCGScop();
740     auto PPCGProg = createPPCGProg(PPCGScop);
741     auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
742 
743     if (PPCGGen->tree)
744       generateCode(isl_ast_node_copy(PPCGGen->tree));
745 
746     freeOptions(PPCGScop);
747     freePPCGGen(PPCGGen);
748     gpu_prog_free(PPCGProg);
749     ppcg_scop_free(PPCGScop);
750 
751     return true;
752   }
753 
754   void printScop(raw_ostream &, Scop &) const override {}
755 
756   void getAnalysisUsage(AnalysisUsage &AU) const override {
757     AU.addRequired<DominatorTreeWrapperPass>();
758     AU.addRequired<RegionInfoPass>();
759     AU.addRequired<ScalarEvolutionWrapperPass>();
760     AU.addRequired<ScopDetection>();
761     AU.addRequired<ScopInfoRegionPass>();
762     AU.addRequired<LoopInfoWrapperPass>();
763 
764     AU.addPreserved<AAResultsWrapperPass>();
765     AU.addPreserved<BasicAAWrapperPass>();
766     AU.addPreserved<LoopInfoWrapperPass>();
767     AU.addPreserved<DominatorTreeWrapperPass>();
768     AU.addPreserved<GlobalsAAWrapperPass>();
769     AU.addPreserved<PostDominatorTreeWrapperPass>();
770     AU.addPreserved<ScopDetection>();
771     AU.addPreserved<ScalarEvolutionWrapperPass>();
772     AU.addPreserved<SCEVAAWrapperPass>();
773 
774     // FIXME: We do not yet add regions for the newly generated code to the
775     //        region tree.
776     AU.addPreserved<RegionInfoPass>();
777     AU.addPreserved<ScopInfoRegionPass>();
778   }
779 };
780 }
781 
782 char PPCGCodeGeneration::ID = 1;
783 
784 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); }
785 
786 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg",
787                       "Polly - Apply PPCG translation to SCOP", false, false)
788 INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
789 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
790 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
791 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
792 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
793 INITIALIZE_PASS_DEPENDENCY(ScopDetection);
794 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg",
795                     "Polly - Apply PPCG translation to SCOP", false, false)
796