1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg 11 // GPU mapping strategy. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/DependenceInfo.h" 17 #include "polly/LinkAllPasses.h" 18 #include "polly/Options.h" 19 #include "polly/ScopInfo.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BasicAliasAnalysis.h" 22 #include "llvm/Analysis/GlobalsModRef.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 26 #include "isl/union_map.h" 27 28 extern "C" { 29 #include "gpu.h" 30 #include "ppcg.h" 31 } 32 33 #include "llvm/Support/Debug.h" 34 35 using namespace polly; 36 using namespace llvm; 37 38 #define DEBUG_TYPE "polly-codegen-ppcg" 39 40 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41 cl::desc("Dump the computed GPU Schedule"), 42 cl::Hidden, cl::init(false), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 /// Create the ast expressions for a ScopStmt. 45 /// 46 /// This function is a callback for to generate the ast expressions for each 47 /// of the scheduled ScopStmts. 48 static __isl_give isl_id_to_ast_expr *pollyBuildAstExprForStmt( 49 void *Stmt, isl_ast_build *Build, 50 isl_multi_pw_aff *(*FunctionIndex)(__isl_take isl_multi_pw_aff *MPA, 51 isl_id *Id, void *User), 52 void *UserIndex, 53 isl_ast_expr *(*FunctionExpr)(isl_ast_expr *Expr, isl_id *Id, void *User), 54 void *User_expr) { 55 56 // TODO: Implement the AST expression generation. For now we just return a 57 // nullptr to ensure that we do not free uninitialized pointers. 58 59 return nullptr; 60 } 61 62 namespace { 63 class PPCGCodeGeneration : public ScopPass { 64 public: 65 static char ID; 66 67 /// The scop that is currently processed. 68 Scop *S; 69 70 PPCGCodeGeneration() : ScopPass(ID) {} 71 72 /// Construct compilation options for PPCG. 73 /// 74 /// @returns The compilation options. 75 ppcg_options *createPPCGOptions() { 76 auto DebugOptions = 77 (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 78 auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 79 80 DebugOptions->dump_schedule_constraints = false; 81 DebugOptions->dump_schedule = false; 82 DebugOptions->dump_final_schedule = false; 83 DebugOptions->dump_sizes = false; 84 85 Options->debug = DebugOptions; 86 87 Options->reschedule = true; 88 Options->scale_tile_loops = false; 89 Options->wrap = false; 90 91 Options->non_negative_parameters = false; 92 Options->ctx = nullptr; 93 Options->sizes = nullptr; 94 95 Options->tile_size = 32; 96 97 Options->use_private_memory = false; 98 Options->use_shared_memory = false; 99 Options->max_shared_memory = 0; 100 101 Options->target = PPCG_TARGET_CUDA; 102 Options->openmp = false; 103 Options->linearize_device_arrays = true; 104 Options->live_range_reordering = false; 105 106 Options->opencl_compiler_options = nullptr; 107 Options->opencl_use_gpu = false; 108 Options->opencl_n_include_file = 0; 109 Options->opencl_include_files = nullptr; 110 Options->opencl_print_kernel_types = false; 111 Options->opencl_embed_kernel_code = false; 112 113 Options->save_schedule_file = nullptr; 114 Options->load_schedule_file = nullptr; 115 116 return Options; 117 } 118 119 /// Get a tagged access relation containing all accesses of type @p AccessTy. 120 /// 121 /// Instead of a normal access of the form: 122 /// 123 /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 124 /// 125 /// a tagged access has the form 126 /// 127 /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 128 /// 129 /// where 'id' is an additional space that references the memory access that 130 /// triggered the access. 131 /// 132 /// @param AccessTy The type of the memory accesses to collect. 133 /// 134 /// @return The relation describing all tagged memory accesses. 135 isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 136 isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 137 138 for (auto &Stmt : *S) 139 for (auto &Acc : Stmt) 140 if (Acc->getType() == AccessTy) { 141 isl_map *Relation = Acc->getAccessRelation(); 142 Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 143 144 isl_space *Space = isl_map_get_space(Relation); 145 Space = isl_space_range(Space); 146 Space = isl_space_from_range(Space); 147 isl_map *Universe = isl_map_universe(Space); 148 Relation = isl_map_domain_product(Relation, Universe); 149 Accesses = isl_union_map_add_map(Accesses, Relation); 150 } 151 152 return Accesses; 153 } 154 155 /// Get the set of all read accesses, tagged with the access id. 156 /// 157 /// @see getTaggedAccesses 158 isl_union_map *getTaggedReads() { 159 return getTaggedAccesses(MemoryAccess::READ); 160 } 161 162 /// Get the set of all may (and must) accesses, tagged with the access id. 163 /// 164 /// @see getTaggedAccesses 165 isl_union_map *getTaggedMayWrites() { 166 return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 167 getTaggedAccesses(MemoryAccess::MUST_WRITE)); 168 } 169 170 /// Get the set of all must accesses, tagged with the access id. 171 /// 172 /// @see getTaggedAccesses 173 isl_union_map *getTaggedMustWrites() { 174 return getTaggedAccesses(MemoryAccess::MUST_WRITE); 175 } 176 177 /// Collect parameter and array names as isl_ids. 178 /// 179 /// To reason about the different parameters and arrays used, ppcg requires 180 /// a list of all isl_ids in use. As PPCG traditionally performs 181 /// source-to-source compilation each of these isl_ids is mapped to the 182 /// expression that represents it. As we do not have a corresponding 183 /// expression in Polly, we just map each id to a 'zero' expression to match 184 /// the data format that ppcg expects. 185 /// 186 /// @returns Retun a map from collected ids to 'zero' ast expressions. 187 __isl_give isl_id_to_ast_expr *getNames() { 188 auto *Names = isl_id_to_ast_expr_alloc( 189 S->getIslCtx(), 190 S->getNumParams() + std::distance(S->array_begin(), S->array_end())); 191 auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); 192 auto *Space = S->getParamSpace(); 193 194 for (int I = 0, E = S->getNumParams(); I < E; ++I) { 195 isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); 196 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 197 } 198 199 for (auto &Array : S->arrays()) { 200 auto Id = Array.second->getBasePtrId(); 201 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 202 } 203 204 isl_space_free(Space); 205 isl_ast_expr_free(Zero); 206 207 return Names; 208 } 209 210 /// Create a new PPCG scop from the current scop. 211 /// 212 /// The PPCG scop is initialized with data from the current polly::Scop. From 213 /// this initial data, the data-dependences in the PPCG scop are initialized. 214 /// We do not use Polly's dependence analysis for now, to ensure we match 215 /// the PPCG default behaviour more closely. 216 /// 217 /// @returns A new ppcg scop. 218 ppcg_scop *createPPCGScop() { 219 auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 220 221 PPCGScop->options = createPPCGOptions(); 222 223 PPCGScop->start = 0; 224 PPCGScop->end = 0; 225 226 PPCGScop->context = S->getContext(); 227 PPCGScop->domain = S->getDomains(); 228 PPCGScop->call = nullptr; 229 PPCGScop->tagged_reads = getTaggedReads(); 230 PPCGScop->reads = S->getReads(); 231 PPCGScop->live_in = nullptr; 232 PPCGScop->tagged_may_writes = getTaggedMayWrites(); 233 PPCGScop->may_writes = S->getWrites(); 234 PPCGScop->tagged_must_writes = getTaggedMustWrites(); 235 PPCGScop->must_writes = S->getMustWrites(); 236 PPCGScop->live_out = nullptr; 237 PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 238 PPCGScop->tagger = nullptr; 239 240 PPCGScop->independence = nullptr; 241 PPCGScop->dep_flow = nullptr; 242 PPCGScop->tagged_dep_flow = nullptr; 243 PPCGScop->dep_false = nullptr; 244 PPCGScop->dep_forced = nullptr; 245 PPCGScop->dep_order = nullptr; 246 PPCGScop->tagged_dep_order = nullptr; 247 248 PPCGScop->schedule = S->getScheduleTree(); 249 PPCGScop->names = getNames(); 250 251 PPCGScop->pet = nullptr; 252 253 compute_tagger(PPCGScop); 254 compute_dependences(PPCGScop); 255 256 return PPCGScop; 257 } 258 259 /// Create a default-initialized PPCG GPU program. 260 /// 261 /// @returns A new gpu grogram description. 262 gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 263 264 if (!PPCGScop) 265 return nullptr; 266 267 auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 268 269 PPCGProg->ctx = S->getIslCtx(); 270 PPCGProg->scop = PPCGScop; 271 PPCGProg->context = isl_set_copy(PPCGScop->context); 272 PPCGProg->read = nullptr; 273 PPCGProg->may_write = nullptr; 274 PPCGProg->must_write = nullptr; 275 PPCGProg->tagged_must_kill = nullptr; 276 PPCGProg->may_persist = nullptr; 277 PPCGProg->to_outer = nullptr; 278 PPCGProg->to_inner = nullptr; 279 PPCGProg->any_to_outer = nullptr; 280 PPCGProg->array_order = nullptr; 281 PPCGProg->n_stmts = 0; 282 PPCGProg->stmts = nullptr; 283 PPCGProg->n_array = 0; 284 PPCGProg->array = nullptr; 285 286 return PPCGProg; 287 } 288 289 // Generate a GPU program using PPCG. 290 // 291 // GPU mapping consists of multiple steps: 292 // 293 // 1) Compute new schedule for the program. 294 // 2) Map schedule to GPU (TODO) 295 // 3) Generate code for new schedule (TODO) 296 // 297 // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 298 // is mostly CPU specific. Instead, we use PPCG's GPU code generation 299 // strategy directly from this pass. 300 gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 301 302 auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 303 304 PPCGGen->ctx = S->getIslCtx(); 305 PPCGGen->options = PPCGScop->options; 306 PPCGGen->print = nullptr; 307 PPCGGen->print_user = nullptr; 308 PPCGGen->build_ast_expr = &pollyBuildAstExprForStmt; 309 PPCGGen->prog = PPCGProg; 310 PPCGGen->tree = nullptr; 311 PPCGGen->types.n = 0; 312 PPCGGen->types.name = nullptr; 313 PPCGGen->sizes = nullptr; 314 PPCGGen->used_sizes = nullptr; 315 PPCGGen->kernel_id = 0; 316 317 // Set scheduling strategy to same strategy PPCG is using. 318 isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 319 isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 320 321 isl_schedule *Schedule = get_schedule(PPCGGen); 322 323 int has_permutable = has_any_permutable_node(Schedule); 324 325 if (!has_permutable || has_permutable < 0) 326 Schedule = isl_schedule_free(Schedule); 327 else 328 Schedule = map_to_device(PPCGGen, Schedule); 329 330 if (DumpSchedule) { 331 isl_printer *P = isl_printer_to_str(S->getIslCtx()); 332 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 333 P = isl_printer_print_str(P, "Schedule\n"); 334 P = isl_printer_print_str(P, "========\n"); 335 if (Schedule) 336 P = isl_printer_print_schedule(P, Schedule); 337 else 338 P = isl_printer_print_str(P, "No schedule found\n"); 339 340 printf("%s\n", isl_printer_get_str(P)); 341 isl_printer_free(P); 342 } 343 344 isl_schedule_free(Schedule); 345 346 return PPCGGen; 347 } 348 349 /// Free gpu_gen structure. 350 /// 351 /// @param PPCGGen The ppcg_gen object to free. 352 void freePPCGGen(gpu_gen *PPCGGen) { 353 isl_ast_node_free(PPCGGen->tree); 354 isl_union_map_free(PPCGGen->sizes); 355 isl_union_map_free(PPCGGen->used_sizes); 356 free(PPCGGen); 357 } 358 359 bool runOnScop(Scop &CurrentScop) override { 360 S = &CurrentScop; 361 362 auto PPCGScop = createPPCGScop(); 363 auto PPCGProg = createPPCGProg(PPCGScop); 364 auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 365 freePPCGGen(PPCGGen); 366 gpu_prog_free(PPCGProg); 367 ppcg_scop_free(PPCGScop); 368 369 return true; 370 } 371 372 void printScop(raw_ostream &, Scop &) const override {} 373 374 void getAnalysisUsage(AnalysisUsage &AU) const override { 375 AU.addRequired<DominatorTreeWrapperPass>(); 376 AU.addRequired<RegionInfoPass>(); 377 AU.addRequired<ScalarEvolutionWrapperPass>(); 378 AU.addRequired<ScopDetection>(); 379 AU.addRequired<ScopInfoRegionPass>(); 380 AU.addRequired<LoopInfoWrapperPass>(); 381 382 AU.addPreserved<AAResultsWrapperPass>(); 383 AU.addPreserved<BasicAAWrapperPass>(); 384 AU.addPreserved<LoopInfoWrapperPass>(); 385 AU.addPreserved<DominatorTreeWrapperPass>(); 386 AU.addPreserved<GlobalsAAWrapperPass>(); 387 AU.addPreserved<PostDominatorTreeWrapperPass>(); 388 AU.addPreserved<ScopDetection>(); 389 AU.addPreserved<ScalarEvolutionWrapperPass>(); 390 AU.addPreserved<SCEVAAWrapperPass>(); 391 392 // FIXME: We do not yet add regions for the newly generated code to the 393 // region tree. 394 AU.addPreserved<RegionInfoPass>(); 395 AU.addPreserved<ScopInfoRegionPass>(); 396 } 397 }; 398 } 399 400 char PPCGCodeGeneration::ID = 1; 401 402 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 403 404 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 405 "Polly - Apply PPCG translation to SCOP", false, false) 406 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 407 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 408 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 409 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 410 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 411 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 412 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 413 "Polly - Apply PPCG translation to SCOP", false, false) 414