1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg 11 // GPU mapping strategy. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/DependenceInfo.h" 17 #include "polly/LinkAllPasses.h" 18 #include "polly/Options.h" 19 #include "polly/ScopInfo.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BasicAliasAnalysis.h" 22 #include "llvm/Analysis/GlobalsModRef.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 26 #include "isl/union_map.h" 27 28 extern "C" { 29 #include "gpu.h" 30 #include "ppcg.h" 31 } 32 33 #include "llvm/Support/Debug.h" 34 35 using namespace polly; 36 using namespace llvm; 37 38 #define DEBUG_TYPE "polly-codegen-ppcg" 39 40 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41 cl::desc("Dump the computed GPU Schedule"), 42 cl::Hidden, cl::init(false), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 45 namespace { 46 class PPCGCodeGeneration : public ScopPass { 47 public: 48 static char ID; 49 50 /// The scop that is currently processed. 51 Scop *S; 52 53 PPCGCodeGeneration() : ScopPass(ID) {} 54 55 /// Construct compilation options for PPCG. 56 /// 57 /// @returns The compilation options. 58 ppcg_options *createPPCGOptions() { 59 auto DebugOptions = 60 (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 61 auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 62 63 DebugOptions->dump_schedule_constraints = false; 64 DebugOptions->dump_schedule = false; 65 DebugOptions->dump_final_schedule = false; 66 DebugOptions->dump_sizes = false; 67 68 Options->debug = DebugOptions; 69 70 Options->reschedule = true; 71 Options->scale_tile_loops = false; 72 Options->wrap = false; 73 74 Options->non_negative_parameters = false; 75 Options->ctx = nullptr; 76 Options->sizes = nullptr; 77 78 Options->tile_size = 32; 79 80 Options->use_private_memory = false; 81 Options->use_shared_memory = false; 82 Options->max_shared_memory = 0; 83 84 Options->target = PPCG_TARGET_CUDA; 85 Options->openmp = false; 86 Options->linearize_device_arrays = true; 87 Options->live_range_reordering = false; 88 89 Options->opencl_compiler_options = nullptr; 90 Options->opencl_use_gpu = false; 91 Options->opencl_n_include_file = 0; 92 Options->opencl_include_files = nullptr; 93 Options->opencl_print_kernel_types = false; 94 Options->opencl_embed_kernel_code = false; 95 96 Options->save_schedule_file = nullptr; 97 Options->load_schedule_file = nullptr; 98 99 return Options; 100 } 101 102 /// Get a tagged access relation containing all accesses of type @p AccessTy. 103 /// 104 /// Instead of a normal access of the form: 105 /// 106 /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 107 /// 108 /// a tagged access has the form 109 /// 110 /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 111 /// 112 /// where 'id' is an additional space that references the memory access that 113 /// triggered the access. 114 /// 115 /// @param AccessTy The type of the memory accesses to collect. 116 /// 117 /// @return The relation describing all tagged memory accesses. 118 isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 119 isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 120 121 for (auto &Stmt : *S) 122 for (auto &Acc : Stmt) 123 if (Acc->getType() == AccessTy) { 124 isl_map *Relation = Acc->getAccessRelation(); 125 Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 126 127 isl_space *Space = isl_map_get_space(Relation); 128 Space = isl_space_range(Space); 129 Space = isl_space_from_range(Space); 130 isl_map *Universe = isl_map_universe(Space); 131 Relation = isl_map_domain_product(Relation, Universe); 132 Accesses = isl_union_map_add_map(Accesses, Relation); 133 } 134 135 return Accesses; 136 } 137 138 /// Get the set of all read accesses, tagged with the access id. 139 /// 140 /// @see getTaggedAccesses 141 isl_union_map *getTaggedReads() { 142 return getTaggedAccesses(MemoryAccess::READ); 143 } 144 145 /// Get the set of all may (and must) accesses, tagged with the access id. 146 /// 147 /// @see getTaggedAccesses 148 isl_union_map *getTaggedMayWrites() { 149 return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 150 getTaggedAccesses(MemoryAccess::MUST_WRITE)); 151 } 152 153 /// Get the set of all must accesses, tagged with the access id. 154 /// 155 /// @see getTaggedAccesses 156 isl_union_map *getTaggedMustWrites() { 157 return getTaggedAccesses(MemoryAccess::MUST_WRITE); 158 } 159 160 /// Collect parameter and array names as isl_ids. 161 /// 162 /// To reason about the different parameters and arrays used, ppcg requires 163 /// a list of all isl_ids in use. As PPCG traditionally performs 164 /// source-to-source compilation each of these isl_ids is mapped to the 165 /// expression that represents it. As we do not have a corresponding 166 /// expression in Polly, we just map each id to a 'zero' expression to match 167 /// the data format that ppcg expects. 168 /// 169 /// @returns Retun a map from collected ids to 'zero' ast expressions. 170 __isl_give isl_id_to_ast_expr *getNames() { 171 auto *Names = isl_id_to_ast_expr_alloc( 172 S->getIslCtx(), 173 S->getNumParams() + std::distance(S->array_begin(), S->array_end())); 174 auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); 175 auto *Space = S->getParamSpace(); 176 177 for (int I = 0, E = S->getNumParams(); I < E; ++I) { 178 isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); 179 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 180 } 181 182 for (auto &Array : S->arrays()) { 183 auto Id = Array.second->getBasePtrId(); 184 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 185 } 186 187 isl_space_free(Space); 188 isl_ast_expr_free(Zero); 189 190 return Names; 191 } 192 193 /// Create a new PPCG scop from the current scop. 194 /// 195 /// The PPCG scop is initialized with data from the current polly::Scop. From 196 /// this initial data, the data-dependences in the PPCG scop are initialized. 197 /// We do not use Polly's dependence analysis for now, to ensure we match 198 /// the PPCG default behaviour more closely. 199 /// 200 /// @returns A new ppcg scop. 201 ppcg_scop *createPPCGScop() { 202 auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 203 204 PPCGScop->options = createPPCGOptions(); 205 206 PPCGScop->start = 0; 207 PPCGScop->end = 0; 208 209 PPCGScop->context = S->getContext(); 210 PPCGScop->domain = S->getDomains(); 211 PPCGScop->call = nullptr; 212 PPCGScop->tagged_reads = getTaggedReads(); 213 PPCGScop->reads = S->getReads(); 214 PPCGScop->live_in = nullptr; 215 PPCGScop->tagged_may_writes = getTaggedMayWrites(); 216 PPCGScop->may_writes = S->getWrites(); 217 PPCGScop->tagged_must_writes = getTaggedMustWrites(); 218 PPCGScop->must_writes = S->getMustWrites(); 219 PPCGScop->live_out = nullptr; 220 PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 221 PPCGScop->tagger = nullptr; 222 223 PPCGScop->independence = nullptr; 224 PPCGScop->dep_flow = nullptr; 225 PPCGScop->tagged_dep_flow = nullptr; 226 PPCGScop->dep_false = nullptr; 227 PPCGScop->dep_forced = nullptr; 228 PPCGScop->dep_order = nullptr; 229 PPCGScop->tagged_dep_order = nullptr; 230 231 PPCGScop->schedule = S->getScheduleTree(); 232 PPCGScop->names = getNames(); 233 234 PPCGScop->pet = nullptr; 235 236 compute_tagger(PPCGScop); 237 compute_dependences(PPCGScop); 238 239 return PPCGScop; 240 } 241 242 /// Create a default-initialized PPCG GPU program. 243 /// 244 /// @returns A new gpu grogram description. 245 gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 246 247 if (!PPCGScop) 248 return nullptr; 249 250 auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 251 252 PPCGProg->ctx = S->getIslCtx(); 253 PPCGProg->scop = PPCGScop; 254 PPCGProg->context = isl_set_copy(PPCGScop->context); 255 PPCGProg->read = nullptr; 256 PPCGProg->may_write = nullptr; 257 PPCGProg->must_write = nullptr; 258 PPCGProg->tagged_must_kill = nullptr; 259 PPCGProg->may_persist = nullptr; 260 PPCGProg->to_outer = nullptr; 261 PPCGProg->to_inner = nullptr; 262 PPCGProg->any_to_outer = nullptr; 263 PPCGProg->array_order = nullptr; 264 PPCGProg->n_stmts = 0; 265 PPCGProg->stmts = nullptr; 266 PPCGProg->n_array = 0; 267 PPCGProg->array = nullptr; 268 269 return PPCGProg; 270 } 271 272 // Generate a GPU program using PPCG. 273 // 274 // GPU mapping consists of multiple steps: 275 // 276 // 1) Compute new schedule for the program. 277 // 2) Map schedule to GPU (TODO) 278 // 3) Generate code for new schedule (TODO) 279 // 280 // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 281 // is mostly CPU specific. Instead, we use PPCG's GPU code generation 282 // strategy directly from this pass. 283 gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 284 285 auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 286 287 PPCGGen->ctx = S->getIslCtx(); 288 PPCGGen->options = PPCGScop->options; 289 PPCGGen->print = nullptr; 290 PPCGGen->print_user = nullptr; 291 PPCGGen->prog = PPCGProg; 292 PPCGGen->tree = nullptr; 293 PPCGGen->types.n = 0; 294 PPCGGen->types.name = nullptr; 295 PPCGGen->sizes = nullptr; 296 PPCGGen->used_sizes = nullptr; 297 PPCGGen->kernel_id = 0; 298 299 // Set scheduling strategy to same strategy PPCG is using. 300 isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 301 isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 302 303 isl_schedule *Schedule = get_schedule(PPCGGen); 304 305 int has_permutable = has_any_permutable_node(Schedule); 306 307 if (!has_permutable || has_permutable < 0) 308 Schedule = isl_schedule_free(Schedule); 309 else 310 Schedule = map_to_device(PPCGGen, Schedule); 311 312 if (DumpSchedule) { 313 isl_printer *P = isl_printer_to_str(S->getIslCtx()); 314 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 315 P = isl_printer_print_str(P, "Schedule\n"); 316 P = isl_printer_print_str(P, "========\n"); 317 if (Schedule) 318 P = isl_printer_print_schedule(P, Schedule); 319 else 320 P = isl_printer_print_str(P, "No schedule found\n"); 321 322 printf("%s\n", isl_printer_get_str(P)); 323 isl_printer_free(P); 324 } 325 326 isl_schedule_free(Schedule); 327 328 return PPCGGen; 329 } 330 331 /// Free gpu_gen structure. 332 /// 333 /// @param PPCGGen The ppcg_gen object to free. 334 void freePPCGGen(gpu_gen *PPCGGen) { 335 isl_ast_node_free(PPCGGen->tree); 336 isl_union_map_free(PPCGGen->sizes); 337 isl_union_map_free(PPCGGen->used_sizes); 338 free(PPCGGen); 339 } 340 341 bool runOnScop(Scop &CurrentScop) override { 342 S = &CurrentScop; 343 344 auto PPCGScop = createPPCGScop(); 345 auto PPCGProg = createPPCGProg(PPCGScop); 346 auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 347 freePPCGGen(PPCGGen); 348 gpu_prog_free(PPCGProg); 349 ppcg_scop_free(PPCGScop); 350 351 return true; 352 } 353 354 void printScop(raw_ostream &, Scop &) const override {} 355 356 void getAnalysisUsage(AnalysisUsage &AU) const override { 357 AU.addRequired<DominatorTreeWrapperPass>(); 358 AU.addRequired<RegionInfoPass>(); 359 AU.addRequired<ScalarEvolutionWrapperPass>(); 360 AU.addRequired<ScopDetection>(); 361 AU.addRequired<ScopInfoRegionPass>(); 362 AU.addRequired<LoopInfoWrapperPass>(); 363 364 AU.addPreserved<AAResultsWrapperPass>(); 365 AU.addPreserved<BasicAAWrapperPass>(); 366 AU.addPreserved<LoopInfoWrapperPass>(); 367 AU.addPreserved<DominatorTreeWrapperPass>(); 368 AU.addPreserved<GlobalsAAWrapperPass>(); 369 AU.addPreserved<PostDominatorTreeWrapperPass>(); 370 AU.addPreserved<ScopDetection>(); 371 AU.addPreserved<ScalarEvolutionWrapperPass>(); 372 AU.addPreserved<SCEVAAWrapperPass>(); 373 374 // FIXME: We do not yet add regions for the newly generated code to the 375 // region tree. 376 AU.addPreserved<RegionInfoPass>(); 377 AU.addPreserved<ScopInfoRegionPass>(); 378 } 379 }; 380 } 381 382 char PPCGCodeGeneration::ID = 1; 383 384 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 385 386 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 387 "Polly - Apply PPCG translation to SCOP", false, false) 388 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 389 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 390 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 391 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 392 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 393 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 394 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 395 "Polly - Apply PPCG translation to SCOP", false, false) 396