1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg 11 // GPU mapping strategy. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/DependenceInfo.h" 17 #include "polly/LinkAllPasses.h" 18 #include "polly/Options.h" 19 #include "polly/ScopInfo.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BasicAliasAnalysis.h" 22 #include "llvm/Analysis/GlobalsModRef.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 26 #include "isl/union_map.h" 27 28 extern "C" { 29 #include "gpu.h" 30 #include "ppcg.h" 31 } 32 33 #include "llvm/Support/Debug.h" 34 35 using namespace polly; 36 using namespace llvm; 37 38 #define DEBUG_TYPE "polly-codegen-ppcg" 39 40 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41 cl::desc("Dump the computed GPU Schedule"), 42 cl::Hidden, cl::init(false), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 45 namespace { 46 class PPCGCodeGeneration : public ScopPass { 47 public: 48 static char ID; 49 50 /// The scop that is currently processed. 51 Scop *S; 52 53 PPCGCodeGeneration() : ScopPass(ID) {} 54 55 /// Construct compilation options for PPCG. 56 /// 57 /// @returns The compilation options. 58 ppcg_options *createPPCGOptions() { 59 auto DebugOptions = 60 (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 61 auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 62 63 DebugOptions->dump_schedule_constraints = false; 64 DebugOptions->dump_schedule = false; 65 DebugOptions->dump_final_schedule = false; 66 DebugOptions->dump_sizes = false; 67 68 Options->debug = DebugOptions; 69 70 Options->reschedule = true; 71 Options->scale_tile_loops = false; 72 Options->wrap = false; 73 74 Options->non_negative_parameters = false; 75 Options->ctx = nullptr; 76 Options->sizes = nullptr; 77 78 Options->use_private_memory = false; 79 Options->use_shared_memory = false; 80 Options->max_shared_memory = 0; 81 82 Options->target = PPCG_TARGET_CUDA; 83 Options->openmp = false; 84 Options->linearize_device_arrays = true; 85 Options->live_range_reordering = false; 86 87 Options->opencl_compiler_options = nullptr; 88 Options->opencl_use_gpu = false; 89 Options->opencl_n_include_file = 0; 90 Options->opencl_include_files = nullptr; 91 Options->opencl_print_kernel_types = false; 92 Options->opencl_embed_kernel_code = false; 93 94 Options->save_schedule_file = nullptr; 95 Options->load_schedule_file = nullptr; 96 97 return Options; 98 } 99 100 /// Get a tagged access relation containing all accesses of type @p AccessTy. 101 /// 102 /// Instead of a normal access of the form: 103 /// 104 /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 105 /// 106 /// a tagged access has the form 107 /// 108 /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 109 /// 110 /// where 'id' is an additional space that references the memory access that 111 /// triggered the access. 112 /// 113 /// @param AccessTy The type of the memory accesses to collect. 114 /// 115 /// @return The relation describing all tagged memory accesses. 116 isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 117 isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 118 119 for (auto &Stmt : *S) 120 for (auto &Acc : Stmt) 121 if (Acc->getType() == AccessTy) { 122 isl_map *Relation = Acc->getAccessRelation(); 123 Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 124 125 isl_space *Space = isl_map_get_space(Relation); 126 Space = isl_space_range(Space); 127 Space = isl_space_from_range(Space); 128 isl_map *Universe = isl_map_universe(Space); 129 Relation = isl_map_domain_product(Relation, Universe); 130 Accesses = isl_union_map_add_map(Accesses, Relation); 131 } 132 133 return Accesses; 134 } 135 136 /// Get the set of all read accesses, tagged with the access id. 137 /// 138 /// @see getTaggedAccesses 139 isl_union_map *getTaggedReads() { 140 return getTaggedAccesses(MemoryAccess::READ); 141 } 142 143 /// Get the set of all may (and must) accesses, tagged with the access id. 144 /// 145 /// @see getTaggedAccesses 146 isl_union_map *getTaggedMayWrites() { 147 return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 148 getTaggedAccesses(MemoryAccess::MUST_WRITE)); 149 } 150 151 /// Get the set of all must accesses, tagged with the access id. 152 /// 153 /// @see getTaggedAccesses 154 isl_union_map *getTaggedMustWrites() { 155 return getTaggedAccesses(MemoryAccess::MUST_WRITE); 156 } 157 158 /// Collect parameter and array names as isl_ids. 159 /// 160 /// To reason about the different parameters and arrays used, ppcg requires 161 /// a list of all isl_ids in use. As PPCG traditionally performs 162 /// source-to-source compilation each of these isl_ids is mapped to the 163 /// expression that represents it. As we do not have a corresponding 164 /// expression in Polly, we just map each id to a 'zero' expression to match 165 /// the data format that ppcg expects. 166 /// 167 /// @returns Retun a map from collected ids to 'zero' ast expressions. 168 __isl_give isl_id_to_ast_expr *getNames() { 169 auto *Names = isl_id_to_ast_expr_alloc( 170 S->getIslCtx(), S->getNumParams() + std::distance(S->array_begin(), S->array_end())); 171 auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); 172 auto *Space = S->getParamSpace(); 173 174 for (int I = 0, E = S->getNumParams(); I < E; ++I) { 175 isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); 176 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 177 } 178 179 for (auto &Array : S->arrays()) { 180 auto Id = Array.second->getBasePtrId(); 181 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 182 } 183 184 isl_space_free(Space); 185 isl_ast_expr_free(Zero); 186 187 return Names; 188 } 189 190 /// Create a new PPCG scop from the current scop. 191 /// 192 /// The PPCG scop is initialized with data from the current polly::Scop. From 193 /// this initial data, the data-dependences in the PPCG scop are initialized. 194 /// We do not use Polly's dependence analysis for now, to ensure we match 195 /// the PPCG default behaviour more closely. 196 /// 197 /// @returns A new ppcg scop. 198 ppcg_scop *createPPCGScop() { 199 auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 200 201 PPCGScop->options = createPPCGOptions(); 202 203 PPCGScop->start = 0; 204 PPCGScop->end = 0; 205 206 PPCGScop->context = S->getContext(); 207 PPCGScop->domain = S->getDomains(); 208 PPCGScop->call = nullptr; 209 PPCGScop->tagged_reads = getTaggedReads(); 210 PPCGScop->reads = S->getReads(); 211 PPCGScop->live_in = nullptr; 212 PPCGScop->tagged_may_writes = getTaggedMayWrites(); 213 PPCGScop->may_writes = S->getWrites(); 214 PPCGScop->tagged_must_writes = getTaggedMustWrites(); 215 PPCGScop->must_writes = S->getMustWrites(); 216 PPCGScop->live_out = nullptr; 217 PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 218 PPCGScop->tagger = nullptr; 219 220 PPCGScop->independence = nullptr; 221 PPCGScop->dep_flow = nullptr; 222 PPCGScop->tagged_dep_flow = nullptr; 223 PPCGScop->dep_false = nullptr; 224 PPCGScop->dep_forced = nullptr; 225 PPCGScop->dep_order = nullptr; 226 PPCGScop->tagged_dep_order = nullptr; 227 228 PPCGScop->schedule = S->getScheduleTree(); 229 PPCGScop->names = getNames(); 230 231 PPCGScop->pet = nullptr; 232 233 compute_tagger(PPCGScop); 234 compute_dependences(PPCGScop); 235 236 return PPCGScop; 237 } 238 239 /// Create a default-initialized PPCG GPU program. 240 /// 241 /// @returns A new gpu grogram description. 242 gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 243 244 if (!PPCGScop) 245 return nullptr; 246 247 auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 248 249 PPCGProg->ctx = S->getIslCtx(); 250 PPCGProg->scop = PPCGScop; 251 PPCGProg->context = isl_set_copy(PPCGScop->context); 252 PPCGProg->read = nullptr; 253 PPCGProg->may_write = nullptr; 254 PPCGProg->must_write = nullptr; 255 PPCGProg->tagged_must_kill = nullptr; 256 PPCGProg->may_persist = nullptr; 257 PPCGProg->to_outer = nullptr; 258 PPCGProg->to_inner = nullptr; 259 PPCGProg->any_to_outer = nullptr; 260 PPCGProg->array_order = nullptr; 261 PPCGProg->n_stmts = 0; 262 PPCGProg->stmts = nullptr; 263 PPCGProg->n_array = 0; 264 PPCGProg->array = nullptr; 265 266 return PPCGProg; 267 } 268 269 // Generate a GPU program using PPCG. 270 // 271 // GPU mapping consists of multiple steps: 272 // 273 // 1) Compute new schedule for the program. 274 // 2) Map schedule to GPU (TODO) 275 // 3) Generate code for new schedule (TODO) 276 // 277 // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 278 // is mostly CPU specific. Instead, we use PPCG's GPU code generation 279 // strategy directly from this pass. 280 gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 281 282 auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 283 284 PPCGGen->ctx = S->getIslCtx(); 285 PPCGGen->options = PPCGScop->options; 286 PPCGGen->print = nullptr; 287 PPCGGen->print_user = nullptr; 288 PPCGGen->prog = PPCGProg; 289 PPCGGen->tree = nullptr; 290 PPCGGen->types.n = 0; 291 PPCGGen->types.name = nullptr; 292 PPCGGen->sizes = nullptr; 293 PPCGGen->used_sizes = nullptr; 294 PPCGGen->kernel_id = 0; 295 296 // Set scheduling strategy to same strategy PPCG is using. 297 isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 298 isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 299 300 isl_schedule *Schedule = get_schedule(PPCGGen); 301 302 int has_permutable = has_any_permutable_node(Schedule); 303 304 if (!has_permutable || has_permutable < 0) 305 Schedule = isl_schedule_free(Schedule); 306 else 307 Schedule = map_to_device(PPCGGen, Schedule); 308 309 if (DumpSchedule) { 310 isl_printer *P = isl_printer_to_str(S->getIslCtx()); 311 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 312 P = isl_printer_print_str(P, "Schedule\n"); 313 P = isl_printer_print_str(P, "========\n"); 314 if (Schedule) 315 P = isl_printer_print_schedule(P, Schedule); 316 else 317 P = isl_printer_print_str(P, "No schedule found\n"); 318 319 printf("%s\n", isl_printer_get_str(P)); 320 isl_printer_free(P); 321 } 322 323 isl_schedule_free(Schedule); 324 325 return PPCGGen; 326 } 327 328 /// Free gpu_gen structure. 329 /// 330 /// @param PPCGGen The ppcg_gen object to free. 331 void freePPCGGen(gpu_gen *PPCGGen) { 332 isl_ast_node_free(PPCGGen->tree); 333 isl_union_map_free(PPCGGen->sizes); 334 isl_union_map_free(PPCGGen->used_sizes); 335 free(PPCGGen); 336 } 337 338 bool runOnScop(Scop &CurrentScop) override { 339 S = &CurrentScop; 340 341 auto PPCGScop = createPPCGScop(); 342 auto PPCGProg = createPPCGProg(PPCGScop); 343 auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 344 freePPCGGen(PPCGGen); 345 gpu_prog_free(PPCGProg); 346 ppcg_scop_free(PPCGScop); 347 348 return true; 349 } 350 351 void printScop(raw_ostream &, Scop &) const override {} 352 353 void getAnalysisUsage(AnalysisUsage &AU) const override { 354 AU.addRequired<DominatorTreeWrapperPass>(); 355 AU.addRequired<RegionInfoPass>(); 356 AU.addRequired<ScalarEvolutionWrapperPass>(); 357 AU.addRequired<ScopDetection>(); 358 AU.addRequired<ScopInfoRegionPass>(); 359 AU.addRequired<LoopInfoWrapperPass>(); 360 361 AU.addPreserved<AAResultsWrapperPass>(); 362 AU.addPreserved<BasicAAWrapperPass>(); 363 AU.addPreserved<LoopInfoWrapperPass>(); 364 AU.addPreserved<DominatorTreeWrapperPass>(); 365 AU.addPreserved<GlobalsAAWrapperPass>(); 366 AU.addPreserved<PostDominatorTreeWrapperPass>(); 367 AU.addPreserved<ScopDetection>(); 368 AU.addPreserved<ScalarEvolutionWrapperPass>(); 369 AU.addPreserved<SCEVAAWrapperPass>(); 370 371 // FIXME: We do not yet add regions for the newly generated code to the 372 // region tree. 373 AU.addPreserved<RegionInfoPass>(); 374 AU.addPreserved<ScopInfoRegionPass>(); 375 } 376 }; 377 } 378 379 char PPCGCodeGeneration::ID = 1; 380 381 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 382 383 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 384 "Polly - Apply PPCG translation to SCOP", false, false) 385 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 386 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 387 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 388 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 389 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 390 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 391 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 392 "Polly - Apply PPCG translation to SCOP", false, false) 393