1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg 11 // GPU mapping strategy. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/DependenceInfo.h" 17 #include "polly/LinkAllPasses.h" 18 #include "polly/Options.h" 19 #include "polly/ScopInfo.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BasicAliasAnalysis.h" 22 #include "llvm/Analysis/GlobalsModRef.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 26 #include "isl/union_map.h" 27 28 extern "C" { 29 #include "gpu.h" 30 #include "ppcg.h" 31 } 32 33 #include "llvm/Support/Debug.h" 34 35 using namespace polly; 36 using namespace llvm; 37 38 #define DEBUG_TYPE "polly-codegen-ppcg" 39 40 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41 cl::desc("Dump the computed GPU Schedule"), 42 cl::Hidden, cl::init(false), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 45 namespace { 46 class PPCGCodeGeneration : public ScopPass { 47 public: 48 static char ID; 49 50 /// The scop that is currently processed. 51 Scop *S; 52 53 PPCGCodeGeneration() : ScopPass(ID) {} 54 55 /// Construct compilation options for PPCG. 56 /// 57 /// @returns The compilation options. 58 ppcg_options *createPPCGOptions() { 59 auto DebugOptions = 60 (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 61 auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 62 63 DebugOptions->dump_schedule_constraints = false; 64 DebugOptions->dump_schedule = false; 65 DebugOptions->dump_final_schedule = false; 66 DebugOptions->dump_sizes = false; 67 68 Options->debug = DebugOptions; 69 70 Options->reschedule = true; 71 Options->scale_tile_loops = false; 72 Options->wrap = false; 73 74 Options->non_negative_parameters = false; 75 Options->ctx = nullptr; 76 Options->sizes = nullptr; 77 78 Options->use_private_memory = false; 79 Options->use_shared_memory = false; 80 Options->max_shared_memory = 0; 81 82 Options->target = PPCG_TARGET_CUDA; 83 Options->openmp = false; 84 Options->linearize_device_arrays = true; 85 Options->live_range_reordering = false; 86 87 Options->opencl_compiler_options = nullptr; 88 Options->opencl_use_gpu = false; 89 Options->opencl_n_include_file = 0; 90 Options->opencl_include_files = nullptr; 91 Options->opencl_print_kernel_types = false; 92 Options->opencl_embed_kernel_code = false; 93 94 Options->save_schedule_file = nullptr; 95 Options->load_schedule_file = nullptr; 96 97 return Options; 98 } 99 100 /// Get a tagged access relation containing all accesses of type @p AccessTy. 101 /// 102 /// Instead of a normal access of the form: 103 /// 104 /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 105 /// 106 /// a tagged access has the form 107 /// 108 /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 109 /// 110 /// where 'id' is an additional space that references the memory access that 111 /// triggered the access. 112 /// 113 /// @param AccessTy The type of the memory accesses to collect. 114 /// 115 /// @return The relation describing all tagged memory accesses. 116 isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 117 isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 118 119 for (auto &Stmt : *S) 120 for (auto &Acc : Stmt) 121 if (Acc->getType() == AccessTy) { 122 isl_map *Relation = Acc->getAccessRelation(); 123 Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 124 125 isl_space *Space = isl_map_get_space(Relation); 126 Space = isl_space_range(Space); 127 Space = isl_space_from_range(Space); 128 isl_map *Universe = isl_map_universe(Space); 129 Relation = isl_map_domain_product(Relation, Universe); 130 Accesses = isl_union_map_add_map(Accesses, Relation); 131 } 132 133 return Accesses; 134 } 135 136 /// Get the set of all read accesses, tagged with the access id. 137 /// 138 /// @see getTaggedAccesses 139 isl_union_map *getTaggedReads() { 140 return getTaggedAccesses(MemoryAccess::READ); 141 } 142 143 /// Get the set of all may (and must) accesses, tagged with the access id. 144 /// 145 /// @see getTaggedAccesses 146 isl_union_map *getTaggedMayWrites() { 147 return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 148 getTaggedAccesses(MemoryAccess::MUST_WRITE)); 149 } 150 151 /// Get the set of all must accesses, tagged with the access id. 152 /// 153 /// @see getTaggedAccesses 154 isl_union_map *getTaggedMustWrites() { 155 return getTaggedAccesses(MemoryAccess::MUST_WRITE); 156 } 157 158 /// Collect parameter and array names as isl_ids. 159 /// 160 /// To reason about the different parameters and arrays used, ppcg requires 161 /// a list of all isl_ids in use. As PPCG traditionally performs 162 /// source-to-source compilation each of these isl_ids is mapped to the 163 /// expression that represents it. As we do not have a corresponding 164 /// expression in Polly, we just map each id to a 'zero' expression to match 165 /// the data format that ppcg expects. 166 /// 167 /// @returns Retun a map from collected ids to 'zero' ast expressions. 168 __isl_give isl_id_to_ast_expr *getNames() { 169 auto *Names = isl_id_to_ast_expr_alloc( 170 S->getIslCtx(), 171 S->getNumParams() + std::distance(S->array_begin(), S->array_end())); 172 auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); 173 auto *Space = S->getParamSpace(); 174 175 for (int I = 0, E = S->getNumParams(); I < E; ++I) { 176 isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); 177 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 178 } 179 180 for (auto &Array : S->arrays()) { 181 auto Id = Array.second->getBasePtrId(); 182 Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 183 } 184 185 isl_space_free(Space); 186 isl_ast_expr_free(Zero); 187 188 return Names; 189 } 190 191 /// Create a new PPCG scop from the current scop. 192 /// 193 /// The PPCG scop is initialized with data from the current polly::Scop. From 194 /// this initial data, the data-dependences in the PPCG scop are initialized. 195 /// We do not use Polly's dependence analysis for now, to ensure we match 196 /// the PPCG default behaviour more closely. 197 /// 198 /// @returns A new ppcg scop. 199 ppcg_scop *createPPCGScop() { 200 auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 201 202 PPCGScop->options = createPPCGOptions(); 203 204 PPCGScop->start = 0; 205 PPCGScop->end = 0; 206 207 PPCGScop->context = S->getContext(); 208 PPCGScop->domain = S->getDomains(); 209 PPCGScop->call = nullptr; 210 PPCGScop->tagged_reads = getTaggedReads(); 211 PPCGScop->reads = S->getReads(); 212 PPCGScop->live_in = nullptr; 213 PPCGScop->tagged_may_writes = getTaggedMayWrites(); 214 PPCGScop->may_writes = S->getWrites(); 215 PPCGScop->tagged_must_writes = getTaggedMustWrites(); 216 PPCGScop->must_writes = S->getMustWrites(); 217 PPCGScop->live_out = nullptr; 218 PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 219 PPCGScop->tagger = nullptr; 220 221 PPCGScop->independence = nullptr; 222 PPCGScop->dep_flow = nullptr; 223 PPCGScop->tagged_dep_flow = nullptr; 224 PPCGScop->dep_false = nullptr; 225 PPCGScop->dep_forced = nullptr; 226 PPCGScop->dep_order = nullptr; 227 PPCGScop->tagged_dep_order = nullptr; 228 229 PPCGScop->schedule = S->getScheduleTree(); 230 PPCGScop->names = getNames(); 231 232 PPCGScop->pet = nullptr; 233 234 compute_tagger(PPCGScop); 235 compute_dependences(PPCGScop); 236 237 return PPCGScop; 238 } 239 240 /// Create a default-initialized PPCG GPU program. 241 /// 242 /// @returns A new gpu grogram description. 243 gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 244 245 if (!PPCGScop) 246 return nullptr; 247 248 auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 249 250 PPCGProg->ctx = S->getIslCtx(); 251 PPCGProg->scop = PPCGScop; 252 PPCGProg->context = isl_set_copy(PPCGScop->context); 253 PPCGProg->read = nullptr; 254 PPCGProg->may_write = nullptr; 255 PPCGProg->must_write = nullptr; 256 PPCGProg->tagged_must_kill = nullptr; 257 PPCGProg->may_persist = nullptr; 258 PPCGProg->to_outer = nullptr; 259 PPCGProg->to_inner = nullptr; 260 PPCGProg->any_to_outer = nullptr; 261 PPCGProg->array_order = nullptr; 262 PPCGProg->n_stmts = 0; 263 PPCGProg->stmts = nullptr; 264 PPCGProg->n_array = 0; 265 PPCGProg->array = nullptr; 266 267 return PPCGProg; 268 } 269 270 // Generate a GPU program using PPCG. 271 // 272 // GPU mapping consists of multiple steps: 273 // 274 // 1) Compute new schedule for the program. 275 // 2) Map schedule to GPU (TODO) 276 // 3) Generate code for new schedule (TODO) 277 // 278 // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 279 // is mostly CPU specific. Instead, we use PPCG's GPU code generation 280 // strategy directly from this pass. 281 gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 282 283 auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 284 285 PPCGGen->ctx = S->getIslCtx(); 286 PPCGGen->options = PPCGScop->options; 287 PPCGGen->print = nullptr; 288 PPCGGen->print_user = nullptr; 289 PPCGGen->prog = PPCGProg; 290 PPCGGen->tree = nullptr; 291 PPCGGen->types.n = 0; 292 PPCGGen->types.name = nullptr; 293 PPCGGen->sizes = nullptr; 294 PPCGGen->used_sizes = nullptr; 295 PPCGGen->kernel_id = 0; 296 297 // Set scheduling strategy to same strategy PPCG is using. 298 isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 299 isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 300 301 isl_schedule *Schedule = get_schedule(PPCGGen); 302 303 int has_permutable = has_any_permutable_node(Schedule); 304 305 if (!has_permutable || has_permutable < 0) 306 Schedule = isl_schedule_free(Schedule); 307 else 308 Schedule = map_to_device(PPCGGen, Schedule); 309 310 if (DumpSchedule) { 311 isl_printer *P = isl_printer_to_str(S->getIslCtx()); 312 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 313 P = isl_printer_print_str(P, "Schedule\n"); 314 P = isl_printer_print_str(P, "========\n"); 315 if (Schedule) 316 P = isl_printer_print_schedule(P, Schedule); 317 else 318 P = isl_printer_print_str(P, "No schedule found\n"); 319 320 printf("%s\n", isl_printer_get_str(P)); 321 isl_printer_free(P); 322 } 323 324 isl_schedule_free(Schedule); 325 326 return PPCGGen; 327 } 328 329 /// Free gpu_gen structure. 330 /// 331 /// @param PPCGGen The ppcg_gen object to free. 332 void freePPCGGen(gpu_gen *PPCGGen) { 333 isl_ast_node_free(PPCGGen->tree); 334 isl_union_map_free(PPCGGen->sizes); 335 isl_union_map_free(PPCGGen->used_sizes); 336 free(PPCGGen); 337 } 338 339 bool runOnScop(Scop &CurrentScop) override { 340 S = &CurrentScop; 341 342 auto PPCGScop = createPPCGScop(); 343 auto PPCGProg = createPPCGProg(PPCGScop); 344 auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 345 freePPCGGen(PPCGGen); 346 gpu_prog_free(PPCGProg); 347 ppcg_scop_free(PPCGScop); 348 349 return true; 350 } 351 352 void printScop(raw_ostream &, Scop &) const override {} 353 354 void getAnalysisUsage(AnalysisUsage &AU) const override { 355 AU.addRequired<DominatorTreeWrapperPass>(); 356 AU.addRequired<RegionInfoPass>(); 357 AU.addRequired<ScalarEvolutionWrapperPass>(); 358 AU.addRequired<ScopDetection>(); 359 AU.addRequired<ScopInfoRegionPass>(); 360 AU.addRequired<LoopInfoWrapperPass>(); 361 362 AU.addPreserved<AAResultsWrapperPass>(); 363 AU.addPreserved<BasicAAWrapperPass>(); 364 AU.addPreserved<LoopInfoWrapperPass>(); 365 AU.addPreserved<DominatorTreeWrapperPass>(); 366 AU.addPreserved<GlobalsAAWrapperPass>(); 367 AU.addPreserved<PostDominatorTreeWrapperPass>(); 368 AU.addPreserved<ScopDetection>(); 369 AU.addPreserved<ScalarEvolutionWrapperPass>(); 370 AU.addPreserved<SCEVAAWrapperPass>(); 371 372 // FIXME: We do not yet add regions for the newly generated code to the 373 // region tree. 374 AU.addPreserved<RegionInfoPass>(); 375 AU.addPreserved<ScopInfoRegionPass>(); 376 } 377 }; 378 } 379 380 char PPCGCodeGeneration::ID = 1; 381 382 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 383 384 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 385 "Polly - Apply PPCG translation to SCOP", false, false) 386 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 387 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 388 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 389 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 390 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 391 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 392 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 393 "Polly - Apply PPCG translation to SCOP", false, false) 394