1 //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Take a scop created by ScopInfo and map it to GPU code using the ppcg 11 // GPU mapping strategy. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/CodeGen/IslNodeBuilder.h" 16 #include "polly/DependenceInfo.h" 17 #include "polly/LinkAllPasses.h" 18 #include "polly/Options.h" 19 #include "polly/ScopInfo.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BasicAliasAnalysis.h" 22 #include "llvm/Analysis/GlobalsModRef.h" 23 #include "llvm/Analysis/PostDominators.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 26 #include "isl/union_map.h" 27 28 extern "C" { 29 #include "gpu.h" 30 #include "ppcg.h" 31 } 32 33 #include "llvm/Support/Debug.h" 34 35 using namespace polly; 36 using namespace llvm; 37 38 #define DEBUG_TYPE "polly-codegen-ppcg" 39 40 static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41 cl::desc("Dump the computed GPU Schedule"), 42 cl::Hidden, cl::init(true), cl::ZeroOrMore, 43 cl::cat(PollyCategory)); 44 45 namespace { 46 class PPCGCodeGeneration : public ScopPass { 47 public: 48 static char ID; 49 50 /// The scop that is currently processed. 51 Scop *S; 52 53 PPCGCodeGeneration() : ScopPass(ID) {} 54 55 /// Construct compilation options for PPCG. 56 /// 57 /// @returns The compilation options. 58 ppcg_options *createPPCGOptions() { 59 auto DebugOptions = 60 (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 61 auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 62 63 DebugOptions->dump_schedule_constraints = false; 64 DebugOptions->dump_schedule = false; 65 DebugOptions->dump_final_schedule = false; 66 DebugOptions->dump_sizes = false; 67 68 Options->debug = DebugOptions; 69 70 Options->reschedule = true; 71 Options->scale_tile_loops = false; 72 Options->wrap = false; 73 74 Options->non_negative_parameters = false; 75 Options->ctx = nullptr; 76 Options->sizes = nullptr; 77 78 Options->use_private_memory = false; 79 Options->use_shared_memory = false; 80 Options->max_shared_memory = 0; 81 82 Options->target = PPCG_TARGET_CUDA; 83 Options->openmp = false; 84 Options->linearize_device_arrays = true; 85 Options->live_range_reordering = false; 86 87 Options->opencl_compiler_options = nullptr; 88 Options->opencl_use_gpu = false; 89 Options->opencl_n_include_file = 0; 90 Options->opencl_include_files = nullptr; 91 Options->opencl_print_kernel_types = false; 92 Options->opencl_embed_kernel_code = false; 93 94 Options->save_schedule_file = nullptr; 95 Options->load_schedule_file = nullptr; 96 97 return Options; 98 } 99 100 /// Get a tagged access relation containing all accesses of type @p AccessTy. 101 /// 102 /// Instead of a normal access of the form: 103 /// 104 /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 105 /// 106 /// a tagged access has the form 107 /// 108 /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 109 /// 110 /// where 'id' is an additional space that references the memory access that 111 /// triggered the access. 112 /// 113 /// @param AccessTy The type of the memory accesses to collect. 114 /// 115 /// @return The relation describing all tagged memory accesses. 116 isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 117 isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 118 119 for (auto &Stmt : *S) 120 for (auto &Acc : Stmt) 121 if (Acc->getType() == AccessTy) { 122 isl_map *Relation = Acc->getAccessRelation(); 123 Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 124 125 isl_space *Space = isl_map_get_space(Relation); 126 Space = isl_space_range(Space); 127 Space = isl_space_from_range(Space); 128 isl_map *Universe = isl_map_universe(Space); 129 Relation = isl_map_domain_product(Relation, Universe); 130 Accesses = isl_union_map_add_map(Accesses, Relation); 131 } 132 133 return Accesses; 134 } 135 136 /// Get the set of all read accesses, tagged with the access id. 137 /// 138 /// @see getTaggedAccesses 139 isl_union_map *getTaggedReads() { 140 return getTaggedAccesses(MemoryAccess::READ); 141 } 142 143 /// Get the set of all may (and must) accesses, tagged with the access id. 144 /// 145 /// @see getTaggedAccesses 146 isl_union_map *getTaggedMayWrites() { 147 return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 148 getTaggedAccesses(MemoryAccess::MUST_WRITE)); 149 } 150 151 /// Get the set of all must accesses, tagged with the access id. 152 /// 153 /// @see getTaggedAccesses 154 isl_union_map *getTaggedMustWrites() { 155 return getTaggedAccesses(MemoryAccess::MUST_WRITE); 156 } 157 158 /// Create a new PPCG scop from the current scop. 159 /// 160 /// The PPCG scop is initialized with data from the current polly::Scop. From 161 /// this initial data, the data-dependences in the PPCG scop are initialized. 162 /// We do not use Polly's dependence analysis for now, to ensure we match 163 /// the PPCG default behaviour more closely. 164 /// 165 /// @returns A new ppcg scop. 166 ppcg_scop *createPPCGScop() { 167 auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 168 169 PPCGScop->options = createPPCGOptions(); 170 171 PPCGScop->start = 0; 172 PPCGScop->end = 0; 173 174 PPCGScop->context = S->getContext(); 175 PPCGScop->domain = S->getDomains(); 176 PPCGScop->call = nullptr; 177 PPCGScop->tagged_reads = getTaggedReads(); 178 PPCGScop->reads = S->getReads(); 179 PPCGScop->live_in = nullptr; 180 PPCGScop->tagged_may_writes = getTaggedMayWrites(); 181 PPCGScop->may_writes = S->getWrites(); 182 PPCGScop->tagged_must_writes = getTaggedMustWrites(); 183 PPCGScop->must_writes = S->getMustWrites(); 184 PPCGScop->live_out = nullptr; 185 PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 186 PPCGScop->tagger = nullptr; 187 188 PPCGScop->independence = nullptr; 189 PPCGScop->dep_flow = nullptr; 190 PPCGScop->tagged_dep_flow = nullptr; 191 PPCGScop->dep_false = nullptr; 192 PPCGScop->dep_forced = nullptr; 193 PPCGScop->dep_order = nullptr; 194 PPCGScop->tagged_dep_order = nullptr; 195 196 PPCGScop->schedule = S->getScheduleTree(); 197 PPCGScop->names = nullptr; 198 199 PPCGScop->pet = nullptr; 200 201 compute_tagger(PPCGScop); 202 compute_dependences(PPCGScop); 203 204 return PPCGScop; 205 } 206 207 /// Create a default-initialized PPCG GPU program. 208 /// 209 /// @returns A new gpu grogram description. 210 gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 211 212 if (!PPCGScop) 213 return nullptr; 214 215 auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 216 217 PPCGProg->ctx = S->getIslCtx(); 218 PPCGProg->scop = PPCGScop; 219 PPCGProg->context = nullptr; 220 PPCGProg->read = nullptr; 221 PPCGProg->may_write = nullptr; 222 PPCGProg->must_write = nullptr; 223 PPCGProg->tagged_must_kill = nullptr; 224 PPCGProg->may_persist = nullptr; 225 PPCGProg->to_outer = nullptr; 226 PPCGProg->to_inner = nullptr; 227 PPCGProg->any_to_outer = nullptr; 228 PPCGProg->array_order = nullptr; 229 PPCGProg->n_stmts = 0; 230 PPCGProg->stmts = nullptr; 231 PPCGProg->n_array = 0; 232 PPCGProg->array = nullptr; 233 234 return PPCGProg; 235 } 236 237 // Generate a GPU program using PPCG. 238 // 239 // GPU mapping consists of multiple steps: 240 // 241 // 1) Compute new schedule for the program. 242 // 2) Map schedule to GPU (TODO) 243 // 3) Generate code for new schedule (TODO) 244 // 245 // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 246 // is mostly CPU specific. Instead, we use PPCG's GPU code generation 247 // strategy directly from this pass. 248 gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 249 250 auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 251 252 PPCGGen->ctx = S->getIslCtx(); 253 PPCGGen->options = PPCGScop->options; 254 PPCGGen->print = nullptr; 255 PPCGGen->print_user = nullptr; 256 PPCGGen->prog = PPCGProg; 257 PPCGGen->tree = nullptr; 258 PPCGGen->types.n = 0; 259 PPCGGen->types.name = nullptr; 260 PPCGGen->sizes = nullptr; 261 PPCGGen->used_sizes = nullptr; 262 PPCGGen->kernel_id = 0; 263 264 // Set scheduling strategy to same strategy PPCG is using. 265 isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 266 isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 267 268 isl_schedule *Schedule = get_schedule(PPCGGen); 269 270 if (DumpSchedule) { 271 isl_printer *P = isl_printer_to_str(S->getIslCtx()); 272 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 273 P = isl_printer_print_str(P, "Schedule\n"); 274 P = isl_printer_print_str(P, "========\n"); 275 if (Schedule) 276 P = isl_printer_print_schedule(P, Schedule); 277 else 278 P = isl_printer_print_str(P, "No schedule found\n"); 279 280 printf("%s\n", isl_printer_get_str(P)); 281 isl_printer_free(P); 282 } 283 284 isl_schedule_free(Schedule); 285 286 return PPCGGen; 287 } 288 289 /// Free gpu_gen structure. 290 /// 291 /// @param PPCGGen The ppcg_gen object to free. 292 void freePPCGGen(gpu_gen *PPCGGen) { 293 isl_ast_node_free(PPCGGen->tree); 294 isl_union_map_free(PPCGGen->sizes); 295 isl_union_map_free(PPCGGen->used_sizes); 296 free(PPCGGen); 297 } 298 299 bool runOnScop(Scop &CurrentScop) override { 300 S = &CurrentScop; 301 302 auto PPCGScop = createPPCGScop(); 303 auto PPCGProg = createPPCGProg(PPCGScop); 304 auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 305 freePPCGGen(PPCGGen); 306 gpu_prog_free(PPCGProg); 307 ppcg_scop_free(PPCGScop); 308 309 return true; 310 } 311 312 void printScop(raw_ostream &, Scop &) const override {} 313 314 void getAnalysisUsage(AnalysisUsage &AU) const override { 315 AU.addRequired<DominatorTreeWrapperPass>(); 316 AU.addRequired<RegionInfoPass>(); 317 AU.addRequired<ScalarEvolutionWrapperPass>(); 318 AU.addRequired<ScopDetection>(); 319 AU.addRequired<ScopInfoRegionPass>(); 320 AU.addRequired<LoopInfoWrapperPass>(); 321 322 AU.addPreserved<AAResultsWrapperPass>(); 323 AU.addPreserved<BasicAAWrapperPass>(); 324 AU.addPreserved<LoopInfoWrapperPass>(); 325 AU.addPreserved<DominatorTreeWrapperPass>(); 326 AU.addPreserved<GlobalsAAWrapperPass>(); 327 AU.addPreserved<PostDominatorTreeWrapperPass>(); 328 AU.addPreserved<ScopDetection>(); 329 AU.addPreserved<ScalarEvolutionWrapperPass>(); 330 AU.addPreserved<SCEVAAWrapperPass>(); 331 332 // FIXME: We do not yet add regions for the newly generated code to the 333 // region tree. 334 AU.addPreserved<RegionInfoPass>(); 335 AU.addPreserved<ScopInfoRegionPass>(); 336 } 337 }; 338 } 339 340 char PPCGCodeGeneration::ID = 1; 341 342 Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 343 344 INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 345 "Polly - Apply PPCG translation to SCOP", false, false) 346 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 347 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 348 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 349 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 350 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 351 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 352 INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 353 "Polly - Apply PPCG translation to SCOP", false, false) 354