19dfe4e7cSTobias Grosser //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 29dfe4e7cSTobias Grosser // 39dfe4e7cSTobias Grosser // The LLVM Compiler Infrastructure 49dfe4e7cSTobias Grosser // 59dfe4e7cSTobias Grosser // This file is distributed under the University of Illinois Open Source 69dfe4e7cSTobias Grosser // License. See LICENSE.TXT for details. 79dfe4e7cSTobias Grosser // 89dfe4e7cSTobias Grosser //===----------------------------------------------------------------------===// 99dfe4e7cSTobias Grosser // 109dfe4e7cSTobias Grosser // Take a scop created by ScopInfo and map it to GPU code using the ppcg 119dfe4e7cSTobias Grosser // GPU mapping strategy. 129dfe4e7cSTobias Grosser // 139dfe4e7cSTobias Grosser //===----------------------------------------------------------------------===// 149dfe4e7cSTobias Grosser 159dfe4e7cSTobias Grosser #include "polly/CodeGen/IslNodeBuilder.h" 169dfe4e7cSTobias Grosser #include "polly/DependenceInfo.h" 179dfe4e7cSTobias Grosser #include "polly/LinkAllPasses.h" 18f384594dSTobias Grosser #include "polly/Options.h" 199dfe4e7cSTobias Grosser #include "polly/ScopInfo.h" 209dfe4e7cSTobias Grosser #include "llvm/Analysis/AliasAnalysis.h" 219dfe4e7cSTobias Grosser #include "llvm/Analysis/BasicAliasAnalysis.h" 229dfe4e7cSTobias Grosser #include "llvm/Analysis/GlobalsModRef.h" 239dfe4e7cSTobias Grosser #include "llvm/Analysis/PostDominators.h" 249dfe4e7cSTobias Grosser #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 259dfe4e7cSTobias Grosser 26f384594dSTobias Grosser #include "isl/union_map.h" 27f384594dSTobias Grosser 28e938517eSTobias Grosser extern "C" { 29e938517eSTobias Grosser #include "gpu.h" 30e938517eSTobias Grosser #include "ppcg.h" 31e938517eSTobias Grosser } 32e938517eSTobias Grosser 339dfe4e7cSTobias Grosser #include "llvm/Support/Debug.h" 349dfe4e7cSTobias Grosser 359dfe4e7cSTobias Grosser using namespace polly; 369dfe4e7cSTobias Grosser using namespace llvm; 379dfe4e7cSTobias Grosser 389dfe4e7cSTobias Grosser #define DEBUG_TYPE "polly-codegen-ppcg" 399dfe4e7cSTobias Grosser 40f384594dSTobias Grosser static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 41f384594dSTobias Grosser cl::desc("Dump the computed GPU Schedule"), 42*681bd568STobias Grosser cl::Hidden, cl::init(false), cl::ZeroOrMore, 43f384594dSTobias Grosser cl::cat(PollyCategory)); 44f384594dSTobias Grosser 459dfe4e7cSTobias Grosser namespace { 469dfe4e7cSTobias Grosser class PPCGCodeGeneration : public ScopPass { 479dfe4e7cSTobias Grosser public: 489dfe4e7cSTobias Grosser static char ID; 499dfe4e7cSTobias Grosser 50e938517eSTobias Grosser /// The scop that is currently processed. 51e938517eSTobias Grosser Scop *S; 52e938517eSTobias Grosser 539dfe4e7cSTobias Grosser PPCGCodeGeneration() : ScopPass(ID) {} 549dfe4e7cSTobias Grosser 55e938517eSTobias Grosser /// Construct compilation options for PPCG. 56e938517eSTobias Grosser /// 57e938517eSTobias Grosser /// @returns The compilation options. 58e938517eSTobias Grosser ppcg_options *createPPCGOptions() { 59e938517eSTobias Grosser auto DebugOptions = 60e938517eSTobias Grosser (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 61e938517eSTobias Grosser auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 62e938517eSTobias Grosser 63e938517eSTobias Grosser DebugOptions->dump_schedule_constraints = false; 64e938517eSTobias Grosser DebugOptions->dump_schedule = false; 65e938517eSTobias Grosser DebugOptions->dump_final_schedule = false; 66e938517eSTobias Grosser DebugOptions->dump_sizes = false; 67e938517eSTobias Grosser 68e938517eSTobias Grosser Options->debug = DebugOptions; 69e938517eSTobias Grosser 70e938517eSTobias Grosser Options->reschedule = true; 71e938517eSTobias Grosser Options->scale_tile_loops = false; 72e938517eSTobias Grosser Options->wrap = false; 73e938517eSTobias Grosser 74e938517eSTobias Grosser Options->non_negative_parameters = false; 75e938517eSTobias Grosser Options->ctx = nullptr; 76e938517eSTobias Grosser Options->sizes = nullptr; 77e938517eSTobias Grosser 78e938517eSTobias Grosser Options->use_private_memory = false; 79e938517eSTobias Grosser Options->use_shared_memory = false; 80e938517eSTobias Grosser Options->max_shared_memory = 0; 81e938517eSTobias Grosser 82e938517eSTobias Grosser Options->target = PPCG_TARGET_CUDA; 83e938517eSTobias Grosser Options->openmp = false; 84e938517eSTobias Grosser Options->linearize_device_arrays = true; 85e938517eSTobias Grosser Options->live_range_reordering = false; 86e938517eSTobias Grosser 87e938517eSTobias Grosser Options->opencl_compiler_options = nullptr; 88e938517eSTobias Grosser Options->opencl_use_gpu = false; 89e938517eSTobias Grosser Options->opencl_n_include_file = 0; 90e938517eSTobias Grosser Options->opencl_include_files = nullptr; 91e938517eSTobias Grosser Options->opencl_print_kernel_types = false; 92e938517eSTobias Grosser Options->opencl_embed_kernel_code = false; 93e938517eSTobias Grosser 94e938517eSTobias Grosser Options->save_schedule_file = nullptr; 95e938517eSTobias Grosser Options->load_schedule_file = nullptr; 96e938517eSTobias Grosser 97e938517eSTobias Grosser return Options; 98e938517eSTobias Grosser } 99e938517eSTobias Grosser 100f384594dSTobias Grosser /// Get a tagged access relation containing all accesses of type @p AccessTy. 101f384594dSTobias Grosser /// 102f384594dSTobias Grosser /// Instead of a normal access of the form: 103f384594dSTobias Grosser /// 104f384594dSTobias Grosser /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 105f384594dSTobias Grosser /// 106f384594dSTobias Grosser /// a tagged access has the form 107f384594dSTobias Grosser /// 108f384594dSTobias Grosser /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 109f384594dSTobias Grosser /// 110f384594dSTobias Grosser /// where 'id' is an additional space that references the memory access that 111f384594dSTobias Grosser /// triggered the access. 112f384594dSTobias Grosser /// 113f384594dSTobias Grosser /// @param AccessTy The type of the memory accesses to collect. 114f384594dSTobias Grosser /// 115f384594dSTobias Grosser /// @return The relation describing all tagged memory accesses. 116f384594dSTobias Grosser isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 117f384594dSTobias Grosser isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 118f384594dSTobias Grosser 119f384594dSTobias Grosser for (auto &Stmt : *S) 120f384594dSTobias Grosser for (auto &Acc : Stmt) 121f384594dSTobias Grosser if (Acc->getType() == AccessTy) { 122f384594dSTobias Grosser isl_map *Relation = Acc->getAccessRelation(); 123f384594dSTobias Grosser Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 124f384594dSTobias Grosser 125f384594dSTobias Grosser isl_space *Space = isl_map_get_space(Relation); 126f384594dSTobias Grosser Space = isl_space_range(Space); 127f384594dSTobias Grosser Space = isl_space_from_range(Space); 128f384594dSTobias Grosser isl_map *Universe = isl_map_universe(Space); 129f384594dSTobias Grosser Relation = isl_map_domain_product(Relation, Universe); 130f384594dSTobias Grosser Accesses = isl_union_map_add_map(Accesses, Relation); 131f384594dSTobias Grosser } 132f384594dSTobias Grosser 133f384594dSTobias Grosser return Accesses; 134f384594dSTobias Grosser } 135f384594dSTobias Grosser 136f384594dSTobias Grosser /// Get the set of all read accesses, tagged with the access id. 137f384594dSTobias Grosser /// 138f384594dSTobias Grosser /// @see getTaggedAccesses 139f384594dSTobias Grosser isl_union_map *getTaggedReads() { 140f384594dSTobias Grosser return getTaggedAccesses(MemoryAccess::READ); 141f384594dSTobias Grosser } 142f384594dSTobias Grosser 143f384594dSTobias Grosser /// Get the set of all may (and must) accesses, tagged with the access id. 144f384594dSTobias Grosser /// 145f384594dSTobias Grosser /// @see getTaggedAccesses 146f384594dSTobias Grosser isl_union_map *getTaggedMayWrites() { 147f384594dSTobias Grosser return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 148f384594dSTobias Grosser getTaggedAccesses(MemoryAccess::MUST_WRITE)); 149f384594dSTobias Grosser } 150f384594dSTobias Grosser 151f384594dSTobias Grosser /// Get the set of all must accesses, tagged with the access id. 152f384594dSTobias Grosser /// 153f384594dSTobias Grosser /// @see getTaggedAccesses 154f384594dSTobias Grosser isl_union_map *getTaggedMustWrites() { 155f384594dSTobias Grosser return getTaggedAccesses(MemoryAccess::MUST_WRITE); 156f384594dSTobias Grosser } 157f384594dSTobias Grosser 158e938517eSTobias Grosser /// Create a new PPCG scop from the current scop. 159e938517eSTobias Grosser /// 160f384594dSTobias Grosser /// The PPCG scop is initialized with data from the current polly::Scop. From 161f384594dSTobias Grosser /// this initial data, the data-dependences in the PPCG scop are initialized. 162f384594dSTobias Grosser /// We do not use Polly's dependence analysis for now, to ensure we match 163f384594dSTobias Grosser /// the PPCG default behaviour more closely. 164e938517eSTobias Grosser /// 165e938517eSTobias Grosser /// @returns A new ppcg scop. 166e938517eSTobias Grosser ppcg_scop *createPPCGScop() { 167e938517eSTobias Grosser auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 168e938517eSTobias Grosser 169e938517eSTobias Grosser PPCGScop->options = createPPCGOptions(); 170e938517eSTobias Grosser 171e938517eSTobias Grosser PPCGScop->start = 0; 172e938517eSTobias Grosser PPCGScop->end = 0; 173e938517eSTobias Grosser 174f384594dSTobias Grosser PPCGScop->context = S->getContext(); 175f384594dSTobias Grosser PPCGScop->domain = S->getDomains(); 176e938517eSTobias Grosser PPCGScop->call = nullptr; 177f384594dSTobias Grosser PPCGScop->tagged_reads = getTaggedReads(); 178f384594dSTobias Grosser PPCGScop->reads = S->getReads(); 179e938517eSTobias Grosser PPCGScop->live_in = nullptr; 180f384594dSTobias Grosser PPCGScop->tagged_may_writes = getTaggedMayWrites(); 181f384594dSTobias Grosser PPCGScop->may_writes = S->getWrites(); 182f384594dSTobias Grosser PPCGScop->tagged_must_writes = getTaggedMustWrites(); 183f384594dSTobias Grosser PPCGScop->must_writes = S->getMustWrites(); 184e938517eSTobias Grosser PPCGScop->live_out = nullptr; 185f384594dSTobias Grosser PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 186e938517eSTobias Grosser PPCGScop->tagger = nullptr; 187e938517eSTobias Grosser 188e938517eSTobias Grosser PPCGScop->independence = nullptr; 189e938517eSTobias Grosser PPCGScop->dep_flow = nullptr; 190e938517eSTobias Grosser PPCGScop->tagged_dep_flow = nullptr; 191e938517eSTobias Grosser PPCGScop->dep_false = nullptr; 192e938517eSTobias Grosser PPCGScop->dep_forced = nullptr; 193e938517eSTobias Grosser PPCGScop->dep_order = nullptr; 194e938517eSTobias Grosser PPCGScop->tagged_dep_order = nullptr; 195e938517eSTobias Grosser 196f384594dSTobias Grosser PPCGScop->schedule = S->getScheduleTree(); 197e938517eSTobias Grosser PPCGScop->names = nullptr; 198e938517eSTobias Grosser 199e938517eSTobias Grosser PPCGScop->pet = nullptr; 200e938517eSTobias Grosser 201f384594dSTobias Grosser compute_tagger(PPCGScop); 202f384594dSTobias Grosser compute_dependences(PPCGScop); 203f384594dSTobias Grosser 204e938517eSTobias Grosser return PPCGScop; 205e938517eSTobias Grosser } 206e938517eSTobias Grosser 207e938517eSTobias Grosser /// Create a default-initialized PPCG GPU program. 208e938517eSTobias Grosser /// 209e938517eSTobias Grosser /// @returns A new gpu grogram description. 210e938517eSTobias Grosser gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 211e938517eSTobias Grosser 212e938517eSTobias Grosser if (!PPCGScop) 213e938517eSTobias Grosser return nullptr; 214e938517eSTobias Grosser 215e938517eSTobias Grosser auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 216e938517eSTobias Grosser 217e938517eSTobias Grosser PPCGProg->ctx = S->getIslCtx(); 218e938517eSTobias Grosser PPCGProg->scop = PPCGScop; 219e938517eSTobias Grosser PPCGProg->context = nullptr; 220e938517eSTobias Grosser PPCGProg->read = nullptr; 221e938517eSTobias Grosser PPCGProg->may_write = nullptr; 222e938517eSTobias Grosser PPCGProg->must_write = nullptr; 223e938517eSTobias Grosser PPCGProg->tagged_must_kill = nullptr; 224e938517eSTobias Grosser PPCGProg->may_persist = nullptr; 225e938517eSTobias Grosser PPCGProg->to_outer = nullptr; 226e938517eSTobias Grosser PPCGProg->to_inner = nullptr; 227e938517eSTobias Grosser PPCGProg->any_to_outer = nullptr; 228e938517eSTobias Grosser PPCGProg->array_order = nullptr; 229e938517eSTobias Grosser PPCGProg->n_stmts = 0; 230e938517eSTobias Grosser PPCGProg->stmts = nullptr; 231e938517eSTobias Grosser PPCGProg->n_array = 0; 232e938517eSTobias Grosser PPCGProg->array = nullptr; 233e938517eSTobias Grosser 234e938517eSTobias Grosser return PPCGProg; 235e938517eSTobias Grosser } 236e938517eSTobias Grosser 237f384594dSTobias Grosser // Generate a GPU program using PPCG. 238f384594dSTobias Grosser // 239f384594dSTobias Grosser // GPU mapping consists of multiple steps: 240f384594dSTobias Grosser // 241f384594dSTobias Grosser // 1) Compute new schedule for the program. 242f384594dSTobias Grosser // 2) Map schedule to GPU (TODO) 243f384594dSTobias Grosser // 3) Generate code for new schedule (TODO) 244f384594dSTobias Grosser // 245f384594dSTobias Grosser // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 246f384594dSTobias Grosser // is mostly CPU specific. Instead, we use PPCG's GPU code generation 247f384594dSTobias Grosser // strategy directly from this pass. 248f384594dSTobias Grosser gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 249f384594dSTobias Grosser 250f384594dSTobias Grosser auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 251f384594dSTobias Grosser 252f384594dSTobias Grosser PPCGGen->ctx = S->getIslCtx(); 253f384594dSTobias Grosser PPCGGen->options = PPCGScop->options; 254f384594dSTobias Grosser PPCGGen->print = nullptr; 255f384594dSTobias Grosser PPCGGen->print_user = nullptr; 256f384594dSTobias Grosser PPCGGen->prog = PPCGProg; 257f384594dSTobias Grosser PPCGGen->tree = nullptr; 258f384594dSTobias Grosser PPCGGen->types.n = 0; 259f384594dSTobias Grosser PPCGGen->types.name = nullptr; 260f384594dSTobias Grosser PPCGGen->sizes = nullptr; 261f384594dSTobias Grosser PPCGGen->used_sizes = nullptr; 262f384594dSTobias Grosser PPCGGen->kernel_id = 0; 263f384594dSTobias Grosser 264f384594dSTobias Grosser // Set scheduling strategy to same strategy PPCG is using. 265f384594dSTobias Grosser isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 266f384594dSTobias Grosser isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 267f384594dSTobias Grosser 268f384594dSTobias Grosser isl_schedule *Schedule = get_schedule(PPCGGen); 269f384594dSTobias Grosser 270f384594dSTobias Grosser if (DumpSchedule) { 271f384594dSTobias Grosser isl_printer *P = isl_printer_to_str(S->getIslCtx()); 272f384594dSTobias Grosser P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 273f384594dSTobias Grosser P = isl_printer_print_str(P, "Schedule\n"); 274f384594dSTobias Grosser P = isl_printer_print_str(P, "========\n"); 275f384594dSTobias Grosser if (Schedule) 276f384594dSTobias Grosser P = isl_printer_print_schedule(P, Schedule); 277f384594dSTobias Grosser else 278f384594dSTobias Grosser P = isl_printer_print_str(P, "No schedule found\n"); 279f384594dSTobias Grosser 280f384594dSTobias Grosser printf("%s\n", isl_printer_get_str(P)); 281f384594dSTobias Grosser isl_printer_free(P); 282f384594dSTobias Grosser } 283f384594dSTobias Grosser 284f384594dSTobias Grosser isl_schedule_free(Schedule); 285f384594dSTobias Grosser 286f384594dSTobias Grosser return PPCGGen; 287f384594dSTobias Grosser } 288f384594dSTobias Grosser 289f384594dSTobias Grosser /// Free gpu_gen structure. 290f384594dSTobias Grosser /// 291f384594dSTobias Grosser /// @param PPCGGen The ppcg_gen object to free. 292f384594dSTobias Grosser void freePPCGGen(gpu_gen *PPCGGen) { 293f384594dSTobias Grosser isl_ast_node_free(PPCGGen->tree); 294f384594dSTobias Grosser isl_union_map_free(PPCGGen->sizes); 295f384594dSTobias Grosser isl_union_map_free(PPCGGen->used_sizes); 296f384594dSTobias Grosser free(PPCGGen); 297f384594dSTobias Grosser } 298f384594dSTobias Grosser 299e938517eSTobias Grosser bool runOnScop(Scop &CurrentScop) override { 300e938517eSTobias Grosser S = &CurrentScop; 301e938517eSTobias Grosser 302e938517eSTobias Grosser auto PPCGScop = createPPCGScop(); 303e938517eSTobias Grosser auto PPCGProg = createPPCGProg(PPCGScop); 304f384594dSTobias Grosser auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 305f384594dSTobias Grosser freePPCGGen(PPCGGen); 306e938517eSTobias Grosser gpu_prog_free(PPCGProg); 307e938517eSTobias Grosser ppcg_scop_free(PPCGScop); 308e938517eSTobias Grosser 309e938517eSTobias Grosser return true; 310e938517eSTobias Grosser } 3119dfe4e7cSTobias Grosser 3129dfe4e7cSTobias Grosser void printScop(raw_ostream &, Scop &) const override {} 3139dfe4e7cSTobias Grosser 3149dfe4e7cSTobias Grosser void getAnalysisUsage(AnalysisUsage &AU) const override { 3159dfe4e7cSTobias Grosser AU.addRequired<DominatorTreeWrapperPass>(); 3169dfe4e7cSTobias Grosser AU.addRequired<RegionInfoPass>(); 3179dfe4e7cSTobias Grosser AU.addRequired<ScalarEvolutionWrapperPass>(); 3189dfe4e7cSTobias Grosser AU.addRequired<ScopDetection>(); 3199dfe4e7cSTobias Grosser AU.addRequired<ScopInfoRegionPass>(); 3209dfe4e7cSTobias Grosser AU.addRequired<LoopInfoWrapperPass>(); 3219dfe4e7cSTobias Grosser 3229dfe4e7cSTobias Grosser AU.addPreserved<AAResultsWrapperPass>(); 3239dfe4e7cSTobias Grosser AU.addPreserved<BasicAAWrapperPass>(); 3249dfe4e7cSTobias Grosser AU.addPreserved<LoopInfoWrapperPass>(); 3259dfe4e7cSTobias Grosser AU.addPreserved<DominatorTreeWrapperPass>(); 3269dfe4e7cSTobias Grosser AU.addPreserved<GlobalsAAWrapperPass>(); 3279dfe4e7cSTobias Grosser AU.addPreserved<PostDominatorTreeWrapperPass>(); 3289dfe4e7cSTobias Grosser AU.addPreserved<ScopDetection>(); 3299dfe4e7cSTobias Grosser AU.addPreserved<ScalarEvolutionWrapperPass>(); 3309dfe4e7cSTobias Grosser AU.addPreserved<SCEVAAWrapperPass>(); 3319dfe4e7cSTobias Grosser 3329dfe4e7cSTobias Grosser // FIXME: We do not yet add regions for the newly generated code to the 3339dfe4e7cSTobias Grosser // region tree. 3349dfe4e7cSTobias Grosser AU.addPreserved<RegionInfoPass>(); 3359dfe4e7cSTobias Grosser AU.addPreserved<ScopInfoRegionPass>(); 3369dfe4e7cSTobias Grosser } 3379dfe4e7cSTobias Grosser }; 3389dfe4e7cSTobias Grosser } 3399dfe4e7cSTobias Grosser 3409dfe4e7cSTobias Grosser char PPCGCodeGeneration::ID = 1; 3419dfe4e7cSTobias Grosser 3429dfe4e7cSTobias Grosser Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 3439dfe4e7cSTobias Grosser 3449dfe4e7cSTobias Grosser INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 3459dfe4e7cSTobias Grosser "Polly - Apply PPCG translation to SCOP", false, false) 3469dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 3479dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 3489dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 3499dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 3509dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 3519dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(ScopDetection); 3529dfe4e7cSTobias Grosser INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 3539dfe4e7cSTobias Grosser "Polly - Apply PPCG translation to SCOP", false, false) 354