19dfe4e7cSTobias Grosser //===------ PPCGCodeGeneration.cpp - Polly Accelerator Code Generation. ---===// 29dfe4e7cSTobias Grosser // 39dfe4e7cSTobias Grosser // The LLVM Compiler Infrastructure 49dfe4e7cSTobias Grosser // 59dfe4e7cSTobias Grosser // This file is distributed under the University of Illinois Open Source 69dfe4e7cSTobias Grosser // License. See LICENSE.TXT for details. 79dfe4e7cSTobias Grosser // 89dfe4e7cSTobias Grosser //===----------------------------------------------------------------------===// 99dfe4e7cSTobias Grosser // 109dfe4e7cSTobias Grosser // Take a scop created by ScopInfo and map it to GPU code using the ppcg 119dfe4e7cSTobias Grosser // GPU mapping strategy. 129dfe4e7cSTobias Grosser // 139dfe4e7cSTobias Grosser //===----------------------------------------------------------------------===// 149dfe4e7cSTobias Grosser 159dfe4e7cSTobias Grosser #include "polly/CodeGen/IslNodeBuilder.h" 1638fc0aedSTobias Grosser #include "polly/CodeGen/Utils.h" 179dfe4e7cSTobias Grosser #include "polly/DependenceInfo.h" 189dfe4e7cSTobias Grosser #include "polly/LinkAllPasses.h" 19f384594dSTobias Grosser #include "polly/Options.h" 20629109b6STobias Grosser #include "polly/ScopDetection.h" 219dfe4e7cSTobias Grosser #include "polly/ScopInfo.h" 22edb885cbSTobias Grosser #include "polly/Support/SCEVValidator.h" 2374dc3cb4STobias Grosser #include "llvm/ADT/PostOrderIterator.h" 249dfe4e7cSTobias Grosser #include "llvm/Analysis/AliasAnalysis.h" 259dfe4e7cSTobias Grosser #include "llvm/Analysis/BasicAliasAnalysis.h" 269dfe4e7cSTobias Grosser #include "llvm/Analysis/GlobalsModRef.h" 279dfe4e7cSTobias Grosser #include "llvm/Analysis/PostDominators.h" 289dfe4e7cSTobias Grosser #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 2974dc3cb4STobias Grosser #include "llvm/Analysis/TargetLibraryInfo.h" 3074dc3cb4STobias Grosser #include "llvm/Analysis/TargetTransformInfo.h" 3174dc3cb4STobias Grosser #include "llvm/IR/LegacyPassManager.h" 32e1a98343STobias Grosser #include "llvm/IR/Verifier.h" 3374dc3cb4STobias Grosser #include "llvm/Support/TargetRegistry.h" 3474dc3cb4STobias Grosser #include "llvm/Support/TargetSelect.h" 3574dc3cb4STobias Grosser #include "llvm/Target/TargetMachine.h" 369a18d559STobias Grosser #include "llvm/Transforms/IPO/PassManagerBuilder.h" 379dfe4e7cSTobias Grosser 38f384594dSTobias Grosser #include "isl/union_map.h" 39f384594dSTobias Grosser 40e938517eSTobias Grosser extern "C" { 41a56f8f8eSTobias Grosser #include "ppcg/cuda.h" 42a56f8f8eSTobias Grosser #include "ppcg/gpu.h" 43a56f8f8eSTobias Grosser #include "ppcg/gpu_print.h" 44a56f8f8eSTobias Grosser #include "ppcg/ppcg.h" 45a56f8f8eSTobias Grosser #include "ppcg/schedule.h" 46e938517eSTobias Grosser } 47e938517eSTobias Grosser 489dfe4e7cSTobias Grosser #include "llvm/Support/Debug.h" 499dfe4e7cSTobias Grosser 509dfe4e7cSTobias Grosser using namespace polly; 519dfe4e7cSTobias Grosser using namespace llvm; 529dfe4e7cSTobias Grosser 539dfe4e7cSTobias Grosser #define DEBUG_TYPE "polly-codegen-ppcg" 549dfe4e7cSTobias Grosser 55f384594dSTobias Grosser static cl::opt<bool> DumpSchedule("polly-acc-dump-schedule", 56f384594dSTobias Grosser cl::desc("Dump the computed GPU Schedule"), 57681bd568STobias Grosser cl::Hidden, cl::init(false), cl::ZeroOrMore, 58f384594dSTobias Grosser cl::cat(PollyCategory)); 5969b46751STobias Grosser 6069b46751STobias Grosser static cl::opt<bool> 6169b46751STobias Grosser DumpCode("polly-acc-dump-code", 6269b46751STobias Grosser cl::desc("Dump C code describing the GPU mapping"), cl::Hidden, 6369b46751STobias Grosser cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 6469b46751STobias Grosser 6532837fe3STobias Grosser static cl::opt<bool> DumpKernelIR("polly-acc-dump-kernel-ir", 6632837fe3STobias Grosser cl::desc("Dump the kernel LLVM-IR"), 6732837fe3STobias Grosser cl::Hidden, cl::init(false), cl::ZeroOrMore, 6832837fe3STobias Grosser cl::cat(PollyCategory)); 6932837fe3STobias Grosser 7074dc3cb4STobias Grosser static cl::opt<bool> DumpKernelASM("polly-acc-dump-kernel-asm", 7174dc3cb4STobias Grosser cl::desc("Dump the kernel assembly code"), 7274dc3cb4STobias Grosser cl::Hidden, cl::init(false), cl::ZeroOrMore, 7374dc3cb4STobias Grosser cl::cat(PollyCategory)); 7474dc3cb4STobias Grosser 7574dc3cb4STobias Grosser static cl::opt<bool> FastMath("polly-acc-fastmath", 7674dc3cb4STobias Grosser cl::desc("Allow unsafe math optimizations"), 7774dc3cb4STobias Grosser cl::Hidden, cl::init(false), cl::ZeroOrMore, 7874dc3cb4STobias Grosser cl::cat(PollyCategory)); 79b513b491STobias Grosser static cl::opt<bool> SharedMemory("polly-acc-use-shared", 80b513b491STobias Grosser cl::desc("Use shared memory"), cl::Hidden, 81b513b491STobias Grosser cl::init(false), cl::ZeroOrMore, 82b513b491STobias Grosser cl::cat(PollyCategory)); 83*130ca30fSTobias Grosser static cl::opt<bool> PrivateMemory("polly-acc-use-private", 84*130ca30fSTobias Grosser cl::desc("Use private memory"), cl::Hidden, 85*130ca30fSTobias Grosser cl::init(false), cl::ZeroOrMore, 86*130ca30fSTobias Grosser cl::cat(PollyCategory)); 8774dc3cb4STobias Grosser 8874dc3cb4STobias Grosser static cl::opt<std::string> 8974dc3cb4STobias Grosser CudaVersion("polly-acc-cuda-version", 9074dc3cb4STobias Grosser cl::desc("The CUDA version to compile for"), cl::Hidden, 9174dc3cb4STobias Grosser cl::init("sm_30"), cl::ZeroOrMore, cl::cat(PollyCategory)); 9274dc3cb4STobias Grosser 9360c60025STobias Grosser /// Create the ast expressions for a ScopStmt. 9460c60025STobias Grosser /// 9560c60025STobias Grosser /// This function is a callback for to generate the ast expressions for each 9660c60025STobias Grosser /// of the scheduled ScopStmts. 9760c60025STobias Grosser static __isl_give isl_id_to_ast_expr *pollyBuildAstExprForStmt( 98edb885cbSTobias Grosser void *StmtT, isl_ast_build *Build, 9960c60025STobias Grosser isl_multi_pw_aff *(*FunctionIndex)(__isl_take isl_multi_pw_aff *MPA, 10060c60025STobias Grosser isl_id *Id, void *User), 10160c60025STobias Grosser void *UserIndex, 10260c60025STobias Grosser isl_ast_expr *(*FunctionExpr)(isl_ast_expr *Expr, isl_id *Id, void *User), 103edb885cbSTobias Grosser void *UserExpr) { 10460c60025STobias Grosser 105edb885cbSTobias Grosser ScopStmt *Stmt = (ScopStmt *)StmtT; 10660c60025STobias Grosser 107edb885cbSTobias Grosser isl_ctx *Ctx; 108edb885cbSTobias Grosser 109edb885cbSTobias Grosser if (!Stmt || !Build) 110edb885cbSTobias Grosser return NULL; 111edb885cbSTobias Grosser 112edb885cbSTobias Grosser Ctx = isl_ast_build_get_ctx(Build); 113edb885cbSTobias Grosser isl_id_to_ast_expr *RefToExpr = isl_id_to_ast_expr_alloc(Ctx, 0); 114edb885cbSTobias Grosser 115edb885cbSTobias Grosser for (MemoryAccess *Acc : *Stmt) { 116edb885cbSTobias Grosser isl_map *AddrFunc = Acc->getAddressFunction(); 117edb885cbSTobias Grosser AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain()); 118edb885cbSTobias Grosser isl_id *RefId = Acc->getId(); 119edb885cbSTobias Grosser isl_pw_multi_aff *PMA = isl_pw_multi_aff_from_map(AddrFunc); 120edb885cbSTobias Grosser isl_multi_pw_aff *MPA = isl_multi_pw_aff_from_pw_multi_aff(PMA); 121edb885cbSTobias Grosser MPA = isl_multi_pw_aff_coalesce(MPA); 122edb885cbSTobias Grosser MPA = FunctionIndex(MPA, RefId, UserIndex); 123edb885cbSTobias Grosser isl_ast_expr *Access = isl_ast_build_access_from_multi_pw_aff(Build, MPA); 124edb885cbSTobias Grosser Access = FunctionExpr(Access, RefId, UserExpr); 125edb885cbSTobias Grosser RefToExpr = isl_id_to_ast_expr_set(RefToExpr, RefId, Access); 126edb885cbSTobias Grosser } 127edb885cbSTobias Grosser 128edb885cbSTobias Grosser return RefToExpr; 12960c60025STobias Grosser } 130f384594dSTobias Grosser 13138fc0aedSTobias Grosser /// Generate code for a GPU specific isl AST. 13238fc0aedSTobias Grosser /// 13338fc0aedSTobias Grosser /// The GPUNodeBuilder augments the general existing IslNodeBuilder, which 13438fc0aedSTobias Grosser /// generates code for general-prupose AST nodes, with special functionality 13538fc0aedSTobias Grosser /// for generating GPU specific user nodes. 13638fc0aedSTobias Grosser /// 13738fc0aedSTobias Grosser /// @see GPUNodeBuilder::createUser 13838fc0aedSTobias Grosser class GPUNodeBuilder : public IslNodeBuilder { 13938fc0aedSTobias Grosser public: 14038fc0aedSTobias Grosser GPUNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, 14138fc0aedSTobias Grosser const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, 14232837fe3STobias Grosser DominatorTree &DT, Scop &S, gpu_prog *Prog) 143edb885cbSTobias Grosser : IslNodeBuilder(Builder, Annotator, P, DL, LI, SE, DT, S), Prog(Prog) { 144edb885cbSTobias Grosser getExprBuilder().setIDToSAI(&IDToSAI); 145edb885cbSTobias Grosser } 14638fc0aedSTobias Grosser 147fa7b0802STobias Grosser /// Create after-run-time-check initialization code. 148fa7b0802STobias Grosser void initializeAfterRTH(); 149fa7b0802STobias Grosser 150fa7b0802STobias Grosser /// Finalize the generated scop. 151fa7b0802STobias Grosser virtual void finalize(); 152fa7b0802STobias Grosser 15338fc0aedSTobias Grosser private: 15474dc3cb4STobias Grosser /// A vector of array base pointers for which a new ScopArrayInfo was created. 15574dc3cb4STobias Grosser /// 15674dc3cb4STobias Grosser /// This vector is used to delete the ScopArrayInfo when it is not needed any 15774dc3cb4STobias Grosser /// more. 15874dc3cb4STobias Grosser std::vector<Value *> LocalArrays; 15974dc3cb4STobias Grosser 16013c78e4dSTobias Grosser /// A map from ScopArrays to their corresponding device allocations. 16113c78e4dSTobias Grosser std::map<ScopArrayInfo *, Value *> DeviceAllocations; 1627287aeddSTobias Grosser 163fa7b0802STobias Grosser /// The current GPU context. 164fa7b0802STobias Grosser Value *GPUContext; 165fa7b0802STobias Grosser 166b513b491STobias Grosser /// The set of isl_ids allocated in the kernel 167b513b491STobias Grosser std::vector<isl_id *> KernelIds; 168b513b491STobias Grosser 16932837fe3STobias Grosser /// A module containing GPU code. 17032837fe3STobias Grosser /// 17132837fe3STobias Grosser /// This pointer is only set in case we are currently generating GPU code. 17232837fe3STobias Grosser std::unique_ptr<Module> GPUModule; 17332837fe3STobias Grosser 17432837fe3STobias Grosser /// The GPU program we generate code for. 17532837fe3STobias Grosser gpu_prog *Prog; 17632837fe3STobias Grosser 177472f9654STobias Grosser /// Class to free isl_ids. 178472f9654STobias Grosser class IslIdDeleter { 179472f9654STobias Grosser public: 180472f9654STobias Grosser void operator()(__isl_take isl_id *Id) { isl_id_free(Id); }; 181472f9654STobias Grosser }; 182472f9654STobias Grosser 183472f9654STobias Grosser /// A set containing all isl_ids allocated in a GPU kernel. 184472f9654STobias Grosser /// 185472f9654STobias Grosser /// By releasing this set all isl_ids will be freed. 186472f9654STobias Grosser std::set<std::unique_ptr<isl_id, IslIdDeleter>> KernelIDs; 187472f9654STobias Grosser 188edb885cbSTobias Grosser IslExprBuilder::IDToScopArrayInfoTy IDToSAI; 189edb885cbSTobias Grosser 19038fc0aedSTobias Grosser /// Create code for user-defined AST nodes. 19138fc0aedSTobias Grosser /// 19238fc0aedSTobias Grosser /// These AST nodes can be of type: 19338fc0aedSTobias Grosser /// 19438fc0aedSTobias Grosser /// - ScopStmt: A computational statement (TODO) 19538fc0aedSTobias Grosser /// - Kernel: A GPU kernel call (TODO) 19613c78e4dSTobias Grosser /// - Data-Transfer: A GPU <-> CPU data-transfer 1975260c041STobias Grosser /// - In-kernel synchronization 1985260c041STobias Grosser /// - In-kernel memory copy statement 19938fc0aedSTobias Grosser /// 2001fb9b64dSTobias Grosser /// @param UserStmt The ast node to generate code for. 2011fb9b64dSTobias Grosser virtual void createUser(__isl_take isl_ast_node *UserStmt); 20232837fe3STobias Grosser 20313c78e4dSTobias Grosser enum DataDirection { HOST_TO_DEVICE, DEVICE_TO_HOST }; 20413c78e4dSTobias Grosser 20513c78e4dSTobias Grosser /// Create code for a data transfer statement 20613c78e4dSTobias Grosser /// 20713c78e4dSTobias Grosser /// @param TransferStmt The data transfer statement. 20813c78e4dSTobias Grosser /// @param Direction The direction in which to transfer data. 20913c78e4dSTobias Grosser void createDataTransfer(__isl_take isl_ast_node *TransferStmt, 21013c78e4dSTobias Grosser enum DataDirection Direction); 21113c78e4dSTobias Grosser 212edb885cbSTobias Grosser /// Find llvm::Values referenced in GPU kernel. 213edb885cbSTobias Grosser /// 214edb885cbSTobias Grosser /// @param Kernel The kernel to scan for llvm::Values 215edb885cbSTobias Grosser /// 216edb885cbSTobias Grosser /// @returns A set of values referenced by the kernel. 217edb885cbSTobias Grosser SetVector<Value *> getReferencesInKernel(ppcg_kernel *Kernel); 218edb885cbSTobias Grosser 21979a947c2STobias Grosser /// Compute the sizes of the execution grid for a given kernel. 22079a947c2STobias Grosser /// 22179a947c2STobias Grosser /// @param Kernel The kernel to compute grid sizes for. 22279a947c2STobias Grosser /// 22379a947c2STobias Grosser /// @returns A tuple with grid sizes for X and Y dimension 22479a947c2STobias Grosser std::tuple<Value *, Value *> getGridSizes(ppcg_kernel *Kernel); 22579a947c2STobias Grosser 22679a947c2STobias Grosser /// Compute the sizes of the thread blocks for a given kernel. 22779a947c2STobias Grosser /// 22879a947c2STobias Grosser /// @param Kernel The kernel to compute thread block sizes for. 22979a947c2STobias Grosser /// 23079a947c2STobias Grosser /// @returns A tuple with thread block sizes for X, Y, and Z dimensions. 23179a947c2STobias Grosser std::tuple<Value *, Value *, Value *> getBlockSizes(ppcg_kernel *Kernel); 23279a947c2STobias Grosser 23379a947c2STobias Grosser /// Create kernel launch parameters. 23479a947c2STobias Grosser /// 23579a947c2STobias Grosser /// @param Kernel The kernel to create parameters for. 23679a947c2STobias Grosser /// @param F The kernel function that has been created. 23757693272STobias Grosser /// @param SubtreeValues The set of llvm::Values referenced by this kernel. 23879a947c2STobias Grosser /// 23979a947c2STobias Grosser /// @returns A stack allocated array with pointers to the parameter 24079a947c2STobias Grosser /// values that are passed to the kernel. 24157693272STobias Grosser Value *createLaunchParameters(ppcg_kernel *Kernel, Function *F, 24257693272STobias Grosser SetVector<Value *> SubtreeValues); 24379a947c2STobias Grosser 244b513b491STobias Grosser /// Create declarations for kernel variable. 245b513b491STobias Grosser /// 246b513b491STobias Grosser /// This includes shared memory declarations. 247b513b491STobias Grosser /// 248b513b491STobias Grosser /// @param Kernel The kernel definition to create variables for. 249b513b491STobias Grosser /// @param FN The function into which to generate the variables. 250b513b491STobias Grosser void createKernelVariables(ppcg_kernel *Kernel, Function *FN); 251b513b491STobias Grosser 25232837fe3STobias Grosser /// Create GPU kernel. 25332837fe3STobias Grosser /// 25432837fe3STobias Grosser /// Code generate the kernel described by @p KernelStmt. 25532837fe3STobias Grosser /// 25632837fe3STobias Grosser /// @param KernelStmt The ast node to generate kernel code for. 25732837fe3STobias Grosser void createKernel(__isl_take isl_ast_node *KernelStmt); 25832837fe3STobias Grosser 25913c78e4dSTobias Grosser /// Generate code that computes the size of an array. 26013c78e4dSTobias Grosser /// 26113c78e4dSTobias Grosser /// @param Array The array for which to compute a size. 26213c78e4dSTobias Grosser Value *getArraySize(gpu_array_info *Array); 26313c78e4dSTobias Grosser 26400bb5a99STobias Grosser /// Prepare the kernel arguments for kernel code generation 26500bb5a99STobias Grosser /// 26600bb5a99STobias Grosser /// @param Kernel The kernel to generate code for. 26700bb5a99STobias Grosser /// @param FN The function created for the kernel. 26800bb5a99STobias Grosser void prepareKernelArguments(ppcg_kernel *Kernel, Function *FN); 26900bb5a99STobias Grosser 27032837fe3STobias Grosser /// Create kernel function. 27132837fe3STobias Grosser /// 27232837fe3STobias Grosser /// Create a kernel function located in a newly created module that can serve 27332837fe3STobias Grosser /// as target for device code generation. Set the Builder to point to the 27432837fe3STobias Grosser /// start block of this newly created function. 27532837fe3STobias Grosser /// 27632837fe3STobias Grosser /// @param Kernel The kernel to generate code for. 277edb885cbSTobias Grosser /// @param SubtreeValues The set of llvm::Values referenced by this kernel. 278edb885cbSTobias Grosser void createKernelFunction(ppcg_kernel *Kernel, 279edb885cbSTobias Grosser SetVector<Value *> &SubtreeValues); 28032837fe3STobias Grosser 28132837fe3STobias Grosser /// Create the declaration of a kernel function. 28232837fe3STobias Grosser /// 28332837fe3STobias Grosser /// The kernel function takes as arguments: 28432837fe3STobias Grosser /// 28532837fe3STobias Grosser /// - One i8 pointer for each external array reference used in the kernel. 286f6044bd0STobias Grosser /// - Host iterators 287c84a1995STobias Grosser /// - Parameters 28832837fe3STobias Grosser /// - Other LLVM Value references (TODO) 28932837fe3STobias Grosser /// 29032837fe3STobias Grosser /// @param Kernel The kernel to generate the function declaration for. 291edb885cbSTobias Grosser /// @param SubtreeValues The set of llvm::Values referenced by this kernel. 292edb885cbSTobias Grosser /// 29332837fe3STobias Grosser /// @returns The newly declared function. 294edb885cbSTobias Grosser Function *createKernelFunctionDecl(ppcg_kernel *Kernel, 295edb885cbSTobias Grosser SetVector<Value *> &SubtreeValues); 29632837fe3STobias Grosser 297472f9654STobias Grosser /// Insert intrinsic functions to obtain thread and block ids. 298472f9654STobias Grosser /// 299472f9654STobias Grosser /// @param The kernel to generate the intrinsic functions for. 300472f9654STobias Grosser void insertKernelIntrinsics(ppcg_kernel *Kernel); 301472f9654STobias Grosser 302b513b491STobias Grosser /// Create a global-to-shared or shared-to-global copy statement. 303b513b491STobias Grosser /// 304b513b491STobias Grosser /// @param CopyStmt The copy statement to generate code for 305b513b491STobias Grosser void createKernelCopy(ppcg_kernel_stmt *CopyStmt); 306b513b491STobias Grosser 307edb885cbSTobias Grosser /// Create code for a ScopStmt called in @p Expr. 308edb885cbSTobias Grosser /// 309edb885cbSTobias Grosser /// @param Expr The expression containing the call. 310edb885cbSTobias Grosser /// @param KernelStmt The kernel statement referenced in the call. 311edb885cbSTobias Grosser void createScopStmt(isl_ast_expr *Expr, ppcg_kernel_stmt *KernelStmt); 312edb885cbSTobias Grosser 3135260c041STobias Grosser /// Create an in-kernel synchronization call. 3145260c041STobias Grosser void createKernelSync(); 3155260c041STobias Grosser 31674dc3cb4STobias Grosser /// Create a PTX assembly string for the current GPU kernel. 31774dc3cb4STobias Grosser /// 31874dc3cb4STobias Grosser /// @returns A string containing the corresponding PTX assembly code. 31974dc3cb4STobias Grosser std::string createKernelASM(); 32074dc3cb4STobias Grosser 32174dc3cb4STobias Grosser /// Remove references from the dominator tree to the kernel function @p F. 32274dc3cb4STobias Grosser /// 32374dc3cb4STobias Grosser /// @param F The function to remove references to. 32474dc3cb4STobias Grosser void clearDominators(Function *F); 32574dc3cb4STobias Grosser 32674dc3cb4STobias Grosser /// Remove references from scalar evolution to the kernel function @p F. 32774dc3cb4STobias Grosser /// 32874dc3cb4STobias Grosser /// @param F The function to remove references to. 32974dc3cb4STobias Grosser void clearScalarEvolution(Function *F); 33074dc3cb4STobias Grosser 33174dc3cb4STobias Grosser /// Remove references from loop info to the kernel function @p F. 33274dc3cb4STobias Grosser /// 33374dc3cb4STobias Grosser /// @param F The function to remove references to. 33474dc3cb4STobias Grosser void clearLoops(Function *F); 33574dc3cb4STobias Grosser 33632837fe3STobias Grosser /// Finalize the generation of the kernel function. 33732837fe3STobias Grosser /// 33832837fe3STobias Grosser /// Free the LLVM-IR module corresponding to the kernel and -- if requested -- 33932837fe3STobias Grosser /// dump its IR to stderr. 34057793596STobias Grosser /// 34157793596STobias Grosser /// @returns The Assembly string of the kernel. 34257793596STobias Grosser std::string finalizeKernelFunction(); 343fa7b0802STobias Grosser 3447287aeddSTobias Grosser /// Create code that allocates memory to store arrays on device. 345fa7b0802STobias Grosser void allocateDeviceArrays(); 346fa7b0802STobias Grosser 3477287aeddSTobias Grosser /// Free all allocated device arrays. 3487287aeddSTobias Grosser void freeDeviceArrays(); 3497287aeddSTobias Grosser 350fa7b0802STobias Grosser /// Create a call to initialize the GPU context. 351fa7b0802STobias Grosser /// 352fa7b0802STobias Grosser /// @returns A pointer to the newly initialized context. 353fa7b0802STobias Grosser Value *createCallInitContext(); 354fa7b0802STobias Grosser 35579a947c2STobias Grosser /// Create a call to get the device pointer for a kernel allocation. 35679a947c2STobias Grosser /// 35779a947c2STobias Grosser /// @param Allocation The Polly GPU allocation 35879a947c2STobias Grosser /// 35979a947c2STobias Grosser /// @returns The device parameter corresponding to this allocation. 36079a947c2STobias Grosser Value *createCallGetDevicePtr(Value *Allocation); 36179a947c2STobias Grosser 362fa7b0802STobias Grosser /// Create a call to free the GPU context. 363fa7b0802STobias Grosser /// 364fa7b0802STobias Grosser /// @param Context A pointer to an initialized GPU context. 365fa7b0802STobias Grosser void createCallFreeContext(Value *Context); 366fa7b0802STobias Grosser 3677287aeddSTobias Grosser /// Create a call to allocate memory on the device. 3687287aeddSTobias Grosser /// 3697287aeddSTobias Grosser /// @param Size The size of memory to allocate 3707287aeddSTobias Grosser /// 3717287aeddSTobias Grosser /// @returns A pointer that identifies this allocation. 372fa7b0802STobias Grosser Value *createCallAllocateMemoryForDevice(Value *Size); 3737287aeddSTobias Grosser 3747287aeddSTobias Grosser /// Create a call to free a device array. 3757287aeddSTobias Grosser /// 3767287aeddSTobias Grosser /// @param Array The device array to free. 3777287aeddSTobias Grosser void createCallFreeDeviceMemory(Value *Array); 37813c78e4dSTobias Grosser 37913c78e4dSTobias Grosser /// Create a call to copy data from host to device. 38013c78e4dSTobias Grosser /// 38113c78e4dSTobias Grosser /// @param HostPtr A pointer to the host data that should be copied. 38213c78e4dSTobias Grosser /// @param DevicePtr A device pointer specifying the location to copy to. 38313c78e4dSTobias Grosser void createCallCopyFromHostToDevice(Value *HostPtr, Value *DevicePtr, 38413c78e4dSTobias Grosser Value *Size); 38513c78e4dSTobias Grosser 38613c78e4dSTobias Grosser /// Create a call to copy data from device to host. 38713c78e4dSTobias Grosser /// 38813c78e4dSTobias Grosser /// @param DevicePtr A pointer to the device data that should be copied. 38913c78e4dSTobias Grosser /// @param HostPtr A host pointer specifying the location to copy to. 39013c78e4dSTobias Grosser void createCallCopyFromDeviceToHost(Value *DevicePtr, Value *HostPtr, 39113c78e4dSTobias Grosser Value *Size); 39257793596STobias Grosser 39357793596STobias Grosser /// Create a call to get a kernel from an assembly string. 39457793596STobias Grosser /// 39557793596STobias Grosser /// @param Buffer The string describing the kernel. 39657793596STobias Grosser /// @param Entry The name of the kernel function to call. 39757793596STobias Grosser /// 39857793596STobias Grosser /// @returns A pointer to a kernel object 39957793596STobias Grosser Value *createCallGetKernel(Value *Buffer, Value *Entry); 40057793596STobias Grosser 40157793596STobias Grosser /// Create a call to free a GPU kernel. 40257793596STobias Grosser /// 40357793596STobias Grosser /// @param GPUKernel THe kernel to free. 40457793596STobias Grosser void createCallFreeKernel(Value *GPUKernel); 40579a947c2STobias Grosser 40679a947c2STobias Grosser /// Create a call to launch a GPU kernel. 40779a947c2STobias Grosser /// 40879a947c2STobias Grosser /// @param GPUKernel The kernel to launch. 40979a947c2STobias Grosser /// @param GridDimX The size of the first grid dimension. 41079a947c2STobias Grosser /// @param GridDimY The size of the second grid dimension. 41179a947c2STobias Grosser /// @param GridBlockX The size of the first block dimension. 41279a947c2STobias Grosser /// @param GridBlockY The size of the second block dimension. 41379a947c2STobias Grosser /// @param GridBlockZ The size of the third block dimension. 41479a947c2STobias Grosser /// @param Paramters A pointer to an array that contains itself pointers to 41579a947c2STobias Grosser /// the parameter values passed for each kernel argument. 41679a947c2STobias Grosser void createCallLaunchKernel(Value *GPUKernel, Value *GridDimX, 41779a947c2STobias Grosser Value *GridDimY, Value *BlockDimX, 41879a947c2STobias Grosser Value *BlockDimY, Value *BlockDimZ, 41979a947c2STobias Grosser Value *Parameters); 4201fb9b64dSTobias Grosser }; 4211fb9b64dSTobias Grosser 422fa7b0802STobias Grosser void GPUNodeBuilder::initializeAfterRTH() { 423fa7b0802STobias Grosser GPUContext = createCallInitContext(); 424fa7b0802STobias Grosser allocateDeviceArrays(); 425fa7b0802STobias Grosser } 426fa7b0802STobias Grosser 427fa7b0802STobias Grosser void GPUNodeBuilder::finalize() { 4287287aeddSTobias Grosser freeDeviceArrays(); 429fa7b0802STobias Grosser createCallFreeContext(GPUContext); 430fa7b0802STobias Grosser IslNodeBuilder::finalize(); 431fa7b0802STobias Grosser } 432fa7b0802STobias Grosser 433fa7b0802STobias Grosser void GPUNodeBuilder::allocateDeviceArrays() { 434fa7b0802STobias Grosser isl_ast_build *Build = isl_ast_build_from_context(S.getContext()); 435fa7b0802STobias Grosser 436fa7b0802STobias Grosser for (int i = 0; i < Prog->n_array; ++i) { 437fa7b0802STobias Grosser gpu_array_info *Array = &Prog->array[i]; 43813c78e4dSTobias Grosser auto *ScopArray = (ScopArrayInfo *)Array->user; 4397287aeddSTobias Grosser std::string DevArrayName("p_dev_array_"); 4407287aeddSTobias Grosser DevArrayName.append(Array->name); 441fa7b0802STobias Grosser 44213c78e4dSTobias Grosser Value *ArraySize = getArraySize(Array); 4437287aeddSTobias Grosser Value *DevArray = createCallAllocateMemoryForDevice(ArraySize); 4447287aeddSTobias Grosser DevArray->setName(DevArrayName); 44513c78e4dSTobias Grosser DeviceAllocations[ScopArray] = DevArray; 446fa7b0802STobias Grosser } 447fa7b0802STobias Grosser 448fa7b0802STobias Grosser isl_ast_build_free(Build); 449fa7b0802STobias Grosser } 450fa7b0802STobias Grosser 4517287aeddSTobias Grosser void GPUNodeBuilder::freeDeviceArrays() { 45213c78e4dSTobias Grosser for (auto &Array : DeviceAllocations) 45313c78e4dSTobias Grosser createCallFreeDeviceMemory(Array.second); 4547287aeddSTobias Grosser } 4557287aeddSTobias Grosser 45657793596STobias Grosser Value *GPUNodeBuilder::createCallGetKernel(Value *Buffer, Value *Entry) { 45757793596STobias Grosser const char *Name = "polly_getKernel"; 45857793596STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 45957793596STobias Grosser Function *F = M->getFunction(Name); 46057793596STobias Grosser 46157793596STobias Grosser // If F is not available, declare it. 46257793596STobias Grosser if (!F) { 46357793596STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 46457793596STobias Grosser std::vector<Type *> Args; 46557793596STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 46657793596STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 46757793596STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); 46857793596STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 46957793596STobias Grosser } 47057793596STobias Grosser 47157793596STobias Grosser return Builder.CreateCall(F, {Buffer, Entry}); 47257793596STobias Grosser } 47357793596STobias Grosser 47479a947c2STobias Grosser Value *GPUNodeBuilder::createCallGetDevicePtr(Value *Allocation) { 47579a947c2STobias Grosser const char *Name = "polly_getDevicePtr"; 47679a947c2STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 47779a947c2STobias Grosser Function *F = M->getFunction(Name); 47879a947c2STobias Grosser 47979a947c2STobias Grosser // If F is not available, declare it. 48079a947c2STobias Grosser if (!F) { 48179a947c2STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 48279a947c2STobias Grosser std::vector<Type *> Args; 48379a947c2STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 48479a947c2STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); 48579a947c2STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 48679a947c2STobias Grosser } 48779a947c2STobias Grosser 48879a947c2STobias Grosser return Builder.CreateCall(F, {Allocation}); 48979a947c2STobias Grosser } 49079a947c2STobias Grosser 49179a947c2STobias Grosser void GPUNodeBuilder::createCallLaunchKernel(Value *GPUKernel, Value *GridDimX, 49279a947c2STobias Grosser Value *GridDimY, Value *BlockDimX, 49379a947c2STobias Grosser Value *BlockDimY, Value *BlockDimZ, 49479a947c2STobias Grosser Value *Parameters) { 49579a947c2STobias Grosser const char *Name = "polly_launchKernel"; 49679a947c2STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 49779a947c2STobias Grosser Function *F = M->getFunction(Name); 49879a947c2STobias Grosser 49979a947c2STobias Grosser // If F is not available, declare it. 50079a947c2STobias Grosser if (!F) { 50179a947c2STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 50279a947c2STobias Grosser std::vector<Type *> Args; 50379a947c2STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 50479a947c2STobias Grosser Args.push_back(Builder.getInt32Ty()); 50579a947c2STobias Grosser Args.push_back(Builder.getInt32Ty()); 50679a947c2STobias Grosser Args.push_back(Builder.getInt32Ty()); 50779a947c2STobias Grosser Args.push_back(Builder.getInt32Ty()); 50879a947c2STobias Grosser Args.push_back(Builder.getInt32Ty()); 50979a947c2STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 51079a947c2STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 51179a947c2STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 51279a947c2STobias Grosser } 51379a947c2STobias Grosser 51479a947c2STobias Grosser Builder.CreateCall(F, {GPUKernel, GridDimX, GridDimY, BlockDimX, BlockDimY, 51579a947c2STobias Grosser BlockDimZ, Parameters}); 51679a947c2STobias Grosser } 51779a947c2STobias Grosser 51857793596STobias Grosser void GPUNodeBuilder::createCallFreeKernel(Value *GPUKernel) { 51957793596STobias Grosser const char *Name = "polly_freeKernel"; 52057793596STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 52157793596STobias Grosser Function *F = M->getFunction(Name); 52257793596STobias Grosser 52357793596STobias Grosser // If F is not available, declare it. 52457793596STobias Grosser if (!F) { 52557793596STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 52657793596STobias Grosser std::vector<Type *> Args; 52757793596STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 52857793596STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 52957793596STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 53057793596STobias Grosser } 53157793596STobias Grosser 53257793596STobias Grosser Builder.CreateCall(F, {GPUKernel}); 53357793596STobias Grosser } 53457793596STobias Grosser 5357287aeddSTobias Grosser void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { 5367287aeddSTobias Grosser const char *Name = "polly_freeDeviceMemory"; 5377287aeddSTobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 5387287aeddSTobias Grosser Function *F = M->getFunction(Name); 5397287aeddSTobias Grosser 5407287aeddSTobias Grosser // If F is not available, declare it. 5417287aeddSTobias Grosser if (!F) { 5427287aeddSTobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 5437287aeddSTobias Grosser std::vector<Type *> Args; 5447287aeddSTobias Grosser Args.push_back(Builder.getInt8PtrTy()); 5457287aeddSTobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 5467287aeddSTobias Grosser F = Function::Create(Ty, Linkage, Name, M); 5477287aeddSTobias Grosser } 5487287aeddSTobias Grosser 5497287aeddSTobias Grosser Builder.CreateCall(F, {Array}); 5507287aeddSTobias Grosser } 5517287aeddSTobias Grosser 552fa7b0802STobias Grosser Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) { 553fa7b0802STobias Grosser const char *Name = "polly_allocateMemoryForDevice"; 554fa7b0802STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 555fa7b0802STobias Grosser Function *F = M->getFunction(Name); 556fa7b0802STobias Grosser 557fa7b0802STobias Grosser // If F is not available, declare it. 558fa7b0802STobias Grosser if (!F) { 559fa7b0802STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 560fa7b0802STobias Grosser std::vector<Type *> Args; 561fa7b0802STobias Grosser Args.push_back(Builder.getInt64Ty()); 562fa7b0802STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); 563fa7b0802STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 564fa7b0802STobias Grosser } 565fa7b0802STobias Grosser 566fa7b0802STobias Grosser return Builder.CreateCall(F, {Size}); 567fa7b0802STobias Grosser } 568fa7b0802STobias Grosser 56913c78e4dSTobias Grosser void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData, 57013c78e4dSTobias Grosser Value *DeviceData, 57113c78e4dSTobias Grosser Value *Size) { 57213c78e4dSTobias Grosser const char *Name = "polly_copyFromHostToDevice"; 57313c78e4dSTobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 57413c78e4dSTobias Grosser Function *F = M->getFunction(Name); 57513c78e4dSTobias Grosser 57613c78e4dSTobias Grosser // If F is not available, declare it. 57713c78e4dSTobias Grosser if (!F) { 57813c78e4dSTobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 57913c78e4dSTobias Grosser std::vector<Type *> Args; 58013c78e4dSTobias Grosser Args.push_back(Builder.getInt8PtrTy()); 58113c78e4dSTobias Grosser Args.push_back(Builder.getInt8PtrTy()); 58213c78e4dSTobias Grosser Args.push_back(Builder.getInt64Ty()); 58313c78e4dSTobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 58413c78e4dSTobias Grosser F = Function::Create(Ty, Linkage, Name, M); 58513c78e4dSTobias Grosser } 58613c78e4dSTobias Grosser 58713c78e4dSTobias Grosser Builder.CreateCall(F, {HostData, DeviceData, Size}); 58813c78e4dSTobias Grosser } 58913c78e4dSTobias Grosser 59013c78e4dSTobias Grosser void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData, 59113c78e4dSTobias Grosser Value *HostData, 59213c78e4dSTobias Grosser Value *Size) { 59313c78e4dSTobias Grosser const char *Name = "polly_copyFromDeviceToHost"; 59413c78e4dSTobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 59513c78e4dSTobias Grosser Function *F = M->getFunction(Name); 59613c78e4dSTobias Grosser 59713c78e4dSTobias Grosser // If F is not available, declare it. 59813c78e4dSTobias Grosser if (!F) { 59913c78e4dSTobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 60013c78e4dSTobias Grosser std::vector<Type *> Args; 60113c78e4dSTobias Grosser Args.push_back(Builder.getInt8PtrTy()); 60213c78e4dSTobias Grosser Args.push_back(Builder.getInt8PtrTy()); 60313c78e4dSTobias Grosser Args.push_back(Builder.getInt64Ty()); 60413c78e4dSTobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 60513c78e4dSTobias Grosser F = Function::Create(Ty, Linkage, Name, M); 60613c78e4dSTobias Grosser } 60713c78e4dSTobias Grosser 60813c78e4dSTobias Grosser Builder.CreateCall(F, {DeviceData, HostData, Size}); 60913c78e4dSTobias Grosser } 61013c78e4dSTobias Grosser 611fa7b0802STobias Grosser Value *GPUNodeBuilder::createCallInitContext() { 612fa7b0802STobias Grosser const char *Name = "polly_initContext"; 613fa7b0802STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 614fa7b0802STobias Grosser Function *F = M->getFunction(Name); 615fa7b0802STobias Grosser 616fa7b0802STobias Grosser // If F is not available, declare it. 617fa7b0802STobias Grosser if (!F) { 618fa7b0802STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 619fa7b0802STobias Grosser std::vector<Type *> Args; 620fa7b0802STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); 621fa7b0802STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 622fa7b0802STobias Grosser } 623fa7b0802STobias Grosser 624fa7b0802STobias Grosser return Builder.CreateCall(F, {}); 625fa7b0802STobias Grosser } 626fa7b0802STobias Grosser 627fa7b0802STobias Grosser void GPUNodeBuilder::createCallFreeContext(Value *Context) { 628fa7b0802STobias Grosser const char *Name = "polly_freeContext"; 629fa7b0802STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 630fa7b0802STobias Grosser Function *F = M->getFunction(Name); 631fa7b0802STobias Grosser 632fa7b0802STobias Grosser // If F is not available, declare it. 633fa7b0802STobias Grosser if (!F) { 634fa7b0802STobias Grosser GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 635fa7b0802STobias Grosser std::vector<Type *> Args; 636fa7b0802STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 637fa7b0802STobias Grosser FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); 638fa7b0802STobias Grosser F = Function::Create(Ty, Linkage, Name, M); 639fa7b0802STobias Grosser } 640fa7b0802STobias Grosser 641fa7b0802STobias Grosser Builder.CreateCall(F, {Context}); 642fa7b0802STobias Grosser } 643fa7b0802STobias Grosser 6445260c041STobias Grosser /// Check if one string is a prefix of another. 6455260c041STobias Grosser /// 6465260c041STobias Grosser /// @param String The string in which to look for the prefix. 6475260c041STobias Grosser /// @param Prefix The prefix to look for. 6485260c041STobias Grosser static bool isPrefix(std::string String, std::string Prefix) { 6495260c041STobias Grosser return String.find(Prefix) == 0; 6505260c041STobias Grosser } 6515260c041STobias Grosser 65213c78e4dSTobias Grosser Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) { 65313c78e4dSTobias Grosser isl_ast_build *Build = isl_ast_build_from_context(S.getContext()); 65413c78e4dSTobias Grosser Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); 65513c78e4dSTobias Grosser 65613c78e4dSTobias Grosser if (!gpu_array_is_scalar(Array)) { 65713c78e4dSTobias Grosser auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]); 65813c78e4dSTobias Grosser isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); 65913c78e4dSTobias Grosser 66013c78e4dSTobias Grosser for (unsigned int i = 1; i < Array->n_index; i++) { 66113c78e4dSTobias Grosser isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]); 66213c78e4dSTobias Grosser isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I); 66313c78e4dSTobias Grosser Res = isl_ast_expr_mul(Res, Expr); 66413c78e4dSTobias Grosser } 66513c78e4dSTobias Grosser 66613c78e4dSTobias Grosser Value *NumElements = ExprBuilder.create(Res); 66713c78e4dSTobias Grosser ArraySize = Builder.CreateMul(ArraySize, NumElements); 66813c78e4dSTobias Grosser } 66913c78e4dSTobias Grosser isl_ast_build_free(Build); 67013c78e4dSTobias Grosser return ArraySize; 67113c78e4dSTobias Grosser } 67213c78e4dSTobias Grosser 67313c78e4dSTobias Grosser void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt, 67413c78e4dSTobias Grosser enum DataDirection Direction) { 67513c78e4dSTobias Grosser isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt); 67613c78e4dSTobias Grosser isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0); 67713c78e4dSTobias Grosser isl_id *Id = isl_ast_expr_get_id(Arg); 67813c78e4dSTobias Grosser auto Array = (gpu_array_info *)isl_id_get_user(Id); 67913c78e4dSTobias Grosser auto ScopArray = (ScopArrayInfo *)(Array->user); 68013c78e4dSTobias Grosser 68113c78e4dSTobias Grosser Value *Size = getArraySize(Array); 68213c78e4dSTobias Grosser Value *HostPtr = ScopArray->getBasePtr(); 68313c78e4dSTobias Grosser 68413c78e4dSTobias Grosser Value *DevPtr = DeviceAllocations[ScopArray]; 68513c78e4dSTobias Grosser 68613c78e4dSTobias Grosser if (gpu_array_is_scalar(Array)) { 68713c78e4dSTobias Grosser HostPtr = Builder.CreateAlloca(ScopArray->getElementType()); 68813c78e4dSTobias Grosser Builder.CreateStore(ScopArray->getBasePtr(), HostPtr); 68913c78e4dSTobias Grosser } 69013c78e4dSTobias Grosser 69113c78e4dSTobias Grosser HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy()); 69213c78e4dSTobias Grosser 69313c78e4dSTobias Grosser if (Direction == HOST_TO_DEVICE) 69413c78e4dSTobias Grosser createCallCopyFromHostToDevice(HostPtr, DevPtr, Size); 69513c78e4dSTobias Grosser else 69613c78e4dSTobias Grosser createCallCopyFromDeviceToHost(DevPtr, HostPtr, Size); 69713c78e4dSTobias Grosser 69813c78e4dSTobias Grosser isl_id_free(Id); 69913c78e4dSTobias Grosser isl_ast_expr_free(Arg); 70013c78e4dSTobias Grosser isl_ast_expr_free(Expr); 70113c78e4dSTobias Grosser isl_ast_node_free(TransferStmt); 70213c78e4dSTobias Grosser } 70313c78e4dSTobias Grosser 7041fb9b64dSTobias Grosser void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) { 70532837fe3STobias Grosser isl_ast_expr *Expr = isl_ast_node_user_get_expr(UserStmt); 70632837fe3STobias Grosser isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 70732837fe3STobias Grosser isl_id *Id = isl_ast_expr_get_id(StmtExpr); 70832837fe3STobias Grosser isl_id_free(Id); 70932837fe3STobias Grosser isl_ast_expr_free(StmtExpr); 71032837fe3STobias Grosser 71132837fe3STobias Grosser const char *Str = isl_id_get_name(Id); 71232837fe3STobias Grosser if (!strcmp(Str, "kernel")) { 71332837fe3STobias Grosser createKernel(UserStmt); 71432837fe3STobias Grosser isl_ast_expr_free(Expr); 71532837fe3STobias Grosser return; 71632837fe3STobias Grosser } 71732837fe3STobias Grosser 71813c78e4dSTobias Grosser if (isPrefix(Str, "to_device")) { 71913c78e4dSTobias Grosser createDataTransfer(UserStmt, HOST_TO_DEVICE); 72032837fe3STobias Grosser isl_ast_expr_free(Expr); 72113c78e4dSTobias Grosser return; 72213c78e4dSTobias Grosser } 72313c78e4dSTobias Grosser 72413c78e4dSTobias Grosser if (isPrefix(Str, "from_device")) { 72513c78e4dSTobias Grosser createDataTransfer(UserStmt, DEVICE_TO_HOST); 72613c78e4dSTobias Grosser isl_ast_expr_free(Expr); 72738fc0aedSTobias Grosser return; 72838fc0aedSTobias Grosser } 72938fc0aedSTobias Grosser 7305260c041STobias Grosser isl_id *Anno = isl_ast_node_get_annotation(UserStmt); 7315260c041STobias Grosser struct ppcg_kernel_stmt *KernelStmt = 7325260c041STobias Grosser (struct ppcg_kernel_stmt *)isl_id_get_user(Anno); 7335260c041STobias Grosser isl_id_free(Anno); 7345260c041STobias Grosser 7355260c041STobias Grosser switch (KernelStmt->type) { 7365260c041STobias Grosser case ppcg_kernel_domain: 737edb885cbSTobias Grosser createScopStmt(Expr, KernelStmt); 7385260c041STobias Grosser isl_ast_node_free(UserStmt); 7395260c041STobias Grosser return; 7405260c041STobias Grosser case ppcg_kernel_copy: 741b513b491STobias Grosser createKernelCopy(KernelStmt); 7425260c041STobias Grosser isl_ast_expr_free(Expr); 7435260c041STobias Grosser isl_ast_node_free(UserStmt); 7445260c041STobias Grosser return; 7455260c041STobias Grosser case ppcg_kernel_sync: 7465260c041STobias Grosser createKernelSync(); 7475260c041STobias Grosser isl_ast_expr_free(Expr); 7485260c041STobias Grosser isl_ast_node_free(UserStmt); 7495260c041STobias Grosser return; 7505260c041STobias Grosser } 7515260c041STobias Grosser 7525260c041STobias Grosser isl_ast_expr_free(Expr); 7535260c041STobias Grosser isl_ast_node_free(UserStmt); 7545260c041STobias Grosser return; 7555260c041STobias Grosser } 756b513b491STobias Grosser void GPUNodeBuilder::createKernelCopy(ppcg_kernel_stmt *KernelStmt) { 757b513b491STobias Grosser isl_ast_expr *LocalIndex = isl_ast_expr_copy(KernelStmt->u.c.local_index); 758b513b491STobias Grosser LocalIndex = isl_ast_expr_address_of(LocalIndex); 759b513b491STobias Grosser Value *LocalAddr = ExprBuilder.create(LocalIndex); 760b513b491STobias Grosser isl_ast_expr *Index = isl_ast_expr_copy(KernelStmt->u.c.index); 761b513b491STobias Grosser Index = isl_ast_expr_address_of(Index); 762b513b491STobias Grosser Value *GlobalAddr = ExprBuilder.create(Index); 763b513b491STobias Grosser 764b513b491STobias Grosser if (KernelStmt->u.c.read) { 765b513b491STobias Grosser LoadInst *Load = Builder.CreateLoad(GlobalAddr, "shared.read"); 766b513b491STobias Grosser Builder.CreateStore(Load, LocalAddr); 767b513b491STobias Grosser } else { 768b513b491STobias Grosser LoadInst *Load = Builder.CreateLoad(LocalAddr, "shared.write"); 769b513b491STobias Grosser Builder.CreateStore(Load, GlobalAddr); 770b513b491STobias Grosser } 771b513b491STobias Grosser } 7725260c041STobias Grosser 773edb885cbSTobias Grosser void GPUNodeBuilder::createScopStmt(isl_ast_expr *Expr, 774edb885cbSTobias Grosser ppcg_kernel_stmt *KernelStmt) { 775edb885cbSTobias Grosser auto Stmt = (ScopStmt *)KernelStmt->u.d.stmt->stmt; 776edb885cbSTobias Grosser isl_id_to_ast_expr *Indexes = KernelStmt->u.d.ref2expr; 777edb885cbSTobias Grosser 778edb885cbSTobias Grosser LoopToScevMapT LTS; 779edb885cbSTobias Grosser LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 780edb885cbSTobias Grosser 781edb885cbSTobias Grosser createSubstitutions(Expr, Stmt, LTS); 782edb885cbSTobias Grosser 783edb885cbSTobias Grosser if (Stmt->isBlockStmt()) 784edb885cbSTobias Grosser BlockGen.copyStmt(*Stmt, LTS, Indexes); 785edb885cbSTobias Grosser else 786edb885cbSTobias Grosser assert(0 && "Region statement not supported\n"); 787edb885cbSTobias Grosser } 788edb885cbSTobias Grosser 7895260c041STobias Grosser void GPUNodeBuilder::createKernelSync() { 7905260c041STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 7915260c041STobias Grosser auto *Sync = Intrinsic::getDeclaration(M, Intrinsic::nvvm_barrier0); 7925260c041STobias Grosser Builder.CreateCall(Sync, {}); 7935260c041STobias Grosser } 7945260c041STobias Grosser 795edb885cbSTobias Grosser /// Collect llvm::Values referenced from @p Node 796edb885cbSTobias Grosser /// 797edb885cbSTobias Grosser /// This function only applies to isl_ast_nodes that are user_nodes referring 798edb885cbSTobias Grosser /// to a ScopStmt. All other node types are ignore. 799edb885cbSTobias Grosser /// 800edb885cbSTobias Grosser /// @param Node The node to collect references for. 801edb885cbSTobias Grosser /// @param User A user pointer used as storage for the data that is collected. 802edb885cbSTobias Grosser /// 803edb885cbSTobias Grosser /// @returns isl_bool_true if data could be collected successfully. 804edb885cbSTobias Grosser isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) { 805edb885cbSTobias Grosser if (isl_ast_node_get_type(Node) != isl_ast_node_user) 806edb885cbSTobias Grosser return isl_bool_true; 807edb885cbSTobias Grosser 808edb885cbSTobias Grosser isl_ast_expr *Expr = isl_ast_node_user_get_expr(Node); 809edb885cbSTobias Grosser isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 810edb885cbSTobias Grosser isl_id *Id = isl_ast_expr_get_id(StmtExpr); 811edb885cbSTobias Grosser const char *Str = isl_id_get_name(Id); 812edb885cbSTobias Grosser isl_id_free(Id); 813edb885cbSTobias Grosser isl_ast_expr_free(StmtExpr); 814edb885cbSTobias Grosser isl_ast_expr_free(Expr); 815edb885cbSTobias Grosser 816edb885cbSTobias Grosser if (!isPrefix(Str, "Stmt")) 817edb885cbSTobias Grosser return isl_bool_true; 818edb885cbSTobias Grosser 819edb885cbSTobias Grosser Id = isl_ast_node_get_annotation(Node); 820edb885cbSTobias Grosser auto *KernelStmt = (ppcg_kernel_stmt *)isl_id_get_user(Id); 821edb885cbSTobias Grosser auto Stmt = (ScopStmt *)KernelStmt->u.d.stmt->stmt; 822edb885cbSTobias Grosser isl_id_free(Id); 823edb885cbSTobias Grosser 82400bb5a99STobias Grosser addReferencesFromStmt(Stmt, User, false /* CreateScalarRefs */); 825edb885cbSTobias Grosser 826edb885cbSTobias Grosser return isl_bool_true; 827edb885cbSTobias Grosser } 828edb885cbSTobias Grosser 829edb885cbSTobias Grosser SetVector<Value *> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { 830edb885cbSTobias Grosser SetVector<Value *> SubtreeValues; 831edb885cbSTobias Grosser SetVector<const SCEV *> SCEVs; 832edb885cbSTobias Grosser SetVector<const Loop *> Loops; 833edb885cbSTobias Grosser SubtreeReferences References = { 834edb885cbSTobias Grosser LI, SE, S, ValueMap, SubtreeValues, SCEVs, getBlockGenerator()}; 835edb885cbSTobias Grosser 836edb885cbSTobias Grosser for (const auto &I : IDToValue) 837edb885cbSTobias Grosser SubtreeValues.insert(I.second); 838edb885cbSTobias Grosser 839edb885cbSTobias Grosser isl_ast_node_foreach_descendant_top_down( 840edb885cbSTobias Grosser Kernel->tree, collectReferencesInGPUStmt, &References); 841edb885cbSTobias Grosser 842edb885cbSTobias Grosser for (const SCEV *Expr : SCEVs) 843edb885cbSTobias Grosser findValues(Expr, SE, SubtreeValues); 844edb885cbSTobias Grosser 845edb885cbSTobias Grosser for (auto &SAI : S.arrays()) 846d7754a12SRoman Gareev SubtreeValues.remove(SAI->getBasePtr()); 847edb885cbSTobias Grosser 848edb885cbSTobias Grosser isl_space *Space = S.getParamSpace(); 849edb885cbSTobias Grosser for (long i = 0; i < isl_space_dim(Space, isl_dim_param); i++) { 850edb885cbSTobias Grosser isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, i); 851edb885cbSTobias Grosser assert(IDToValue.count(Id)); 852edb885cbSTobias Grosser Value *Val = IDToValue[Id]; 853edb885cbSTobias Grosser SubtreeValues.remove(Val); 854edb885cbSTobias Grosser isl_id_free(Id); 855edb885cbSTobias Grosser } 856edb885cbSTobias Grosser isl_space_free(Space); 857edb885cbSTobias Grosser 858edb885cbSTobias Grosser for (long i = 0; i < isl_space_dim(Kernel->space, isl_dim_set); i++) { 859edb885cbSTobias Grosser isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i); 860edb885cbSTobias Grosser assert(IDToValue.count(Id)); 861edb885cbSTobias Grosser Value *Val = IDToValue[Id]; 862edb885cbSTobias Grosser SubtreeValues.remove(Val); 863edb885cbSTobias Grosser isl_id_free(Id); 864edb885cbSTobias Grosser } 865edb885cbSTobias Grosser 866edb885cbSTobias Grosser return SubtreeValues; 867edb885cbSTobias Grosser } 868edb885cbSTobias Grosser 86974dc3cb4STobias Grosser void GPUNodeBuilder::clearDominators(Function *F) { 87074dc3cb4STobias Grosser DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 87174dc3cb4STobias Grosser std::vector<BasicBlock *> Nodes; 87274dc3cb4STobias Grosser for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) 87374dc3cb4STobias Grosser Nodes.push_back(I->getBlock()); 87474dc3cb4STobias Grosser 87574dc3cb4STobias Grosser for (BasicBlock *BB : Nodes) 87674dc3cb4STobias Grosser DT.eraseNode(BB); 87774dc3cb4STobias Grosser } 87874dc3cb4STobias Grosser 87974dc3cb4STobias Grosser void GPUNodeBuilder::clearScalarEvolution(Function *F) { 88074dc3cb4STobias Grosser for (BasicBlock &BB : *F) { 88174dc3cb4STobias Grosser Loop *L = LI.getLoopFor(&BB); 88274dc3cb4STobias Grosser if (L) 88374dc3cb4STobias Grosser SE.forgetLoop(L); 88474dc3cb4STobias Grosser } 88574dc3cb4STobias Grosser } 88674dc3cb4STobias Grosser 88774dc3cb4STobias Grosser void GPUNodeBuilder::clearLoops(Function *F) { 88874dc3cb4STobias Grosser for (BasicBlock &BB : *F) { 88974dc3cb4STobias Grosser Loop *L = LI.getLoopFor(&BB); 89074dc3cb4STobias Grosser if (L) 89174dc3cb4STobias Grosser SE.forgetLoop(L); 89274dc3cb4STobias Grosser LI.removeBlock(&BB); 89374dc3cb4STobias Grosser } 89474dc3cb4STobias Grosser } 89574dc3cb4STobias Grosser 89679a947c2STobias Grosser std::tuple<Value *, Value *> GPUNodeBuilder::getGridSizes(ppcg_kernel *Kernel) { 89779a947c2STobias Grosser std::vector<Value *> Sizes; 89879a947c2STobias Grosser isl_ast_build *Context = isl_ast_build_from_context(S.getContext()); 89979a947c2STobias Grosser 90079a947c2STobias Grosser for (long i = 0; i < Kernel->n_grid; i++) { 90179a947c2STobias Grosser isl_pw_aff *Size = isl_multi_pw_aff_get_pw_aff(Kernel->grid_size, i); 90279a947c2STobias Grosser isl_ast_expr *GridSize = isl_ast_build_expr_from_pw_aff(Context, Size); 90379a947c2STobias Grosser Value *Res = ExprBuilder.create(GridSize); 90479a947c2STobias Grosser Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); 90579a947c2STobias Grosser Sizes.push_back(Res); 90679a947c2STobias Grosser } 90779a947c2STobias Grosser isl_ast_build_free(Context); 90879a947c2STobias Grosser 90979a947c2STobias Grosser for (long i = Kernel->n_grid; i < 3; i++) 91079a947c2STobias Grosser Sizes.push_back(ConstantInt::get(Builder.getInt32Ty(), 1)); 91179a947c2STobias Grosser 91279a947c2STobias Grosser return std::make_tuple(Sizes[0], Sizes[1]); 91379a947c2STobias Grosser } 91479a947c2STobias Grosser 91579a947c2STobias Grosser std::tuple<Value *, Value *, Value *> 91679a947c2STobias Grosser GPUNodeBuilder::getBlockSizes(ppcg_kernel *Kernel) { 91779a947c2STobias Grosser std::vector<Value *> Sizes; 91879a947c2STobias Grosser 91979a947c2STobias Grosser for (long i = 0; i < Kernel->n_block; i++) { 92079a947c2STobias Grosser Value *Res = ConstantInt::get(Builder.getInt32Ty(), Kernel->block_dim[i]); 92179a947c2STobias Grosser Sizes.push_back(Res); 92279a947c2STobias Grosser } 92379a947c2STobias Grosser 92479a947c2STobias Grosser for (long i = Kernel->n_block; i < 3; i++) 92579a947c2STobias Grosser Sizes.push_back(ConstantInt::get(Builder.getInt32Ty(), 1)); 92679a947c2STobias Grosser 92779a947c2STobias Grosser return std::make_tuple(Sizes[0], Sizes[1], Sizes[2]); 92879a947c2STobias Grosser } 92979a947c2STobias Grosser 93057693272STobias Grosser Value * 93157693272STobias Grosser GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F, 93257693272STobias Grosser SetVector<Value *> SubtreeValues) { 9334e18d71cSTobias Grosser Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 9344e18d71cSTobias Grosser std::distance(F->arg_begin(), F->arg_end())); 93579a947c2STobias Grosser 93679a947c2STobias Grosser BasicBlock *EntryBlock = 93779a947c2STobias Grosser &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 93879a947c2STobias Grosser std::string Launch = "polly_launch_" + std::to_string(Kernel->id); 93979a947c2STobias Grosser Instruction *Parameters = 94079a947c2STobias Grosser new AllocaInst(ArrayTy, Launch + "_params", EntryBlock->getTerminator()); 94179a947c2STobias Grosser 94279a947c2STobias Grosser int Index = 0; 94379a947c2STobias Grosser for (long i = 0; i < Prog->n_array; i++) { 94479a947c2STobias Grosser if (!ppcg_kernel_requires_array_argument(Kernel, i)) 94579a947c2STobias Grosser continue; 94679a947c2STobias Grosser 94779a947c2STobias Grosser isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set); 94879a947c2STobias Grosser const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id); 94979a947c2STobias Grosser 95079a947c2STobias Grosser Value *DevArray = DeviceAllocations[(ScopArrayInfo *)SAI]; 95179a947c2STobias Grosser DevArray = createCallGetDevicePtr(DevArray); 95279a947c2STobias Grosser Instruction *Param = new AllocaInst( 95379a947c2STobias Grosser Builder.getInt8PtrTy(), Launch + "_param_" + std::to_string(Index), 95479a947c2STobias Grosser EntryBlock->getTerminator()); 95579a947c2STobias Grosser Builder.CreateStore(DevArray, Param); 95644143bb9STobias Grosser Value *Slot = Builder.CreateGEP( 95744143bb9STobias Grosser Parameters, {Builder.getInt64(0), Builder.getInt64(Index)}); 95879a947c2STobias Grosser Value *ParamTyped = 95979a947c2STobias Grosser Builder.CreatePointerCast(Param, Builder.getInt8PtrTy()); 96079a947c2STobias Grosser Builder.CreateStore(ParamTyped, Slot); 96179a947c2STobias Grosser Index++; 96279a947c2STobias Grosser } 96379a947c2STobias Grosser 964a490147cSTobias Grosser int NumHostIters = isl_space_dim(Kernel->space, isl_dim_set); 965a490147cSTobias Grosser 966a490147cSTobias Grosser for (long i = 0; i < NumHostIters; i++) { 967a490147cSTobias Grosser isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i); 968a490147cSTobias Grosser Value *Val = IDToValue[Id]; 969a490147cSTobias Grosser isl_id_free(Id); 970a490147cSTobias Grosser Instruction *Param = new AllocaInst( 971a490147cSTobias Grosser Val->getType(), Launch + "_param_" + std::to_string(Index), 972a490147cSTobias Grosser EntryBlock->getTerminator()); 973a490147cSTobias Grosser Builder.CreateStore(Val, Param); 974a490147cSTobias Grosser Value *Slot = Builder.CreateGEP( 975a490147cSTobias Grosser Parameters, {Builder.getInt64(0), Builder.getInt64(Index)}); 976a490147cSTobias Grosser Value *ParamTyped = 977a490147cSTobias Grosser Builder.CreatePointerCast(Param, Builder.getInt8PtrTy()); 978a490147cSTobias Grosser Builder.CreateStore(ParamTyped, Slot); 979a490147cSTobias Grosser Index++; 980a490147cSTobias Grosser } 981a490147cSTobias Grosser 982d8b94bcaSTobias Grosser int NumVars = isl_space_dim(Kernel->space, isl_dim_param); 983d8b94bcaSTobias Grosser 984d8b94bcaSTobias Grosser for (long i = 0; i < NumVars; i++) { 985d8b94bcaSTobias Grosser isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_param, i); 986d8b94bcaSTobias Grosser Value *Val = IDToValue[Id]; 987d8b94bcaSTobias Grosser isl_id_free(Id); 988d8b94bcaSTobias Grosser Instruction *Param = new AllocaInst( 989d8b94bcaSTobias Grosser Val->getType(), Launch + "_param_" + std::to_string(Index), 990d8b94bcaSTobias Grosser EntryBlock->getTerminator()); 991d8b94bcaSTobias Grosser Builder.CreateStore(Val, Param); 992d8b94bcaSTobias Grosser Value *Slot = Builder.CreateGEP( 993d8b94bcaSTobias Grosser Parameters, {Builder.getInt64(0), Builder.getInt64(Index)}); 994d8b94bcaSTobias Grosser Value *ParamTyped = 995d8b94bcaSTobias Grosser Builder.CreatePointerCast(Param, Builder.getInt8PtrTy()); 996d8b94bcaSTobias Grosser Builder.CreateStore(ParamTyped, Slot); 997d8b94bcaSTobias Grosser Index++; 998d8b94bcaSTobias Grosser } 999d8b94bcaSTobias Grosser 100057693272STobias Grosser for (auto Val : SubtreeValues) { 100157693272STobias Grosser Instruction *Param = new AllocaInst( 100257693272STobias Grosser Val->getType(), Launch + "_param_" + std::to_string(Index), 100357693272STobias Grosser EntryBlock->getTerminator()); 100457693272STobias Grosser Builder.CreateStore(Val, Param); 100557693272STobias Grosser Value *Slot = Builder.CreateGEP( 100657693272STobias Grosser Parameters, {Builder.getInt64(0), Builder.getInt64(Index)}); 100757693272STobias Grosser Value *ParamTyped = 100857693272STobias Grosser Builder.CreatePointerCast(Param, Builder.getInt8PtrTy()); 100957693272STobias Grosser Builder.CreateStore(ParamTyped, Slot); 101057693272STobias Grosser Index++; 101157693272STobias Grosser } 101257693272STobias Grosser 101379a947c2STobias Grosser auto Location = EntryBlock->getTerminator(); 101479a947c2STobias Grosser return new BitCastInst(Parameters, Builder.getInt8PtrTy(), 101579a947c2STobias Grosser Launch + "_params_i8ptr", Location); 101679a947c2STobias Grosser } 101779a947c2STobias Grosser 101832837fe3STobias Grosser void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { 101932837fe3STobias Grosser isl_id *Id = isl_ast_node_get_annotation(KernelStmt); 102032837fe3STobias Grosser ppcg_kernel *Kernel = (ppcg_kernel *)isl_id_get_user(Id); 102132837fe3STobias Grosser isl_id_free(Id); 102232837fe3STobias Grosser isl_ast_node_free(KernelStmt); 102332837fe3STobias Grosser 1024edb885cbSTobias Grosser SetVector<Value *> SubtreeValues = getReferencesInKernel(Kernel); 1025edb885cbSTobias Grosser 102632837fe3STobias Grosser assert(Kernel->tree && "Device AST of kernel node is empty"); 102732837fe3STobias Grosser 102832837fe3STobias Grosser Instruction &HostInsertPoint = *Builder.GetInsertPoint(); 1029472f9654STobias Grosser IslExprBuilder::IDToValueTy HostIDs = IDToValue; 1030edb885cbSTobias Grosser ValueMapT HostValueMap = ValueMap; 103132837fe3STobias Grosser 1032edb885cbSTobias Grosser SetVector<const Loop *> Loops; 1033edb885cbSTobias Grosser 1034edb885cbSTobias Grosser // Create for all loops we depend on values that contain the current loop 1035edb885cbSTobias Grosser // iteration. These values are necessary to generate code for SCEVs that 1036edb885cbSTobias Grosser // depend on such loops. As a result we need to pass them to the subfunction. 1037edb885cbSTobias Grosser for (const Loop *L : Loops) { 1038edb885cbSTobias Grosser const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 1039edb885cbSTobias Grosser SE.getUnknown(Builder.getInt64(1)), 1040edb885cbSTobias Grosser L, SCEV::FlagAnyWrap); 1041edb885cbSTobias Grosser Value *V = generateSCEV(OuterLIV); 1042edb885cbSTobias Grosser OutsideLoopIterations[L] = SE.getUnknown(V); 1043edb885cbSTobias Grosser SubtreeValues.insert(V); 1044edb885cbSTobias Grosser } 1045edb885cbSTobias Grosser 1046edb885cbSTobias Grosser createKernelFunction(Kernel, SubtreeValues); 104732837fe3STobias Grosser 104859ab0705STobias Grosser create(isl_ast_node_copy(Kernel->tree)); 104959ab0705STobias Grosser 105074dc3cb4STobias Grosser Function *F = Builder.GetInsertBlock()->getParent(); 105174dc3cb4STobias Grosser clearDominators(F); 105274dc3cb4STobias Grosser clearScalarEvolution(F); 105374dc3cb4STobias Grosser clearLoops(F); 105474dc3cb4STobias Grosser 105532837fe3STobias Grosser Builder.SetInsertPoint(&HostInsertPoint); 1056472f9654STobias Grosser IDToValue = HostIDs; 105732837fe3STobias Grosser 1058edb885cbSTobias Grosser ValueMap = HostValueMap; 1059edb885cbSTobias Grosser ScalarMap.clear(); 1060edb885cbSTobias Grosser PHIOpMap.clear(); 1061edb885cbSTobias Grosser EscapeMap.clear(); 1062edb885cbSTobias Grosser IDToSAI.clear(); 106374dc3cb4STobias Grosser Annotator.resetAlternativeAliasBases(); 106474dc3cb4STobias Grosser for (auto &BasePtr : LocalArrays) 106574dc3cb4STobias Grosser S.invalidateScopArrayInfo(BasePtr, ScopArrayInfo::MK_Array); 106674dc3cb4STobias Grosser LocalArrays.clear(); 1067edb885cbSTobias Grosser 106857693272STobias Grosser Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues); 106979a947c2STobias Grosser 107057793596STobias Grosser std::string ASMString = finalizeKernelFunction(); 107157793596STobias Grosser std::string Name = "kernel_" + std::to_string(Kernel->id); 107257793596STobias Grosser Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name); 107357793596STobias Grosser Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name"); 107457793596STobias Grosser Value *GPUKernel = createCallGetKernel(KernelString, NameString); 107579a947c2STobias Grosser 107679a947c2STobias Grosser Value *GridDimX, *GridDimY; 107779a947c2STobias Grosser std::tie(GridDimX, GridDimY) = getGridSizes(Kernel); 107879a947c2STobias Grosser 107979a947c2STobias Grosser Value *BlockDimX, *BlockDimY, *BlockDimZ; 108079a947c2STobias Grosser std::tie(BlockDimX, BlockDimY, BlockDimZ) = getBlockSizes(Kernel); 108179a947c2STobias Grosser 108279a947c2STobias Grosser createCallLaunchKernel(GPUKernel, GridDimX, GridDimY, BlockDimX, BlockDimY, 108379a947c2STobias Grosser BlockDimZ, Parameters); 108457793596STobias Grosser createCallFreeKernel(GPUKernel); 1085b513b491STobias Grosser 1086b513b491STobias Grosser for (auto Id : KernelIds) 1087b513b491STobias Grosser isl_id_free(Id); 1088b513b491STobias Grosser 1089b513b491STobias Grosser KernelIds.clear(); 109032837fe3STobias Grosser } 109132837fe3STobias Grosser 109232837fe3STobias Grosser /// Compute the DataLayout string for the NVPTX backend. 109332837fe3STobias Grosser /// 109432837fe3STobias Grosser /// @param is64Bit Are we looking for a 64 bit architecture? 109532837fe3STobias Grosser static std::string computeNVPTXDataLayout(bool is64Bit) { 109632837fe3STobias Grosser std::string Ret = "e"; 109732837fe3STobias Grosser 109832837fe3STobias Grosser if (!is64Bit) 109932837fe3STobias Grosser Ret += "-p:32:32"; 110032837fe3STobias Grosser 110132837fe3STobias Grosser Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 110232837fe3STobias Grosser 110332837fe3STobias Grosser return Ret; 110432837fe3STobias Grosser } 110532837fe3STobias Grosser 1106edb885cbSTobias Grosser Function * 1107edb885cbSTobias Grosser GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, 1108edb885cbSTobias Grosser SetVector<Value *> &SubtreeValues) { 110932837fe3STobias Grosser std::vector<Type *> Args; 111032837fe3STobias Grosser std::string Identifier = "kernel_" + std::to_string(Kernel->id); 111132837fe3STobias Grosser 111232837fe3STobias Grosser for (long i = 0; i < Prog->n_array; i++) { 111332837fe3STobias Grosser if (!ppcg_kernel_requires_array_argument(Kernel, i)) 111432837fe3STobias Grosser continue; 111532837fe3STobias Grosser 111632837fe3STobias Grosser Args.push_back(Builder.getInt8PtrTy()); 111732837fe3STobias Grosser } 111832837fe3STobias Grosser 1119f6044bd0STobias Grosser int NumHostIters = isl_space_dim(Kernel->space, isl_dim_set); 1120f6044bd0STobias Grosser 1121f6044bd0STobias Grosser for (long i = 0; i < NumHostIters; i++) 1122f6044bd0STobias Grosser Args.push_back(Builder.getInt64Ty()); 1123f6044bd0STobias Grosser 1124c84a1995STobias Grosser int NumVars = isl_space_dim(Kernel->space, isl_dim_param); 1125c84a1995STobias Grosser 1126c84a1995STobias Grosser for (long i = 0; i < NumVars; i++) 1127c84a1995STobias Grosser Args.push_back(Builder.getInt64Ty()); 1128c84a1995STobias Grosser 1129edb885cbSTobias Grosser for (auto *V : SubtreeValues) 1130edb885cbSTobias Grosser Args.push_back(V->getType()); 1131edb885cbSTobias Grosser 113232837fe3STobias Grosser auto *FT = FunctionType::get(Builder.getVoidTy(), Args, false); 113332837fe3STobias Grosser auto *FN = Function::Create(FT, Function::ExternalLinkage, Identifier, 113432837fe3STobias Grosser GPUModule.get()); 113532837fe3STobias Grosser FN->setCallingConv(CallingConv::PTX_Kernel); 113632837fe3STobias Grosser 113732837fe3STobias Grosser auto Arg = FN->arg_begin(); 113832837fe3STobias Grosser for (long i = 0; i < Kernel->n_array; i++) { 113932837fe3STobias Grosser if (!ppcg_kernel_requires_array_argument(Kernel, i)) 114032837fe3STobias Grosser continue; 114132837fe3STobias Grosser 1142edb885cbSTobias Grosser Arg->setName(Kernel->array[i].array->name); 1143edb885cbSTobias Grosser 1144edb885cbSTobias Grosser isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set); 1145edb885cbSTobias Grosser const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl_id_copy(Id)); 1146edb885cbSTobias Grosser Type *EleTy = SAI->getElementType(); 1147edb885cbSTobias Grosser Value *Val = &*Arg; 1148edb885cbSTobias Grosser SmallVector<const SCEV *, 4> Sizes; 1149edb885cbSTobias Grosser isl_ast_build *Build = 1150edb885cbSTobias Grosser isl_ast_build_from_context(isl_set_copy(Prog->context)); 1151edb885cbSTobias Grosser for (long j = 1; j < Kernel->array[i].array->n_index; j++) { 1152edb885cbSTobias Grosser isl_ast_expr *DimSize = isl_ast_build_expr_from_pw_aff( 1153edb885cbSTobias Grosser Build, isl_pw_aff_copy(Kernel->array[i].array->bound[j])); 1154edb885cbSTobias Grosser auto V = ExprBuilder.create(DimSize); 1155edb885cbSTobias Grosser Sizes.push_back(SE.getSCEV(V)); 1156edb885cbSTobias Grosser } 1157edb885cbSTobias Grosser const ScopArrayInfo *SAIRep = 1158edb885cbSTobias Grosser S.getOrCreateScopArrayInfo(Val, EleTy, Sizes, ScopArrayInfo::MK_Array); 115974dc3cb4STobias Grosser LocalArrays.push_back(Val); 1160edb885cbSTobias Grosser 1161edb885cbSTobias Grosser isl_ast_build_free(Build); 1162b513b491STobias Grosser KernelIds.push_back(Id); 1163edb885cbSTobias Grosser IDToSAI[Id] = SAIRep; 116432837fe3STobias Grosser Arg++; 116532837fe3STobias Grosser } 116632837fe3STobias Grosser 1167f6044bd0STobias Grosser for (long i = 0; i < NumHostIters; i++) { 1168f6044bd0STobias Grosser isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i); 1169f6044bd0STobias Grosser Arg->setName(isl_id_get_name(Id)); 1170f6044bd0STobias Grosser IDToValue[Id] = &*Arg; 1171f6044bd0STobias Grosser KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id)); 1172f6044bd0STobias Grosser Arg++; 1173f6044bd0STobias Grosser } 1174f6044bd0STobias Grosser 1175c84a1995STobias Grosser for (long i = 0; i < NumVars; i++) { 1176c84a1995STobias Grosser isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_param, i); 1177c84a1995STobias Grosser Arg->setName(isl_id_get_name(Id)); 1178c84a1995STobias Grosser IDToValue[Id] = &*Arg; 1179c84a1995STobias Grosser KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id)); 1180c84a1995STobias Grosser Arg++; 1181c84a1995STobias Grosser } 1182c84a1995STobias Grosser 1183edb885cbSTobias Grosser for (auto *V : SubtreeValues) { 1184edb885cbSTobias Grosser Arg->setName(V->getName()); 1185edb885cbSTobias Grosser ValueMap[V] = &*Arg; 1186edb885cbSTobias Grosser Arg++; 1187edb885cbSTobias Grosser } 1188edb885cbSTobias Grosser 118932837fe3STobias Grosser return FN; 119032837fe3STobias Grosser } 119132837fe3STobias Grosser 1192472f9654STobias Grosser void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) { 1193472f9654STobias Grosser Intrinsic::ID IntrinsicsBID[] = {Intrinsic::nvvm_read_ptx_sreg_ctaid_x, 1194472f9654STobias Grosser Intrinsic::nvvm_read_ptx_sreg_ctaid_y}; 1195472f9654STobias Grosser 1196472f9654STobias Grosser Intrinsic::ID IntrinsicsTID[] = {Intrinsic::nvvm_read_ptx_sreg_tid_x, 1197472f9654STobias Grosser Intrinsic::nvvm_read_ptx_sreg_tid_y, 1198472f9654STobias Grosser Intrinsic::nvvm_read_ptx_sreg_tid_z}; 1199472f9654STobias Grosser 1200472f9654STobias Grosser auto addId = [this](__isl_take isl_id *Id, Intrinsic::ID Intr) mutable { 1201472f9654STobias Grosser std::string Name = isl_id_get_name(Id); 1202472f9654STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 1203472f9654STobias Grosser Function *IntrinsicFn = Intrinsic::getDeclaration(M, Intr); 1204472f9654STobias Grosser Value *Val = Builder.CreateCall(IntrinsicFn, {}); 1205472f9654STobias Grosser Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name); 1206472f9654STobias Grosser IDToValue[Id] = Val; 1207472f9654STobias Grosser KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id)); 1208472f9654STobias Grosser }; 1209472f9654STobias Grosser 1210472f9654STobias Grosser for (int i = 0; i < Kernel->n_grid; ++i) { 1211472f9654STobias Grosser isl_id *Id = isl_id_list_get_id(Kernel->block_ids, i); 1212472f9654STobias Grosser addId(Id, IntrinsicsBID[i]); 1213472f9654STobias Grosser } 1214472f9654STobias Grosser 1215472f9654STobias Grosser for (int i = 0; i < Kernel->n_block; ++i) { 1216472f9654STobias Grosser isl_id *Id = isl_id_list_get_id(Kernel->thread_ids, i); 1217472f9654STobias Grosser addId(Id, IntrinsicsTID[i]); 1218472f9654STobias Grosser } 1219472f9654STobias Grosser } 1220472f9654STobias Grosser 122100bb5a99STobias Grosser void GPUNodeBuilder::prepareKernelArguments(ppcg_kernel *Kernel, Function *FN) { 122200bb5a99STobias Grosser auto Arg = FN->arg_begin(); 122300bb5a99STobias Grosser for (long i = 0; i < Kernel->n_array; i++) { 122400bb5a99STobias Grosser if (!ppcg_kernel_requires_array_argument(Kernel, i)) 122500bb5a99STobias Grosser continue; 122600bb5a99STobias Grosser 122700bb5a99STobias Grosser isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set); 122800bb5a99STobias Grosser const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl_id_copy(Id)); 122900bb5a99STobias Grosser isl_id_free(Id); 123000bb5a99STobias Grosser 123100bb5a99STobias Grosser if (SAI->getNumberOfDimensions() > 0) { 123200bb5a99STobias Grosser Arg++; 123300bb5a99STobias Grosser continue; 123400bb5a99STobias Grosser } 123500bb5a99STobias Grosser 123600bb5a99STobias Grosser Value *Alloca = BlockGen.getOrCreateScalarAlloca(SAI->getBasePtr()); 123700bb5a99STobias Grosser Value *ArgPtr = &*Arg; 123800bb5a99STobias Grosser Type *TypePtr = SAI->getElementType()->getPointerTo(); 123900bb5a99STobias Grosser Value *TypedArgPtr = Builder.CreatePointerCast(ArgPtr, TypePtr); 124000bb5a99STobias Grosser Value *Val = Builder.CreateLoad(TypedArgPtr); 124100bb5a99STobias Grosser Builder.CreateStore(Val, Alloca); 124200bb5a99STobias Grosser 124300bb5a99STobias Grosser Arg++; 124400bb5a99STobias Grosser } 124500bb5a99STobias Grosser } 124600bb5a99STobias Grosser 1247b513b491STobias Grosser void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) { 1248b513b491STobias Grosser Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 1249b513b491STobias Grosser 1250b513b491STobias Grosser for (int i = 0; i < Kernel->n_var; ++i) { 1251b513b491STobias Grosser struct ppcg_kernel_var &Var = Kernel->var[i]; 1252b513b491STobias Grosser isl_id *Id = isl_space_get_tuple_id(Var.array->space, isl_dim_set); 1253b513b491STobias Grosser Type *EleTy = ScopArrayInfo::getFromId(Id)->getElementType(); 1254b513b491STobias Grosser 1255b513b491STobias Grosser SmallVector<const SCEV *, 4> Sizes; 1256b513b491STobias Grosser isl_val *V0 = isl_vec_get_element_val(Var.size, 0); 1257b513b491STobias Grosser long Bound = isl_val_get_num_si(V0); 1258b513b491STobias Grosser isl_val_free(V0); 1259b513b491STobias Grosser Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); 1260b513b491STobias Grosser 1261b513b491STobias Grosser ArrayType *ArrayTy = ArrayType::get(EleTy, Bound); 1262b513b491STobias Grosser for (unsigned int j = 1; j < Var.array->n_index; ++j) { 1263b513b491STobias Grosser isl_val *Val = isl_vec_get_element_val(Var.size, j); 1264b513b491STobias Grosser Bound = isl_val_get_num_si(Val); 1265b513b491STobias Grosser isl_val_free(Val); 1266b513b491STobias Grosser Sizes.push_back(S.getSE()->getConstant(Builder.getInt64Ty(), Bound)); 1267b513b491STobias Grosser ArrayTy = ArrayType::get(ArrayTy, Bound); 1268b513b491STobias Grosser } 1269b513b491STobias Grosser 1270*130ca30fSTobias Grosser const ScopArrayInfo *SAI; 1271*130ca30fSTobias Grosser Value *Allocation; 1272*130ca30fSTobias Grosser if (Var.type == ppcg_access_shared) { 1273*130ca30fSTobias Grosser auto GlobalVar = new GlobalVariable( 1274*130ca30fSTobias Grosser *M, ArrayTy, false, GlobalValue::InternalLinkage, 0, Var.name, 1275*130ca30fSTobias Grosser nullptr, GlobalValue::ThreadLocalMode::NotThreadLocal, 3); 1276*130ca30fSTobias Grosser GlobalVar->setAlignment(EleTy->getPrimitiveSizeInBits() / 8); 1277b513b491STobias Grosser ConstantAggregateZero *Zero = ConstantAggregateZero::get(ArrayTy); 1278*130ca30fSTobias Grosser GlobalVar->setInitializer(Zero); 1279*130ca30fSTobias Grosser Allocation = GlobalVar; 1280*130ca30fSTobias Grosser } else if (Var.type == ppcg_access_private) { 1281*130ca30fSTobias Grosser Allocation = Builder.CreateAlloca(ArrayTy, 0, "private_array"); 1282*130ca30fSTobias Grosser } else { 1283*130ca30fSTobias Grosser llvm_unreachable("unknown variable type"); 1284*130ca30fSTobias Grosser } 1285*130ca30fSTobias Grosser Builder.GetInsertBlock()->dump(); 1286*130ca30fSTobias Grosser SAI = S.getOrCreateScopArrayInfo(Allocation, EleTy, Sizes, 1287*130ca30fSTobias Grosser ScopArrayInfo::MK_Array); 1288b513b491STobias Grosser Id = isl_id_alloc(S.getIslCtx(), Var.name, nullptr); 1289*130ca30fSTobias Grosser IDToValue[Id] = Allocation; 1290*130ca30fSTobias Grosser LocalArrays.push_back(Allocation); 1291b513b491STobias Grosser KernelIds.push_back(Id); 1292b513b491STobias Grosser IDToSAI[Id] = SAI; 1293b513b491STobias Grosser } 1294b513b491STobias Grosser } 1295b513b491STobias Grosser 1296edb885cbSTobias Grosser void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel, 1297edb885cbSTobias Grosser SetVector<Value *> &SubtreeValues) { 129832837fe3STobias Grosser 129932837fe3STobias Grosser std::string Identifier = "kernel_" + std::to_string(Kernel->id); 130032837fe3STobias Grosser GPUModule.reset(new Module(Identifier, Builder.getContext())); 130132837fe3STobias Grosser GPUModule->setTargetTriple(Triple::normalize("nvptx64-nvidia-cuda")); 130232837fe3STobias Grosser GPUModule->setDataLayout(computeNVPTXDataLayout(true /* is64Bit */)); 130332837fe3STobias Grosser 1304edb885cbSTobias Grosser Function *FN = createKernelFunctionDecl(Kernel, SubtreeValues); 130532837fe3STobias Grosser 130659ab0705STobias Grosser BasicBlock *PrevBlock = Builder.GetInsertBlock(); 130732837fe3STobias Grosser auto EntryBlock = BasicBlock::Create(Builder.getContext(), "entry", FN); 130832837fe3STobias Grosser 130959ab0705STobias Grosser DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 131059ab0705STobias Grosser DT.addNewBlock(EntryBlock, PrevBlock); 131159ab0705STobias Grosser 131232837fe3STobias Grosser Builder.SetInsertPoint(EntryBlock); 131332837fe3STobias Grosser Builder.CreateRetVoid(); 131432837fe3STobias Grosser Builder.SetInsertPoint(EntryBlock, EntryBlock->begin()); 1315472f9654STobias Grosser 1316629109b6STobias Grosser ScopDetection::markFunctionAsInvalid(FN); 1317629109b6STobias Grosser 131800bb5a99STobias Grosser prepareKernelArguments(Kernel, FN); 1319b513b491STobias Grosser createKernelVariables(Kernel, FN); 1320472f9654STobias Grosser insertKernelIntrinsics(Kernel); 132132837fe3STobias Grosser } 132232837fe3STobias Grosser 132374dc3cb4STobias Grosser std::string GPUNodeBuilder::createKernelASM() { 132474dc3cb4STobias Grosser llvm::Triple GPUTriple(Triple::normalize("nvptx64-nvidia-cuda")); 132574dc3cb4STobias Grosser std::string ErrMsg; 132674dc3cb4STobias Grosser auto GPUTarget = TargetRegistry::lookupTarget(GPUTriple.getTriple(), ErrMsg); 132774dc3cb4STobias Grosser 132874dc3cb4STobias Grosser if (!GPUTarget) { 132974dc3cb4STobias Grosser errs() << ErrMsg << "\n"; 133074dc3cb4STobias Grosser return ""; 133174dc3cb4STobias Grosser } 133274dc3cb4STobias Grosser 133374dc3cb4STobias Grosser TargetOptions Options; 133474dc3cb4STobias Grosser Options.UnsafeFPMath = FastMath; 133574dc3cb4STobias Grosser std::unique_ptr<TargetMachine> TargetM( 133674dc3cb4STobias Grosser GPUTarget->createTargetMachine(GPUTriple.getTriple(), CudaVersion, "", 133774dc3cb4STobias Grosser Options, Optional<Reloc::Model>())); 133874dc3cb4STobias Grosser 133974dc3cb4STobias Grosser SmallString<0> ASMString; 134074dc3cb4STobias Grosser raw_svector_ostream ASMStream(ASMString); 134174dc3cb4STobias Grosser llvm::legacy::PassManager PM; 134274dc3cb4STobias Grosser 134374dc3cb4STobias Grosser PM.add(createTargetTransformInfoWrapperPass(TargetM->getTargetIRAnalysis())); 134474dc3cb4STobias Grosser 134574dc3cb4STobias Grosser if (TargetM->addPassesToEmitFile( 134674dc3cb4STobias Grosser PM, ASMStream, TargetMachine::CGFT_AssemblyFile, true /* verify */)) { 134774dc3cb4STobias Grosser errs() << "The target does not support generation of this file type!\n"; 134874dc3cb4STobias Grosser return ""; 134974dc3cb4STobias Grosser } 135074dc3cb4STobias Grosser 135174dc3cb4STobias Grosser PM.run(*GPUModule); 135274dc3cb4STobias Grosser 135374dc3cb4STobias Grosser return ASMStream.str(); 135474dc3cb4STobias Grosser } 135574dc3cb4STobias Grosser 135657793596STobias Grosser std::string GPUNodeBuilder::finalizeKernelFunction() { 1357e1a98343STobias Grosser // Verify module. 1358e1a98343STobias Grosser llvm::legacy::PassManager Passes; 1359e1a98343STobias Grosser Passes.add(createVerifierPass()); 1360e1a98343STobias Grosser Passes.run(*GPUModule); 136132837fe3STobias Grosser 136232837fe3STobias Grosser if (DumpKernelIR) 136332837fe3STobias Grosser outs() << *GPUModule << "\n"; 136432837fe3STobias Grosser 13659a18d559STobias Grosser // Optimize module. 13669a18d559STobias Grosser llvm::legacy::PassManager OptPasses; 13679a18d559STobias Grosser PassManagerBuilder PassBuilder; 13689a18d559STobias Grosser PassBuilder.OptLevel = 3; 13699a18d559STobias Grosser PassBuilder.SizeLevel = 0; 13709a18d559STobias Grosser PassBuilder.populateModulePassManager(OptPasses); 13719a18d559STobias Grosser OptPasses.run(*GPUModule); 13729a18d559STobias Grosser 137374dc3cb4STobias Grosser std::string Assembly = createKernelASM(); 137474dc3cb4STobias Grosser 137574dc3cb4STobias Grosser if (DumpKernelASM) 137674dc3cb4STobias Grosser outs() << Assembly << "\n"; 137774dc3cb4STobias Grosser 137832837fe3STobias Grosser GPUModule.release(); 1379472f9654STobias Grosser KernelIDs.clear(); 138057793596STobias Grosser 138157793596STobias Grosser return Assembly; 138232837fe3STobias Grosser } 138332837fe3STobias Grosser 13849dfe4e7cSTobias Grosser namespace { 13859dfe4e7cSTobias Grosser class PPCGCodeGeneration : public ScopPass { 13869dfe4e7cSTobias Grosser public: 13879dfe4e7cSTobias Grosser static char ID; 13889dfe4e7cSTobias Grosser 1389e938517eSTobias Grosser /// The scop that is currently processed. 1390e938517eSTobias Grosser Scop *S; 1391e938517eSTobias Grosser 139238fc0aedSTobias Grosser LoopInfo *LI; 139338fc0aedSTobias Grosser DominatorTree *DT; 139438fc0aedSTobias Grosser ScalarEvolution *SE; 139538fc0aedSTobias Grosser const DataLayout *DL; 139638fc0aedSTobias Grosser RegionInfo *RI; 139738fc0aedSTobias Grosser 13989dfe4e7cSTobias Grosser PPCGCodeGeneration() : ScopPass(ID) {} 13999dfe4e7cSTobias Grosser 1400e938517eSTobias Grosser /// Construct compilation options for PPCG. 1401e938517eSTobias Grosser /// 1402e938517eSTobias Grosser /// @returns The compilation options. 1403e938517eSTobias Grosser ppcg_options *createPPCGOptions() { 1404e938517eSTobias Grosser auto DebugOptions = 1405e938517eSTobias Grosser (ppcg_debug_options *)malloc(sizeof(ppcg_debug_options)); 1406e938517eSTobias Grosser auto Options = (ppcg_options *)malloc(sizeof(ppcg_options)); 1407e938517eSTobias Grosser 1408e938517eSTobias Grosser DebugOptions->dump_schedule_constraints = false; 1409e938517eSTobias Grosser DebugOptions->dump_schedule = false; 1410e938517eSTobias Grosser DebugOptions->dump_final_schedule = false; 1411e938517eSTobias Grosser DebugOptions->dump_sizes = false; 1412e938517eSTobias Grosser 1413e938517eSTobias Grosser Options->debug = DebugOptions; 1414e938517eSTobias Grosser 1415e938517eSTobias Grosser Options->reschedule = true; 1416e938517eSTobias Grosser Options->scale_tile_loops = false; 1417e938517eSTobias Grosser Options->wrap = false; 1418e938517eSTobias Grosser 1419e938517eSTobias Grosser Options->non_negative_parameters = false; 1420e938517eSTobias Grosser Options->ctx = nullptr; 1421e938517eSTobias Grosser Options->sizes = nullptr; 1422e938517eSTobias Grosser 14234eaedde5STobias Grosser Options->tile_size = 32; 14244eaedde5STobias Grosser 1425*130ca30fSTobias Grosser Options->use_private_memory = PrivateMemory; 1426b513b491STobias Grosser Options->use_shared_memory = SharedMemory; 1427b513b491STobias Grosser Options->max_shared_memory = 48 * 1024; 1428e938517eSTobias Grosser 1429e938517eSTobias Grosser Options->target = PPCG_TARGET_CUDA; 1430e938517eSTobias Grosser Options->openmp = false; 1431e938517eSTobias Grosser Options->linearize_device_arrays = true; 1432e938517eSTobias Grosser Options->live_range_reordering = false; 1433e938517eSTobias Grosser 1434e938517eSTobias Grosser Options->opencl_compiler_options = nullptr; 1435e938517eSTobias Grosser Options->opencl_use_gpu = false; 1436e938517eSTobias Grosser Options->opencl_n_include_file = 0; 1437e938517eSTobias Grosser Options->opencl_include_files = nullptr; 1438e938517eSTobias Grosser Options->opencl_print_kernel_types = false; 1439e938517eSTobias Grosser Options->opencl_embed_kernel_code = false; 1440e938517eSTobias Grosser 1441e938517eSTobias Grosser Options->save_schedule_file = nullptr; 1442e938517eSTobias Grosser Options->load_schedule_file = nullptr; 1443e938517eSTobias Grosser 1444e938517eSTobias Grosser return Options; 1445e938517eSTobias Grosser } 1446e938517eSTobias Grosser 1447f384594dSTobias Grosser /// Get a tagged access relation containing all accesses of type @p AccessTy. 1448f384594dSTobias Grosser /// 1449f384594dSTobias Grosser /// Instead of a normal access of the form: 1450f384594dSTobias Grosser /// 1451f384594dSTobias Grosser /// Stmt[i,j,k] -> Array[f_0(i,j,k), f_1(i,j,k)] 1452f384594dSTobias Grosser /// 1453f384594dSTobias Grosser /// a tagged access has the form 1454f384594dSTobias Grosser /// 1455f384594dSTobias Grosser /// [Stmt[i,j,k] -> id[]] -> Array[f_0(i,j,k), f_1(i,j,k)] 1456f384594dSTobias Grosser /// 1457f384594dSTobias Grosser /// where 'id' is an additional space that references the memory access that 1458f384594dSTobias Grosser /// triggered the access. 1459f384594dSTobias Grosser /// 1460f384594dSTobias Grosser /// @param AccessTy The type of the memory accesses to collect. 1461f384594dSTobias Grosser /// 1462f384594dSTobias Grosser /// @return The relation describing all tagged memory accesses. 1463f384594dSTobias Grosser isl_union_map *getTaggedAccesses(enum MemoryAccess::AccessType AccessTy) { 1464f384594dSTobias Grosser isl_union_map *Accesses = isl_union_map_empty(S->getParamSpace()); 1465f384594dSTobias Grosser 1466f384594dSTobias Grosser for (auto &Stmt : *S) 1467f384594dSTobias Grosser for (auto &Acc : Stmt) 1468f384594dSTobias Grosser if (Acc->getType() == AccessTy) { 1469f384594dSTobias Grosser isl_map *Relation = Acc->getAccessRelation(); 1470f384594dSTobias Grosser Relation = isl_map_intersect_domain(Relation, Stmt.getDomain()); 1471f384594dSTobias Grosser 1472f384594dSTobias Grosser isl_space *Space = isl_map_get_space(Relation); 1473f384594dSTobias Grosser Space = isl_space_range(Space); 1474f384594dSTobias Grosser Space = isl_space_from_range(Space); 14756293ba69STobias Grosser Space = isl_space_set_tuple_id(Space, isl_dim_in, Acc->getId()); 1476f384594dSTobias Grosser isl_map *Universe = isl_map_universe(Space); 1477f384594dSTobias Grosser Relation = isl_map_domain_product(Relation, Universe); 1478f384594dSTobias Grosser Accesses = isl_union_map_add_map(Accesses, Relation); 1479f384594dSTobias Grosser } 1480f384594dSTobias Grosser 1481f384594dSTobias Grosser return Accesses; 1482f384594dSTobias Grosser } 1483f384594dSTobias Grosser 1484f384594dSTobias Grosser /// Get the set of all read accesses, tagged with the access id. 1485f384594dSTobias Grosser /// 1486f384594dSTobias Grosser /// @see getTaggedAccesses 1487f384594dSTobias Grosser isl_union_map *getTaggedReads() { 1488f384594dSTobias Grosser return getTaggedAccesses(MemoryAccess::READ); 1489f384594dSTobias Grosser } 1490f384594dSTobias Grosser 1491f384594dSTobias Grosser /// Get the set of all may (and must) accesses, tagged with the access id. 1492f384594dSTobias Grosser /// 1493f384594dSTobias Grosser /// @see getTaggedAccesses 1494f384594dSTobias Grosser isl_union_map *getTaggedMayWrites() { 1495f384594dSTobias Grosser return isl_union_map_union(getTaggedAccesses(MemoryAccess::MAY_WRITE), 1496f384594dSTobias Grosser getTaggedAccesses(MemoryAccess::MUST_WRITE)); 1497f384594dSTobias Grosser } 1498f384594dSTobias Grosser 1499f384594dSTobias Grosser /// Get the set of all must accesses, tagged with the access id. 1500f384594dSTobias Grosser /// 1501f384594dSTobias Grosser /// @see getTaggedAccesses 1502f384594dSTobias Grosser isl_union_map *getTaggedMustWrites() { 1503f384594dSTobias Grosser return getTaggedAccesses(MemoryAccess::MUST_WRITE); 1504f384594dSTobias Grosser } 1505f384594dSTobias Grosser 1506aef5196fSTobias Grosser /// Collect parameter and array names as isl_ids. 1507aef5196fSTobias Grosser /// 1508aef5196fSTobias Grosser /// To reason about the different parameters and arrays used, ppcg requires 1509aef5196fSTobias Grosser /// a list of all isl_ids in use. As PPCG traditionally performs 1510aef5196fSTobias Grosser /// source-to-source compilation each of these isl_ids is mapped to the 1511aef5196fSTobias Grosser /// expression that represents it. As we do not have a corresponding 1512aef5196fSTobias Grosser /// expression in Polly, we just map each id to a 'zero' expression to match 1513aef5196fSTobias Grosser /// the data format that ppcg expects. 1514aef5196fSTobias Grosser /// 1515aef5196fSTobias Grosser /// @returns Retun a map from collected ids to 'zero' ast expressions. 1516aef5196fSTobias Grosser __isl_give isl_id_to_ast_expr *getNames() { 1517aef5196fSTobias Grosser auto *Names = isl_id_to_ast_expr_alloc( 1518bd81a7eeSTobias Grosser S->getIslCtx(), 1519bd81a7eeSTobias Grosser S->getNumParams() + std::distance(S->array_begin(), S->array_end())); 1520aef5196fSTobias Grosser auto *Zero = isl_ast_expr_from_val(isl_val_zero(S->getIslCtx())); 1521aef5196fSTobias Grosser auto *Space = S->getParamSpace(); 1522aef5196fSTobias Grosser 1523aef5196fSTobias Grosser for (int I = 0, E = S->getNumParams(); I < E; ++I) { 1524aef5196fSTobias Grosser isl_id *Id = isl_space_get_dim_id(Space, isl_dim_param, I); 1525aef5196fSTobias Grosser Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 1526aef5196fSTobias Grosser } 1527aef5196fSTobias Grosser 1528aef5196fSTobias Grosser for (auto &Array : S->arrays()) { 1529d7754a12SRoman Gareev auto Id = Array->getBasePtrId(); 1530aef5196fSTobias Grosser Names = isl_id_to_ast_expr_set(Names, Id, isl_ast_expr_copy(Zero)); 1531aef5196fSTobias Grosser } 1532aef5196fSTobias Grosser 1533aef5196fSTobias Grosser isl_space_free(Space); 1534aef5196fSTobias Grosser isl_ast_expr_free(Zero); 1535aef5196fSTobias Grosser 1536aef5196fSTobias Grosser return Names; 1537aef5196fSTobias Grosser } 1538aef5196fSTobias Grosser 1539e938517eSTobias Grosser /// Create a new PPCG scop from the current scop. 1540e938517eSTobias Grosser /// 1541f384594dSTobias Grosser /// The PPCG scop is initialized with data from the current polly::Scop. From 1542f384594dSTobias Grosser /// this initial data, the data-dependences in the PPCG scop are initialized. 1543f384594dSTobias Grosser /// We do not use Polly's dependence analysis for now, to ensure we match 1544f384594dSTobias Grosser /// the PPCG default behaviour more closely. 1545e938517eSTobias Grosser /// 1546e938517eSTobias Grosser /// @returns A new ppcg scop. 1547e938517eSTobias Grosser ppcg_scop *createPPCGScop() { 1548e938517eSTobias Grosser auto PPCGScop = (ppcg_scop *)malloc(sizeof(ppcg_scop)); 1549e938517eSTobias Grosser 1550e938517eSTobias Grosser PPCGScop->options = createPPCGOptions(); 1551e938517eSTobias Grosser 1552e938517eSTobias Grosser PPCGScop->start = 0; 1553e938517eSTobias Grosser PPCGScop->end = 0; 1554e938517eSTobias Grosser 1555f384594dSTobias Grosser PPCGScop->context = S->getContext(); 1556f384594dSTobias Grosser PPCGScop->domain = S->getDomains(); 1557e938517eSTobias Grosser PPCGScop->call = nullptr; 1558f384594dSTobias Grosser PPCGScop->tagged_reads = getTaggedReads(); 1559f384594dSTobias Grosser PPCGScop->reads = S->getReads(); 1560e938517eSTobias Grosser PPCGScop->live_in = nullptr; 1561f384594dSTobias Grosser PPCGScop->tagged_may_writes = getTaggedMayWrites(); 1562f384594dSTobias Grosser PPCGScop->may_writes = S->getWrites(); 1563f384594dSTobias Grosser PPCGScop->tagged_must_writes = getTaggedMustWrites(); 1564f384594dSTobias Grosser PPCGScop->must_writes = S->getMustWrites(); 1565e938517eSTobias Grosser PPCGScop->live_out = nullptr; 1566f384594dSTobias Grosser PPCGScop->tagged_must_kills = isl_union_map_empty(S->getParamSpace()); 1567e938517eSTobias Grosser PPCGScop->tagger = nullptr; 1568e938517eSTobias Grosser 1569e938517eSTobias Grosser PPCGScop->independence = nullptr; 1570e938517eSTobias Grosser PPCGScop->dep_flow = nullptr; 1571e938517eSTobias Grosser PPCGScop->tagged_dep_flow = nullptr; 1572e938517eSTobias Grosser PPCGScop->dep_false = nullptr; 1573e938517eSTobias Grosser PPCGScop->dep_forced = nullptr; 1574e938517eSTobias Grosser PPCGScop->dep_order = nullptr; 1575e938517eSTobias Grosser PPCGScop->tagged_dep_order = nullptr; 1576e938517eSTobias Grosser 1577f384594dSTobias Grosser PPCGScop->schedule = S->getScheduleTree(); 1578aef5196fSTobias Grosser PPCGScop->names = getNames(); 1579e938517eSTobias Grosser 1580e938517eSTobias Grosser PPCGScop->pet = nullptr; 1581e938517eSTobias Grosser 1582f384594dSTobias Grosser compute_tagger(PPCGScop); 1583f384594dSTobias Grosser compute_dependences(PPCGScop); 1584f384594dSTobias Grosser 1585e938517eSTobias Grosser return PPCGScop; 1586e938517eSTobias Grosser } 1587e938517eSTobias Grosser 158860f63b49STobias Grosser /// Collect the array acesses in a statement. 158960f63b49STobias Grosser /// 159060f63b49STobias Grosser /// @param Stmt The statement for which to collect the accesses. 159160f63b49STobias Grosser /// 159260f63b49STobias Grosser /// @returns A list of array accesses. 159360f63b49STobias Grosser gpu_stmt_access *getStmtAccesses(ScopStmt &Stmt) { 159460f63b49STobias Grosser gpu_stmt_access *Accesses = nullptr; 159560f63b49STobias Grosser 159660f63b49STobias Grosser for (MemoryAccess *Acc : Stmt) { 159760f63b49STobias Grosser auto Access = isl_alloc_type(S->getIslCtx(), struct gpu_stmt_access); 159860f63b49STobias Grosser Access->read = Acc->isRead(); 159960f63b49STobias Grosser Access->write = Acc->isWrite(); 160060f63b49STobias Grosser Access->access = Acc->getAccessRelation(); 160160f63b49STobias Grosser isl_space *Space = isl_map_get_space(Access->access); 160260f63b49STobias Grosser Space = isl_space_range(Space); 160360f63b49STobias Grosser Space = isl_space_from_range(Space); 16046293ba69STobias Grosser Space = isl_space_set_tuple_id(Space, isl_dim_in, Acc->getId()); 160560f63b49STobias Grosser isl_map *Universe = isl_map_universe(Space); 160660f63b49STobias Grosser Access->tagged_access = 160760f63b49STobias Grosser isl_map_domain_product(Acc->getAccessRelation(), Universe); 1608b513b491STobias Grosser Access->exact_write = !Acc->isMayWrite(); 160960f63b49STobias Grosser Access->ref_id = Acc->getId(); 161060f63b49STobias Grosser Access->next = Accesses; 1611b513b491STobias Grosser Access->n_index = Acc->getScopArrayInfo()->getNumberOfDimensions(); 161260f63b49STobias Grosser Accesses = Access; 161360f63b49STobias Grosser } 161460f63b49STobias Grosser 161560f63b49STobias Grosser return Accesses; 161660f63b49STobias Grosser } 161760f63b49STobias Grosser 161869b46751STobias Grosser /// Collect the list of GPU statements. 161969b46751STobias Grosser /// 162069b46751STobias Grosser /// Each statement has an id, a pointer to the underlying data structure, 162169b46751STobias Grosser /// as well as a list with all memory accesses. 162269b46751STobias Grosser /// 162369b46751STobias Grosser /// TODO: Initialize the list of memory accesses. 162469b46751STobias Grosser /// 162569b46751STobias Grosser /// @returns A linked-list of statements. 162669b46751STobias Grosser gpu_stmt *getStatements() { 162769b46751STobias Grosser gpu_stmt *Stmts = isl_calloc_array(S->getIslCtx(), struct gpu_stmt, 162869b46751STobias Grosser std::distance(S->begin(), S->end())); 162969b46751STobias Grosser 163069b46751STobias Grosser int i = 0; 163169b46751STobias Grosser for (auto &Stmt : *S) { 163269b46751STobias Grosser gpu_stmt *GPUStmt = &Stmts[i]; 163369b46751STobias Grosser 163469b46751STobias Grosser GPUStmt->id = Stmt.getDomainId(); 163569b46751STobias Grosser 163669b46751STobias Grosser // We use the pet stmt pointer to keep track of the Polly statements. 163769b46751STobias Grosser GPUStmt->stmt = (pet_stmt *)&Stmt; 163860f63b49STobias Grosser GPUStmt->accesses = getStmtAccesses(Stmt); 163969b46751STobias Grosser i++; 164069b46751STobias Grosser } 164169b46751STobias Grosser 164269b46751STobias Grosser return Stmts; 164369b46751STobias Grosser } 164469b46751STobias Grosser 164560f63b49STobias Grosser /// Derive the extent of an array. 164660f63b49STobias Grosser /// 164760f63b49STobias Grosser /// The extent of an array is defined by the set of memory locations for 164860f63b49STobias Grosser /// which a memory access in the iteration domain exists. 164960f63b49STobias Grosser /// 165060f63b49STobias Grosser /// @param Array The array to derive the extent for. 165160f63b49STobias Grosser /// 165260f63b49STobias Grosser /// @returns An isl_set describing the extent of the array. 165360f63b49STobias Grosser __isl_give isl_set *getExtent(ScopArrayInfo *Array) { 165460f63b49STobias Grosser isl_union_map *Accesses = S->getAccesses(); 165560f63b49STobias Grosser Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains()); 165660f63b49STobias Grosser isl_union_set *AccessUSet = isl_union_map_range(Accesses); 165760f63b49STobias Grosser isl_set *AccessSet = 165860f63b49STobias Grosser isl_union_set_extract_set(AccessUSet, Array->getSpace()); 165960f63b49STobias Grosser isl_union_set_free(AccessUSet); 166060f63b49STobias Grosser 166160f63b49STobias Grosser return AccessSet; 166260f63b49STobias Grosser } 166360f63b49STobias Grosser 166460f63b49STobias Grosser /// Derive the bounds of an array. 166560f63b49STobias Grosser /// 166660f63b49STobias Grosser /// For the first dimension we derive the bound of the array from the extent 166760f63b49STobias Grosser /// of this dimension. For inner dimensions we obtain their size directly from 166860f63b49STobias Grosser /// ScopArrayInfo. 166960f63b49STobias Grosser /// 167060f63b49STobias Grosser /// @param PPCGArray The array to compute bounds for. 167160f63b49STobias Grosser /// @param Array The polly array from which to take the information. 167260f63b49STobias Grosser void setArrayBounds(gpu_array_info &PPCGArray, ScopArrayInfo *Array) { 167360f63b49STobias Grosser if (PPCGArray.n_index > 0) { 167460f63b49STobias Grosser isl_set *Dom = isl_set_copy(PPCGArray.extent); 167560f63b49STobias Grosser Dom = isl_set_project_out(Dom, isl_dim_set, 1, PPCGArray.n_index - 1); 167660f63b49STobias Grosser isl_pw_aff *Bound = isl_set_dim_max(isl_set_copy(Dom), 0); 167760f63b49STobias Grosser isl_set_free(Dom); 167860f63b49STobias Grosser Dom = isl_pw_aff_domain(isl_pw_aff_copy(Bound)); 167960f63b49STobias Grosser isl_local_space *LS = isl_local_space_from_space(isl_set_get_space(Dom)); 168060f63b49STobias Grosser isl_aff *One = isl_aff_zero_on_domain(LS); 168160f63b49STobias Grosser One = isl_aff_add_constant_si(One, 1); 168260f63b49STobias Grosser Bound = isl_pw_aff_add(Bound, isl_pw_aff_alloc(Dom, One)); 168360f63b49STobias Grosser Bound = isl_pw_aff_gist(Bound, S->getContext()); 168460f63b49STobias Grosser PPCGArray.bound[0] = Bound; 168560f63b49STobias Grosser } 168660f63b49STobias Grosser 168760f63b49STobias Grosser for (unsigned i = 1; i < PPCGArray.n_index; ++i) { 168860f63b49STobias Grosser isl_pw_aff *Bound = Array->getDimensionSizePw(i); 168960f63b49STobias Grosser auto LS = isl_pw_aff_get_domain_space(Bound); 169060f63b49STobias Grosser auto Aff = isl_multi_aff_zero(LS); 169160f63b49STobias Grosser Bound = isl_pw_aff_pullback_multi_aff(Bound, Aff); 169260f63b49STobias Grosser PPCGArray.bound[i] = Bound; 169360f63b49STobias Grosser } 169460f63b49STobias Grosser } 169560f63b49STobias Grosser 169660f63b49STobias Grosser /// Create the arrays for @p PPCGProg. 169760f63b49STobias Grosser /// 169860f63b49STobias Grosser /// @param PPCGProg The program to compute the arrays for. 169960f63b49STobias Grosser void createArrays(gpu_prog *PPCGProg) { 170060f63b49STobias Grosser int i = 0; 1701d7754a12SRoman Gareev for (auto &Array : S->arrays()) { 170260f63b49STobias Grosser std::string TypeName; 170360f63b49STobias Grosser raw_string_ostream OS(TypeName); 170460f63b49STobias Grosser 170560f63b49STobias Grosser OS << *Array->getElementType(); 170660f63b49STobias Grosser TypeName = OS.str(); 170760f63b49STobias Grosser 170860f63b49STobias Grosser gpu_array_info &PPCGArray = PPCGProg->array[i]; 170960f63b49STobias Grosser 171060f63b49STobias Grosser PPCGArray.space = Array->getSpace(); 171160f63b49STobias Grosser PPCGArray.type = strdup(TypeName.c_str()); 171260f63b49STobias Grosser PPCGArray.size = Array->getElementType()->getPrimitiveSizeInBits() / 8; 171360f63b49STobias Grosser PPCGArray.name = strdup(Array->getName().c_str()); 171460f63b49STobias Grosser PPCGArray.extent = nullptr; 171560f63b49STobias Grosser PPCGArray.n_index = Array->getNumberOfDimensions(); 171660f63b49STobias Grosser PPCGArray.bound = 171760f63b49STobias Grosser isl_alloc_array(S->getIslCtx(), isl_pw_aff *, PPCGArray.n_index); 171860f63b49STobias Grosser PPCGArray.extent = getExtent(Array); 171960f63b49STobias Grosser PPCGArray.n_ref = 0; 172060f63b49STobias Grosser PPCGArray.refs = nullptr; 172160f63b49STobias Grosser PPCGArray.accessed = true; 172260f63b49STobias Grosser PPCGArray.read_only_scalar = false; 172360f63b49STobias Grosser PPCGArray.has_compound_element = false; 172460f63b49STobias Grosser PPCGArray.local = false; 172560f63b49STobias Grosser PPCGArray.declare_local = false; 172660f63b49STobias Grosser PPCGArray.global = false; 172760f63b49STobias Grosser PPCGArray.linearize = false; 172860f63b49STobias Grosser PPCGArray.dep_order = nullptr; 172913c78e4dSTobias Grosser PPCGArray.user = Array; 173060f63b49STobias Grosser 173160f63b49STobias Grosser setArrayBounds(PPCGArray, Array); 17322d010dafSTobias Grosser i++; 1733b9fc860aSTobias Grosser 1734b9fc860aSTobias Grosser collect_references(PPCGProg, &PPCGArray); 173560f63b49STobias Grosser } 173660f63b49STobias Grosser } 173760f63b49STobias Grosser 173860f63b49STobias Grosser /// Create an identity map between the arrays in the scop. 173960f63b49STobias Grosser /// 174060f63b49STobias Grosser /// @returns An identity map between the arrays in the scop. 174160f63b49STobias Grosser isl_union_map *getArrayIdentity() { 174260f63b49STobias Grosser isl_union_map *Maps = isl_union_map_empty(S->getParamSpace()); 174360f63b49STobias Grosser 1744d7754a12SRoman Gareev for (auto &Array : S->arrays()) { 174560f63b49STobias Grosser isl_space *Space = Array->getSpace(); 174660f63b49STobias Grosser Space = isl_space_map_from_set(Space); 174760f63b49STobias Grosser isl_map *Identity = isl_map_identity(Space); 174860f63b49STobias Grosser Maps = isl_union_map_add_map(Maps, Identity); 174960f63b49STobias Grosser } 175060f63b49STobias Grosser 175160f63b49STobias Grosser return Maps; 175260f63b49STobias Grosser } 175360f63b49STobias Grosser 1754e938517eSTobias Grosser /// Create a default-initialized PPCG GPU program. 1755e938517eSTobias Grosser /// 1756e938517eSTobias Grosser /// @returns A new gpu grogram description. 1757e938517eSTobias Grosser gpu_prog *createPPCGProg(ppcg_scop *PPCGScop) { 1758e938517eSTobias Grosser 1759e938517eSTobias Grosser if (!PPCGScop) 1760e938517eSTobias Grosser return nullptr; 1761e938517eSTobias Grosser 1762e938517eSTobias Grosser auto PPCGProg = isl_calloc_type(S->getIslCtx(), struct gpu_prog); 1763e938517eSTobias Grosser 1764e938517eSTobias Grosser PPCGProg->ctx = S->getIslCtx(); 1765e938517eSTobias Grosser PPCGProg->scop = PPCGScop; 1766aef5196fSTobias Grosser PPCGProg->context = isl_set_copy(PPCGScop->context); 176760f63b49STobias Grosser PPCGProg->read = isl_union_map_copy(PPCGScop->reads); 176860f63b49STobias Grosser PPCGProg->may_write = isl_union_map_copy(PPCGScop->may_writes); 176960f63b49STobias Grosser PPCGProg->must_write = isl_union_map_copy(PPCGScop->must_writes); 177060f63b49STobias Grosser PPCGProg->tagged_must_kill = 177160f63b49STobias Grosser isl_union_map_copy(PPCGScop->tagged_must_kills); 177260f63b49STobias Grosser PPCGProg->to_inner = getArrayIdentity(); 177360f63b49STobias Grosser PPCGProg->to_outer = getArrayIdentity(); 177460f63b49STobias Grosser PPCGProg->may_persist = compute_may_persist(PPCGProg); 1775e938517eSTobias Grosser PPCGProg->any_to_outer = nullptr; 1776e938517eSTobias Grosser PPCGProg->array_order = nullptr; 177769b46751STobias Grosser PPCGProg->n_stmts = std::distance(S->begin(), S->end()); 177869b46751STobias Grosser PPCGProg->stmts = getStatements(); 177960f63b49STobias Grosser PPCGProg->n_array = std::distance(S->array_begin(), S->array_end()); 178060f63b49STobias Grosser PPCGProg->array = isl_calloc_array(S->getIslCtx(), struct gpu_array_info, 178160f63b49STobias Grosser PPCGProg->n_array); 178260f63b49STobias Grosser 178360f63b49STobias Grosser createArrays(PPCGProg); 1784e938517eSTobias Grosser 1785e938517eSTobias Grosser return PPCGProg; 1786e938517eSTobias Grosser } 1787e938517eSTobias Grosser 178869b46751STobias Grosser struct PrintGPUUserData { 178969b46751STobias Grosser struct cuda_info *CudaInfo; 179069b46751STobias Grosser struct gpu_prog *PPCGProg; 179169b46751STobias Grosser std::vector<ppcg_kernel *> Kernels; 179269b46751STobias Grosser }; 179369b46751STobias Grosser 179469b46751STobias Grosser /// Print a user statement node in the host code. 179569b46751STobias Grosser /// 179669b46751STobias Grosser /// We use ppcg's printing facilities to print the actual statement and 179769b46751STobias Grosser /// additionally build up a list of all kernels that are encountered in the 179869b46751STobias Grosser /// host ast. 179969b46751STobias Grosser /// 180069b46751STobias Grosser /// @param P The printer to print to 180169b46751STobias Grosser /// @param Options The printing options to use 180269b46751STobias Grosser /// @param Node The node to print 180369b46751STobias Grosser /// @param User A user pointer to carry additional data. This pointer is 180469b46751STobias Grosser /// expected to be of type PrintGPUUserData. 180569b46751STobias Grosser /// 180669b46751STobias Grosser /// @returns A printer to which the output has been printed. 180769b46751STobias Grosser static __isl_give isl_printer * 180869b46751STobias Grosser printHostUser(__isl_take isl_printer *P, 180969b46751STobias Grosser __isl_take isl_ast_print_options *Options, 181069b46751STobias Grosser __isl_take isl_ast_node *Node, void *User) { 181169b46751STobias Grosser auto Data = (struct PrintGPUUserData *)User; 181269b46751STobias Grosser auto Id = isl_ast_node_get_annotation(Node); 181369b46751STobias Grosser 181469b46751STobias Grosser if (Id) { 181520251734STobias Grosser bool IsUser = !strcmp(isl_id_get_name(Id), "user"); 181620251734STobias Grosser 181720251734STobias Grosser // If this is a user statement, format it ourselves as ppcg would 181820251734STobias Grosser // otherwise try to call pet functionality that is not available in 181920251734STobias Grosser // Polly. 182020251734STobias Grosser if (IsUser) { 182120251734STobias Grosser P = isl_printer_start_line(P); 182220251734STobias Grosser P = isl_printer_print_ast_node(P, Node); 182320251734STobias Grosser P = isl_printer_end_line(P); 182420251734STobias Grosser isl_id_free(Id); 182520251734STobias Grosser isl_ast_print_options_free(Options); 182620251734STobias Grosser return P; 182720251734STobias Grosser } 182820251734STobias Grosser 182969b46751STobias Grosser auto Kernel = (struct ppcg_kernel *)isl_id_get_user(Id); 183069b46751STobias Grosser isl_id_free(Id); 183169b46751STobias Grosser Data->Kernels.push_back(Kernel); 183269b46751STobias Grosser } 183369b46751STobias Grosser 183469b46751STobias Grosser return print_host_user(P, Options, Node, User); 183569b46751STobias Grosser } 183669b46751STobias Grosser 183769b46751STobias Grosser /// Print C code corresponding to the control flow in @p Kernel. 183869b46751STobias Grosser /// 183969b46751STobias Grosser /// @param Kernel The kernel to print 184069b46751STobias Grosser void printKernel(ppcg_kernel *Kernel) { 184169b46751STobias Grosser auto *P = isl_printer_to_str(S->getIslCtx()); 184269b46751STobias Grosser P = isl_printer_set_output_format(P, ISL_FORMAT_C); 184369b46751STobias Grosser auto *Options = isl_ast_print_options_alloc(S->getIslCtx()); 184469b46751STobias Grosser P = isl_ast_node_print(Kernel->tree, P, Options); 184569b46751STobias Grosser char *String = isl_printer_get_str(P); 184669b46751STobias Grosser printf("%s\n", String); 184769b46751STobias Grosser free(String); 184869b46751STobias Grosser isl_printer_free(P); 184969b46751STobias Grosser } 185069b46751STobias Grosser 185169b46751STobias Grosser /// Print C code corresponding to the GPU code described by @p Tree. 185269b46751STobias Grosser /// 185369b46751STobias Grosser /// @param Tree An AST describing GPU code 185469b46751STobias Grosser /// @param PPCGProg The PPCG program from which @Tree has been constructed. 185569b46751STobias Grosser void printGPUTree(isl_ast_node *Tree, gpu_prog *PPCGProg) { 185669b46751STobias Grosser auto *P = isl_printer_to_str(S->getIslCtx()); 185769b46751STobias Grosser P = isl_printer_set_output_format(P, ISL_FORMAT_C); 185869b46751STobias Grosser 185969b46751STobias Grosser PrintGPUUserData Data; 186069b46751STobias Grosser Data.PPCGProg = PPCGProg; 186169b46751STobias Grosser 186269b46751STobias Grosser auto *Options = isl_ast_print_options_alloc(S->getIslCtx()); 186369b46751STobias Grosser Options = 186469b46751STobias Grosser isl_ast_print_options_set_print_user(Options, printHostUser, &Data); 186569b46751STobias Grosser P = isl_ast_node_print(Tree, P, Options); 186669b46751STobias Grosser char *String = isl_printer_get_str(P); 186769b46751STobias Grosser printf("# host\n"); 186869b46751STobias Grosser printf("%s\n", String); 186969b46751STobias Grosser free(String); 187069b46751STobias Grosser isl_printer_free(P); 187169b46751STobias Grosser 187269b46751STobias Grosser for (auto Kernel : Data.Kernels) { 187369b46751STobias Grosser printf("# kernel%d\n", Kernel->id); 187469b46751STobias Grosser printKernel(Kernel); 187569b46751STobias Grosser } 187669b46751STobias Grosser } 187769b46751STobias Grosser 1878f384594dSTobias Grosser // Generate a GPU program using PPCG. 1879f384594dSTobias Grosser // 1880f384594dSTobias Grosser // GPU mapping consists of multiple steps: 1881f384594dSTobias Grosser // 1882f384594dSTobias Grosser // 1) Compute new schedule for the program. 1883f384594dSTobias Grosser // 2) Map schedule to GPU (TODO) 1884f384594dSTobias Grosser // 3) Generate code for new schedule (TODO) 1885f384594dSTobias Grosser // 1886f384594dSTobias Grosser // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer 1887f384594dSTobias Grosser // is mostly CPU specific. Instead, we use PPCG's GPU code generation 1888f384594dSTobias Grosser // strategy directly from this pass. 1889f384594dSTobias Grosser gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { 1890f384594dSTobias Grosser 1891f384594dSTobias Grosser auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); 1892f384594dSTobias Grosser 1893f384594dSTobias Grosser PPCGGen->ctx = S->getIslCtx(); 1894f384594dSTobias Grosser PPCGGen->options = PPCGScop->options; 1895f384594dSTobias Grosser PPCGGen->print = nullptr; 1896f384594dSTobias Grosser PPCGGen->print_user = nullptr; 189760c60025STobias Grosser PPCGGen->build_ast_expr = &pollyBuildAstExprForStmt; 1898f384594dSTobias Grosser PPCGGen->prog = PPCGProg; 1899f384594dSTobias Grosser PPCGGen->tree = nullptr; 1900f384594dSTobias Grosser PPCGGen->types.n = 0; 1901f384594dSTobias Grosser PPCGGen->types.name = nullptr; 1902f384594dSTobias Grosser PPCGGen->sizes = nullptr; 1903f384594dSTobias Grosser PPCGGen->used_sizes = nullptr; 1904f384594dSTobias Grosser PPCGGen->kernel_id = 0; 1905f384594dSTobias Grosser 1906f384594dSTobias Grosser // Set scheduling strategy to same strategy PPCG is using. 1907f384594dSTobias Grosser isl_options_set_schedule_outer_coincidence(PPCGGen->ctx, true); 1908f384594dSTobias Grosser isl_options_set_schedule_maximize_band_depth(PPCGGen->ctx, true); 19092341fe9eSTobias Grosser isl_options_set_schedule_whole_component(PPCGGen->ctx, false); 1910f384594dSTobias Grosser 1911f384594dSTobias Grosser isl_schedule *Schedule = get_schedule(PPCGGen); 1912f384594dSTobias Grosser 1913aef5196fSTobias Grosser int has_permutable = has_any_permutable_node(Schedule); 1914aef5196fSTobias Grosser 191569b46751STobias Grosser if (!has_permutable || has_permutable < 0) { 1916aef5196fSTobias Grosser Schedule = isl_schedule_free(Schedule); 191769b46751STobias Grosser } else { 1918aef5196fSTobias Grosser Schedule = map_to_device(PPCGGen, Schedule); 191969b46751STobias Grosser PPCGGen->tree = generate_code(PPCGGen, isl_schedule_copy(Schedule)); 192069b46751STobias Grosser } 1921aef5196fSTobias Grosser 1922f384594dSTobias Grosser if (DumpSchedule) { 1923f384594dSTobias Grosser isl_printer *P = isl_printer_to_str(S->getIslCtx()); 1924f384594dSTobias Grosser P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 1925f384594dSTobias Grosser P = isl_printer_print_str(P, "Schedule\n"); 1926f384594dSTobias Grosser P = isl_printer_print_str(P, "========\n"); 1927f384594dSTobias Grosser if (Schedule) 1928f384594dSTobias Grosser P = isl_printer_print_schedule(P, Schedule); 1929f384594dSTobias Grosser else 1930f384594dSTobias Grosser P = isl_printer_print_str(P, "No schedule found\n"); 1931f384594dSTobias Grosser 1932f384594dSTobias Grosser printf("%s\n", isl_printer_get_str(P)); 1933f384594dSTobias Grosser isl_printer_free(P); 1934f384594dSTobias Grosser } 1935f384594dSTobias Grosser 193669b46751STobias Grosser if (DumpCode) { 193769b46751STobias Grosser printf("Code\n"); 193869b46751STobias Grosser printf("====\n"); 193969b46751STobias Grosser if (PPCGGen->tree) 194069b46751STobias Grosser printGPUTree(PPCGGen->tree, PPCGProg); 194169b46751STobias Grosser else 194269b46751STobias Grosser printf("No code generated\n"); 194369b46751STobias Grosser } 194469b46751STobias Grosser 1945f384594dSTobias Grosser isl_schedule_free(Schedule); 1946f384594dSTobias Grosser 1947f384594dSTobias Grosser return PPCGGen; 1948f384594dSTobias Grosser } 1949f384594dSTobias Grosser 1950f384594dSTobias Grosser /// Free gpu_gen structure. 1951f384594dSTobias Grosser /// 1952f384594dSTobias Grosser /// @param PPCGGen The ppcg_gen object to free. 1953f384594dSTobias Grosser void freePPCGGen(gpu_gen *PPCGGen) { 1954f384594dSTobias Grosser isl_ast_node_free(PPCGGen->tree); 1955f384594dSTobias Grosser isl_union_map_free(PPCGGen->sizes); 1956f384594dSTobias Grosser isl_union_map_free(PPCGGen->used_sizes); 1957f384594dSTobias Grosser free(PPCGGen); 1958f384594dSTobias Grosser } 1959f384594dSTobias Grosser 1960b307ed4dSTobias Grosser /// Free the options in the ppcg scop structure. 1961b307ed4dSTobias Grosser /// 1962b307ed4dSTobias Grosser /// ppcg is not freeing these options for us. To avoid leaks we do this 1963b307ed4dSTobias Grosser /// ourselves. 1964b307ed4dSTobias Grosser /// 1965b307ed4dSTobias Grosser /// @param PPCGScop The scop referencing the options to free. 1966b307ed4dSTobias Grosser void freeOptions(ppcg_scop *PPCGScop) { 1967b307ed4dSTobias Grosser free(PPCGScop->options->debug); 1968b307ed4dSTobias Grosser PPCGScop->options->debug = nullptr; 1969b307ed4dSTobias Grosser free(PPCGScop->options); 1970b307ed4dSTobias Grosser PPCGScop->options = nullptr; 1971b307ed4dSTobias Grosser } 1972b307ed4dSTobias Grosser 197338fc0aedSTobias Grosser /// Generate code for a given GPU AST described by @p Root. 197438fc0aedSTobias Grosser /// 197532837fe3STobias Grosser /// @param Root An isl_ast_node pointing to the root of the GPU AST. 197632837fe3STobias Grosser /// @param Prog The GPU Program to generate code for. 197732837fe3STobias Grosser void generateCode(__isl_take isl_ast_node *Root, gpu_prog *Prog) { 197838fc0aedSTobias Grosser ScopAnnotator Annotator; 197938fc0aedSTobias Grosser Annotator.buildAliasScopes(*S); 198038fc0aedSTobias Grosser 198138fc0aedSTobias Grosser Region *R = &S->getRegion(); 198238fc0aedSTobias Grosser 198338fc0aedSTobias Grosser simplifyRegion(R, DT, LI, RI); 198438fc0aedSTobias Grosser 198538fc0aedSTobias Grosser BasicBlock *EnteringBB = R->getEnteringBlock(); 198638fc0aedSTobias Grosser 198738fc0aedSTobias Grosser PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); 198838fc0aedSTobias Grosser 198932837fe3STobias Grosser GPUNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, *S, 199032837fe3STobias Grosser Prog); 199138fc0aedSTobias Grosser 199238fc0aedSTobias Grosser // Only build the run-time condition and parameters _after_ having 199338fc0aedSTobias Grosser // introduced the conditional branch. This is important as the conditional 199438fc0aedSTobias Grosser // branch will guard the original scop from new induction variables that 199538fc0aedSTobias Grosser // the SCEVExpander may introduce while code generating the parameters and 199638fc0aedSTobias Grosser // which may introduce scalar dependences that prevent us from correctly 199738fc0aedSTobias Grosser // code generating this scop. 199838fc0aedSTobias Grosser BasicBlock *StartBlock = 199938fc0aedSTobias Grosser executeScopConditionally(*S, this, Builder.getTrue()); 200038fc0aedSTobias Grosser 200138fc0aedSTobias Grosser // TODO: Handle LICM 200238fc0aedSTobias Grosser // TODO: Verify run-time checks 200338fc0aedSTobias Grosser auto SplitBlock = StartBlock->getSinglePredecessor(); 200438fc0aedSTobias Grosser Builder.SetInsertPoint(SplitBlock->getTerminator()); 200538fc0aedSTobias Grosser NodeBuilder.addParameters(S->getContext()); 200638fc0aedSTobias Grosser Builder.SetInsertPoint(&*StartBlock->begin()); 2007fa7b0802STobias Grosser 2008fa7b0802STobias Grosser NodeBuilder.initializeAfterRTH(); 200938fc0aedSTobias Grosser NodeBuilder.create(Root); 20108ed5e599STobias Grosser NodeBuilder.finalize(); 201138fc0aedSTobias Grosser } 201238fc0aedSTobias Grosser 2013e938517eSTobias Grosser bool runOnScop(Scop &CurrentScop) override { 2014e938517eSTobias Grosser S = &CurrentScop; 201538fc0aedSTobias Grosser LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 201638fc0aedSTobias Grosser DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 201738fc0aedSTobias Grosser SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 201838fc0aedSTobias Grosser DL = &S->getRegion().getEntry()->getParent()->getParent()->getDataLayout(); 201938fc0aedSTobias Grosser RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 2020e938517eSTobias Grosser 20212d58a64eSTobias Grosser // We currently do not support scops with invariant loads. 20222d58a64eSTobias Grosser if (S->hasInvariantAccesses()) 20232d58a64eSTobias Grosser return false; 20242d58a64eSTobias Grosser 2025e938517eSTobias Grosser auto PPCGScop = createPPCGScop(); 2026e938517eSTobias Grosser auto PPCGProg = createPPCGProg(PPCGScop); 2027f384594dSTobias Grosser auto PPCGGen = generateGPU(PPCGScop, PPCGProg); 202838fc0aedSTobias Grosser 202938fc0aedSTobias Grosser if (PPCGGen->tree) 203032837fe3STobias Grosser generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); 203138fc0aedSTobias Grosser 2032b307ed4dSTobias Grosser freeOptions(PPCGScop); 2033f384594dSTobias Grosser freePPCGGen(PPCGGen); 2034e938517eSTobias Grosser gpu_prog_free(PPCGProg); 2035e938517eSTobias Grosser ppcg_scop_free(PPCGScop); 2036e938517eSTobias Grosser 2037e938517eSTobias Grosser return true; 2038e938517eSTobias Grosser } 20399dfe4e7cSTobias Grosser 20409dfe4e7cSTobias Grosser void printScop(raw_ostream &, Scop &) const override {} 20419dfe4e7cSTobias Grosser 20429dfe4e7cSTobias Grosser void getAnalysisUsage(AnalysisUsage &AU) const override { 20439dfe4e7cSTobias Grosser AU.addRequired<DominatorTreeWrapperPass>(); 20449dfe4e7cSTobias Grosser AU.addRequired<RegionInfoPass>(); 20459dfe4e7cSTobias Grosser AU.addRequired<ScalarEvolutionWrapperPass>(); 20469dfe4e7cSTobias Grosser AU.addRequired<ScopDetection>(); 20479dfe4e7cSTobias Grosser AU.addRequired<ScopInfoRegionPass>(); 20489dfe4e7cSTobias Grosser AU.addRequired<LoopInfoWrapperPass>(); 20499dfe4e7cSTobias Grosser 20509dfe4e7cSTobias Grosser AU.addPreserved<AAResultsWrapperPass>(); 20519dfe4e7cSTobias Grosser AU.addPreserved<BasicAAWrapperPass>(); 20529dfe4e7cSTobias Grosser AU.addPreserved<LoopInfoWrapperPass>(); 20539dfe4e7cSTobias Grosser AU.addPreserved<DominatorTreeWrapperPass>(); 20549dfe4e7cSTobias Grosser AU.addPreserved<GlobalsAAWrapperPass>(); 20559dfe4e7cSTobias Grosser AU.addPreserved<PostDominatorTreeWrapperPass>(); 20569dfe4e7cSTobias Grosser AU.addPreserved<ScopDetection>(); 20579dfe4e7cSTobias Grosser AU.addPreserved<ScalarEvolutionWrapperPass>(); 20589dfe4e7cSTobias Grosser AU.addPreserved<SCEVAAWrapperPass>(); 20599dfe4e7cSTobias Grosser 20609dfe4e7cSTobias Grosser // FIXME: We do not yet add regions for the newly generated code to the 20619dfe4e7cSTobias Grosser // region tree. 20629dfe4e7cSTobias Grosser AU.addPreserved<RegionInfoPass>(); 20639dfe4e7cSTobias Grosser AU.addPreserved<ScopInfoRegionPass>(); 20649dfe4e7cSTobias Grosser } 20659dfe4e7cSTobias Grosser }; 20669dfe4e7cSTobias Grosser } 20679dfe4e7cSTobias Grosser 20689dfe4e7cSTobias Grosser char PPCGCodeGeneration::ID = 1; 20699dfe4e7cSTobias Grosser 20709dfe4e7cSTobias Grosser Pass *polly::createPPCGCodeGenerationPass() { return new PPCGCodeGeneration(); } 20719dfe4e7cSTobias Grosser 20729dfe4e7cSTobias Grosser INITIALIZE_PASS_BEGIN(PPCGCodeGeneration, "polly-codegen-ppcg", 20739dfe4e7cSTobias Grosser "Polly - Apply PPCG translation to SCOP", false, false) 20749dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 20759dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 20769dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 20779dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 20789dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 20799dfe4e7cSTobias Grosser INITIALIZE_PASS_DEPENDENCY(ScopDetection); 20809dfe4e7cSTobias Grosser INITIALIZE_PASS_END(PPCGCodeGeneration, "polly-codegen-ppcg", 20819dfe4e7cSTobias Grosser "Polly - Apply PPCG translation to SCOP", false, false) 2082