13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16*3b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 173931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 183931948bSMatt Arsenault #include "llvm/IR/Module.h" 193931948bSMatt Arsenault 203931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 213931948bSMatt Arsenault 223931948bSMatt Arsenault using namespace llvm; 233931948bSMatt Arsenault 243931948bSMatt Arsenault namespace { 253931948bSMatt Arsenault 263931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass { 273931948bSMatt Arsenault private: 2899c14524SMatt Arsenault static bool hasAddrSpaceCast(const Function &F); 2999c14524SMatt Arsenault 303931948bSMatt Arsenault void addAttrToCallers(Function *Intrin, StringRef AttrName); 313931948bSMatt Arsenault bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 323931948bSMatt Arsenault 333931948bSMatt Arsenault public: 343931948bSMatt Arsenault static char ID; 353931948bSMatt Arsenault 363931948bSMatt Arsenault AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } 373931948bSMatt Arsenault bool runOnModule(Module &M) override; 383931948bSMatt Arsenault const char *getPassName() const override { 393931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 403931948bSMatt Arsenault } 413931948bSMatt Arsenault 423931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 433931948bSMatt Arsenault AU.setPreservesAll(); 443931948bSMatt Arsenault ModulePass::getAnalysisUsage(AU); 453931948bSMatt Arsenault } 46*3b2e2a59SMatt Arsenault 47*3b2e2a59SMatt Arsenault static bool visitConstantExpr(const ConstantExpr *CE); 48*3b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 49*3b2e2a59SMatt Arsenault const Constant *EntryC, 50*3b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 513931948bSMatt Arsenault }; 523931948bSMatt Arsenault 533931948bSMatt Arsenault } 543931948bSMatt Arsenault 553931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 563931948bSMatt Arsenault 573931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 583931948bSMatt Arsenault 5999c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 603931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 613931948bSMatt Arsenault 6299c14524SMatt Arsenault 6399c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 64*3b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) { 6599c14524SMatt Arsenault return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 6699c14524SMatt Arsenault } 6799c14524SMatt Arsenault 68*3b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 69*3b2e2a59SMatt Arsenault return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 70*3b2e2a59SMatt Arsenault } 71*3b2e2a59SMatt Arsenault 72*3b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 73*3b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 74*3b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 75*3b2e2a59SMatt Arsenault return castRequiresQueuePtr(SrcAS); 76*3b2e2a59SMatt Arsenault } 77*3b2e2a59SMatt Arsenault 78*3b2e2a59SMatt Arsenault return false; 79*3b2e2a59SMatt Arsenault } 80*3b2e2a59SMatt Arsenault 81*3b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 82*3b2e2a59SMatt Arsenault const Constant *EntryC, 83*3b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 84*3b2e2a59SMatt Arsenault 85*3b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 86*3b2e2a59SMatt Arsenault return false; 87*3b2e2a59SMatt Arsenault 88*3b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 89*3b2e2a59SMatt Arsenault Stack.push_back(EntryC); 90*3b2e2a59SMatt Arsenault 91*3b2e2a59SMatt Arsenault while (!Stack.empty()) { 92*3b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 93*3b2e2a59SMatt Arsenault 94*3b2e2a59SMatt Arsenault // Check this constant expression. 95*3b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 96*3b2e2a59SMatt Arsenault if (visitConstantExpr(CE)) 97*3b2e2a59SMatt Arsenault return true; 98*3b2e2a59SMatt Arsenault } 99*3b2e2a59SMatt Arsenault 100*3b2e2a59SMatt Arsenault // Visit all sub-expressions. 101*3b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 102*3b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 103*3b2e2a59SMatt Arsenault if (!OpC) 104*3b2e2a59SMatt Arsenault continue; 105*3b2e2a59SMatt Arsenault 106*3b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 107*3b2e2a59SMatt Arsenault continue; 108*3b2e2a59SMatt Arsenault 109*3b2e2a59SMatt Arsenault Stack.push_back(OpC); 110*3b2e2a59SMatt Arsenault } 111*3b2e2a59SMatt Arsenault } 112*3b2e2a59SMatt Arsenault 113*3b2e2a59SMatt Arsenault return false; 114*3b2e2a59SMatt Arsenault } 115*3b2e2a59SMatt Arsenault 11699c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr. 11799c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 118*3b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 119*3b2e2a59SMatt Arsenault 12099c14524SMatt Arsenault for (const BasicBlock &BB : F) { 12199c14524SMatt Arsenault for (const Instruction &I : BB) { 12299c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 12399c14524SMatt Arsenault if (castRequiresQueuePtr(ASC)) 12499c14524SMatt Arsenault return true; 12599c14524SMatt Arsenault } 126*3b2e2a59SMatt Arsenault 127*3b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 128*3b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 129*3b2e2a59SMatt Arsenault if (!OpC) 130*3b2e2a59SMatt Arsenault continue; 131*3b2e2a59SMatt Arsenault 132*3b2e2a59SMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) 133*3b2e2a59SMatt Arsenault return true; 134*3b2e2a59SMatt Arsenault } 13599c14524SMatt Arsenault } 13699c14524SMatt Arsenault } 13799c14524SMatt Arsenault 13899c14524SMatt Arsenault return false; 13999c14524SMatt Arsenault } 1403931948bSMatt Arsenault 1413931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 1423931948bSMatt Arsenault StringRef AttrName) { 1433931948bSMatt Arsenault SmallPtrSet<Function *, 4> SeenFuncs; 1443931948bSMatt Arsenault 1453931948bSMatt Arsenault for (User *U : Intrin->users()) { 1463931948bSMatt Arsenault // CallInst is the only valid user for an intrinsic. 1473931948bSMatt Arsenault CallInst *CI = cast<CallInst>(U); 1483931948bSMatt Arsenault 1493931948bSMatt Arsenault Function *CallingFunction = CI->getParent()->getParent(); 1503931948bSMatt Arsenault if (SeenFuncs.insert(CallingFunction).second) 1513931948bSMatt Arsenault CallingFunction->addFnAttr(AttrName); 1523931948bSMatt Arsenault } 1533931948bSMatt Arsenault } 1543931948bSMatt Arsenault 1553931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 1563931948bSMatt Arsenault Module &M, 1573931948bSMatt Arsenault ArrayRef<StringRef[2]> IntrinsicToAttr) { 1583931948bSMatt Arsenault bool Changed = false; 1593931948bSMatt Arsenault 1603931948bSMatt Arsenault for (const StringRef *Arr : IntrinsicToAttr) { 1613931948bSMatt Arsenault if (Function *Fn = M.getFunction(Arr[0])) { 1623931948bSMatt Arsenault addAttrToCallers(Fn, Arr[1]); 1633931948bSMatt Arsenault Changed = true; 1643931948bSMatt Arsenault } 1653931948bSMatt Arsenault } 1663931948bSMatt Arsenault 1673931948bSMatt Arsenault return Changed; 1683931948bSMatt Arsenault } 1693931948bSMatt Arsenault 1703931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 1713931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 1723931948bSMatt Arsenault 1733931948bSMatt Arsenault static const StringRef IntrinsicToAttr[][2] = { 1743931948bSMatt Arsenault // .x omitted 17543976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 17643976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 17743976df0SMatt Arsenault 17843976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 17943976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 18043976df0SMatt Arsenault 1813931948bSMatt Arsenault { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 1823931948bSMatt Arsenault { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 1833931948bSMatt Arsenault 1843931948bSMatt Arsenault // .x omitted 1853931948bSMatt Arsenault { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 1863931948bSMatt Arsenault { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 1873931948bSMatt Arsenault }; 1883931948bSMatt Arsenault 1893931948bSMatt Arsenault static const StringRef HSAIntrinsicToAttr[][2] = { 19048ab526fSMatt Arsenault { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 19148ab526fSMatt Arsenault { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" } 1923931948bSMatt Arsenault }; 1933931948bSMatt Arsenault 194d0799df7SMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 195d0799df7SMatt Arsenault // size is 1 for y/z. 196d0799df7SMatt Arsenault 1973931948bSMatt Arsenault // TODO: Intrinsics that require queue ptr. 1983931948bSMatt Arsenault 1993931948bSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are 2003931948bSMatt Arsenault // always initialized. 2013931948bSMatt Arsenault 2023931948bSMatt Arsenault bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 20399c14524SMatt Arsenault if (TT.getOS() == Triple::AMDHSA) { 2043931948bSMatt Arsenault Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 2053931948bSMatt Arsenault 20699c14524SMatt Arsenault for (Function &F : M) { 20799c14524SMatt Arsenault if (F.hasFnAttribute("amdgpu-queue-ptr")) 20899c14524SMatt Arsenault continue; 20999c14524SMatt Arsenault 21099c14524SMatt Arsenault if (hasAddrSpaceCast(F)) 21199c14524SMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 21299c14524SMatt Arsenault } 21399c14524SMatt Arsenault } 21499c14524SMatt Arsenault 2153931948bSMatt Arsenault return Changed; 2163931948bSMatt Arsenault } 2173931948bSMatt Arsenault 2183931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 2193931948bSMatt Arsenault return new AMDGPUAnnotateKernelFeatures(); 2203931948bSMatt Arsenault } 221