13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 163b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 173931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 183931948bSMatt Arsenault #include "llvm/IR/Module.h" 193931948bSMatt Arsenault 203931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 213931948bSMatt Arsenault 223931948bSMatt Arsenault using namespace llvm; 233931948bSMatt Arsenault 243931948bSMatt Arsenault namespace { 253931948bSMatt Arsenault 263931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass { 273931948bSMatt Arsenault private: 2899c14524SMatt Arsenault static bool hasAddrSpaceCast(const Function &F); 2999c14524SMatt Arsenault 303931948bSMatt Arsenault void addAttrToCallers(Function *Intrin, StringRef AttrName); 313931948bSMatt Arsenault bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 323931948bSMatt Arsenault 333931948bSMatt Arsenault public: 343931948bSMatt Arsenault static char ID; 353931948bSMatt Arsenault 363931948bSMatt Arsenault AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } 373931948bSMatt Arsenault bool runOnModule(Module &M) override; 383931948bSMatt Arsenault const char *getPassName() const override { 393931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 403931948bSMatt Arsenault } 413931948bSMatt Arsenault 423931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 433931948bSMatt Arsenault AU.setPreservesAll(); 443931948bSMatt Arsenault ModulePass::getAnalysisUsage(AU); 453931948bSMatt Arsenault } 463b2e2a59SMatt Arsenault 473b2e2a59SMatt Arsenault static bool visitConstantExpr(const ConstantExpr *CE); 483b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 493b2e2a59SMatt Arsenault const Constant *EntryC, 503b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 513931948bSMatt Arsenault }; 523931948bSMatt Arsenault 533931948bSMatt Arsenault } 543931948bSMatt Arsenault 553931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 563931948bSMatt Arsenault 573931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 583931948bSMatt Arsenault 5999c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 603931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 613931948bSMatt Arsenault 6299c14524SMatt Arsenault 6399c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 643b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) { 6599c14524SMatt Arsenault return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 6699c14524SMatt Arsenault } 6799c14524SMatt Arsenault 683b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 693b2e2a59SMatt Arsenault return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 703b2e2a59SMatt Arsenault } 713b2e2a59SMatt Arsenault 723b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 733b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 743b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 753b2e2a59SMatt Arsenault return castRequiresQueuePtr(SrcAS); 763b2e2a59SMatt Arsenault } 773b2e2a59SMatt Arsenault 783b2e2a59SMatt Arsenault return false; 793b2e2a59SMatt Arsenault } 803b2e2a59SMatt Arsenault 813b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 823b2e2a59SMatt Arsenault const Constant *EntryC, 833b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 843b2e2a59SMatt Arsenault 853b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 863b2e2a59SMatt Arsenault return false; 873b2e2a59SMatt Arsenault 883b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 893b2e2a59SMatt Arsenault Stack.push_back(EntryC); 903b2e2a59SMatt Arsenault 913b2e2a59SMatt Arsenault while (!Stack.empty()) { 923b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 933b2e2a59SMatt Arsenault 943b2e2a59SMatt Arsenault // Check this constant expression. 953b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 963b2e2a59SMatt Arsenault if (visitConstantExpr(CE)) 973b2e2a59SMatt Arsenault return true; 983b2e2a59SMatt Arsenault } 993b2e2a59SMatt Arsenault 1003b2e2a59SMatt Arsenault // Visit all sub-expressions. 1013b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1023b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1033b2e2a59SMatt Arsenault if (!OpC) 1043b2e2a59SMatt Arsenault continue; 1053b2e2a59SMatt Arsenault 1063b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1073b2e2a59SMatt Arsenault continue; 1083b2e2a59SMatt Arsenault 1093b2e2a59SMatt Arsenault Stack.push_back(OpC); 1103b2e2a59SMatt Arsenault } 1113b2e2a59SMatt Arsenault } 1123b2e2a59SMatt Arsenault 1133b2e2a59SMatt Arsenault return false; 1143b2e2a59SMatt Arsenault } 1153b2e2a59SMatt Arsenault 11699c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr. 11799c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 1183b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 1193b2e2a59SMatt Arsenault 12099c14524SMatt Arsenault for (const BasicBlock &BB : F) { 12199c14524SMatt Arsenault for (const Instruction &I : BB) { 12299c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 12399c14524SMatt Arsenault if (castRequiresQueuePtr(ASC)) 12499c14524SMatt Arsenault return true; 12599c14524SMatt Arsenault } 1263b2e2a59SMatt Arsenault 1273b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 1283b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1293b2e2a59SMatt Arsenault if (!OpC) 1303b2e2a59SMatt Arsenault continue; 1313b2e2a59SMatt Arsenault 1323b2e2a59SMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) 1333b2e2a59SMatt Arsenault return true; 1343b2e2a59SMatt Arsenault } 13599c14524SMatt Arsenault } 13699c14524SMatt Arsenault } 13799c14524SMatt Arsenault 13899c14524SMatt Arsenault return false; 13999c14524SMatt Arsenault } 1403931948bSMatt Arsenault 1413931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 1423931948bSMatt Arsenault StringRef AttrName) { 1433931948bSMatt Arsenault SmallPtrSet<Function *, 4> SeenFuncs; 1443931948bSMatt Arsenault 1453931948bSMatt Arsenault for (User *U : Intrin->users()) { 1463931948bSMatt Arsenault // CallInst is the only valid user for an intrinsic. 1473931948bSMatt Arsenault CallInst *CI = cast<CallInst>(U); 1483931948bSMatt Arsenault 1493931948bSMatt Arsenault Function *CallingFunction = CI->getParent()->getParent(); 1503931948bSMatt Arsenault if (SeenFuncs.insert(CallingFunction).second) 1513931948bSMatt Arsenault CallingFunction->addFnAttr(AttrName); 1523931948bSMatt Arsenault } 1533931948bSMatt Arsenault } 1543931948bSMatt Arsenault 1553931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 1563931948bSMatt Arsenault Module &M, 1573931948bSMatt Arsenault ArrayRef<StringRef[2]> IntrinsicToAttr) { 1583931948bSMatt Arsenault bool Changed = false; 1593931948bSMatt Arsenault 1603931948bSMatt Arsenault for (const StringRef *Arr : IntrinsicToAttr) { 1613931948bSMatt Arsenault if (Function *Fn = M.getFunction(Arr[0])) { 1623931948bSMatt Arsenault addAttrToCallers(Fn, Arr[1]); 1633931948bSMatt Arsenault Changed = true; 1643931948bSMatt Arsenault } 1653931948bSMatt Arsenault } 1663931948bSMatt Arsenault 1673931948bSMatt Arsenault return Changed; 1683931948bSMatt Arsenault } 1693931948bSMatt Arsenault 1703931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 1713931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 1723931948bSMatt Arsenault 1733931948bSMatt Arsenault static const StringRef IntrinsicToAttr[][2] = { 1743931948bSMatt Arsenault // .x omitted 17543976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 17643976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 17743976df0SMatt Arsenault 17843976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 17943976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 18043976df0SMatt Arsenault 1813931948bSMatt Arsenault { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 1823931948bSMatt Arsenault { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 1833931948bSMatt Arsenault 1843931948bSMatt Arsenault // .x omitted 1853931948bSMatt Arsenault { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 1863931948bSMatt Arsenault { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 1873931948bSMatt Arsenault }; 1883931948bSMatt Arsenault 1893931948bSMatt Arsenault static const StringRef HSAIntrinsicToAttr[][2] = { 19048ab526fSMatt Arsenault { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 191*8d718dcfSMatt Arsenault { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 192*8d718dcfSMatt Arsenault { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" } 1933931948bSMatt Arsenault }; 1943931948bSMatt Arsenault 195d0799df7SMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 196d0799df7SMatt Arsenault // size is 1 for y/z. 197d0799df7SMatt Arsenault 1983931948bSMatt Arsenault // TODO: Intrinsics that require queue ptr. 1993931948bSMatt Arsenault 2003931948bSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are 2013931948bSMatt Arsenault // always initialized. 2023931948bSMatt Arsenault 2033931948bSMatt Arsenault bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 20499c14524SMatt Arsenault if (TT.getOS() == Triple::AMDHSA) { 2053931948bSMatt Arsenault Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 2063931948bSMatt Arsenault 20799c14524SMatt Arsenault for (Function &F : M) { 20899c14524SMatt Arsenault if (F.hasFnAttribute("amdgpu-queue-ptr")) 20999c14524SMatt Arsenault continue; 21099c14524SMatt Arsenault 21199c14524SMatt Arsenault if (hasAddrSpaceCast(F)) 21299c14524SMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 21399c14524SMatt Arsenault } 21499c14524SMatt Arsenault } 21599c14524SMatt Arsenault 2163931948bSMatt Arsenault return Changed; 2173931948bSMatt Arsenault } 2183931948bSMatt Arsenault 2193931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 2203931948bSMatt Arsenault return new AMDGPUAnnotateKernelFeatures(); 2213931948bSMatt Arsenault } 222