13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16*2ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h" 173b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 183931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 193931948bSMatt Arsenault #include "llvm/IR/Module.h" 203931948bSMatt Arsenault 213931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 223931948bSMatt Arsenault 233931948bSMatt Arsenault using namespace llvm; 243931948bSMatt Arsenault 253931948bSMatt Arsenault namespace { 263931948bSMatt Arsenault 273931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass { 283931948bSMatt Arsenault private: 2999c14524SMatt Arsenault static bool hasAddrSpaceCast(const Function &F); 3099c14524SMatt Arsenault 313931948bSMatt Arsenault void addAttrToCallers(Function *Intrin, StringRef AttrName); 323931948bSMatt Arsenault bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 333931948bSMatt Arsenault 343931948bSMatt Arsenault public: 353931948bSMatt Arsenault static char ID; 363931948bSMatt Arsenault 373931948bSMatt Arsenault AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } 383931948bSMatt Arsenault bool runOnModule(Module &M) override; 393931948bSMatt Arsenault const char *getPassName() const override { 403931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 413931948bSMatt Arsenault } 423931948bSMatt Arsenault 433931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 443931948bSMatt Arsenault AU.setPreservesAll(); 453931948bSMatt Arsenault ModulePass::getAnalysisUsage(AU); 463931948bSMatt Arsenault } 473b2e2a59SMatt Arsenault 483b2e2a59SMatt Arsenault static bool visitConstantExpr(const ConstantExpr *CE); 493b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 503b2e2a59SMatt Arsenault const Constant *EntryC, 513b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 523931948bSMatt Arsenault }; 533931948bSMatt Arsenault 543931948bSMatt Arsenault } 553931948bSMatt Arsenault 563931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 573931948bSMatt Arsenault 583931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 593931948bSMatt Arsenault 6099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 613931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 623931948bSMatt Arsenault 6399c14524SMatt Arsenault 6499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 653b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) { 6699c14524SMatt Arsenault return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 6799c14524SMatt Arsenault } 6899c14524SMatt Arsenault 693b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 703b2e2a59SMatt Arsenault return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 713b2e2a59SMatt Arsenault } 723b2e2a59SMatt Arsenault 733b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 743b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 753b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 763b2e2a59SMatt Arsenault return castRequiresQueuePtr(SrcAS); 773b2e2a59SMatt Arsenault } 783b2e2a59SMatt Arsenault 793b2e2a59SMatt Arsenault return false; 803b2e2a59SMatt Arsenault } 813b2e2a59SMatt Arsenault 823b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 833b2e2a59SMatt Arsenault const Constant *EntryC, 843b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 853b2e2a59SMatt Arsenault 863b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 873b2e2a59SMatt Arsenault return false; 883b2e2a59SMatt Arsenault 893b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 903b2e2a59SMatt Arsenault Stack.push_back(EntryC); 913b2e2a59SMatt Arsenault 923b2e2a59SMatt Arsenault while (!Stack.empty()) { 933b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 943b2e2a59SMatt Arsenault 953b2e2a59SMatt Arsenault // Check this constant expression. 963b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 973b2e2a59SMatt Arsenault if (visitConstantExpr(CE)) 983b2e2a59SMatt Arsenault return true; 993b2e2a59SMatt Arsenault } 1003b2e2a59SMatt Arsenault 1013b2e2a59SMatt Arsenault // Visit all sub-expressions. 1023b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1033b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1043b2e2a59SMatt Arsenault if (!OpC) 1053b2e2a59SMatt Arsenault continue; 1063b2e2a59SMatt Arsenault 1073b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1083b2e2a59SMatt Arsenault continue; 1093b2e2a59SMatt Arsenault 1103b2e2a59SMatt Arsenault Stack.push_back(OpC); 1113b2e2a59SMatt Arsenault } 1123b2e2a59SMatt Arsenault } 1133b2e2a59SMatt Arsenault 1143b2e2a59SMatt Arsenault return false; 1153b2e2a59SMatt Arsenault } 1163b2e2a59SMatt Arsenault 11799c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr. 11899c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 1193b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 1203b2e2a59SMatt Arsenault 12199c14524SMatt Arsenault for (const BasicBlock &BB : F) { 12299c14524SMatt Arsenault for (const Instruction &I : BB) { 12399c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 12499c14524SMatt Arsenault if (castRequiresQueuePtr(ASC)) 12599c14524SMatt Arsenault return true; 12699c14524SMatt Arsenault } 1273b2e2a59SMatt Arsenault 1283b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 1293b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1303b2e2a59SMatt Arsenault if (!OpC) 1313b2e2a59SMatt Arsenault continue; 1323b2e2a59SMatt Arsenault 1333b2e2a59SMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) 1343b2e2a59SMatt Arsenault return true; 1353b2e2a59SMatt Arsenault } 13699c14524SMatt Arsenault } 13799c14524SMatt Arsenault } 13899c14524SMatt Arsenault 13999c14524SMatt Arsenault return false; 14099c14524SMatt Arsenault } 1413931948bSMatt Arsenault 1423931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 1433931948bSMatt Arsenault StringRef AttrName) { 1443931948bSMatt Arsenault SmallPtrSet<Function *, 4> SeenFuncs; 1453931948bSMatt Arsenault 1463931948bSMatt Arsenault for (User *U : Intrin->users()) { 1473931948bSMatt Arsenault // CallInst is the only valid user for an intrinsic. 1483931948bSMatt Arsenault CallInst *CI = cast<CallInst>(U); 1493931948bSMatt Arsenault 1503931948bSMatt Arsenault Function *CallingFunction = CI->getParent()->getParent(); 1513931948bSMatt Arsenault if (SeenFuncs.insert(CallingFunction).second) 1523931948bSMatt Arsenault CallingFunction->addFnAttr(AttrName); 1533931948bSMatt Arsenault } 1543931948bSMatt Arsenault } 1553931948bSMatt Arsenault 1563931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 1573931948bSMatt Arsenault Module &M, 1583931948bSMatt Arsenault ArrayRef<StringRef[2]> IntrinsicToAttr) { 1593931948bSMatt Arsenault bool Changed = false; 1603931948bSMatt Arsenault 1613931948bSMatt Arsenault for (const StringRef *Arr : IntrinsicToAttr) { 1623931948bSMatt Arsenault if (Function *Fn = M.getFunction(Arr[0])) { 1633931948bSMatt Arsenault addAttrToCallers(Fn, Arr[1]); 1643931948bSMatt Arsenault Changed = true; 1653931948bSMatt Arsenault } 1663931948bSMatt Arsenault } 1673931948bSMatt Arsenault 1683931948bSMatt Arsenault return Changed; 1693931948bSMatt Arsenault } 1703931948bSMatt Arsenault 1713931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 1723931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 1733931948bSMatt Arsenault 1743931948bSMatt Arsenault static const StringRef IntrinsicToAttr[][2] = { 1753931948bSMatt Arsenault // .x omitted 17643976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 17743976df0SMatt Arsenault { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 17843976df0SMatt Arsenault 17943976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 18043976df0SMatt Arsenault { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 18143976df0SMatt Arsenault 1823931948bSMatt Arsenault { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 1833931948bSMatt Arsenault { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 1843931948bSMatt Arsenault 1853931948bSMatt Arsenault // .x omitted 1863931948bSMatt Arsenault { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 1873931948bSMatt Arsenault { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 1883931948bSMatt Arsenault }; 1893931948bSMatt Arsenault 1903931948bSMatt Arsenault static const StringRef HSAIntrinsicToAttr[][2] = { 19148ab526fSMatt Arsenault { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 1928d718dcfSMatt Arsenault { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 1938d718dcfSMatt Arsenault { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" } 1943931948bSMatt Arsenault }; 1953931948bSMatt Arsenault 196d0799df7SMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 197d0799df7SMatt Arsenault // size is 1 for y/z. 198d0799df7SMatt Arsenault 1993931948bSMatt Arsenault // TODO: Intrinsics that require queue ptr. 2003931948bSMatt Arsenault 2013931948bSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are 2023931948bSMatt Arsenault // always initialized. 2033931948bSMatt Arsenault 2043931948bSMatt Arsenault bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 20599c14524SMatt Arsenault if (TT.getOS() == Triple::AMDHSA) { 2063931948bSMatt Arsenault Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 2073931948bSMatt Arsenault 20899c14524SMatt Arsenault for (Function &F : M) { 20999c14524SMatt Arsenault if (F.hasFnAttribute("amdgpu-queue-ptr")) 21099c14524SMatt Arsenault continue; 21199c14524SMatt Arsenault 21299c14524SMatt Arsenault if (hasAddrSpaceCast(F)) 21399c14524SMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 21499c14524SMatt Arsenault } 21599c14524SMatt Arsenault } 21699c14524SMatt Arsenault 2173931948bSMatt Arsenault return Changed; 2183931948bSMatt Arsenault } 2193931948bSMatt Arsenault 2203931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 2213931948bSMatt Arsenault return new AMDGPUAnnotateKernelFeatures(); 2223931948bSMatt Arsenault } 223