13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
163b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
173931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
183931948bSMatt Arsenault #include "llvm/IR/Module.h"
193931948bSMatt Arsenault 
203931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
213931948bSMatt Arsenault 
223931948bSMatt Arsenault using namespace llvm;
233931948bSMatt Arsenault 
243931948bSMatt Arsenault namespace {
253931948bSMatt Arsenault 
263931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass {
273931948bSMatt Arsenault private:
2899c14524SMatt Arsenault   static bool hasAddrSpaceCast(const Function &F);
2999c14524SMatt Arsenault 
303931948bSMatt Arsenault   void addAttrToCallers(Function *Intrin, StringRef AttrName);
313931948bSMatt Arsenault   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
323931948bSMatt Arsenault 
333931948bSMatt Arsenault public:
343931948bSMatt Arsenault   static char ID;
353931948bSMatt Arsenault 
363931948bSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
373931948bSMatt Arsenault   bool runOnModule(Module &M) override;
383931948bSMatt Arsenault   const char *getPassName() const override {
393931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
403931948bSMatt Arsenault   }
413931948bSMatt Arsenault 
423931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
433931948bSMatt Arsenault     AU.setPreservesAll();
443931948bSMatt Arsenault     ModulePass::getAnalysisUsage(AU);
453931948bSMatt Arsenault   }
463b2e2a59SMatt Arsenault 
473b2e2a59SMatt Arsenault   static bool visitConstantExpr(const ConstantExpr *CE);
483b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
493b2e2a59SMatt Arsenault     const Constant *EntryC,
503b2e2a59SMatt Arsenault     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
513931948bSMatt Arsenault };
523931948bSMatt Arsenault 
533931948bSMatt Arsenault }
543931948bSMatt Arsenault 
553931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
563931948bSMatt Arsenault 
573931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
583931948bSMatt Arsenault 
5999c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
603931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
613931948bSMatt Arsenault 
6299c14524SMatt Arsenault 
6399c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
643b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) {
6599c14524SMatt Arsenault   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
6699c14524SMatt Arsenault }
6799c14524SMatt Arsenault 
683b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
693b2e2a59SMatt Arsenault   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
703b2e2a59SMatt Arsenault }
713b2e2a59SMatt Arsenault 
723b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
733b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
743b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
753b2e2a59SMatt Arsenault     return castRequiresQueuePtr(SrcAS);
763b2e2a59SMatt Arsenault   }
773b2e2a59SMatt Arsenault 
783b2e2a59SMatt Arsenault   return false;
793b2e2a59SMatt Arsenault }
803b2e2a59SMatt Arsenault 
813b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
823b2e2a59SMatt Arsenault   const Constant *EntryC,
833b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
843b2e2a59SMatt Arsenault 
853b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
863b2e2a59SMatt Arsenault     return false;
873b2e2a59SMatt Arsenault 
883b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
893b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
903b2e2a59SMatt Arsenault 
913b2e2a59SMatt Arsenault   while (!Stack.empty()) {
923b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
933b2e2a59SMatt Arsenault 
943b2e2a59SMatt Arsenault     // Check this constant expression.
953b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
963b2e2a59SMatt Arsenault       if (visitConstantExpr(CE))
973b2e2a59SMatt Arsenault         return true;
983b2e2a59SMatt Arsenault     }
993b2e2a59SMatt Arsenault 
1003b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1013b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1023b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1033b2e2a59SMatt Arsenault       if (!OpC)
1043b2e2a59SMatt Arsenault         continue;
1053b2e2a59SMatt Arsenault 
1063b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1073b2e2a59SMatt Arsenault         continue;
1083b2e2a59SMatt Arsenault 
1093b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1103b2e2a59SMatt Arsenault     }
1113b2e2a59SMatt Arsenault   }
1123b2e2a59SMatt Arsenault 
1133b2e2a59SMatt Arsenault   return false;
1143b2e2a59SMatt Arsenault }
1153b2e2a59SMatt Arsenault 
11699c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr.
11799c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
1183b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
1193b2e2a59SMatt Arsenault 
12099c14524SMatt Arsenault   for (const BasicBlock &BB : F) {
12199c14524SMatt Arsenault     for (const Instruction &I : BB) {
12299c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
12399c14524SMatt Arsenault         if (castRequiresQueuePtr(ASC))
12499c14524SMatt Arsenault           return true;
12599c14524SMatt Arsenault       }
1263b2e2a59SMatt Arsenault 
1273b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
1283b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
1293b2e2a59SMatt Arsenault         if (!OpC)
1303b2e2a59SMatt Arsenault           continue;
1313b2e2a59SMatt Arsenault 
1323b2e2a59SMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
1333b2e2a59SMatt Arsenault           return true;
1343b2e2a59SMatt Arsenault       }
13599c14524SMatt Arsenault     }
13699c14524SMatt Arsenault   }
13799c14524SMatt Arsenault 
13899c14524SMatt Arsenault   return false;
13999c14524SMatt Arsenault }
1403931948bSMatt Arsenault 
1413931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
1423931948bSMatt Arsenault                                                     StringRef AttrName) {
1433931948bSMatt Arsenault   SmallPtrSet<Function *, 4> SeenFuncs;
1443931948bSMatt Arsenault 
1453931948bSMatt Arsenault   for (User *U : Intrin->users()) {
1463931948bSMatt Arsenault     // CallInst is the only valid user for an intrinsic.
1473931948bSMatt Arsenault     CallInst *CI = cast<CallInst>(U);
1483931948bSMatt Arsenault 
1493931948bSMatt Arsenault     Function *CallingFunction = CI->getParent()->getParent();
1503931948bSMatt Arsenault     if (SeenFuncs.insert(CallingFunction).second)
1513931948bSMatt Arsenault       CallingFunction->addFnAttr(AttrName);
1523931948bSMatt Arsenault   }
1533931948bSMatt Arsenault }
1543931948bSMatt Arsenault 
1553931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
1563931948bSMatt Arsenault   Module &M,
1573931948bSMatt Arsenault   ArrayRef<StringRef[2]> IntrinsicToAttr) {
1583931948bSMatt Arsenault   bool Changed = false;
1593931948bSMatt Arsenault 
1603931948bSMatt Arsenault   for (const StringRef *Arr  : IntrinsicToAttr) {
1613931948bSMatt Arsenault     if (Function *Fn = M.getFunction(Arr[0])) {
1623931948bSMatt Arsenault       addAttrToCallers(Fn, Arr[1]);
1633931948bSMatt Arsenault       Changed = true;
1643931948bSMatt Arsenault     }
1653931948bSMatt Arsenault   }
1663931948bSMatt Arsenault 
1673931948bSMatt Arsenault   return Changed;
1683931948bSMatt Arsenault }
1693931948bSMatt Arsenault 
1703931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
1713931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
1723931948bSMatt Arsenault 
1733931948bSMatt Arsenault   static const StringRef IntrinsicToAttr[][2] = {
1743931948bSMatt Arsenault     // .x omitted
17543976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
17643976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
17743976df0SMatt Arsenault 
17843976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
17943976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
18043976df0SMatt Arsenault 
1813931948bSMatt Arsenault     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
1823931948bSMatt Arsenault     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
1833931948bSMatt Arsenault 
1843931948bSMatt Arsenault     // .x omitted
1853931948bSMatt Arsenault     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
1863931948bSMatt Arsenault     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
1873931948bSMatt Arsenault   };
1883931948bSMatt Arsenault 
1893931948bSMatt Arsenault   static const StringRef HSAIntrinsicToAttr[][2] = {
19048ab526fSMatt Arsenault     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
191*8d718dcfSMatt Arsenault     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
192*8d718dcfSMatt Arsenault     { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
1933931948bSMatt Arsenault   };
1943931948bSMatt Arsenault 
195d0799df7SMatt Arsenault   // TODO: We should not add the attributes if the known compile time workgroup
196d0799df7SMatt Arsenault   // size is 1 for y/z.
197d0799df7SMatt Arsenault 
1983931948bSMatt Arsenault   // TODO: Intrinsics that require queue ptr.
1993931948bSMatt Arsenault 
2003931948bSMatt Arsenault   // We do not need to note the x workitem or workgroup id because they are
2013931948bSMatt Arsenault   // always initialized.
2023931948bSMatt Arsenault 
2033931948bSMatt Arsenault   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
20499c14524SMatt Arsenault   if (TT.getOS() == Triple::AMDHSA) {
2053931948bSMatt Arsenault     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
2063931948bSMatt Arsenault 
20799c14524SMatt Arsenault     for (Function &F : M) {
20899c14524SMatt Arsenault       if (F.hasFnAttribute("amdgpu-queue-ptr"))
20999c14524SMatt Arsenault         continue;
21099c14524SMatt Arsenault 
21199c14524SMatt Arsenault       if (hasAddrSpaceCast(F))
21299c14524SMatt Arsenault         F.addFnAttr("amdgpu-queue-ptr");
21399c14524SMatt Arsenault     }
21499c14524SMatt Arsenault   }
21599c14524SMatt Arsenault 
2163931948bSMatt Arsenault   return Changed;
2173931948bSMatt Arsenault }
2183931948bSMatt Arsenault 
2193931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
2203931948bSMatt Arsenault   return new AMDGPUAnnotateKernelFeatures();
2213931948bSMatt Arsenault }
222