13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16*3b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
173931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
183931948bSMatt Arsenault #include "llvm/IR/Module.h"
193931948bSMatt Arsenault 
203931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
213931948bSMatt Arsenault 
223931948bSMatt Arsenault using namespace llvm;
233931948bSMatt Arsenault 
243931948bSMatt Arsenault namespace {
253931948bSMatt Arsenault 
263931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass {
273931948bSMatt Arsenault private:
2899c14524SMatt Arsenault   static bool hasAddrSpaceCast(const Function &F);
2999c14524SMatt Arsenault 
303931948bSMatt Arsenault   void addAttrToCallers(Function *Intrin, StringRef AttrName);
313931948bSMatt Arsenault   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
323931948bSMatt Arsenault 
333931948bSMatt Arsenault public:
343931948bSMatt Arsenault   static char ID;
353931948bSMatt Arsenault 
363931948bSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
373931948bSMatt Arsenault   bool runOnModule(Module &M) override;
383931948bSMatt Arsenault   const char *getPassName() const override {
393931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
403931948bSMatt Arsenault   }
413931948bSMatt Arsenault 
423931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
433931948bSMatt Arsenault     AU.setPreservesAll();
443931948bSMatt Arsenault     ModulePass::getAnalysisUsage(AU);
453931948bSMatt Arsenault   }
46*3b2e2a59SMatt Arsenault 
47*3b2e2a59SMatt Arsenault   static bool visitConstantExpr(const ConstantExpr *CE);
48*3b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
49*3b2e2a59SMatt Arsenault     const Constant *EntryC,
50*3b2e2a59SMatt Arsenault     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
513931948bSMatt Arsenault };
523931948bSMatt Arsenault 
533931948bSMatt Arsenault }
543931948bSMatt Arsenault 
553931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
563931948bSMatt Arsenault 
573931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
583931948bSMatt Arsenault 
5999c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
603931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
613931948bSMatt Arsenault 
6299c14524SMatt Arsenault 
6399c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
64*3b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) {
6599c14524SMatt Arsenault   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
6699c14524SMatt Arsenault }
6799c14524SMatt Arsenault 
68*3b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
69*3b2e2a59SMatt Arsenault   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
70*3b2e2a59SMatt Arsenault }
71*3b2e2a59SMatt Arsenault 
72*3b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
73*3b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
74*3b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
75*3b2e2a59SMatt Arsenault     return castRequiresQueuePtr(SrcAS);
76*3b2e2a59SMatt Arsenault   }
77*3b2e2a59SMatt Arsenault 
78*3b2e2a59SMatt Arsenault   return false;
79*3b2e2a59SMatt Arsenault }
80*3b2e2a59SMatt Arsenault 
81*3b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
82*3b2e2a59SMatt Arsenault   const Constant *EntryC,
83*3b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
84*3b2e2a59SMatt Arsenault 
85*3b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
86*3b2e2a59SMatt Arsenault     return false;
87*3b2e2a59SMatt Arsenault 
88*3b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
89*3b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
90*3b2e2a59SMatt Arsenault 
91*3b2e2a59SMatt Arsenault   while (!Stack.empty()) {
92*3b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
93*3b2e2a59SMatt Arsenault 
94*3b2e2a59SMatt Arsenault     // Check this constant expression.
95*3b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
96*3b2e2a59SMatt Arsenault       if (visitConstantExpr(CE))
97*3b2e2a59SMatt Arsenault         return true;
98*3b2e2a59SMatt Arsenault     }
99*3b2e2a59SMatt Arsenault 
100*3b2e2a59SMatt Arsenault     // Visit all sub-expressions.
101*3b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
102*3b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
103*3b2e2a59SMatt Arsenault       if (!OpC)
104*3b2e2a59SMatt Arsenault         continue;
105*3b2e2a59SMatt Arsenault 
106*3b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
107*3b2e2a59SMatt Arsenault         continue;
108*3b2e2a59SMatt Arsenault 
109*3b2e2a59SMatt Arsenault       Stack.push_back(OpC);
110*3b2e2a59SMatt Arsenault     }
111*3b2e2a59SMatt Arsenault   }
112*3b2e2a59SMatt Arsenault 
113*3b2e2a59SMatt Arsenault   return false;
114*3b2e2a59SMatt Arsenault }
115*3b2e2a59SMatt Arsenault 
11699c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr.
11799c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
118*3b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
119*3b2e2a59SMatt Arsenault 
12099c14524SMatt Arsenault   for (const BasicBlock &BB : F) {
12199c14524SMatt Arsenault     for (const Instruction &I : BB) {
12299c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
12399c14524SMatt Arsenault         if (castRequiresQueuePtr(ASC))
12499c14524SMatt Arsenault           return true;
12599c14524SMatt Arsenault       }
126*3b2e2a59SMatt Arsenault 
127*3b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
128*3b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
129*3b2e2a59SMatt Arsenault         if (!OpC)
130*3b2e2a59SMatt Arsenault           continue;
131*3b2e2a59SMatt Arsenault 
132*3b2e2a59SMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
133*3b2e2a59SMatt Arsenault           return true;
134*3b2e2a59SMatt Arsenault       }
13599c14524SMatt Arsenault     }
13699c14524SMatt Arsenault   }
13799c14524SMatt Arsenault 
13899c14524SMatt Arsenault   return false;
13999c14524SMatt Arsenault }
1403931948bSMatt Arsenault 
1413931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
1423931948bSMatt Arsenault                                                     StringRef AttrName) {
1433931948bSMatt Arsenault   SmallPtrSet<Function *, 4> SeenFuncs;
1443931948bSMatt Arsenault 
1453931948bSMatt Arsenault   for (User *U : Intrin->users()) {
1463931948bSMatt Arsenault     // CallInst is the only valid user for an intrinsic.
1473931948bSMatt Arsenault     CallInst *CI = cast<CallInst>(U);
1483931948bSMatt Arsenault 
1493931948bSMatt Arsenault     Function *CallingFunction = CI->getParent()->getParent();
1503931948bSMatt Arsenault     if (SeenFuncs.insert(CallingFunction).second)
1513931948bSMatt Arsenault       CallingFunction->addFnAttr(AttrName);
1523931948bSMatt Arsenault   }
1533931948bSMatt Arsenault }
1543931948bSMatt Arsenault 
1553931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
1563931948bSMatt Arsenault   Module &M,
1573931948bSMatt Arsenault   ArrayRef<StringRef[2]> IntrinsicToAttr) {
1583931948bSMatt Arsenault   bool Changed = false;
1593931948bSMatt Arsenault 
1603931948bSMatt Arsenault   for (const StringRef *Arr  : IntrinsicToAttr) {
1613931948bSMatt Arsenault     if (Function *Fn = M.getFunction(Arr[0])) {
1623931948bSMatt Arsenault       addAttrToCallers(Fn, Arr[1]);
1633931948bSMatt Arsenault       Changed = true;
1643931948bSMatt Arsenault     }
1653931948bSMatt Arsenault   }
1663931948bSMatt Arsenault 
1673931948bSMatt Arsenault   return Changed;
1683931948bSMatt Arsenault }
1693931948bSMatt Arsenault 
1703931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
1713931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
1723931948bSMatt Arsenault 
1733931948bSMatt Arsenault   static const StringRef IntrinsicToAttr[][2] = {
1743931948bSMatt Arsenault     // .x omitted
17543976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
17643976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
17743976df0SMatt Arsenault 
17843976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
17943976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
18043976df0SMatt Arsenault 
1813931948bSMatt Arsenault     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
1823931948bSMatt Arsenault     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
1833931948bSMatt Arsenault 
1843931948bSMatt Arsenault     // .x omitted
1853931948bSMatt Arsenault     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
1863931948bSMatt Arsenault     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
1873931948bSMatt Arsenault   };
1883931948bSMatt Arsenault 
1893931948bSMatt Arsenault   static const StringRef HSAIntrinsicToAttr[][2] = {
19048ab526fSMatt Arsenault     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
19148ab526fSMatt Arsenault     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }
1923931948bSMatt Arsenault   };
1933931948bSMatt Arsenault 
194d0799df7SMatt Arsenault   // TODO: We should not add the attributes if the known compile time workgroup
195d0799df7SMatt Arsenault   // size is 1 for y/z.
196d0799df7SMatt Arsenault 
1973931948bSMatt Arsenault   // TODO: Intrinsics that require queue ptr.
1983931948bSMatt Arsenault 
1993931948bSMatt Arsenault   // We do not need to note the x workitem or workgroup id because they are
2003931948bSMatt Arsenault   // always initialized.
2013931948bSMatt Arsenault 
2023931948bSMatt Arsenault   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
20399c14524SMatt Arsenault   if (TT.getOS() == Triple::AMDHSA) {
2043931948bSMatt Arsenault     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
2053931948bSMatt Arsenault 
20699c14524SMatt Arsenault     for (Function &F : M) {
20799c14524SMatt Arsenault       if (F.hasFnAttribute("amdgpu-queue-ptr"))
20899c14524SMatt Arsenault         continue;
20999c14524SMatt Arsenault 
21099c14524SMatt Arsenault       if (hasAddrSpaceCast(F))
21199c14524SMatt Arsenault         F.addFnAttr("amdgpu-queue-ptr");
21299c14524SMatt Arsenault     }
21399c14524SMatt Arsenault   }
21499c14524SMatt Arsenault 
2153931948bSMatt Arsenault   return Changed;
2163931948bSMatt Arsenault }
2173931948bSMatt Arsenault 
2183931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
2193931948bSMatt Arsenault   return new AMDGPUAnnotateKernelFeatures();
2203931948bSMatt Arsenault }
221