13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16*2ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h"
173b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
183931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
193931948bSMatt Arsenault #include "llvm/IR/Module.h"
203931948bSMatt Arsenault 
213931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
223931948bSMatt Arsenault 
233931948bSMatt Arsenault using namespace llvm;
243931948bSMatt Arsenault 
253931948bSMatt Arsenault namespace {
263931948bSMatt Arsenault 
273931948bSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public ModulePass {
283931948bSMatt Arsenault private:
2999c14524SMatt Arsenault   static bool hasAddrSpaceCast(const Function &F);
3099c14524SMatt Arsenault 
313931948bSMatt Arsenault   void addAttrToCallers(Function *Intrin, StringRef AttrName);
323931948bSMatt Arsenault   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
333931948bSMatt Arsenault 
343931948bSMatt Arsenault public:
353931948bSMatt Arsenault   static char ID;
363931948bSMatt Arsenault 
373931948bSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
383931948bSMatt Arsenault   bool runOnModule(Module &M) override;
393931948bSMatt Arsenault   const char *getPassName() const override {
403931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
413931948bSMatt Arsenault   }
423931948bSMatt Arsenault 
433931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
443931948bSMatt Arsenault     AU.setPreservesAll();
453931948bSMatt Arsenault     ModulePass::getAnalysisUsage(AU);
463931948bSMatt Arsenault   }
473b2e2a59SMatt Arsenault 
483b2e2a59SMatt Arsenault   static bool visitConstantExpr(const ConstantExpr *CE);
493b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
503b2e2a59SMatt Arsenault     const Constant *EntryC,
513b2e2a59SMatt Arsenault     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
523931948bSMatt Arsenault };
533931948bSMatt Arsenault 
543931948bSMatt Arsenault }
553931948bSMatt Arsenault 
563931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
573931948bSMatt Arsenault 
583931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
593931948bSMatt Arsenault 
6099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
613931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
623931948bSMatt Arsenault 
6399c14524SMatt Arsenault 
6499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
653b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(unsigned SrcAS) {
6699c14524SMatt Arsenault   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
6799c14524SMatt Arsenault }
6899c14524SMatt Arsenault 
693b2e2a59SMatt Arsenault static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
703b2e2a59SMatt Arsenault   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
713b2e2a59SMatt Arsenault }
723b2e2a59SMatt Arsenault 
733b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
743b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
753b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
763b2e2a59SMatt Arsenault     return castRequiresQueuePtr(SrcAS);
773b2e2a59SMatt Arsenault   }
783b2e2a59SMatt Arsenault 
793b2e2a59SMatt Arsenault   return false;
803b2e2a59SMatt Arsenault }
813b2e2a59SMatt Arsenault 
823b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
833b2e2a59SMatt Arsenault   const Constant *EntryC,
843b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
853b2e2a59SMatt Arsenault 
863b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
873b2e2a59SMatt Arsenault     return false;
883b2e2a59SMatt Arsenault 
893b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
903b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
913b2e2a59SMatt Arsenault 
923b2e2a59SMatt Arsenault   while (!Stack.empty()) {
933b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
943b2e2a59SMatt Arsenault 
953b2e2a59SMatt Arsenault     // Check this constant expression.
963b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
973b2e2a59SMatt Arsenault       if (visitConstantExpr(CE))
983b2e2a59SMatt Arsenault         return true;
993b2e2a59SMatt Arsenault     }
1003b2e2a59SMatt Arsenault 
1013b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1023b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1033b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1043b2e2a59SMatt Arsenault       if (!OpC)
1053b2e2a59SMatt Arsenault         continue;
1063b2e2a59SMatt Arsenault 
1073b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1083b2e2a59SMatt Arsenault         continue;
1093b2e2a59SMatt Arsenault 
1103b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1113b2e2a59SMatt Arsenault     }
1123b2e2a59SMatt Arsenault   }
1133b2e2a59SMatt Arsenault 
1143b2e2a59SMatt Arsenault   return false;
1153b2e2a59SMatt Arsenault }
1163b2e2a59SMatt Arsenault 
11799c14524SMatt Arsenault // Return true if an addrspacecast is used that requires the queue ptr.
11899c14524SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
1193b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
1203b2e2a59SMatt Arsenault 
12199c14524SMatt Arsenault   for (const BasicBlock &BB : F) {
12299c14524SMatt Arsenault     for (const Instruction &I : BB) {
12399c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
12499c14524SMatt Arsenault         if (castRequiresQueuePtr(ASC))
12599c14524SMatt Arsenault           return true;
12699c14524SMatt Arsenault       }
1273b2e2a59SMatt Arsenault 
1283b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
1293b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
1303b2e2a59SMatt Arsenault         if (!OpC)
1313b2e2a59SMatt Arsenault           continue;
1323b2e2a59SMatt Arsenault 
1333b2e2a59SMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
1343b2e2a59SMatt Arsenault           return true;
1353b2e2a59SMatt Arsenault       }
13699c14524SMatt Arsenault     }
13799c14524SMatt Arsenault   }
13899c14524SMatt Arsenault 
13999c14524SMatt Arsenault   return false;
14099c14524SMatt Arsenault }
1413931948bSMatt Arsenault 
1423931948bSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
1433931948bSMatt Arsenault                                                     StringRef AttrName) {
1443931948bSMatt Arsenault   SmallPtrSet<Function *, 4> SeenFuncs;
1453931948bSMatt Arsenault 
1463931948bSMatt Arsenault   for (User *U : Intrin->users()) {
1473931948bSMatt Arsenault     // CallInst is the only valid user for an intrinsic.
1483931948bSMatt Arsenault     CallInst *CI = cast<CallInst>(U);
1493931948bSMatt Arsenault 
1503931948bSMatt Arsenault     Function *CallingFunction = CI->getParent()->getParent();
1513931948bSMatt Arsenault     if (SeenFuncs.insert(CallingFunction).second)
1523931948bSMatt Arsenault       CallingFunction->addFnAttr(AttrName);
1533931948bSMatt Arsenault   }
1543931948bSMatt Arsenault }
1553931948bSMatt Arsenault 
1563931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
1573931948bSMatt Arsenault   Module &M,
1583931948bSMatt Arsenault   ArrayRef<StringRef[2]> IntrinsicToAttr) {
1593931948bSMatt Arsenault   bool Changed = false;
1603931948bSMatt Arsenault 
1613931948bSMatt Arsenault   for (const StringRef *Arr  : IntrinsicToAttr) {
1623931948bSMatt Arsenault     if (Function *Fn = M.getFunction(Arr[0])) {
1633931948bSMatt Arsenault       addAttrToCallers(Fn, Arr[1]);
1643931948bSMatt Arsenault       Changed = true;
1653931948bSMatt Arsenault     }
1663931948bSMatt Arsenault   }
1673931948bSMatt Arsenault 
1683931948bSMatt Arsenault   return Changed;
1693931948bSMatt Arsenault }
1703931948bSMatt Arsenault 
1713931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
1723931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
1733931948bSMatt Arsenault 
1743931948bSMatt Arsenault   static const StringRef IntrinsicToAttr[][2] = {
1753931948bSMatt Arsenault     // .x omitted
17643976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
17743976df0SMatt Arsenault     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
17843976df0SMatt Arsenault 
17943976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
18043976df0SMatt Arsenault     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
18143976df0SMatt Arsenault 
1823931948bSMatt Arsenault     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
1833931948bSMatt Arsenault     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
1843931948bSMatt Arsenault 
1853931948bSMatt Arsenault     // .x omitted
1863931948bSMatt Arsenault     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
1873931948bSMatt Arsenault     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
1883931948bSMatt Arsenault   };
1893931948bSMatt Arsenault 
1903931948bSMatt Arsenault   static const StringRef HSAIntrinsicToAttr[][2] = {
19148ab526fSMatt Arsenault     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
1928d718dcfSMatt Arsenault     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
1938d718dcfSMatt Arsenault     { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
1943931948bSMatt Arsenault   };
1953931948bSMatt Arsenault 
196d0799df7SMatt Arsenault   // TODO: We should not add the attributes if the known compile time workgroup
197d0799df7SMatt Arsenault   // size is 1 for y/z.
198d0799df7SMatt Arsenault 
1993931948bSMatt Arsenault   // TODO: Intrinsics that require queue ptr.
2003931948bSMatt Arsenault 
2013931948bSMatt Arsenault   // We do not need to note the x workitem or workgroup id because they are
2023931948bSMatt Arsenault   // always initialized.
2033931948bSMatt Arsenault 
2043931948bSMatt Arsenault   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
20599c14524SMatt Arsenault   if (TT.getOS() == Triple::AMDHSA) {
2063931948bSMatt Arsenault     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
2073931948bSMatt Arsenault 
20899c14524SMatt Arsenault     for (Function &F : M) {
20999c14524SMatt Arsenault       if (F.hasFnAttribute("amdgpu-queue-ptr"))
21099c14524SMatt Arsenault         continue;
21199c14524SMatt Arsenault 
21299c14524SMatt Arsenault       if (hasAddrSpaceCast(F))
21399c14524SMatt Arsenault         F.addFnAttr("amdgpu-queue-ptr");
21499c14524SMatt Arsenault     }
21599c14524SMatt Arsenault   }
21699c14524SMatt Arsenault 
2173931948bSMatt Arsenault   return Changed;
2183931948bSMatt Arsenault }
2193931948bSMatt Arsenault 
2203931948bSMatt Arsenault ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
2213931948bSMatt Arsenault   return new AMDGPUAnnotateKernelFeatures();
2223931948bSMatt Arsenault }
223