1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This pass adds target attributes to functions which use intrinsics
11 /// which will impact calling convention lowering.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Module.h"
21 
22 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
23 
24 using namespace llvm;
25 
26 namespace {
27 
28 class AMDGPUAnnotateKernelFeatures : public ModulePass {
29 private:
30   const TargetMachine *TM;
31   static bool hasAddrSpaceCast(const Function &F);
32 
33   void addAttrToCallers(Function *Intrin, StringRef AttrName);
34   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
35 
36 public:
37   static char ID;
38 
39   AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
40                                ModulePass(ID), TM(TM_) {}
41   bool runOnModule(Module &M) override;
42   StringRef getPassName() const override {
43     return "AMDGPU Annotate Kernel Features";
44   }
45 
46   void getAnalysisUsage(AnalysisUsage &AU) const override {
47     AU.setPreservesAll();
48     ModulePass::getAnalysisUsage(AU);
49   }
50 
51   static bool visitConstantExpr(const ConstantExpr *CE);
52   static bool visitConstantExprsRecursively(
53     const Constant *EntryC,
54     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
55 };
56 
57 }
58 
59 char AMDGPUAnnotateKernelFeatures::ID = 0;
60 
61 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
62 
63 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
64                 "Add AMDGPU function attributes", false, false)
65 
66 
67 // The queue ptr is only needed when casting to flat, not from it.
68 static bool castRequiresQueuePtr(unsigned SrcAS) {
69   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
70 }
71 
72 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
73   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
74 }
75 
76 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
77   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
78     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
79     return castRequiresQueuePtr(SrcAS);
80   }
81 
82   return false;
83 }
84 
85 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
86   const Constant *EntryC,
87   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
88 
89   if (!ConstantExprVisited.insert(EntryC).second)
90     return false;
91 
92   SmallVector<const Constant *, 16> Stack;
93   Stack.push_back(EntryC);
94 
95   while (!Stack.empty()) {
96     const Constant *C = Stack.pop_back_val();
97 
98     // Check this constant expression.
99     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
100       if (visitConstantExpr(CE))
101         return true;
102     }
103 
104     // Visit all sub-expressions.
105     for (const Use &U : C->operands()) {
106       const auto *OpC = dyn_cast<Constant>(U);
107       if (!OpC)
108         continue;
109 
110       if (!ConstantExprVisited.insert(OpC).second)
111         continue;
112 
113       Stack.push_back(OpC);
114     }
115   }
116 
117   return false;
118 }
119 
120 // Return true if an addrspacecast is used that requires the queue ptr.
121 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
122   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
123 
124   for (const BasicBlock &BB : F) {
125     for (const Instruction &I : BB) {
126       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
127         if (castRequiresQueuePtr(ASC))
128           return true;
129       }
130 
131       for (const Use &U : I.operands()) {
132         const auto *OpC = dyn_cast<Constant>(U);
133         if (!OpC)
134           continue;
135 
136         if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
137           return true;
138       }
139     }
140   }
141 
142   return false;
143 }
144 
145 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
146                                                     StringRef AttrName) {
147   SmallPtrSet<Function *, 4> SeenFuncs;
148 
149   for (User *U : Intrin->users()) {
150     // CallInst is the only valid user for an intrinsic.
151     CallInst *CI = cast<CallInst>(U);
152 
153     Function *CallingFunction = CI->getParent()->getParent();
154     if (SeenFuncs.insert(CallingFunction).second)
155       CallingFunction->addFnAttr(AttrName);
156   }
157 }
158 
159 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
160   Module &M,
161   ArrayRef<StringRef[2]> IntrinsicToAttr) {
162   bool Changed = false;
163 
164   for (const StringRef *Arr  : IntrinsicToAttr) {
165     if (Function *Fn = M.getFunction(Arr[0])) {
166       addAttrToCallers(Fn, Arr[1]);
167       Changed = true;
168     }
169   }
170 
171   return Changed;
172 }
173 
174 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
175   Triple TT(M.getTargetTriple());
176 
177   static const StringRef IntrinsicToAttr[][2] = {
178     // .x omitted
179     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
180     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
181 
182     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
183     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
184 
185     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
186     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
187 
188     // .x omitted
189     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
190     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
191   };
192 
193   static const StringRef HSAIntrinsicToAttr[][2] = {
194     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
195     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
196     { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
197     { "llvm.trap", "amdgpu-queue-ptr" },
198     { "llvm.debugtrap", "amdgpu-queue-ptr" }
199   };
200 
201   // TODO: We should not add the attributes if the known compile time workgroup
202   // size is 1 for y/z.
203 
204   // TODO: Intrinsics that require queue ptr.
205 
206   // We do not need to note the x workitem or workgroup id because they are
207   // always initialized.
208 
209   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
210   if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
211     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
212 
213     for (Function &F : M) {
214       if (F.hasFnAttribute("amdgpu-queue-ptr"))
215         continue;
216 
217       bool HasApertureRegs =
218         TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
219       if (!HasApertureRegs && hasAddrSpaceCast(F))
220         F.addFnAttr("amdgpu-queue-ptr");
221     }
222   }
223 
224   return Changed;
225 }
226 
227 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
228   return new AMDGPUAnnotateKernelFeatures(TM);
229 }
230