1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file This pass adds target attributes to functions which use intrinsics
11 /// which will impact calling convention lowering.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "llvm/IR/Instructions.h"
17 #include "llvm/IR/Module.h"
18 
19 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
20 
21 using namespace llvm;
22 
23 namespace {
24 
25 class AMDGPUAnnotateKernelFeatures : public ModulePass {
26 private:
27   static bool hasAddrSpaceCast(const Function &F);
28 
29   void addAttrToCallers(Function *Intrin, StringRef AttrName);
30   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
31 
32 public:
33   static char ID;
34 
35   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
36   bool runOnModule(Module &M) override;
37   const char *getPassName() const override {
38     return "AMDGPU Annotate Kernel Features";
39   }
40 
41   void getAnalysisUsage(AnalysisUsage &AU) const override {
42     AU.setPreservesAll();
43     ModulePass::getAnalysisUsage(AU);
44   }
45 };
46 
47 }
48 
49 char AMDGPUAnnotateKernelFeatures::ID = 0;
50 
51 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
52 
53 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
54                 "Add AMDGPU function attributes", false, false)
55 
56 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
57   unsigned SrcAS = ASC->getSrcAddressSpace();
58 
59   // The queue ptr is only needed when casting to flat, not from it.
60   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
61 }
62 
63 // Return true if an addrspacecast is used that requires the queue ptr.
64 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
65   for (const BasicBlock &BB : F) {
66     for (const Instruction &I : BB) {
67       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
68         if (castRequiresQueuePtr(ASC))
69           return true;
70       }
71     }
72   }
73 
74   return false;
75 }
76 
77 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
78                                                     StringRef AttrName) {
79   SmallPtrSet<Function *, 4> SeenFuncs;
80 
81   for (User *U : Intrin->users()) {
82     // CallInst is the only valid user for an intrinsic.
83     CallInst *CI = cast<CallInst>(U);
84 
85     Function *CallingFunction = CI->getParent()->getParent();
86     if (SeenFuncs.insert(CallingFunction).second)
87       CallingFunction->addFnAttr(AttrName);
88   }
89 }
90 
91 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
92   Module &M,
93   ArrayRef<StringRef[2]> IntrinsicToAttr) {
94   bool Changed = false;
95 
96   for (const StringRef *Arr  : IntrinsicToAttr) {
97     if (Function *Fn = M.getFunction(Arr[0])) {
98       addAttrToCallers(Fn, Arr[1]);
99       Changed = true;
100     }
101   }
102 
103   return Changed;
104 }
105 
106 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
107   Triple TT(M.getTargetTriple());
108 
109   static const StringRef IntrinsicToAttr[][2] = {
110     // .x omitted
111     { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" },
112     { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" },
113 
114     { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" },
115     { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" },
116 
117     { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
118     { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
119 
120     // .x omitted
121     { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
122     { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
123   };
124 
125   static const StringRef HSAIntrinsicToAttr[][2] = {
126     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
127     { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }
128   };
129 
130   // TODO: We should not add the attributes if the known compile time workgroup
131   // size is 1 for y/z.
132 
133   // TODO: Intrinsics that require queue ptr.
134 
135   // We do not need to note the x workitem or workgroup id because they are
136   // always initialized.
137 
138   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
139   if (TT.getOS() == Triple::AMDHSA) {
140     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
141 
142     for (Function &F : M) {
143       if (F.hasFnAttribute("amdgpu-queue-ptr"))
144         continue;
145 
146       if (hasAddrSpaceCast(F))
147         F.addFnAttr("amdgpu-queue-ptr");
148     }
149   }
150 
151   return Changed;
152 }
153 
154 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
155   return new AMDGPUAnnotateKernelFeatures();
156 }
157