1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "llvm/ADT/Triple.h" 17 #include "llvm/IR/Constants.h" 18 #include "llvm/IR/Instructions.h" 19 #include "llvm/IR/Module.h" 20 21 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 22 23 using namespace llvm; 24 25 namespace { 26 27 class AMDGPUAnnotateKernelFeatures : public ModulePass { 28 private: 29 static bool hasAddrSpaceCast(const Function &F); 30 31 void addAttrToCallers(Function *Intrin, StringRef AttrName); 32 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 33 34 public: 35 static char ID; 36 37 AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } 38 bool runOnModule(Module &M) override; 39 StringRef getPassName() const override { 40 return "AMDGPU Annotate Kernel Features"; 41 } 42 43 void getAnalysisUsage(AnalysisUsage &AU) const override { 44 AU.setPreservesAll(); 45 ModulePass::getAnalysisUsage(AU); 46 } 47 48 static bool visitConstantExpr(const ConstantExpr *CE); 49 static bool visitConstantExprsRecursively( 50 const Constant *EntryC, 51 SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 52 }; 53 54 } 55 56 char AMDGPUAnnotateKernelFeatures::ID = 0; 57 58 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 59 60 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 61 "Add AMDGPU function attributes", false, false) 62 63 64 // The queue ptr is only needed when casting to flat, not from it. 65 static bool castRequiresQueuePtr(unsigned SrcAS) { 66 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 67 } 68 69 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 70 return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 71 } 72 73 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 74 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 75 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 76 return castRequiresQueuePtr(SrcAS); 77 } 78 79 return false; 80 } 81 82 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 83 const Constant *EntryC, 84 SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 85 86 if (!ConstantExprVisited.insert(EntryC).second) 87 return false; 88 89 SmallVector<const Constant *, 16> Stack; 90 Stack.push_back(EntryC); 91 92 while (!Stack.empty()) { 93 const Constant *C = Stack.pop_back_val(); 94 95 // Check this constant expression. 96 if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 97 if (visitConstantExpr(CE)) 98 return true; 99 } 100 101 // Visit all sub-expressions. 102 for (const Use &U : C->operands()) { 103 const auto *OpC = dyn_cast<Constant>(U); 104 if (!OpC) 105 continue; 106 107 if (!ConstantExprVisited.insert(OpC).second) 108 continue; 109 110 Stack.push_back(OpC); 111 } 112 } 113 114 return false; 115 } 116 117 // Return true if an addrspacecast is used that requires the queue ptr. 118 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 119 SmallPtrSet<const Constant *, 8> ConstantExprVisited; 120 121 for (const BasicBlock &BB : F) { 122 for (const Instruction &I : BB) { 123 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 124 if (castRequiresQueuePtr(ASC)) 125 return true; 126 } 127 128 for (const Use &U : I.operands()) { 129 const auto *OpC = dyn_cast<Constant>(U); 130 if (!OpC) 131 continue; 132 133 if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) 134 return true; 135 } 136 } 137 } 138 139 return false; 140 } 141 142 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 143 StringRef AttrName) { 144 SmallPtrSet<Function *, 4> SeenFuncs; 145 146 for (User *U : Intrin->users()) { 147 // CallInst is the only valid user for an intrinsic. 148 CallInst *CI = cast<CallInst>(U); 149 150 Function *CallingFunction = CI->getParent()->getParent(); 151 if (SeenFuncs.insert(CallingFunction).second) 152 CallingFunction->addFnAttr(AttrName); 153 } 154 } 155 156 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 157 Module &M, 158 ArrayRef<StringRef[2]> IntrinsicToAttr) { 159 bool Changed = false; 160 161 for (const StringRef *Arr : IntrinsicToAttr) { 162 if (Function *Fn = M.getFunction(Arr[0])) { 163 addAttrToCallers(Fn, Arr[1]); 164 Changed = true; 165 } 166 } 167 168 return Changed; 169 } 170 171 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 172 Triple TT(M.getTargetTriple()); 173 174 static const StringRef IntrinsicToAttr[][2] = { 175 // .x omitted 176 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 177 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 178 179 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 180 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 181 182 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 183 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 184 185 // .x omitted 186 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 187 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 188 }; 189 190 static const StringRef HSAIntrinsicToAttr[][2] = { 191 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 192 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 193 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" } 194 }; 195 196 // TODO: We should not add the attributes if the known compile time workgroup 197 // size is 1 for y/z. 198 199 // TODO: Intrinsics that require queue ptr. 200 201 // We do not need to note the x workitem or workgroup id because they are 202 // always initialized. 203 204 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 205 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { 206 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 207 208 for (Function &F : M) { 209 if (F.hasFnAttribute("amdgpu-queue-ptr")) 210 continue; 211 212 if (hasAddrSpaceCast(F)) 213 F.addFnAttr("amdgpu-queue-ptr"); 214 } 215 } 216 217 return Changed; 218 } 219 220 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 221 return new AMDGPUAnnotateKernelFeatures(); 222 } 223