1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Instructions.h" 20 #include "llvm/IR/Module.h" 21 22 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 23 24 using namespace llvm; 25 26 namespace { 27 28 class AMDGPUAnnotateKernelFeatures : public ModulePass { 29 private: 30 const TargetMachine *TM; 31 AMDGPUAS AS; 32 static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); 33 34 void addAttrToCallers(Function *Intrin, StringRef AttrName); 35 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 36 37 public: 38 static char ID; 39 40 AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) : 41 ModulePass(ID), TM(TM_) {} 42 bool runOnModule(Module &M) override; 43 StringRef getPassName() const override { 44 return "AMDGPU Annotate Kernel Features"; 45 } 46 47 void getAnalysisUsage(AnalysisUsage &AU) const override { 48 AU.setPreservesAll(); 49 ModulePass::getAnalysisUsage(AU); 50 } 51 52 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 53 static bool visitConstantExprsRecursively( 54 const Constant *EntryC, 55 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 56 AMDGPUAS AS); 57 }; 58 59 } 60 61 char AMDGPUAnnotateKernelFeatures::ID = 0; 62 63 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 64 65 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 66 "Add AMDGPU function attributes", false, false) 67 68 69 // The queue ptr is only needed when casting to flat, not from it. 70 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 71 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 72 } 73 74 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 75 const AMDGPUAS &AS) { 76 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 77 } 78 79 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 80 AMDGPUAS AS) { 81 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 82 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 83 return castRequiresQueuePtr(SrcAS, AS); 84 } 85 86 return false; 87 } 88 89 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 90 const Constant *EntryC, 91 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 92 AMDGPUAS AS) { 93 94 if (!ConstantExprVisited.insert(EntryC).second) 95 return false; 96 97 SmallVector<const Constant *, 16> Stack; 98 Stack.push_back(EntryC); 99 100 while (!Stack.empty()) { 101 const Constant *C = Stack.pop_back_val(); 102 103 // Check this constant expression. 104 if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 105 if (visitConstantExpr(CE, AS)) 106 return true; 107 } 108 109 // Visit all sub-expressions. 110 for (const Use &U : C->operands()) { 111 const auto *OpC = dyn_cast<Constant>(U); 112 if (!OpC) 113 continue; 114 115 if (!ConstantExprVisited.insert(OpC).second) 116 continue; 117 118 Stack.push_back(OpC); 119 } 120 } 121 122 return false; 123 } 124 125 // Return true if an addrspacecast is used that requires the queue ptr. 126 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, 127 AMDGPUAS AS) { 128 SmallPtrSet<const Constant *, 8> ConstantExprVisited; 129 130 for (const BasicBlock &BB : F) { 131 for (const Instruction &I : BB) { 132 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 133 if (castRequiresQueuePtr(ASC, AS)) 134 return true; 135 } 136 137 for (const Use &U : I.operands()) { 138 const auto *OpC = dyn_cast<Constant>(U); 139 if (!OpC) 140 continue; 141 142 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) 143 return true; 144 } 145 } 146 } 147 148 return false; 149 } 150 151 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 152 StringRef AttrName) { 153 SmallPtrSet<Function *, 4> SeenFuncs; 154 155 for (User *U : Intrin->users()) { 156 // CallInst is the only valid user for an intrinsic. 157 CallInst *CI = cast<CallInst>(U); 158 159 Function *CallingFunction = CI->getParent()->getParent(); 160 if (SeenFuncs.insert(CallingFunction).second) 161 CallingFunction->addFnAttr(AttrName); 162 } 163 } 164 165 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 166 Module &M, 167 ArrayRef<StringRef[2]> IntrinsicToAttr) { 168 bool Changed = false; 169 170 for (const StringRef *Arr : IntrinsicToAttr) { 171 if (Function *Fn = M.getFunction(Arr[0])) { 172 addAttrToCallers(Fn, Arr[1]); 173 Changed = true; 174 } 175 } 176 177 return Changed; 178 } 179 180 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 181 Triple TT(M.getTargetTriple()); 182 AS = AMDGPU::getAMDGPUAS(M); 183 184 static const StringRef IntrinsicToAttr[][2] = { 185 // .x omitted 186 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 187 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 188 189 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 190 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 191 192 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 193 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 194 195 // .x omitted 196 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 197 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 198 }; 199 200 static const StringRef HSAIntrinsicToAttr[][2] = { 201 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 202 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 203 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, 204 { "llvm.trap", "amdgpu-queue-ptr" }, 205 { "llvm.debugtrap", "amdgpu-queue-ptr" } 206 }; 207 208 // TODO: We should not add the attributes if the known compile time workgroup 209 // size is 1 for y/z. 210 211 // TODO: Intrinsics that require queue ptr. 212 213 // We do not need to note the x workitem or workgroup id because they are 214 // always initialized. 215 216 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 217 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { 218 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 219 220 for (Function &F : M) { 221 if (F.hasFnAttribute("amdgpu-queue-ptr")) 222 continue; 223 224 bool HasApertureRegs = 225 TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); 226 if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) 227 F.addFnAttr("amdgpu-queue-ptr"); 228 } 229 } 230 231 return Changed; 232 } 233 234 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) { 235 return new AMDGPUAnnotateKernelFeatures(TM); 236 } 237