1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/CodeGen/TargetPassConfig.h" 19 #include "llvm/IR/Constants.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/Module.h" 22 23 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 24 25 using namespace llvm; 26 27 namespace { 28 29 class AMDGPUAnnotateKernelFeatures : public ModulePass { 30 private: 31 AMDGPUAS AS; 32 static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); 33 34 void addAttrToCallers(Function *Intrin, StringRef AttrName); 35 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 36 37 public: 38 static char ID; 39 40 AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {} 41 bool runOnModule(Module &M) override; 42 StringRef getPassName() const override { 43 return "AMDGPU Annotate Kernel Features"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.setPreservesAll(); 48 ModulePass::getAnalysisUsage(AU); 49 } 50 51 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 52 static bool visitConstantExprsRecursively( 53 const Constant *EntryC, 54 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 55 AMDGPUAS AS); 56 }; 57 58 } 59 60 char AMDGPUAnnotateKernelFeatures::ID = 0; 61 62 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 63 64 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 65 "Add AMDGPU function attributes", false, false) 66 67 68 // The queue ptr is only needed when casting to flat, not from it. 69 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 70 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 71 } 72 73 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 74 const AMDGPUAS &AS) { 75 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 76 } 77 78 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 79 AMDGPUAS AS) { 80 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 81 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 82 return castRequiresQueuePtr(SrcAS, AS); 83 } 84 85 return false; 86 } 87 88 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 89 const Constant *EntryC, 90 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 91 AMDGPUAS AS) { 92 93 if (!ConstantExprVisited.insert(EntryC).second) 94 return false; 95 96 SmallVector<const Constant *, 16> Stack; 97 Stack.push_back(EntryC); 98 99 while (!Stack.empty()) { 100 const Constant *C = Stack.pop_back_val(); 101 102 // Check this constant expression. 103 if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 104 if (visitConstantExpr(CE, AS)) 105 return true; 106 } 107 108 // Visit all sub-expressions. 109 for (const Use &U : C->operands()) { 110 const auto *OpC = dyn_cast<Constant>(U); 111 if (!OpC) 112 continue; 113 114 if (!ConstantExprVisited.insert(OpC).second) 115 continue; 116 117 Stack.push_back(OpC); 118 } 119 } 120 121 return false; 122 } 123 124 // Return true if an addrspacecast is used that requires the queue ptr. 125 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, 126 AMDGPUAS AS) { 127 SmallPtrSet<const Constant *, 8> ConstantExprVisited; 128 129 for (const BasicBlock &BB : F) { 130 for (const Instruction &I : BB) { 131 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 132 if (castRequiresQueuePtr(ASC, AS)) 133 return true; 134 } 135 136 for (const Use &U : I.operands()) { 137 const auto *OpC = dyn_cast<Constant>(U); 138 if (!OpC) 139 continue; 140 141 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) 142 return true; 143 } 144 } 145 } 146 147 return false; 148 } 149 150 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 151 StringRef AttrName) { 152 SmallPtrSet<Function *, 4> SeenFuncs; 153 154 for (User *U : Intrin->users()) { 155 // CallInst is the only valid user for an intrinsic. 156 CallInst *CI = cast<CallInst>(U); 157 158 Function *CallingFunction = CI->getParent()->getParent(); 159 if (SeenFuncs.insert(CallingFunction).second) 160 CallingFunction->addFnAttr(AttrName); 161 } 162 } 163 164 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 165 Module &M, 166 ArrayRef<StringRef[2]> IntrinsicToAttr) { 167 bool Changed = false; 168 169 for (const StringRef *Arr : IntrinsicToAttr) { 170 if (Function *Fn = M.getFunction(Arr[0])) { 171 addAttrToCallers(Fn, Arr[1]); 172 Changed = true; 173 } 174 } 175 176 return Changed; 177 } 178 179 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 180 Triple TT(M.getTargetTriple()); 181 AS = AMDGPU::getAMDGPUAS(M); 182 183 static const StringRef IntrinsicToAttr[][2] = { 184 // .x omitted 185 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 186 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 187 188 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 189 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 190 191 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 192 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 193 194 // .x omitted 195 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 196 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 197 }; 198 199 static const StringRef HSAIntrinsicToAttr[][2] = { 200 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 201 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 202 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, 203 { "llvm.trap", "amdgpu-queue-ptr" }, 204 { "llvm.debugtrap", "amdgpu-queue-ptr" } 205 }; 206 207 // TODO: We should not add the attributes if the known compile time workgroup 208 // size is 1 for y/z. 209 210 // TODO: Intrinsics that require queue ptr. 211 212 // We do not need to note the x workitem or workgroup id because they are 213 // always initialized. 214 215 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 216 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { 217 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 218 219 for (Function &F : M) { 220 if (F.hasFnAttribute("amdgpu-queue-ptr")) 221 continue; 222 223 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 224 bool HasApertureRegs = TPC && TPC->getTM<TargetMachine>() 225 .getSubtarget<AMDGPUSubtarget>(F) 226 .hasApertureRegs(); 227 if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) 228 F.addFnAttr("amdgpu-queue-ptr"); 229 } 230 } 231 232 return Changed; 233 } 234 235 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 236 return new AMDGPUAnnotateKernelFeatures(); 237 } 238