1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Instructions.h" 20 #include "llvm/IR/Module.h" 21 22 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 23 24 using namespace llvm; 25 26 namespace { 27 28 class AMDGPUAnnotateKernelFeatures : public ModulePass { 29 private: 30 const TargetMachine *TM; 31 static bool hasAddrSpaceCast(const Function &F); 32 33 void addAttrToCallers(Function *Intrin, StringRef AttrName); 34 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 35 36 public: 37 static char ID; 38 39 AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) : 40 ModulePass(ID), TM(TM_) {} 41 bool runOnModule(Module &M) override; 42 StringRef getPassName() const override { 43 return "AMDGPU Annotate Kernel Features"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.setPreservesAll(); 48 ModulePass::getAnalysisUsage(AU); 49 } 50 51 static bool visitConstantExpr(const ConstantExpr *CE); 52 static bool visitConstantExprsRecursively( 53 const Constant *EntryC, 54 SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 55 }; 56 57 } 58 59 char AMDGPUAnnotateKernelFeatures::ID = 0; 60 61 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 62 63 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 64 "Add AMDGPU function attributes", false, false) 65 66 67 // The queue ptr is only needed when casting to flat, not from it. 68 static bool castRequiresQueuePtr(unsigned SrcAS) { 69 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 70 } 71 72 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 73 return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 74 } 75 76 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 77 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 78 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 79 return castRequiresQueuePtr(SrcAS); 80 } 81 82 return false; 83 } 84 85 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 86 const Constant *EntryC, 87 SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 88 89 if (!ConstantExprVisited.insert(EntryC).second) 90 return false; 91 92 SmallVector<const Constant *, 16> Stack; 93 Stack.push_back(EntryC); 94 95 while (!Stack.empty()) { 96 const Constant *C = Stack.pop_back_val(); 97 98 // Check this constant expression. 99 if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 100 if (visitConstantExpr(CE)) 101 return true; 102 } 103 104 // Visit all sub-expressions. 105 for (const Use &U : C->operands()) { 106 const auto *OpC = dyn_cast<Constant>(U); 107 if (!OpC) 108 continue; 109 110 if (!ConstantExprVisited.insert(OpC).second) 111 continue; 112 113 Stack.push_back(OpC); 114 } 115 } 116 117 return false; 118 } 119 120 // Return true if an addrspacecast is used that requires the queue ptr. 121 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 122 SmallPtrSet<const Constant *, 8> ConstantExprVisited; 123 124 for (const BasicBlock &BB : F) { 125 for (const Instruction &I : BB) { 126 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 127 if (castRequiresQueuePtr(ASC)) 128 return true; 129 } 130 131 for (const Use &U : I.operands()) { 132 const auto *OpC = dyn_cast<Constant>(U); 133 if (!OpC) 134 continue; 135 136 if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) 137 return true; 138 } 139 } 140 } 141 142 return false; 143 } 144 145 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 146 StringRef AttrName) { 147 SmallPtrSet<Function *, 4> SeenFuncs; 148 149 for (User *U : Intrin->users()) { 150 // CallInst is the only valid user for an intrinsic. 151 CallInst *CI = cast<CallInst>(U); 152 153 Function *CallingFunction = CI->getParent()->getParent(); 154 if (SeenFuncs.insert(CallingFunction).second) 155 CallingFunction->addFnAttr(AttrName); 156 } 157 } 158 159 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 160 Module &M, 161 ArrayRef<StringRef[2]> IntrinsicToAttr) { 162 bool Changed = false; 163 164 for (const StringRef *Arr : IntrinsicToAttr) { 165 if (Function *Fn = M.getFunction(Arr[0])) { 166 addAttrToCallers(Fn, Arr[1]); 167 Changed = true; 168 } 169 } 170 171 return Changed; 172 } 173 174 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 175 Triple TT(M.getTargetTriple()); 176 177 static const StringRef IntrinsicToAttr[][2] = { 178 // .x omitted 179 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 180 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 181 182 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 183 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 184 185 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 186 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 187 188 // .x omitted 189 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 190 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 191 }; 192 193 static const StringRef HSAIntrinsicToAttr[][2] = { 194 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 195 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }, 196 { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }, 197 { "llvm.trap", "amdgpu-queue-ptr" }, 198 { "llvm.debugtrap", "amdgpu-queue-ptr" } 199 }; 200 201 // TODO: We should not add the attributes if the known compile time workgroup 202 // size is 1 for y/z. 203 204 // TODO: Intrinsics that require queue ptr. 205 206 // We do not need to note the x workitem or workgroup id because they are 207 // always initialized. 208 209 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 210 if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) { 211 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 212 213 for (Function &F : M) { 214 if (F.hasFnAttribute("amdgpu-queue-ptr")) 215 continue; 216 217 bool HasApertureRegs = 218 TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); 219 if (!HasApertureRegs && hasAddrSpaceCast(F)) 220 F.addFnAttr("amdgpu-queue-ptr"); 221 } 222 } 223 224 return Changed; 225 } 226 227 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) { 228 return new AMDGPUAnnotateKernelFeatures(TM); 229 } 230