1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/Module.h" 18 19 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 20 21 using namespace llvm; 22 23 namespace { 24 25 class AMDGPUAnnotateKernelFeatures : public ModulePass { 26 private: 27 static bool hasAddrSpaceCast(const Function &F); 28 29 void addAttrToCallers(Function *Intrin, StringRef AttrName); 30 bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 31 32 public: 33 static char ID; 34 35 AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { } 36 bool runOnModule(Module &M) override; 37 const char *getPassName() const override { 38 return "AMDGPU Annotate Kernel Features"; 39 } 40 41 void getAnalysisUsage(AnalysisUsage &AU) const override { 42 AU.setPreservesAll(); 43 ModulePass::getAnalysisUsage(AU); 44 } 45 }; 46 47 } 48 49 char AMDGPUAnnotateKernelFeatures::ID = 0; 50 51 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 52 53 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 54 "Add AMDGPU function attributes", false, false) 55 56 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 57 unsigned SrcAS = ASC->getSrcAddressSpace(); 58 59 // The queue ptr is only needed when casting to flat, not from it. 60 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 61 } 62 63 // Return true if an addrspacecast is used that requires the queue ptr. 64 bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { 65 for (const BasicBlock &BB : F) { 66 for (const Instruction &I : BB) { 67 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 68 if (castRequiresQueuePtr(ASC)) 69 return true; 70 } 71 } 72 } 73 74 return false; 75 } 76 77 void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin, 78 StringRef AttrName) { 79 SmallPtrSet<Function *, 4> SeenFuncs; 80 81 for (User *U : Intrin->users()) { 82 // CallInst is the only valid user for an intrinsic. 83 CallInst *CI = cast<CallInst>(U); 84 85 Function *CallingFunction = CI->getParent()->getParent(); 86 if (SeenFuncs.insert(CallingFunction).second) 87 CallingFunction->addFnAttr(AttrName); 88 } 89 } 90 91 bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 92 Module &M, 93 ArrayRef<StringRef[2]> IntrinsicToAttr) { 94 bool Changed = false; 95 96 for (const StringRef *Arr : IntrinsicToAttr) { 97 if (Function *Fn = M.getFunction(Arr[0])) { 98 addAttrToCallers(Fn, Arr[1]); 99 Changed = true; 100 } 101 } 102 103 return Changed; 104 } 105 106 bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { 107 Triple TT(M.getTargetTriple()); 108 109 static const StringRef IntrinsicToAttr[][2] = { 110 // .x omitted 111 { "llvm.amdgcn.workitem.id.y", "amdgpu-work-item-id-y" }, 112 { "llvm.amdgcn.workitem.id.z", "amdgpu-work-item-id-z" }, 113 114 { "llvm.amdgcn.workgroup.id.y", "amdgpu-work-group-id-y" }, 115 { "llvm.amdgcn.workgroup.id.z", "amdgpu-work-group-id-z" }, 116 117 { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" }, 118 { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" }, 119 120 // .x omitted 121 { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" }, 122 { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" } 123 }; 124 125 static const StringRef HSAIntrinsicToAttr[][2] = { 126 { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, 127 { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" } 128 }; 129 130 // TODO: We should not add the attributes if the known compile time workgroup 131 // size is 1 for y/z. 132 133 // TODO: Intrinsics that require queue ptr. 134 135 // We do not need to note the x workitem or workgroup id because they are 136 // always initialized. 137 138 bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr); 139 if (TT.getOS() == Triple::AMDHSA) { 140 Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr); 141 142 for (Function &F : M) { 143 if (F.hasFnAttribute("amdgpu-queue-ptr")) 144 continue; 145 146 if (hasAddrSpaceCast(F)) 147 F.addFnAttr("amdgpu-queue-ptr"); 148 } 149 } 150 151 return Changed; 152 } 153 154 ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 155 return new AMDGPUAnnotateKernelFeatures(); 156 } 157