1 //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file This pass adds target attributes to functions which use intrinsics 11 /// which will impact calling convention lowering. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/Analysis/CallGraphSCCPass.h" 19 #include "llvm/CodeGen/TargetPassConfig.h" 20 #include "llvm/IR/Constants.h" 21 #include "llvm/IR/InstIterator.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Module.h" 24 25 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 26 27 using namespace llvm; 28 29 namespace { 30 31 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 32 private: 33 const TargetMachine *TM = nullptr; 34 AMDGPUAS AS; 35 36 bool addFeatureAttributes(Function &F); 37 38 public: 39 static char ID; 40 41 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 42 43 bool doInitialization(CallGraph &CG) override; 44 bool runOnSCC(CallGraphSCC &SCC) override; 45 StringRef getPassName() const override { 46 return "AMDGPU Annotate Kernel Features"; 47 } 48 49 void getAnalysisUsage(AnalysisUsage &AU) const override { 50 AU.setPreservesAll(); 51 CallGraphSCCPass::getAnalysisUsage(AU); 52 } 53 54 static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 55 static bool visitConstantExprsRecursively( 56 const Constant *EntryC, 57 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 58 AMDGPUAS AS); 59 }; 60 61 } 62 63 char AMDGPUAnnotateKernelFeatures::ID = 0; 64 65 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 66 67 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 68 "Add AMDGPU function attributes", false, false) 69 70 71 // The queue ptr is only needed when casting to flat, not from it. 72 static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 73 return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 74 } 75 76 static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 77 const AMDGPUAS &AS) { 78 return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 79 } 80 81 bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 82 AMDGPUAS AS) { 83 if (CE->getOpcode() == Instruction::AddrSpaceCast) { 84 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 85 return castRequiresQueuePtr(SrcAS, AS); 86 } 87 88 return false; 89 } 90 91 bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 92 const Constant *EntryC, 93 SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 94 AMDGPUAS AS) { 95 96 if (!ConstantExprVisited.insert(EntryC).second) 97 return false; 98 99 SmallVector<const Constant *, 16> Stack; 100 Stack.push_back(EntryC); 101 102 while (!Stack.empty()) { 103 const Constant *C = Stack.pop_back_val(); 104 105 // Check this constant expression. 106 if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 107 if (visitConstantExpr(CE, AS)) 108 return true; 109 } 110 111 // Visit all sub-expressions. 112 for (const Use &U : C->operands()) { 113 const auto *OpC = dyn_cast<Constant>(U); 114 if (!OpC) 115 continue; 116 117 if (!ConstantExprVisited.insert(OpC).second) 118 continue; 119 120 Stack.push_back(OpC); 121 } 122 } 123 124 return false; 125 } 126 127 // We do not need to note the x workitem or workgroup id because they are always 128 // initialized. 129 // 130 // TODO: We should not add the attributes if the known compile time workgroup 131 // size is 1 for y/z. 132 static StringRef intrinsicToAttrName(Intrinsic::ID ID, 133 bool &NonKernelOnly, 134 bool &IsQueuePtr) { 135 switch (ID) { 136 case Intrinsic::amdgcn_workitem_id_x: 137 NonKernelOnly = true; 138 return "amdgpu-work-item-id-x"; 139 case Intrinsic::amdgcn_workgroup_id_x: 140 NonKernelOnly = true; 141 return "amdgpu-work-group-id-x"; 142 case Intrinsic::amdgcn_workitem_id_y: 143 case Intrinsic::r600_read_tidig_y: 144 return "amdgpu-work-item-id-y"; 145 case Intrinsic::amdgcn_workitem_id_z: 146 case Intrinsic::r600_read_tidig_z: 147 return "amdgpu-work-item-id-z"; 148 case Intrinsic::amdgcn_workgroup_id_y: 149 case Intrinsic::r600_read_tgid_y: 150 return "amdgpu-work-group-id-y"; 151 case Intrinsic::amdgcn_workgroup_id_z: 152 case Intrinsic::r600_read_tgid_z: 153 return "amdgpu-work-group-id-z"; 154 case Intrinsic::amdgcn_dispatch_ptr: 155 return "amdgpu-dispatch-ptr"; 156 case Intrinsic::amdgcn_dispatch_id: 157 return "amdgpu-dispatch-id"; 158 case Intrinsic::amdgcn_kernarg_segment_ptr: 159 return "amdgpu-kernarg-segment-ptr"; 160 case Intrinsic::amdgcn_implicitarg_ptr: 161 return "amdgpu-implicitarg-ptr"; 162 case Intrinsic::amdgcn_queue_ptr: 163 case Intrinsic::trap: 164 case Intrinsic::debugtrap: 165 IsQueuePtr = true; 166 return "amdgpu-queue-ptr"; 167 default: 168 return ""; 169 } 170 } 171 172 static bool handleAttr(Function &Parent, const Function &Callee, 173 StringRef Name) { 174 if (Callee.hasFnAttribute(Name)) { 175 Parent.addFnAttr(Name); 176 return true; 177 } 178 179 return false; 180 } 181 182 static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 183 bool &NeedQueuePtr) { 184 // X ids unnecessarily propagated to kernels. 185 static const StringRef AttrNames[] = { 186 { "amdgpu-work-item-id-x" }, 187 { "amdgpu-work-item-id-y" }, 188 { "amdgpu-work-item-id-z" }, 189 { "amdgpu-work-group-id-x" }, 190 { "amdgpu-work-group-id-y" }, 191 { "amdgpu-work-group-id-z" }, 192 { "amdgpu-dispatch-ptr" }, 193 { "amdgpu-dispatch-id" }, 194 { "amdgpu-kernarg-segment-ptr" }, 195 { "amdgpu-implicitarg-ptr" } 196 }; 197 198 if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 199 NeedQueuePtr = true; 200 201 for (StringRef AttrName : AttrNames) 202 handleAttr(Parent, Callee, AttrName); 203 } 204 205 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 206 const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); 207 bool HasFlat = ST.hasFlatAddressSpace(); 208 bool HasApertureRegs = ST.hasApertureRegs(); 209 SmallPtrSet<const Constant *, 8> ConstantExprVisited; 210 211 bool Changed = false; 212 bool NeedQueuePtr = false; 213 bool HaveCall = false; 214 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 215 216 for (BasicBlock &BB : F) { 217 for (Instruction &I : BB) { 218 CallSite CS(&I); 219 if (CS) { 220 Function *Callee = CS.getCalledFunction(); 221 222 // TODO: Do something with indirect calls. 223 if (!Callee) { 224 if (!CS.isInlineAsm()) 225 HaveCall = true; 226 continue; 227 } 228 229 Intrinsic::ID IID = Callee->getIntrinsicID(); 230 if (IID == Intrinsic::not_intrinsic) { 231 HaveCall = true; 232 copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 233 Changed = true; 234 } else { 235 bool NonKernelOnly = false; 236 StringRef AttrName = intrinsicToAttrName(IID, 237 NonKernelOnly, NeedQueuePtr); 238 if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 239 F.addFnAttr(AttrName); 240 Changed = true; 241 } 242 } 243 } 244 245 if (NeedQueuePtr || HasApertureRegs) 246 continue; 247 248 if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 249 if (castRequiresQueuePtr(ASC, AS)) { 250 NeedQueuePtr = true; 251 continue; 252 } 253 } 254 255 for (const Use &U : I.operands()) { 256 const auto *OpC = dyn_cast<Constant>(U); 257 if (!OpC) 258 continue; 259 260 if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { 261 NeedQueuePtr = true; 262 break; 263 } 264 } 265 } 266 } 267 268 if (NeedQueuePtr) { 269 F.addFnAttr("amdgpu-queue-ptr"); 270 Changed = true; 271 } 272 273 // TODO: We could refine this to captured pointers that could possibly be 274 // accessed by flat instructions. For now this is mostly a poor way of 275 // estimating whether there are calls before argument lowering. 276 if (HasFlat && !IsFunc && HaveCall) { 277 F.addFnAttr("amdgpu-flat-scratch"); 278 Changed = true; 279 } 280 281 return Changed; 282 } 283 284 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 285 Module &M = SCC.getCallGraph().getModule(); 286 Triple TT(M.getTargetTriple()); 287 288 bool Changed = false; 289 for (CallGraphNode *I : SCC) { 290 Function *F = I->getFunction(); 291 if (!F || F->isDeclaration()) 292 continue; 293 294 Changed |= addFeatureAttributes(*F); 295 } 296 297 298 return Changed; 299 } 300 301 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 302 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 303 if (!TPC) 304 report_fatal_error("TargetMachine is required"); 305 306 AS = AMDGPU::getAMDGPUAS(CG.getModule()); 307 TM = &TPC->getTM<TargetMachine>(); 308 return false; 309 } 310 311 Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 312 return new AMDGPUAnnotateKernelFeatures(); 313 } 314