1*fa6434beSEugene Zelenko //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h" 17*fa6434beSEugene Zelenko #include "Utils/AMDGPUBaseInfo.h" 18*fa6434beSEugene Zelenko #include "llvm/ADT/SmallPtrSet.h" 19*fa6434beSEugene Zelenko #include "llvm/ADT/SmallVector.h" 20*fa6434beSEugene Zelenko #include "llvm/ADT/StringRef.h" 212ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h" 22*fa6434beSEugene Zelenko #include "llvm/Analysis/CallGraph.h" 236b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h" 248b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h" 25*fa6434beSEugene Zelenko #include "llvm/IR/CallSite.h" 26*fa6434beSEugene Zelenko #include "llvm/IR/Constant.h" 273b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 28*fa6434beSEugene Zelenko #include "llvm/IR/Function.h" 29*fa6434beSEugene Zelenko #include "llvm/IR/Instruction.h" 303931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 31*fa6434beSEugene Zelenko #include "llvm/IR/Intrinsics.h" 323931948bSMatt Arsenault #include "llvm/IR/Module.h" 33*fa6434beSEugene Zelenko #include "llvm/IR/Type.h" 34*fa6434beSEugene Zelenko #include "llvm/IR/Use.h" 35*fa6434beSEugene Zelenko #include "llvm/Pass.h" 36*fa6434beSEugene Zelenko #include "llvm/Support/Casting.h" 37*fa6434beSEugene Zelenko #include "llvm/Support/ErrorHandling.h" 38*fa6434beSEugene Zelenko #include "llvm/Target/TargetMachine.h" 393931948bSMatt Arsenault 403931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 413931948bSMatt Arsenault 423931948bSMatt Arsenault using namespace llvm; 433931948bSMatt Arsenault 443931948bSMatt Arsenault namespace { 453931948bSMatt Arsenault 466b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 473931948bSMatt Arsenault private: 486b93046fSMatt Arsenault const TargetMachine *TM = nullptr; 491a14bfa0SYaxun Liu AMDGPUAS AS; 5099c14524SMatt Arsenault 516b93046fSMatt Arsenault bool addFeatureAttributes(Function &F); 526b93046fSMatt Arsenault 533931948bSMatt Arsenault public: 543931948bSMatt Arsenault static char ID; 553931948bSMatt Arsenault 566b93046fSMatt Arsenault AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 576b93046fSMatt Arsenault 586b93046fSMatt Arsenault bool doInitialization(CallGraph &CG) override; 596b93046fSMatt Arsenault bool runOnSCC(CallGraphSCC &SCC) override; 60*fa6434beSEugene Zelenko 61117296c0SMehdi Amini StringRef getPassName() const override { 623931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 633931948bSMatt Arsenault } 643931948bSMatt Arsenault 653931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 663931948bSMatt Arsenault AU.setPreservesAll(); 676b93046fSMatt Arsenault CallGraphSCCPass::getAnalysisUsage(AU); 683931948bSMatt Arsenault } 693b2e2a59SMatt Arsenault 701a14bfa0SYaxun Liu static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 713b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 723b2e2a59SMatt Arsenault const Constant *EntryC, 731a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 741a14bfa0SYaxun Liu AMDGPUAS AS); 753931948bSMatt Arsenault }; 763931948bSMatt Arsenault 77*fa6434beSEugene Zelenko } // end anonymous namespace 783931948bSMatt Arsenault 793931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 803931948bSMatt Arsenault 813931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 823931948bSMatt Arsenault 8399c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 843931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 853931948bSMatt Arsenault 8699c14524SMatt Arsenault 8799c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 881a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 891a14bfa0SYaxun Liu return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 9099c14524SMatt Arsenault } 9199c14524SMatt Arsenault 921a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 931a14bfa0SYaxun Liu const AMDGPUAS &AS) { 941a14bfa0SYaxun Liu return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 953b2e2a59SMatt Arsenault } 963b2e2a59SMatt Arsenault 971a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 981a14bfa0SYaxun Liu AMDGPUAS AS) { 993b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 1003b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 1011a14bfa0SYaxun Liu return castRequiresQueuePtr(SrcAS, AS); 1023b2e2a59SMatt Arsenault } 1033b2e2a59SMatt Arsenault 1043b2e2a59SMatt Arsenault return false; 1053b2e2a59SMatt Arsenault } 1063b2e2a59SMatt Arsenault 1073b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 1083b2e2a59SMatt Arsenault const Constant *EntryC, 1091a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 1101a14bfa0SYaxun Liu AMDGPUAS AS) { 1113b2e2a59SMatt Arsenault 1123b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 1133b2e2a59SMatt Arsenault return false; 1143b2e2a59SMatt Arsenault 1153b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 1163b2e2a59SMatt Arsenault Stack.push_back(EntryC); 1173b2e2a59SMatt Arsenault 1183b2e2a59SMatt Arsenault while (!Stack.empty()) { 1193b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 1203b2e2a59SMatt Arsenault 1213b2e2a59SMatt Arsenault // Check this constant expression. 1223b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 1231a14bfa0SYaxun Liu if (visitConstantExpr(CE, AS)) 1243b2e2a59SMatt Arsenault return true; 1253b2e2a59SMatt Arsenault } 1263b2e2a59SMatt Arsenault 1273b2e2a59SMatt Arsenault // Visit all sub-expressions. 1283b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1293b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1303b2e2a59SMatt Arsenault if (!OpC) 1313b2e2a59SMatt Arsenault continue; 1323b2e2a59SMatt Arsenault 1333b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1343b2e2a59SMatt Arsenault continue; 1353b2e2a59SMatt Arsenault 1363b2e2a59SMatt Arsenault Stack.push_back(OpC); 1373b2e2a59SMatt Arsenault } 1383b2e2a59SMatt Arsenault } 1393b2e2a59SMatt Arsenault 1403b2e2a59SMatt Arsenault return false; 1413b2e2a59SMatt Arsenault } 1423b2e2a59SMatt Arsenault 1436b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always 1446b93046fSMatt Arsenault // initialized. 1456b93046fSMatt Arsenault // 1466b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 1476b93046fSMatt Arsenault // size is 1 for y/z. 148e15855d9SMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, 149e15855d9SMatt Arsenault bool &NonKernelOnly, 150e15855d9SMatt Arsenault bool &IsQueuePtr) { 1516b93046fSMatt Arsenault switch (ID) { 152e15855d9SMatt Arsenault case Intrinsic::amdgcn_workitem_id_x: 153e15855d9SMatt Arsenault NonKernelOnly = true; 154e15855d9SMatt Arsenault return "amdgpu-work-item-id-x"; 155e15855d9SMatt Arsenault case Intrinsic::amdgcn_workgroup_id_x: 156e15855d9SMatt Arsenault NonKernelOnly = true; 157e15855d9SMatt Arsenault return "amdgpu-work-group-id-x"; 1586b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_y: 1596b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_y: 1606b93046fSMatt Arsenault return "amdgpu-work-item-id-y"; 1616b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_z: 1626b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_z: 1636b93046fSMatt Arsenault return "amdgpu-work-item-id-z"; 1646b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_y: 1656b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_y: 1666b93046fSMatt Arsenault return "amdgpu-work-group-id-y"; 1676b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_z: 1686b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_z: 1696b93046fSMatt Arsenault return "amdgpu-work-group-id-z"; 1706b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_ptr: 1716b93046fSMatt Arsenault return "amdgpu-dispatch-ptr"; 1726b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_id: 1736b93046fSMatt Arsenault return "amdgpu-dispatch-id"; 17423e4df6aSMatt Arsenault case Intrinsic::amdgcn_kernarg_segment_ptr: 17523e4df6aSMatt Arsenault return "amdgpu-kernarg-segment-ptr"; 1769166ce86SMatt Arsenault case Intrinsic::amdgcn_implicitarg_ptr: 1779166ce86SMatt Arsenault return "amdgpu-implicitarg-ptr"; 1786b93046fSMatt Arsenault case Intrinsic::amdgcn_queue_ptr: 1796b93046fSMatt Arsenault case Intrinsic::trap: 1806b93046fSMatt Arsenault case Intrinsic::debugtrap: 1816b93046fSMatt Arsenault IsQueuePtr = true; 1826b93046fSMatt Arsenault return "amdgpu-queue-ptr"; 1836b93046fSMatt Arsenault default: 1846b93046fSMatt Arsenault return ""; 1856b93046fSMatt Arsenault } 1866b93046fSMatt Arsenault } 1876b93046fSMatt Arsenault 1886b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee, 1896b93046fSMatt Arsenault StringRef Name) { 1906b93046fSMatt Arsenault if (Callee.hasFnAttribute(Name)) { 1916b93046fSMatt Arsenault Parent.addFnAttr(Name); 1926b93046fSMatt Arsenault return true; 1936b93046fSMatt Arsenault } 1946b93046fSMatt Arsenault 1956b93046fSMatt Arsenault return false; 1966b93046fSMatt Arsenault } 1976b93046fSMatt Arsenault 1986b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 1996b93046fSMatt Arsenault bool &NeedQueuePtr) { 200e15855d9SMatt Arsenault // X ids unnecessarily propagated to kernels. 2016b93046fSMatt Arsenault static const StringRef AttrNames[] = { 202e15855d9SMatt Arsenault { "amdgpu-work-item-id-x" }, 2036b93046fSMatt Arsenault { "amdgpu-work-item-id-y" }, 2046b93046fSMatt Arsenault { "amdgpu-work-item-id-z" }, 205e15855d9SMatt Arsenault { "amdgpu-work-group-id-x" }, 2066b93046fSMatt Arsenault { "amdgpu-work-group-id-y" }, 2076b93046fSMatt Arsenault { "amdgpu-work-group-id-z" }, 2086b93046fSMatt Arsenault { "amdgpu-dispatch-ptr" }, 20923e4df6aSMatt Arsenault { "amdgpu-dispatch-id" }, 2109166ce86SMatt Arsenault { "amdgpu-kernarg-segment-ptr" }, 2119166ce86SMatt Arsenault { "amdgpu-implicitarg-ptr" } 2126b93046fSMatt Arsenault }; 2136b93046fSMatt Arsenault 2146b93046fSMatt Arsenault if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 2156b93046fSMatt Arsenault NeedQueuePtr = true; 2166b93046fSMatt Arsenault 2176b93046fSMatt Arsenault for (StringRef AttrName : AttrNames) 2186b93046fSMatt Arsenault handleAttr(Parent, Callee, AttrName); 2196b93046fSMatt Arsenault } 2206b93046fSMatt Arsenault 2216b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 222254ad3deSMatt Arsenault const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); 223254ad3deSMatt Arsenault bool HasFlat = ST.hasFlatAddressSpace(); 224254ad3deSMatt Arsenault bool HasApertureRegs = ST.hasApertureRegs(); 2253b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 2263b2e2a59SMatt Arsenault 2276b93046fSMatt Arsenault bool Changed = false; 2286b93046fSMatt Arsenault bool NeedQueuePtr = false; 229254ad3deSMatt Arsenault bool HaveCall = false; 230e15855d9SMatt Arsenault bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 2316b93046fSMatt Arsenault 2326b93046fSMatt Arsenault for (BasicBlock &BB : F) { 2336b93046fSMatt Arsenault for (Instruction &I : BB) { 2346b93046fSMatt Arsenault CallSite CS(&I); 2356b93046fSMatt Arsenault if (CS) { 2366b93046fSMatt Arsenault Function *Callee = CS.getCalledFunction(); 2376b93046fSMatt Arsenault 2386b93046fSMatt Arsenault // TODO: Do something with indirect calls. 239254ad3deSMatt Arsenault if (!Callee) { 240254ad3deSMatt Arsenault if (!CS.isInlineAsm()) 241254ad3deSMatt Arsenault HaveCall = true; 2426b93046fSMatt Arsenault continue; 243254ad3deSMatt Arsenault } 2446b93046fSMatt Arsenault 2456b93046fSMatt Arsenault Intrinsic::ID IID = Callee->getIntrinsicID(); 2466b93046fSMatt Arsenault if (IID == Intrinsic::not_intrinsic) { 247254ad3deSMatt Arsenault HaveCall = true; 2486b93046fSMatt Arsenault copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 2496b93046fSMatt Arsenault Changed = true; 2506b93046fSMatt Arsenault } else { 251e15855d9SMatt Arsenault bool NonKernelOnly = false; 252e15855d9SMatt Arsenault StringRef AttrName = intrinsicToAttrName(IID, 253e15855d9SMatt Arsenault NonKernelOnly, NeedQueuePtr); 254e15855d9SMatt Arsenault if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 2556b93046fSMatt Arsenault F.addFnAttr(AttrName); 2566b93046fSMatt Arsenault Changed = true; 2576b93046fSMatt Arsenault } 2586b93046fSMatt Arsenault } 2596b93046fSMatt Arsenault } 2606b93046fSMatt Arsenault 2616b93046fSMatt Arsenault if (NeedQueuePtr || HasApertureRegs) 2626b93046fSMatt Arsenault continue; 2636b93046fSMatt Arsenault 26499c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 2656b93046fSMatt Arsenault if (castRequiresQueuePtr(ASC, AS)) { 2666b93046fSMatt Arsenault NeedQueuePtr = true; 2676b93046fSMatt Arsenault continue; 2686b93046fSMatt Arsenault } 26999c14524SMatt Arsenault } 2703b2e2a59SMatt Arsenault 2713b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 2723b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 2733b2e2a59SMatt Arsenault if (!OpC) 2743b2e2a59SMatt Arsenault continue; 2753b2e2a59SMatt Arsenault 2766b93046fSMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { 2776b93046fSMatt Arsenault NeedQueuePtr = true; 2786b93046fSMatt Arsenault break; 2796b93046fSMatt Arsenault } 2803b2e2a59SMatt Arsenault } 28199c14524SMatt Arsenault } 28299c14524SMatt Arsenault } 28399c14524SMatt Arsenault 2846b93046fSMatt Arsenault if (NeedQueuePtr) { 2856b93046fSMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 2866b93046fSMatt Arsenault Changed = true; 28799c14524SMatt Arsenault } 2883931948bSMatt Arsenault 289254ad3deSMatt Arsenault // TODO: We could refine this to captured pointers that could possibly be 290254ad3deSMatt Arsenault // accessed by flat instructions. For now this is mostly a poor way of 291254ad3deSMatt Arsenault // estimating whether there are calls before argument lowering. 292254ad3deSMatt Arsenault if (HasFlat && !IsFunc && HaveCall) { 293254ad3deSMatt Arsenault F.addFnAttr("amdgpu-flat-scratch"); 294254ad3deSMatt Arsenault Changed = true; 295254ad3deSMatt Arsenault } 296254ad3deSMatt Arsenault 2976b93046fSMatt Arsenault return Changed; 2986b93046fSMatt Arsenault } 2996b93046fSMatt Arsenault 3006b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 3016b93046fSMatt Arsenault Module &M = SCC.getCallGraph().getModule(); 3023931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 3033931948bSMatt Arsenault 3046b93046fSMatt Arsenault bool Changed = false; 3056b93046fSMatt Arsenault for (CallGraphNode *I : SCC) { 3066b93046fSMatt Arsenault Function *F = I->getFunction(); 3076b93046fSMatt Arsenault if (!F || F->isDeclaration()) 30899c14524SMatt Arsenault continue; 30999c14524SMatt Arsenault 3106b93046fSMatt Arsenault Changed |= addFeatureAttributes(*F); 31199c14524SMatt Arsenault } 3126b93046fSMatt Arsenault 3133931948bSMatt Arsenault return Changed; 3143931948bSMatt Arsenault } 3153931948bSMatt Arsenault 3166b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 3176b93046fSMatt Arsenault auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 3186b93046fSMatt Arsenault if (!TPC) 3196b93046fSMatt Arsenault report_fatal_error("TargetMachine is required"); 3206b93046fSMatt Arsenault 3216b93046fSMatt Arsenault AS = AMDGPU::getAMDGPUAS(CG.getModule()); 3226b93046fSMatt Arsenault TM = &TPC->getTM<TargetMachine>(); 3236b93046fSMatt Arsenault return false; 3246b93046fSMatt Arsenault } 3256b93046fSMatt Arsenault 3266b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 3278b61764cSFrancis Visoiu Mistrih return new AMDGPUAnnotateKernelFeatures(); 3283931948bSMatt Arsenault } 329