13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h" 172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h" 186b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h" 198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h" 203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 216b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h" 223931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 233931948bSMatt Arsenault #include "llvm/IR/Module.h" 243931948bSMatt Arsenault 253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 263931948bSMatt Arsenault 273931948bSMatt Arsenault using namespace llvm; 283931948bSMatt Arsenault 293931948bSMatt Arsenault namespace { 303931948bSMatt Arsenault 316b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 323931948bSMatt Arsenault private: 336b93046fSMatt Arsenault const TargetMachine *TM = nullptr; 341a14bfa0SYaxun Liu AMDGPUAS AS; 3599c14524SMatt Arsenault 366b93046fSMatt Arsenault bool addFeatureAttributes(Function &F); 376b93046fSMatt Arsenault 383931948bSMatt Arsenault public: 393931948bSMatt Arsenault static char ID; 403931948bSMatt Arsenault 416b93046fSMatt Arsenault AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 426b93046fSMatt Arsenault 436b93046fSMatt Arsenault bool doInitialization(CallGraph &CG) override; 446b93046fSMatt Arsenault bool runOnSCC(CallGraphSCC &SCC) override; 45117296c0SMehdi Amini StringRef getPassName() const override { 463931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 473931948bSMatt Arsenault } 483931948bSMatt Arsenault 493931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 503931948bSMatt Arsenault AU.setPreservesAll(); 516b93046fSMatt Arsenault CallGraphSCCPass::getAnalysisUsage(AU); 523931948bSMatt Arsenault } 533b2e2a59SMatt Arsenault 541a14bfa0SYaxun Liu static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 553b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 563b2e2a59SMatt Arsenault const Constant *EntryC, 571a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 581a14bfa0SYaxun Liu AMDGPUAS AS); 593931948bSMatt Arsenault }; 603931948bSMatt Arsenault 613931948bSMatt Arsenault } 623931948bSMatt Arsenault 633931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 643931948bSMatt Arsenault 653931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 663931948bSMatt Arsenault 6799c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 683931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 693931948bSMatt Arsenault 7099c14524SMatt Arsenault 7199c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 721a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 731a14bfa0SYaxun Liu return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 7499c14524SMatt Arsenault } 7599c14524SMatt Arsenault 761a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 771a14bfa0SYaxun Liu const AMDGPUAS &AS) { 781a14bfa0SYaxun Liu return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 793b2e2a59SMatt Arsenault } 803b2e2a59SMatt Arsenault 811a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 821a14bfa0SYaxun Liu AMDGPUAS AS) { 833b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 843b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 851a14bfa0SYaxun Liu return castRequiresQueuePtr(SrcAS, AS); 863b2e2a59SMatt Arsenault } 873b2e2a59SMatt Arsenault 883b2e2a59SMatt Arsenault return false; 893b2e2a59SMatt Arsenault } 903b2e2a59SMatt Arsenault 913b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 923b2e2a59SMatt Arsenault const Constant *EntryC, 931a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 941a14bfa0SYaxun Liu AMDGPUAS AS) { 953b2e2a59SMatt Arsenault 963b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 973b2e2a59SMatt Arsenault return false; 983b2e2a59SMatt Arsenault 993b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 1003b2e2a59SMatt Arsenault Stack.push_back(EntryC); 1013b2e2a59SMatt Arsenault 1023b2e2a59SMatt Arsenault while (!Stack.empty()) { 1033b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 1043b2e2a59SMatt Arsenault 1053b2e2a59SMatt Arsenault // Check this constant expression. 1063b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 1071a14bfa0SYaxun Liu if (visitConstantExpr(CE, AS)) 1083b2e2a59SMatt Arsenault return true; 1093b2e2a59SMatt Arsenault } 1103b2e2a59SMatt Arsenault 1113b2e2a59SMatt Arsenault // Visit all sub-expressions. 1123b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1133b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1143b2e2a59SMatt Arsenault if (!OpC) 1153b2e2a59SMatt Arsenault continue; 1163b2e2a59SMatt Arsenault 1173b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1183b2e2a59SMatt Arsenault continue; 1193b2e2a59SMatt Arsenault 1203b2e2a59SMatt Arsenault Stack.push_back(OpC); 1213b2e2a59SMatt Arsenault } 1223b2e2a59SMatt Arsenault } 1233b2e2a59SMatt Arsenault 1243b2e2a59SMatt Arsenault return false; 1253b2e2a59SMatt Arsenault } 1263b2e2a59SMatt Arsenault 1276b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always 1286b93046fSMatt Arsenault // initialized. 1296b93046fSMatt Arsenault // 1306b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 1316b93046fSMatt Arsenault // size is 1 for y/z. 132e15855d9SMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, 133e15855d9SMatt Arsenault bool &NonKernelOnly, 134e15855d9SMatt Arsenault bool &IsQueuePtr) { 1356b93046fSMatt Arsenault switch (ID) { 136e15855d9SMatt Arsenault case Intrinsic::amdgcn_workitem_id_x: 137e15855d9SMatt Arsenault NonKernelOnly = true; 138e15855d9SMatt Arsenault return "amdgpu-work-item-id-x"; 139e15855d9SMatt Arsenault case Intrinsic::amdgcn_workgroup_id_x: 140e15855d9SMatt Arsenault NonKernelOnly = true; 141e15855d9SMatt Arsenault return "amdgpu-work-group-id-x"; 1426b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_y: 1436b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_y: 1446b93046fSMatt Arsenault return "amdgpu-work-item-id-y"; 1456b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_z: 1466b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_z: 1476b93046fSMatt Arsenault return "amdgpu-work-item-id-z"; 1486b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_y: 1496b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_y: 1506b93046fSMatt Arsenault return "amdgpu-work-group-id-y"; 1516b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_z: 1526b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_z: 1536b93046fSMatt Arsenault return "amdgpu-work-group-id-z"; 1546b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_ptr: 1556b93046fSMatt Arsenault return "amdgpu-dispatch-ptr"; 1566b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_id: 1576b93046fSMatt Arsenault return "amdgpu-dispatch-id"; 15823e4df6aSMatt Arsenault case Intrinsic::amdgcn_kernarg_segment_ptr: 15923e4df6aSMatt Arsenault return "amdgpu-kernarg-segment-ptr"; 160*9166ce86SMatt Arsenault case Intrinsic::amdgcn_implicitarg_ptr: 161*9166ce86SMatt Arsenault return "amdgpu-implicitarg-ptr"; 1626b93046fSMatt Arsenault case Intrinsic::amdgcn_queue_ptr: 1636b93046fSMatt Arsenault case Intrinsic::trap: 1646b93046fSMatt Arsenault case Intrinsic::debugtrap: 1656b93046fSMatt Arsenault IsQueuePtr = true; 1666b93046fSMatt Arsenault return "amdgpu-queue-ptr"; 1676b93046fSMatt Arsenault default: 1686b93046fSMatt Arsenault return ""; 1696b93046fSMatt Arsenault } 1706b93046fSMatt Arsenault } 1716b93046fSMatt Arsenault 1726b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee, 1736b93046fSMatt Arsenault StringRef Name) { 1746b93046fSMatt Arsenault if (Callee.hasFnAttribute(Name)) { 1756b93046fSMatt Arsenault Parent.addFnAttr(Name); 1766b93046fSMatt Arsenault return true; 1776b93046fSMatt Arsenault } 1786b93046fSMatt Arsenault 1796b93046fSMatt Arsenault return false; 1806b93046fSMatt Arsenault } 1816b93046fSMatt Arsenault 1826b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 1836b93046fSMatt Arsenault bool &NeedQueuePtr) { 184e15855d9SMatt Arsenault // X ids unnecessarily propagated to kernels. 1856b93046fSMatt Arsenault static const StringRef AttrNames[] = { 186e15855d9SMatt Arsenault { "amdgpu-work-item-id-x" }, 1876b93046fSMatt Arsenault { "amdgpu-work-item-id-y" }, 1886b93046fSMatt Arsenault { "amdgpu-work-item-id-z" }, 189e15855d9SMatt Arsenault { "amdgpu-work-group-id-x" }, 1906b93046fSMatt Arsenault { "amdgpu-work-group-id-y" }, 1916b93046fSMatt Arsenault { "amdgpu-work-group-id-z" }, 1926b93046fSMatt Arsenault { "amdgpu-dispatch-ptr" }, 19323e4df6aSMatt Arsenault { "amdgpu-dispatch-id" }, 194*9166ce86SMatt Arsenault { "amdgpu-kernarg-segment-ptr" }, 195*9166ce86SMatt Arsenault { "amdgpu-implicitarg-ptr" } 1966b93046fSMatt Arsenault }; 1976b93046fSMatt Arsenault 1986b93046fSMatt Arsenault if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 1996b93046fSMatt Arsenault NeedQueuePtr = true; 2006b93046fSMatt Arsenault 2016b93046fSMatt Arsenault for (StringRef AttrName : AttrNames) 2026b93046fSMatt Arsenault handleAttr(Parent, Callee, AttrName); 2036b93046fSMatt Arsenault } 2046b93046fSMatt Arsenault 2056b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 206254ad3deSMatt Arsenault const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F); 207254ad3deSMatt Arsenault bool HasFlat = ST.hasFlatAddressSpace(); 208254ad3deSMatt Arsenault bool HasApertureRegs = ST.hasApertureRegs(); 2093b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 2103b2e2a59SMatt Arsenault 2116b93046fSMatt Arsenault bool Changed = false; 2126b93046fSMatt Arsenault bool NeedQueuePtr = false; 213254ad3deSMatt Arsenault bool HaveCall = false; 214e15855d9SMatt Arsenault bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 2156b93046fSMatt Arsenault 2166b93046fSMatt Arsenault for (BasicBlock &BB : F) { 2176b93046fSMatt Arsenault for (Instruction &I : BB) { 2186b93046fSMatt Arsenault CallSite CS(&I); 2196b93046fSMatt Arsenault if (CS) { 2206b93046fSMatt Arsenault Function *Callee = CS.getCalledFunction(); 2216b93046fSMatt Arsenault 2226b93046fSMatt Arsenault // TODO: Do something with indirect calls. 223254ad3deSMatt Arsenault if (!Callee) { 224254ad3deSMatt Arsenault if (!CS.isInlineAsm()) 225254ad3deSMatt Arsenault HaveCall = true; 2266b93046fSMatt Arsenault continue; 227254ad3deSMatt Arsenault } 2286b93046fSMatt Arsenault 2296b93046fSMatt Arsenault Intrinsic::ID IID = Callee->getIntrinsicID(); 2306b93046fSMatt Arsenault if (IID == Intrinsic::not_intrinsic) { 231254ad3deSMatt Arsenault HaveCall = true; 2326b93046fSMatt Arsenault copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 2336b93046fSMatt Arsenault Changed = true; 2346b93046fSMatt Arsenault } else { 235e15855d9SMatt Arsenault bool NonKernelOnly = false; 236e15855d9SMatt Arsenault StringRef AttrName = intrinsicToAttrName(IID, 237e15855d9SMatt Arsenault NonKernelOnly, NeedQueuePtr); 238e15855d9SMatt Arsenault if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 2396b93046fSMatt Arsenault F.addFnAttr(AttrName); 2406b93046fSMatt Arsenault Changed = true; 2416b93046fSMatt Arsenault } 2426b93046fSMatt Arsenault } 2436b93046fSMatt Arsenault } 2446b93046fSMatt Arsenault 2456b93046fSMatt Arsenault if (NeedQueuePtr || HasApertureRegs) 2466b93046fSMatt Arsenault continue; 2476b93046fSMatt Arsenault 24899c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 2496b93046fSMatt Arsenault if (castRequiresQueuePtr(ASC, AS)) { 2506b93046fSMatt Arsenault NeedQueuePtr = true; 2516b93046fSMatt Arsenault continue; 2526b93046fSMatt Arsenault } 25399c14524SMatt Arsenault } 2543b2e2a59SMatt Arsenault 2553b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 2563b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 2573b2e2a59SMatt Arsenault if (!OpC) 2583b2e2a59SMatt Arsenault continue; 2593b2e2a59SMatt Arsenault 2606b93046fSMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { 2616b93046fSMatt Arsenault NeedQueuePtr = true; 2626b93046fSMatt Arsenault break; 2636b93046fSMatt Arsenault } 2643b2e2a59SMatt Arsenault } 26599c14524SMatt Arsenault } 26699c14524SMatt Arsenault } 26799c14524SMatt Arsenault 2686b93046fSMatt Arsenault if (NeedQueuePtr) { 2696b93046fSMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 2706b93046fSMatt Arsenault Changed = true; 27199c14524SMatt Arsenault } 2723931948bSMatt Arsenault 273254ad3deSMatt Arsenault // TODO: We could refine this to captured pointers that could possibly be 274254ad3deSMatt Arsenault // accessed by flat instructions. For now this is mostly a poor way of 275254ad3deSMatt Arsenault // estimating whether there are calls before argument lowering. 276254ad3deSMatt Arsenault if (HasFlat && !IsFunc && HaveCall) { 277254ad3deSMatt Arsenault F.addFnAttr("amdgpu-flat-scratch"); 278254ad3deSMatt Arsenault Changed = true; 279254ad3deSMatt Arsenault } 280254ad3deSMatt Arsenault 2816b93046fSMatt Arsenault return Changed; 2826b93046fSMatt Arsenault } 2836b93046fSMatt Arsenault 2846b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 2856b93046fSMatt Arsenault Module &M = SCC.getCallGraph().getModule(); 2863931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 2873931948bSMatt Arsenault 2886b93046fSMatt Arsenault bool Changed = false; 2896b93046fSMatt Arsenault for (CallGraphNode *I : SCC) { 2906b93046fSMatt Arsenault Function *F = I->getFunction(); 2916b93046fSMatt Arsenault if (!F || F->isDeclaration()) 29299c14524SMatt Arsenault continue; 29399c14524SMatt Arsenault 2946b93046fSMatt Arsenault Changed |= addFeatureAttributes(*F); 29599c14524SMatt Arsenault } 2966b93046fSMatt Arsenault 29799c14524SMatt Arsenault 2983931948bSMatt Arsenault return Changed; 2993931948bSMatt Arsenault } 3003931948bSMatt Arsenault 3016b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 3026b93046fSMatt Arsenault auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 3036b93046fSMatt Arsenault if (!TPC) 3046b93046fSMatt Arsenault report_fatal_error("TargetMachine is required"); 3056b93046fSMatt Arsenault 3066b93046fSMatt Arsenault AS = AMDGPU::getAMDGPUAS(CG.getModule()); 3076b93046fSMatt Arsenault TM = &TPC->getTM<TargetMachine>(); 3086b93046fSMatt Arsenault return false; 3096b93046fSMatt Arsenault } 3106b93046fSMatt Arsenault 3116b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 3128b61764cSFrancis Visoiu Mistrih return new AMDGPUAnnotateKernelFeatures(); 3133931948bSMatt Arsenault } 314