13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h" 172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h" 186b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h" 198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h" 203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 216b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h" 223931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 233931948bSMatt Arsenault #include "llvm/IR/Module.h" 243931948bSMatt Arsenault 253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 263931948bSMatt Arsenault 273931948bSMatt Arsenault using namespace llvm; 283931948bSMatt Arsenault 293931948bSMatt Arsenault namespace { 303931948bSMatt Arsenault 316b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 323931948bSMatt Arsenault private: 336b93046fSMatt Arsenault const TargetMachine *TM = nullptr; 341a14bfa0SYaxun Liu AMDGPUAS AS; 3599c14524SMatt Arsenault 366b93046fSMatt Arsenault bool addFeatureAttributes(Function &F); 376b93046fSMatt Arsenault 386b93046fSMatt Arsenault void addAttrToCallers(Function &Intrin, StringRef AttrName); 393931948bSMatt Arsenault bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 403931948bSMatt Arsenault 413931948bSMatt Arsenault public: 423931948bSMatt Arsenault static char ID; 433931948bSMatt Arsenault 446b93046fSMatt Arsenault AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 456b93046fSMatt Arsenault 466b93046fSMatt Arsenault bool doInitialization(CallGraph &CG) override; 476b93046fSMatt Arsenault bool runOnSCC(CallGraphSCC &SCC) override; 48117296c0SMehdi Amini StringRef getPassName() const override { 493931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 503931948bSMatt Arsenault } 513931948bSMatt Arsenault 523931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 533931948bSMatt Arsenault AU.setPreservesAll(); 546b93046fSMatt Arsenault CallGraphSCCPass::getAnalysisUsage(AU); 553931948bSMatt Arsenault } 563b2e2a59SMatt Arsenault 571a14bfa0SYaxun Liu static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 583b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 593b2e2a59SMatt Arsenault const Constant *EntryC, 601a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 611a14bfa0SYaxun Liu AMDGPUAS AS); 623931948bSMatt Arsenault }; 633931948bSMatt Arsenault 643931948bSMatt Arsenault } 653931948bSMatt Arsenault 663931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 673931948bSMatt Arsenault 683931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 693931948bSMatt Arsenault 7099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 713931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 723931948bSMatt Arsenault 7399c14524SMatt Arsenault 7499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 751a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 761a14bfa0SYaxun Liu return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 7799c14524SMatt Arsenault } 7899c14524SMatt Arsenault 791a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 801a14bfa0SYaxun Liu const AMDGPUAS &AS) { 811a14bfa0SYaxun Liu return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 823b2e2a59SMatt Arsenault } 833b2e2a59SMatt Arsenault 841a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 851a14bfa0SYaxun Liu AMDGPUAS AS) { 863b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 873b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 881a14bfa0SYaxun Liu return castRequiresQueuePtr(SrcAS, AS); 893b2e2a59SMatt Arsenault } 903b2e2a59SMatt Arsenault 913b2e2a59SMatt Arsenault return false; 923b2e2a59SMatt Arsenault } 933b2e2a59SMatt Arsenault 943b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 953b2e2a59SMatt Arsenault const Constant *EntryC, 961a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 971a14bfa0SYaxun Liu AMDGPUAS AS) { 983b2e2a59SMatt Arsenault 993b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 1003b2e2a59SMatt Arsenault return false; 1013b2e2a59SMatt Arsenault 1023b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 1033b2e2a59SMatt Arsenault Stack.push_back(EntryC); 1043b2e2a59SMatt Arsenault 1053b2e2a59SMatt Arsenault while (!Stack.empty()) { 1063b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 1073b2e2a59SMatt Arsenault 1083b2e2a59SMatt Arsenault // Check this constant expression. 1093b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 1101a14bfa0SYaxun Liu if (visitConstantExpr(CE, AS)) 1113b2e2a59SMatt Arsenault return true; 1123b2e2a59SMatt Arsenault } 1133b2e2a59SMatt Arsenault 1143b2e2a59SMatt Arsenault // Visit all sub-expressions. 1153b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1163b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1173b2e2a59SMatt Arsenault if (!OpC) 1183b2e2a59SMatt Arsenault continue; 1193b2e2a59SMatt Arsenault 1203b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1213b2e2a59SMatt Arsenault continue; 1223b2e2a59SMatt Arsenault 1233b2e2a59SMatt Arsenault Stack.push_back(OpC); 1243b2e2a59SMatt Arsenault } 1253b2e2a59SMatt Arsenault } 1263b2e2a59SMatt Arsenault 1273b2e2a59SMatt Arsenault return false; 1283b2e2a59SMatt Arsenault } 1293b2e2a59SMatt Arsenault 1306b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always 1316b93046fSMatt Arsenault // initialized. 1326b93046fSMatt Arsenault // 1336b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 1346b93046fSMatt Arsenault // size is 1 for y/z. 1356b93046fSMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) { 1366b93046fSMatt Arsenault switch (ID) { 1376b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_y: 1386b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_y: 1396b93046fSMatt Arsenault return "amdgpu-work-item-id-y"; 1406b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_z: 1416b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_z: 1426b93046fSMatt Arsenault return "amdgpu-work-item-id-z"; 1436b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_y: 1446b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_y: 1456b93046fSMatt Arsenault return "amdgpu-work-group-id-y"; 1466b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_z: 1476b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_z: 1486b93046fSMatt Arsenault return "amdgpu-work-group-id-z"; 1496b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_ptr: 1506b93046fSMatt Arsenault return "amdgpu-dispatch-ptr"; 1516b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_id: 1526b93046fSMatt Arsenault return "amdgpu-dispatch-id"; 153*23e4df6aSMatt Arsenault case Intrinsic::amdgcn_kernarg_segment_ptr: 154*23e4df6aSMatt Arsenault case Intrinsic::amdgcn_implicitarg_ptr: 155*23e4df6aSMatt Arsenault return "amdgpu-kernarg-segment-ptr"; 1566b93046fSMatt Arsenault case Intrinsic::amdgcn_queue_ptr: 1576b93046fSMatt Arsenault case Intrinsic::trap: 1586b93046fSMatt Arsenault case Intrinsic::debugtrap: 1596b93046fSMatt Arsenault IsQueuePtr = true; 1606b93046fSMatt Arsenault return "amdgpu-queue-ptr"; 1616b93046fSMatt Arsenault default: 1626b93046fSMatt Arsenault return ""; 1636b93046fSMatt Arsenault } 1646b93046fSMatt Arsenault } 1656b93046fSMatt Arsenault 1666b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee, 1676b93046fSMatt Arsenault StringRef Name) { 1686b93046fSMatt Arsenault if (Callee.hasFnAttribute(Name)) { 1696b93046fSMatt Arsenault Parent.addFnAttr(Name); 1706b93046fSMatt Arsenault return true; 1716b93046fSMatt Arsenault } 1726b93046fSMatt Arsenault 1736b93046fSMatt Arsenault return false; 1746b93046fSMatt Arsenault } 1756b93046fSMatt Arsenault 1766b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 1776b93046fSMatt Arsenault bool &NeedQueuePtr) { 1786b93046fSMatt Arsenault 1796b93046fSMatt Arsenault static const StringRef AttrNames[] = { 1806b93046fSMatt Arsenault // .x omitted 1816b93046fSMatt Arsenault { "amdgpu-work-item-id-y" }, 1826b93046fSMatt Arsenault { "amdgpu-work-item-id-z" }, 1836b93046fSMatt Arsenault // .x omitted 1846b93046fSMatt Arsenault { "amdgpu-work-group-id-y" }, 1856b93046fSMatt Arsenault { "amdgpu-work-group-id-z" }, 1866b93046fSMatt Arsenault { "amdgpu-dispatch-ptr" }, 187*23e4df6aSMatt Arsenault { "amdgpu-dispatch-id" }, 188*23e4df6aSMatt Arsenault { "amdgpu-kernarg-segment-ptr" } 1896b93046fSMatt Arsenault }; 1906b93046fSMatt Arsenault 1916b93046fSMatt Arsenault if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 1926b93046fSMatt Arsenault NeedQueuePtr = true; 1936b93046fSMatt Arsenault 1946b93046fSMatt Arsenault for (StringRef AttrName : AttrNames) 1956b93046fSMatt Arsenault handleAttr(Parent, Callee, AttrName); 1966b93046fSMatt Arsenault } 1976b93046fSMatt Arsenault 1986b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 1996b93046fSMatt Arsenault bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); 2003b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 2013b2e2a59SMatt Arsenault 2026b93046fSMatt Arsenault bool Changed = false; 2036b93046fSMatt Arsenault bool NeedQueuePtr = false; 2046b93046fSMatt Arsenault 2056b93046fSMatt Arsenault for (BasicBlock &BB : F) { 2066b93046fSMatt Arsenault for (Instruction &I : BB) { 2076b93046fSMatt Arsenault CallSite CS(&I); 2086b93046fSMatt Arsenault if (CS) { 2096b93046fSMatt Arsenault Function *Callee = CS.getCalledFunction(); 2106b93046fSMatt Arsenault 2116b93046fSMatt Arsenault // TODO: Do something with indirect calls. 2126b93046fSMatt Arsenault if (!Callee) 2136b93046fSMatt Arsenault continue; 2146b93046fSMatt Arsenault 2156b93046fSMatt Arsenault Intrinsic::ID IID = Callee->getIntrinsicID(); 2166b93046fSMatt Arsenault if (IID == Intrinsic::not_intrinsic) { 2176b93046fSMatt Arsenault copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 2186b93046fSMatt Arsenault Changed = true; 2196b93046fSMatt Arsenault } else { 2206b93046fSMatt Arsenault StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr); 2216b93046fSMatt Arsenault if (!AttrName.empty()) { 2226b93046fSMatt Arsenault F.addFnAttr(AttrName); 2236b93046fSMatt Arsenault Changed = true; 2246b93046fSMatt Arsenault } 2256b93046fSMatt Arsenault } 2266b93046fSMatt Arsenault } 2276b93046fSMatt Arsenault 2286b93046fSMatt Arsenault if (NeedQueuePtr || HasApertureRegs) 2296b93046fSMatt Arsenault continue; 2306b93046fSMatt Arsenault 23199c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 2326b93046fSMatt Arsenault if (castRequiresQueuePtr(ASC, AS)) { 2336b93046fSMatt Arsenault NeedQueuePtr = true; 2346b93046fSMatt Arsenault continue; 2356b93046fSMatt Arsenault } 23699c14524SMatt Arsenault } 2373b2e2a59SMatt Arsenault 2383b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 2393b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 2403b2e2a59SMatt Arsenault if (!OpC) 2413b2e2a59SMatt Arsenault continue; 2423b2e2a59SMatt Arsenault 2436b93046fSMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { 2446b93046fSMatt Arsenault NeedQueuePtr = true; 2456b93046fSMatt Arsenault break; 2466b93046fSMatt Arsenault } 2473b2e2a59SMatt Arsenault } 24899c14524SMatt Arsenault } 24999c14524SMatt Arsenault } 25099c14524SMatt Arsenault 2516b93046fSMatt Arsenault if (NeedQueuePtr) { 2526b93046fSMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 2536b93046fSMatt Arsenault Changed = true; 25499c14524SMatt Arsenault } 2553931948bSMatt Arsenault 2566b93046fSMatt Arsenault return Changed; 2576b93046fSMatt Arsenault } 2586b93046fSMatt Arsenault 2596b93046fSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin, 2603931948bSMatt Arsenault StringRef AttrName) { 2613931948bSMatt Arsenault SmallPtrSet<Function *, 4> SeenFuncs; 2623931948bSMatt Arsenault 2636b93046fSMatt Arsenault for (User *U : Intrin.users()) { 2643931948bSMatt Arsenault // CallInst is the only valid user for an intrinsic. 2653931948bSMatt Arsenault CallInst *CI = cast<CallInst>(U); 2663931948bSMatt Arsenault 2673931948bSMatt Arsenault Function *CallingFunction = CI->getParent()->getParent(); 2683931948bSMatt Arsenault if (SeenFuncs.insert(CallingFunction).second) 2693931948bSMatt Arsenault CallingFunction->addFnAttr(AttrName); 2703931948bSMatt Arsenault } 2713931948bSMatt Arsenault } 2723931948bSMatt Arsenault 2733931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 2743931948bSMatt Arsenault Module &M, 2753931948bSMatt Arsenault ArrayRef<StringRef[2]> IntrinsicToAttr) { 2763931948bSMatt Arsenault bool Changed = false; 2773931948bSMatt Arsenault 2783931948bSMatt Arsenault for (const StringRef *Arr : IntrinsicToAttr) { 2793931948bSMatt Arsenault if (Function *Fn = M.getFunction(Arr[0])) { 2806b93046fSMatt Arsenault addAttrToCallers(*Fn, Arr[1]); 2813931948bSMatt Arsenault Changed = true; 2823931948bSMatt Arsenault } 2833931948bSMatt Arsenault } 2843931948bSMatt Arsenault 2853931948bSMatt Arsenault return Changed; 2863931948bSMatt Arsenault } 2873931948bSMatt Arsenault 2886b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 2896b93046fSMatt Arsenault Module &M = SCC.getCallGraph().getModule(); 2903931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 2913931948bSMatt Arsenault 2926b93046fSMatt Arsenault bool Changed = false; 2936b93046fSMatt Arsenault for (CallGraphNode *I : SCC) { 2946b93046fSMatt Arsenault Function *F = I->getFunction(); 2956b93046fSMatt Arsenault if (!F || F->isDeclaration()) 29699c14524SMatt Arsenault continue; 29799c14524SMatt Arsenault 2986b93046fSMatt Arsenault Changed |= addFeatureAttributes(*F); 29999c14524SMatt Arsenault } 3006b93046fSMatt Arsenault 30199c14524SMatt Arsenault 3023931948bSMatt Arsenault return Changed; 3033931948bSMatt Arsenault } 3043931948bSMatt Arsenault 3056b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 3066b93046fSMatt Arsenault auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 3076b93046fSMatt Arsenault if (!TPC) 3086b93046fSMatt Arsenault report_fatal_error("TargetMachine is required"); 3096b93046fSMatt Arsenault 3106b93046fSMatt Arsenault AS = AMDGPU::getAMDGPUAS(CG.getModule()); 3116b93046fSMatt Arsenault TM = &TPC->getTM<TargetMachine>(); 3126b93046fSMatt Arsenault return false; 3136b93046fSMatt Arsenault } 3146b93046fSMatt Arsenault 3156b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 3168b61764cSFrancis Visoiu Mistrih return new AMDGPUAnnotateKernelFeatures(); 3173931948bSMatt Arsenault } 318