13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===// 23931948bSMatt Arsenault // 33931948bSMatt Arsenault // The LLVM Compiler Infrastructure 43931948bSMatt Arsenault // 53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source 63931948bSMatt Arsenault // License. See LICENSE.TXT for details. 73931948bSMatt Arsenault // 83931948bSMatt Arsenault //===----------------------------------------------------------------------===// 93931948bSMatt Arsenault // 103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics 113931948bSMatt Arsenault /// which will impact calling convention lowering. 123931948bSMatt Arsenault // 133931948bSMatt Arsenault //===----------------------------------------------------------------------===// 143931948bSMatt Arsenault 153931948bSMatt Arsenault #include "AMDGPU.h" 16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h" 172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h" 18*6b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h" 198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h" 203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h" 21*6b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h" 223931948bSMatt Arsenault #include "llvm/IR/Instructions.h" 233931948bSMatt Arsenault #include "llvm/IR/Module.h" 243931948bSMatt Arsenault 253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 263931948bSMatt Arsenault 273931948bSMatt Arsenault using namespace llvm; 283931948bSMatt Arsenault 293931948bSMatt Arsenault namespace { 303931948bSMatt Arsenault 31*6b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 323931948bSMatt Arsenault private: 33*6b93046fSMatt Arsenault const TargetMachine *TM = nullptr; 341a14bfa0SYaxun Liu AMDGPUAS AS; 3599c14524SMatt Arsenault 36*6b93046fSMatt Arsenault bool addFeatureAttributes(Function &F); 37*6b93046fSMatt Arsenault 38*6b93046fSMatt Arsenault void addAttrToCallers(Function &Intrin, StringRef AttrName); 393931948bSMatt Arsenault bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>); 403931948bSMatt Arsenault 413931948bSMatt Arsenault public: 423931948bSMatt Arsenault static char ID; 433931948bSMatt Arsenault 44*6b93046fSMatt Arsenault AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 45*6b93046fSMatt Arsenault 46*6b93046fSMatt Arsenault bool doInitialization(CallGraph &CG) override; 47*6b93046fSMatt Arsenault bool runOnSCC(CallGraphSCC &SCC) override; 48117296c0SMehdi Amini StringRef getPassName() const override { 493931948bSMatt Arsenault return "AMDGPU Annotate Kernel Features"; 503931948bSMatt Arsenault } 513931948bSMatt Arsenault 523931948bSMatt Arsenault void getAnalysisUsage(AnalysisUsage &AU) const override { 533931948bSMatt Arsenault AU.setPreservesAll(); 54*6b93046fSMatt Arsenault CallGraphSCCPass::getAnalysisUsage(AU); 553931948bSMatt Arsenault } 563b2e2a59SMatt Arsenault 571a14bfa0SYaxun Liu static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); 583b2e2a59SMatt Arsenault static bool visitConstantExprsRecursively( 593b2e2a59SMatt Arsenault const Constant *EntryC, 601a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 611a14bfa0SYaxun Liu AMDGPUAS AS); 623931948bSMatt Arsenault }; 633931948bSMatt Arsenault 643931948bSMatt Arsenault } 653931948bSMatt Arsenault 663931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0; 673931948bSMatt Arsenault 683931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 693931948bSMatt Arsenault 7099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 713931948bSMatt Arsenault "Add AMDGPU function attributes", false, false) 723931948bSMatt Arsenault 7399c14524SMatt Arsenault 7499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it. 751a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { 761a14bfa0SYaxun Liu return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; 7799c14524SMatt Arsenault } 7899c14524SMatt Arsenault 791a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, 801a14bfa0SYaxun Liu const AMDGPUAS &AS) { 811a14bfa0SYaxun Liu return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); 823b2e2a59SMatt Arsenault } 833b2e2a59SMatt Arsenault 841a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, 851a14bfa0SYaxun Liu AMDGPUAS AS) { 863b2e2a59SMatt Arsenault if (CE->getOpcode() == Instruction::AddrSpaceCast) { 873b2e2a59SMatt Arsenault unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 881a14bfa0SYaxun Liu return castRequiresQueuePtr(SrcAS, AS); 893b2e2a59SMatt Arsenault } 903b2e2a59SMatt Arsenault 913b2e2a59SMatt Arsenault return false; 923b2e2a59SMatt Arsenault } 933b2e2a59SMatt Arsenault 943b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 953b2e2a59SMatt Arsenault const Constant *EntryC, 961a14bfa0SYaxun Liu SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 971a14bfa0SYaxun Liu AMDGPUAS AS) { 983b2e2a59SMatt Arsenault 993b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(EntryC).second) 1003b2e2a59SMatt Arsenault return false; 1013b2e2a59SMatt Arsenault 1023b2e2a59SMatt Arsenault SmallVector<const Constant *, 16> Stack; 1033b2e2a59SMatt Arsenault Stack.push_back(EntryC); 1043b2e2a59SMatt Arsenault 1053b2e2a59SMatt Arsenault while (!Stack.empty()) { 1063b2e2a59SMatt Arsenault const Constant *C = Stack.pop_back_val(); 1073b2e2a59SMatt Arsenault 1083b2e2a59SMatt Arsenault // Check this constant expression. 1093b2e2a59SMatt Arsenault if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 1101a14bfa0SYaxun Liu if (visitConstantExpr(CE, AS)) 1113b2e2a59SMatt Arsenault return true; 1123b2e2a59SMatt Arsenault } 1133b2e2a59SMatt Arsenault 1143b2e2a59SMatt Arsenault // Visit all sub-expressions. 1153b2e2a59SMatt Arsenault for (const Use &U : C->operands()) { 1163b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 1173b2e2a59SMatt Arsenault if (!OpC) 1183b2e2a59SMatt Arsenault continue; 1193b2e2a59SMatt Arsenault 1203b2e2a59SMatt Arsenault if (!ConstantExprVisited.insert(OpC).second) 1213b2e2a59SMatt Arsenault continue; 1223b2e2a59SMatt Arsenault 1233b2e2a59SMatt Arsenault Stack.push_back(OpC); 1243b2e2a59SMatt Arsenault } 1253b2e2a59SMatt Arsenault } 1263b2e2a59SMatt Arsenault 1273b2e2a59SMatt Arsenault return false; 1283b2e2a59SMatt Arsenault } 1293b2e2a59SMatt Arsenault 130*6b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always 131*6b93046fSMatt Arsenault // initialized. 132*6b93046fSMatt Arsenault // 133*6b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup 134*6b93046fSMatt Arsenault // size is 1 for y/z. 135*6b93046fSMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) { 136*6b93046fSMatt Arsenault switch (ID) { 137*6b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_y: 138*6b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_y: 139*6b93046fSMatt Arsenault return "amdgpu-work-item-id-y"; 140*6b93046fSMatt Arsenault case Intrinsic::amdgcn_workitem_id_z: 141*6b93046fSMatt Arsenault case Intrinsic::r600_read_tidig_z: 142*6b93046fSMatt Arsenault return "amdgpu-work-item-id-z"; 143*6b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_y: 144*6b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_y: 145*6b93046fSMatt Arsenault return "amdgpu-work-group-id-y"; 146*6b93046fSMatt Arsenault case Intrinsic::amdgcn_workgroup_id_z: 147*6b93046fSMatt Arsenault case Intrinsic::r600_read_tgid_z: 148*6b93046fSMatt Arsenault return "amdgpu-work-group-id-z"; 149*6b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_ptr: 150*6b93046fSMatt Arsenault return "amdgpu-dispatch-ptr"; 151*6b93046fSMatt Arsenault case Intrinsic::amdgcn_dispatch_id: 152*6b93046fSMatt Arsenault return "amdgpu-dispatch-id"; 153*6b93046fSMatt Arsenault case Intrinsic::amdgcn_queue_ptr: 154*6b93046fSMatt Arsenault case Intrinsic::trap: 155*6b93046fSMatt Arsenault case Intrinsic::debugtrap: 156*6b93046fSMatt Arsenault IsQueuePtr = true; 157*6b93046fSMatt Arsenault return "amdgpu-queue-ptr"; 158*6b93046fSMatt Arsenault default: 159*6b93046fSMatt Arsenault return ""; 160*6b93046fSMatt Arsenault } 161*6b93046fSMatt Arsenault } 162*6b93046fSMatt Arsenault 163*6b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee, 164*6b93046fSMatt Arsenault StringRef Name) { 165*6b93046fSMatt Arsenault if (Callee.hasFnAttribute(Name)) { 166*6b93046fSMatt Arsenault Parent.addFnAttr(Name); 167*6b93046fSMatt Arsenault return true; 168*6b93046fSMatt Arsenault } 169*6b93046fSMatt Arsenault 170*6b93046fSMatt Arsenault return false; 171*6b93046fSMatt Arsenault } 172*6b93046fSMatt Arsenault 173*6b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 174*6b93046fSMatt Arsenault bool &NeedQueuePtr) { 175*6b93046fSMatt Arsenault 176*6b93046fSMatt Arsenault static const StringRef AttrNames[] = { 177*6b93046fSMatt Arsenault // .x omitted 178*6b93046fSMatt Arsenault { "amdgpu-work-item-id-y" }, 179*6b93046fSMatt Arsenault { "amdgpu-work-item-id-z" }, 180*6b93046fSMatt Arsenault // .x omitted 181*6b93046fSMatt Arsenault { "amdgpu-work-group-id-y" }, 182*6b93046fSMatt Arsenault { "amdgpu-work-group-id-z" }, 183*6b93046fSMatt Arsenault { "amdgpu-dispatch-ptr" }, 184*6b93046fSMatt Arsenault { "amdgpu-dispatch-id" } 185*6b93046fSMatt Arsenault }; 186*6b93046fSMatt Arsenault 187*6b93046fSMatt Arsenault if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 188*6b93046fSMatt Arsenault NeedQueuePtr = true; 189*6b93046fSMatt Arsenault 190*6b93046fSMatt Arsenault for (StringRef AttrName : AttrNames) 191*6b93046fSMatt Arsenault handleAttr(Parent, Callee, AttrName); 192*6b93046fSMatt Arsenault } 193*6b93046fSMatt Arsenault 194*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 195*6b93046fSMatt Arsenault bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs(); 1963b2e2a59SMatt Arsenault SmallPtrSet<const Constant *, 8> ConstantExprVisited; 1973b2e2a59SMatt Arsenault 198*6b93046fSMatt Arsenault bool Changed = false; 199*6b93046fSMatt Arsenault bool NeedQueuePtr = false; 200*6b93046fSMatt Arsenault 201*6b93046fSMatt Arsenault for (BasicBlock &BB : F) { 202*6b93046fSMatt Arsenault for (Instruction &I : BB) { 203*6b93046fSMatt Arsenault CallSite CS(&I); 204*6b93046fSMatt Arsenault if (CS) { 205*6b93046fSMatt Arsenault Function *Callee = CS.getCalledFunction(); 206*6b93046fSMatt Arsenault 207*6b93046fSMatt Arsenault // TODO: Do something with indirect calls. 208*6b93046fSMatt Arsenault if (!Callee) 209*6b93046fSMatt Arsenault continue; 210*6b93046fSMatt Arsenault 211*6b93046fSMatt Arsenault Intrinsic::ID IID = Callee->getIntrinsicID(); 212*6b93046fSMatt Arsenault if (IID == Intrinsic::not_intrinsic) { 213*6b93046fSMatt Arsenault copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 214*6b93046fSMatt Arsenault Changed = true; 215*6b93046fSMatt Arsenault } else { 216*6b93046fSMatt Arsenault StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr); 217*6b93046fSMatt Arsenault if (!AttrName.empty()) { 218*6b93046fSMatt Arsenault F.addFnAttr(AttrName); 219*6b93046fSMatt Arsenault Changed = true; 220*6b93046fSMatt Arsenault } 221*6b93046fSMatt Arsenault } 222*6b93046fSMatt Arsenault } 223*6b93046fSMatt Arsenault 224*6b93046fSMatt Arsenault if (NeedQueuePtr || HasApertureRegs) 225*6b93046fSMatt Arsenault continue; 226*6b93046fSMatt Arsenault 22799c14524SMatt Arsenault if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 228*6b93046fSMatt Arsenault if (castRequiresQueuePtr(ASC, AS)) { 229*6b93046fSMatt Arsenault NeedQueuePtr = true; 230*6b93046fSMatt Arsenault continue; 231*6b93046fSMatt Arsenault } 23299c14524SMatt Arsenault } 2333b2e2a59SMatt Arsenault 2343b2e2a59SMatt Arsenault for (const Use &U : I.operands()) { 2353b2e2a59SMatt Arsenault const auto *OpC = dyn_cast<Constant>(U); 2363b2e2a59SMatt Arsenault if (!OpC) 2373b2e2a59SMatt Arsenault continue; 2383b2e2a59SMatt Arsenault 239*6b93046fSMatt Arsenault if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) { 240*6b93046fSMatt Arsenault NeedQueuePtr = true; 241*6b93046fSMatt Arsenault break; 242*6b93046fSMatt Arsenault } 2433b2e2a59SMatt Arsenault } 24499c14524SMatt Arsenault } 24599c14524SMatt Arsenault } 24699c14524SMatt Arsenault 247*6b93046fSMatt Arsenault if (NeedQueuePtr) { 248*6b93046fSMatt Arsenault F.addFnAttr("amdgpu-queue-ptr"); 249*6b93046fSMatt Arsenault Changed = true; 25099c14524SMatt Arsenault } 2513931948bSMatt Arsenault 252*6b93046fSMatt Arsenault return Changed; 253*6b93046fSMatt Arsenault } 254*6b93046fSMatt Arsenault 255*6b93046fSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin, 2563931948bSMatt Arsenault StringRef AttrName) { 2573931948bSMatt Arsenault SmallPtrSet<Function *, 4> SeenFuncs; 2583931948bSMatt Arsenault 259*6b93046fSMatt Arsenault for (User *U : Intrin.users()) { 2603931948bSMatt Arsenault // CallInst is the only valid user for an intrinsic. 2613931948bSMatt Arsenault CallInst *CI = cast<CallInst>(U); 2623931948bSMatt Arsenault 2633931948bSMatt Arsenault Function *CallingFunction = CI->getParent()->getParent(); 2643931948bSMatt Arsenault if (SeenFuncs.insert(CallingFunction).second) 2653931948bSMatt Arsenault CallingFunction->addFnAttr(AttrName); 2663931948bSMatt Arsenault } 2673931948bSMatt Arsenault } 2683931948bSMatt Arsenault 2693931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( 2703931948bSMatt Arsenault Module &M, 2713931948bSMatt Arsenault ArrayRef<StringRef[2]> IntrinsicToAttr) { 2723931948bSMatt Arsenault bool Changed = false; 2733931948bSMatt Arsenault 2743931948bSMatt Arsenault for (const StringRef *Arr : IntrinsicToAttr) { 2753931948bSMatt Arsenault if (Function *Fn = M.getFunction(Arr[0])) { 276*6b93046fSMatt Arsenault addAttrToCallers(*Fn, Arr[1]); 2773931948bSMatt Arsenault Changed = true; 2783931948bSMatt Arsenault } 2793931948bSMatt Arsenault } 2803931948bSMatt Arsenault 2813931948bSMatt Arsenault return Changed; 2823931948bSMatt Arsenault } 2833931948bSMatt Arsenault 284*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 285*6b93046fSMatt Arsenault Module &M = SCC.getCallGraph().getModule(); 2863931948bSMatt Arsenault Triple TT(M.getTargetTriple()); 2873931948bSMatt Arsenault 288*6b93046fSMatt Arsenault bool Changed = false; 289*6b93046fSMatt Arsenault for (CallGraphNode *I : SCC) { 290*6b93046fSMatt Arsenault Function *F = I->getFunction(); 291*6b93046fSMatt Arsenault if (!F || F->isDeclaration()) 29299c14524SMatt Arsenault continue; 29399c14524SMatt Arsenault 294*6b93046fSMatt Arsenault Changed |= addFeatureAttributes(*F); 29599c14524SMatt Arsenault } 296*6b93046fSMatt Arsenault 29799c14524SMatt Arsenault 2983931948bSMatt Arsenault return Changed; 2993931948bSMatt Arsenault } 3003931948bSMatt Arsenault 301*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 302*6b93046fSMatt Arsenault auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 303*6b93046fSMatt Arsenault if (!TPC) 304*6b93046fSMatt Arsenault report_fatal_error("TargetMachine is required"); 305*6b93046fSMatt Arsenault 306*6b93046fSMatt Arsenault AS = AMDGPU::getAMDGPUAS(CG.getModule()); 307*6b93046fSMatt Arsenault TM = &TPC->getTM<TargetMachine>(); 308*6b93046fSMatt Arsenault return false; 309*6b93046fSMatt Arsenault } 310*6b93046fSMatt Arsenault 311*6b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 3128b61764cSFrancis Visoiu Mistrih return new AMDGPUAnnotateKernelFeatures(); 3133931948bSMatt Arsenault } 314