13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h"
172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h"
186b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h"
198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
216b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h"
223931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
233931948bSMatt Arsenault #include "llvm/IR/Module.h"
243931948bSMatt Arsenault 
253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
263931948bSMatt Arsenault 
273931948bSMatt Arsenault using namespace llvm;
283931948bSMatt Arsenault 
293931948bSMatt Arsenault namespace {
303931948bSMatt Arsenault 
316b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
323931948bSMatt Arsenault private:
336b93046fSMatt Arsenault   const TargetMachine *TM = nullptr;
341a14bfa0SYaxun Liu   AMDGPUAS AS;
3599c14524SMatt Arsenault 
366b93046fSMatt Arsenault   bool addFeatureAttributes(Function &F);
376b93046fSMatt Arsenault 
386b93046fSMatt Arsenault   void addAttrToCallers(Function &Intrin, StringRef AttrName);
393931948bSMatt Arsenault   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
403931948bSMatt Arsenault 
413931948bSMatt Arsenault public:
423931948bSMatt Arsenault   static char ID;
433931948bSMatt Arsenault 
446b93046fSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
456b93046fSMatt Arsenault 
466b93046fSMatt Arsenault   bool doInitialization(CallGraph &CG) override;
476b93046fSMatt Arsenault   bool runOnSCC(CallGraphSCC &SCC) override;
48117296c0SMehdi Amini   StringRef getPassName() const override {
493931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
503931948bSMatt Arsenault   }
513931948bSMatt Arsenault 
523931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
533931948bSMatt Arsenault     AU.setPreservesAll();
546b93046fSMatt Arsenault     CallGraphSCCPass::getAnalysisUsage(AU);
553931948bSMatt Arsenault   }
563b2e2a59SMatt Arsenault 
571a14bfa0SYaxun Liu   static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
583b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
593b2e2a59SMatt Arsenault     const Constant *EntryC,
601a14bfa0SYaxun Liu     SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
611a14bfa0SYaxun Liu     AMDGPUAS AS);
623931948bSMatt Arsenault };
633931948bSMatt Arsenault 
643931948bSMatt Arsenault }
653931948bSMatt Arsenault 
663931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
673931948bSMatt Arsenault 
683931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
693931948bSMatt Arsenault 
7099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
713931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
723931948bSMatt Arsenault 
7399c14524SMatt Arsenault 
7499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
751a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
761a14bfa0SYaxun Liu   return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
7799c14524SMatt Arsenault }
7899c14524SMatt Arsenault 
791a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
801a14bfa0SYaxun Liu     const AMDGPUAS &AS) {
811a14bfa0SYaxun Liu   return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
823b2e2a59SMatt Arsenault }
833b2e2a59SMatt Arsenault 
841a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
851a14bfa0SYaxun Liu     AMDGPUAS AS) {
863b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
873b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
881a14bfa0SYaxun Liu     return castRequiresQueuePtr(SrcAS, AS);
893b2e2a59SMatt Arsenault   }
903b2e2a59SMatt Arsenault 
913b2e2a59SMatt Arsenault   return false;
923b2e2a59SMatt Arsenault }
933b2e2a59SMatt Arsenault 
943b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
953b2e2a59SMatt Arsenault   const Constant *EntryC,
961a14bfa0SYaxun Liu   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
971a14bfa0SYaxun Liu   AMDGPUAS AS) {
983b2e2a59SMatt Arsenault 
993b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
1003b2e2a59SMatt Arsenault     return false;
1013b2e2a59SMatt Arsenault 
1023b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
1033b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
1043b2e2a59SMatt Arsenault 
1053b2e2a59SMatt Arsenault   while (!Stack.empty()) {
1063b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
1073b2e2a59SMatt Arsenault 
1083b2e2a59SMatt Arsenault     // Check this constant expression.
1093b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1101a14bfa0SYaxun Liu       if (visitConstantExpr(CE, AS))
1113b2e2a59SMatt Arsenault         return true;
1123b2e2a59SMatt Arsenault     }
1133b2e2a59SMatt Arsenault 
1143b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1153b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1163b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1173b2e2a59SMatt Arsenault       if (!OpC)
1183b2e2a59SMatt Arsenault         continue;
1193b2e2a59SMatt Arsenault 
1203b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1213b2e2a59SMatt Arsenault         continue;
1223b2e2a59SMatt Arsenault 
1233b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1243b2e2a59SMatt Arsenault     }
1253b2e2a59SMatt Arsenault   }
1263b2e2a59SMatt Arsenault 
1273b2e2a59SMatt Arsenault   return false;
1283b2e2a59SMatt Arsenault }
1293b2e2a59SMatt Arsenault 
1306b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always
1316b93046fSMatt Arsenault // initialized.
1326b93046fSMatt Arsenault //
1336b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup
1346b93046fSMatt Arsenault // size is 1 for y/z.
1356b93046fSMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) {
1366b93046fSMatt Arsenault   switch (ID) {
1376b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_y:
1386b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_y:
1396b93046fSMatt Arsenault     return "amdgpu-work-item-id-y";
1406b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_z:
1416b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_z:
1426b93046fSMatt Arsenault     return "amdgpu-work-item-id-z";
1436b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_y:
1446b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_y:
1456b93046fSMatt Arsenault     return "amdgpu-work-group-id-y";
1466b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_z:
1476b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_z:
1486b93046fSMatt Arsenault     return "amdgpu-work-group-id-z";
1496b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_ptr:
1506b93046fSMatt Arsenault     return "amdgpu-dispatch-ptr";
1516b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_id:
1526b93046fSMatt Arsenault     return "amdgpu-dispatch-id";
153*23e4df6aSMatt Arsenault   case Intrinsic::amdgcn_kernarg_segment_ptr:
154*23e4df6aSMatt Arsenault   case Intrinsic::amdgcn_implicitarg_ptr:
155*23e4df6aSMatt Arsenault     return "amdgpu-kernarg-segment-ptr";
1566b93046fSMatt Arsenault   case Intrinsic::amdgcn_queue_ptr:
1576b93046fSMatt Arsenault   case Intrinsic::trap:
1586b93046fSMatt Arsenault   case Intrinsic::debugtrap:
1596b93046fSMatt Arsenault     IsQueuePtr = true;
1606b93046fSMatt Arsenault     return "amdgpu-queue-ptr";
1616b93046fSMatt Arsenault   default:
1626b93046fSMatt Arsenault     return "";
1636b93046fSMatt Arsenault   }
1646b93046fSMatt Arsenault }
1656b93046fSMatt Arsenault 
1666b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee,
1676b93046fSMatt Arsenault                        StringRef Name) {
1686b93046fSMatt Arsenault   if (Callee.hasFnAttribute(Name)) {
1696b93046fSMatt Arsenault     Parent.addFnAttr(Name);
1706b93046fSMatt Arsenault     return true;
1716b93046fSMatt Arsenault   }
1726b93046fSMatt Arsenault 
1736b93046fSMatt Arsenault   return false;
1746b93046fSMatt Arsenault }
1756b93046fSMatt Arsenault 
1766b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
1776b93046fSMatt Arsenault                                    bool &NeedQueuePtr) {
1786b93046fSMatt Arsenault 
1796b93046fSMatt Arsenault   static const StringRef AttrNames[] = {
1806b93046fSMatt Arsenault     // .x omitted
1816b93046fSMatt Arsenault     { "amdgpu-work-item-id-y" },
1826b93046fSMatt Arsenault     { "amdgpu-work-item-id-z" },
1836b93046fSMatt Arsenault     // .x omitted
1846b93046fSMatt Arsenault     { "amdgpu-work-group-id-y" },
1856b93046fSMatt Arsenault     { "amdgpu-work-group-id-z" },
1866b93046fSMatt Arsenault     { "amdgpu-dispatch-ptr" },
187*23e4df6aSMatt Arsenault     { "amdgpu-dispatch-id" },
188*23e4df6aSMatt Arsenault     { "amdgpu-kernarg-segment-ptr" }
1896b93046fSMatt Arsenault   };
1906b93046fSMatt Arsenault 
1916b93046fSMatt Arsenault   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
1926b93046fSMatt Arsenault     NeedQueuePtr = true;
1936b93046fSMatt Arsenault 
1946b93046fSMatt Arsenault   for (StringRef AttrName : AttrNames)
1956b93046fSMatt Arsenault     handleAttr(Parent, Callee, AttrName);
1966b93046fSMatt Arsenault }
1976b93046fSMatt Arsenault 
1986b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
1996b93046fSMatt Arsenault   bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
2003b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2013b2e2a59SMatt Arsenault 
2026b93046fSMatt Arsenault   bool Changed = false;
2036b93046fSMatt Arsenault   bool NeedQueuePtr = false;
2046b93046fSMatt Arsenault 
2056b93046fSMatt Arsenault   for (BasicBlock &BB : F) {
2066b93046fSMatt Arsenault     for (Instruction &I : BB) {
2076b93046fSMatt Arsenault       CallSite CS(&I);
2086b93046fSMatt Arsenault       if (CS) {
2096b93046fSMatt Arsenault         Function *Callee = CS.getCalledFunction();
2106b93046fSMatt Arsenault 
2116b93046fSMatt Arsenault         // TODO: Do something with indirect calls.
2126b93046fSMatt Arsenault         if (!Callee)
2136b93046fSMatt Arsenault           continue;
2146b93046fSMatt Arsenault 
2156b93046fSMatt Arsenault         Intrinsic::ID IID = Callee->getIntrinsicID();
2166b93046fSMatt Arsenault         if (IID == Intrinsic::not_intrinsic) {
2176b93046fSMatt Arsenault           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
2186b93046fSMatt Arsenault           Changed = true;
2196b93046fSMatt Arsenault         } else {
2206b93046fSMatt Arsenault           StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr);
2216b93046fSMatt Arsenault           if (!AttrName.empty()) {
2226b93046fSMatt Arsenault             F.addFnAttr(AttrName);
2236b93046fSMatt Arsenault             Changed = true;
2246b93046fSMatt Arsenault           }
2256b93046fSMatt Arsenault         }
2266b93046fSMatt Arsenault       }
2276b93046fSMatt Arsenault 
2286b93046fSMatt Arsenault       if (NeedQueuePtr || HasApertureRegs)
2296b93046fSMatt Arsenault         continue;
2306b93046fSMatt Arsenault 
23199c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
2326b93046fSMatt Arsenault         if (castRequiresQueuePtr(ASC, AS)) {
2336b93046fSMatt Arsenault           NeedQueuePtr = true;
2346b93046fSMatt Arsenault           continue;
2356b93046fSMatt Arsenault         }
23699c14524SMatt Arsenault       }
2373b2e2a59SMatt Arsenault 
2383b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
2393b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
2403b2e2a59SMatt Arsenault         if (!OpC)
2413b2e2a59SMatt Arsenault           continue;
2423b2e2a59SMatt Arsenault 
2436b93046fSMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
2446b93046fSMatt Arsenault           NeedQueuePtr = true;
2456b93046fSMatt Arsenault           break;
2466b93046fSMatt Arsenault         }
2473b2e2a59SMatt Arsenault       }
24899c14524SMatt Arsenault     }
24999c14524SMatt Arsenault   }
25099c14524SMatt Arsenault 
2516b93046fSMatt Arsenault   if (NeedQueuePtr) {
2526b93046fSMatt Arsenault     F.addFnAttr("amdgpu-queue-ptr");
2536b93046fSMatt Arsenault     Changed = true;
25499c14524SMatt Arsenault   }
2553931948bSMatt Arsenault 
2566b93046fSMatt Arsenault   return Changed;
2576b93046fSMatt Arsenault }
2586b93046fSMatt Arsenault 
2596b93046fSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin,
2603931948bSMatt Arsenault                                                     StringRef AttrName) {
2613931948bSMatt Arsenault   SmallPtrSet<Function *, 4> SeenFuncs;
2623931948bSMatt Arsenault 
2636b93046fSMatt Arsenault   for (User *U : Intrin.users()) {
2643931948bSMatt Arsenault     // CallInst is the only valid user for an intrinsic.
2653931948bSMatt Arsenault     CallInst *CI = cast<CallInst>(U);
2663931948bSMatt Arsenault 
2673931948bSMatt Arsenault     Function *CallingFunction = CI->getParent()->getParent();
2683931948bSMatt Arsenault     if (SeenFuncs.insert(CallingFunction).second)
2693931948bSMatt Arsenault       CallingFunction->addFnAttr(AttrName);
2703931948bSMatt Arsenault   }
2713931948bSMatt Arsenault }
2723931948bSMatt Arsenault 
2733931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
2743931948bSMatt Arsenault   Module &M,
2753931948bSMatt Arsenault   ArrayRef<StringRef[2]> IntrinsicToAttr) {
2763931948bSMatt Arsenault   bool Changed = false;
2773931948bSMatt Arsenault 
2783931948bSMatt Arsenault   for (const StringRef *Arr  : IntrinsicToAttr) {
2793931948bSMatt Arsenault     if (Function *Fn = M.getFunction(Arr[0])) {
2806b93046fSMatt Arsenault       addAttrToCallers(*Fn, Arr[1]);
2813931948bSMatt Arsenault       Changed = true;
2823931948bSMatt Arsenault     }
2833931948bSMatt Arsenault   }
2843931948bSMatt Arsenault 
2853931948bSMatt Arsenault   return Changed;
2863931948bSMatt Arsenault }
2873931948bSMatt Arsenault 
2886b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
2896b93046fSMatt Arsenault   Module &M = SCC.getCallGraph().getModule();
2903931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
2913931948bSMatt Arsenault 
2926b93046fSMatt Arsenault   bool Changed = false;
2936b93046fSMatt Arsenault   for (CallGraphNode *I : SCC) {
2946b93046fSMatt Arsenault     Function *F = I->getFunction();
2956b93046fSMatt Arsenault     if (!F || F->isDeclaration())
29699c14524SMatt Arsenault       continue;
29799c14524SMatt Arsenault 
2986b93046fSMatt Arsenault     Changed |= addFeatureAttributes(*F);
29999c14524SMatt Arsenault   }
3006b93046fSMatt Arsenault 
30199c14524SMatt Arsenault 
3023931948bSMatt Arsenault   return Changed;
3033931948bSMatt Arsenault }
3043931948bSMatt Arsenault 
3056b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
3066b93046fSMatt Arsenault   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
3076b93046fSMatt Arsenault   if (!TPC)
3086b93046fSMatt Arsenault     report_fatal_error("TargetMachine is required");
3096b93046fSMatt Arsenault 
3106b93046fSMatt Arsenault   AS = AMDGPU::getAMDGPUAS(CG.getModule());
3116b93046fSMatt Arsenault   TM = &TPC->getTM<TargetMachine>();
3126b93046fSMatt Arsenault   return false;
3136b93046fSMatt Arsenault }
3146b93046fSMatt Arsenault 
3156b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
3168b61764cSFrancis Visoiu Mistrih   return new AMDGPUAnnotateKernelFeatures();
3173931948bSMatt Arsenault }
318