1*fa6434beSEugene Zelenko //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h"
17*fa6434beSEugene Zelenko #include "Utils/AMDGPUBaseInfo.h"
18*fa6434beSEugene Zelenko #include "llvm/ADT/SmallPtrSet.h"
19*fa6434beSEugene Zelenko #include "llvm/ADT/SmallVector.h"
20*fa6434beSEugene Zelenko #include "llvm/ADT/StringRef.h"
212ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h"
22*fa6434beSEugene Zelenko #include "llvm/Analysis/CallGraph.h"
236b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h"
248b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
25*fa6434beSEugene Zelenko #include "llvm/IR/CallSite.h"
26*fa6434beSEugene Zelenko #include "llvm/IR/Constant.h"
273b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
28*fa6434beSEugene Zelenko #include "llvm/IR/Function.h"
29*fa6434beSEugene Zelenko #include "llvm/IR/Instruction.h"
303931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
31*fa6434beSEugene Zelenko #include "llvm/IR/Intrinsics.h"
323931948bSMatt Arsenault #include "llvm/IR/Module.h"
33*fa6434beSEugene Zelenko #include "llvm/IR/Type.h"
34*fa6434beSEugene Zelenko #include "llvm/IR/Use.h"
35*fa6434beSEugene Zelenko #include "llvm/Pass.h"
36*fa6434beSEugene Zelenko #include "llvm/Support/Casting.h"
37*fa6434beSEugene Zelenko #include "llvm/Support/ErrorHandling.h"
38*fa6434beSEugene Zelenko #include "llvm/Target/TargetMachine.h"
393931948bSMatt Arsenault 
403931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
413931948bSMatt Arsenault 
423931948bSMatt Arsenault using namespace llvm;
433931948bSMatt Arsenault 
443931948bSMatt Arsenault namespace {
453931948bSMatt Arsenault 
466b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
473931948bSMatt Arsenault private:
486b93046fSMatt Arsenault   const TargetMachine *TM = nullptr;
491a14bfa0SYaxun Liu   AMDGPUAS AS;
5099c14524SMatt Arsenault 
516b93046fSMatt Arsenault   bool addFeatureAttributes(Function &F);
526b93046fSMatt Arsenault 
533931948bSMatt Arsenault public:
543931948bSMatt Arsenault   static char ID;
553931948bSMatt Arsenault 
566b93046fSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
576b93046fSMatt Arsenault 
586b93046fSMatt Arsenault   bool doInitialization(CallGraph &CG) override;
596b93046fSMatt Arsenault   bool runOnSCC(CallGraphSCC &SCC) override;
60*fa6434beSEugene Zelenko 
61117296c0SMehdi Amini   StringRef getPassName() const override {
623931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
633931948bSMatt Arsenault   }
643931948bSMatt Arsenault 
653931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
663931948bSMatt Arsenault     AU.setPreservesAll();
676b93046fSMatt Arsenault     CallGraphSCCPass::getAnalysisUsage(AU);
683931948bSMatt Arsenault   }
693b2e2a59SMatt Arsenault 
701a14bfa0SYaxun Liu   static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
713b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
723b2e2a59SMatt Arsenault     const Constant *EntryC,
731a14bfa0SYaxun Liu     SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
741a14bfa0SYaxun Liu     AMDGPUAS AS);
753931948bSMatt Arsenault };
763931948bSMatt Arsenault 
77*fa6434beSEugene Zelenko } // end anonymous namespace
783931948bSMatt Arsenault 
793931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
803931948bSMatt Arsenault 
813931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
823931948bSMatt Arsenault 
8399c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
843931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
853931948bSMatt Arsenault 
8699c14524SMatt Arsenault 
8799c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
881a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
891a14bfa0SYaxun Liu   return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
9099c14524SMatt Arsenault }
9199c14524SMatt Arsenault 
921a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
931a14bfa0SYaxun Liu     const AMDGPUAS &AS) {
941a14bfa0SYaxun Liu   return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
953b2e2a59SMatt Arsenault }
963b2e2a59SMatt Arsenault 
971a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
981a14bfa0SYaxun Liu     AMDGPUAS AS) {
993b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
1003b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
1011a14bfa0SYaxun Liu     return castRequiresQueuePtr(SrcAS, AS);
1023b2e2a59SMatt Arsenault   }
1033b2e2a59SMatt Arsenault 
1043b2e2a59SMatt Arsenault   return false;
1053b2e2a59SMatt Arsenault }
1063b2e2a59SMatt Arsenault 
1073b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1083b2e2a59SMatt Arsenault   const Constant *EntryC,
1091a14bfa0SYaxun Liu   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
1101a14bfa0SYaxun Liu   AMDGPUAS AS) {
1113b2e2a59SMatt Arsenault 
1123b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
1133b2e2a59SMatt Arsenault     return false;
1143b2e2a59SMatt Arsenault 
1153b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
1163b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
1173b2e2a59SMatt Arsenault 
1183b2e2a59SMatt Arsenault   while (!Stack.empty()) {
1193b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
1203b2e2a59SMatt Arsenault 
1213b2e2a59SMatt Arsenault     // Check this constant expression.
1223b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1231a14bfa0SYaxun Liu       if (visitConstantExpr(CE, AS))
1243b2e2a59SMatt Arsenault         return true;
1253b2e2a59SMatt Arsenault     }
1263b2e2a59SMatt Arsenault 
1273b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1283b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1293b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1303b2e2a59SMatt Arsenault       if (!OpC)
1313b2e2a59SMatt Arsenault         continue;
1323b2e2a59SMatt Arsenault 
1333b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1343b2e2a59SMatt Arsenault         continue;
1353b2e2a59SMatt Arsenault 
1363b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1373b2e2a59SMatt Arsenault     }
1383b2e2a59SMatt Arsenault   }
1393b2e2a59SMatt Arsenault 
1403b2e2a59SMatt Arsenault   return false;
1413b2e2a59SMatt Arsenault }
1423b2e2a59SMatt Arsenault 
1436b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always
1446b93046fSMatt Arsenault // initialized.
1456b93046fSMatt Arsenault //
1466b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup
1476b93046fSMatt Arsenault // size is 1 for y/z.
148e15855d9SMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID,
149e15855d9SMatt Arsenault                                      bool &NonKernelOnly,
150e15855d9SMatt Arsenault                                      bool &IsQueuePtr) {
1516b93046fSMatt Arsenault   switch (ID) {
152e15855d9SMatt Arsenault   case Intrinsic::amdgcn_workitem_id_x:
153e15855d9SMatt Arsenault     NonKernelOnly = true;
154e15855d9SMatt Arsenault     return "amdgpu-work-item-id-x";
155e15855d9SMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_x:
156e15855d9SMatt Arsenault     NonKernelOnly = true;
157e15855d9SMatt Arsenault     return "amdgpu-work-group-id-x";
1586b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_y:
1596b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_y:
1606b93046fSMatt Arsenault     return "amdgpu-work-item-id-y";
1616b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_z:
1626b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_z:
1636b93046fSMatt Arsenault     return "amdgpu-work-item-id-z";
1646b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_y:
1656b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_y:
1666b93046fSMatt Arsenault     return "amdgpu-work-group-id-y";
1676b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_z:
1686b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_z:
1696b93046fSMatt Arsenault     return "amdgpu-work-group-id-z";
1706b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_ptr:
1716b93046fSMatt Arsenault     return "amdgpu-dispatch-ptr";
1726b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_id:
1736b93046fSMatt Arsenault     return "amdgpu-dispatch-id";
17423e4df6aSMatt Arsenault   case Intrinsic::amdgcn_kernarg_segment_ptr:
17523e4df6aSMatt Arsenault     return "amdgpu-kernarg-segment-ptr";
1769166ce86SMatt Arsenault   case Intrinsic::amdgcn_implicitarg_ptr:
1779166ce86SMatt Arsenault     return "amdgpu-implicitarg-ptr";
1786b93046fSMatt Arsenault   case Intrinsic::amdgcn_queue_ptr:
1796b93046fSMatt Arsenault   case Intrinsic::trap:
1806b93046fSMatt Arsenault   case Intrinsic::debugtrap:
1816b93046fSMatt Arsenault     IsQueuePtr = true;
1826b93046fSMatt Arsenault     return "amdgpu-queue-ptr";
1836b93046fSMatt Arsenault   default:
1846b93046fSMatt Arsenault     return "";
1856b93046fSMatt Arsenault   }
1866b93046fSMatt Arsenault }
1876b93046fSMatt Arsenault 
1886b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee,
1896b93046fSMatt Arsenault                        StringRef Name) {
1906b93046fSMatt Arsenault   if (Callee.hasFnAttribute(Name)) {
1916b93046fSMatt Arsenault     Parent.addFnAttr(Name);
1926b93046fSMatt Arsenault     return true;
1936b93046fSMatt Arsenault   }
1946b93046fSMatt Arsenault 
1956b93046fSMatt Arsenault   return false;
1966b93046fSMatt Arsenault }
1976b93046fSMatt Arsenault 
1986b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
1996b93046fSMatt Arsenault                                    bool &NeedQueuePtr) {
200e15855d9SMatt Arsenault   // X ids unnecessarily propagated to kernels.
2016b93046fSMatt Arsenault   static const StringRef AttrNames[] = {
202e15855d9SMatt Arsenault     { "amdgpu-work-item-id-x" },
2036b93046fSMatt Arsenault     { "amdgpu-work-item-id-y" },
2046b93046fSMatt Arsenault     { "amdgpu-work-item-id-z" },
205e15855d9SMatt Arsenault     { "amdgpu-work-group-id-x" },
2066b93046fSMatt Arsenault     { "amdgpu-work-group-id-y" },
2076b93046fSMatt Arsenault     { "amdgpu-work-group-id-z" },
2086b93046fSMatt Arsenault     { "amdgpu-dispatch-ptr" },
20923e4df6aSMatt Arsenault     { "amdgpu-dispatch-id" },
2109166ce86SMatt Arsenault     { "amdgpu-kernarg-segment-ptr" },
2119166ce86SMatt Arsenault     { "amdgpu-implicitarg-ptr" }
2126b93046fSMatt Arsenault   };
2136b93046fSMatt Arsenault 
2146b93046fSMatt Arsenault   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
2156b93046fSMatt Arsenault     NeedQueuePtr = true;
2166b93046fSMatt Arsenault 
2176b93046fSMatt Arsenault   for (StringRef AttrName : AttrNames)
2186b93046fSMatt Arsenault     handleAttr(Parent, Callee, AttrName);
2196b93046fSMatt Arsenault }
2206b93046fSMatt Arsenault 
2216b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
222254ad3deSMatt Arsenault   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
223254ad3deSMatt Arsenault   bool HasFlat = ST.hasFlatAddressSpace();
224254ad3deSMatt Arsenault   bool HasApertureRegs = ST.hasApertureRegs();
2253b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2263b2e2a59SMatt Arsenault 
2276b93046fSMatt Arsenault   bool Changed = false;
2286b93046fSMatt Arsenault   bool NeedQueuePtr = false;
229254ad3deSMatt Arsenault   bool HaveCall = false;
230e15855d9SMatt Arsenault   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
2316b93046fSMatt Arsenault 
2326b93046fSMatt Arsenault   for (BasicBlock &BB : F) {
2336b93046fSMatt Arsenault     for (Instruction &I : BB) {
2346b93046fSMatt Arsenault       CallSite CS(&I);
2356b93046fSMatt Arsenault       if (CS) {
2366b93046fSMatt Arsenault         Function *Callee = CS.getCalledFunction();
2376b93046fSMatt Arsenault 
2386b93046fSMatt Arsenault         // TODO: Do something with indirect calls.
239254ad3deSMatt Arsenault         if (!Callee) {
240254ad3deSMatt Arsenault           if (!CS.isInlineAsm())
241254ad3deSMatt Arsenault             HaveCall = true;
2426b93046fSMatt Arsenault           continue;
243254ad3deSMatt Arsenault         }
2446b93046fSMatt Arsenault 
2456b93046fSMatt Arsenault         Intrinsic::ID IID = Callee->getIntrinsicID();
2466b93046fSMatt Arsenault         if (IID == Intrinsic::not_intrinsic) {
247254ad3deSMatt Arsenault           HaveCall = true;
2486b93046fSMatt Arsenault           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
2496b93046fSMatt Arsenault           Changed = true;
2506b93046fSMatt Arsenault         } else {
251e15855d9SMatt Arsenault           bool NonKernelOnly = false;
252e15855d9SMatt Arsenault           StringRef AttrName = intrinsicToAttrName(IID,
253e15855d9SMatt Arsenault                                                    NonKernelOnly, NeedQueuePtr);
254e15855d9SMatt Arsenault           if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
2556b93046fSMatt Arsenault             F.addFnAttr(AttrName);
2566b93046fSMatt Arsenault             Changed = true;
2576b93046fSMatt Arsenault           }
2586b93046fSMatt Arsenault         }
2596b93046fSMatt Arsenault       }
2606b93046fSMatt Arsenault 
2616b93046fSMatt Arsenault       if (NeedQueuePtr || HasApertureRegs)
2626b93046fSMatt Arsenault         continue;
2636b93046fSMatt Arsenault 
26499c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
2656b93046fSMatt Arsenault         if (castRequiresQueuePtr(ASC, AS)) {
2666b93046fSMatt Arsenault           NeedQueuePtr = true;
2676b93046fSMatt Arsenault           continue;
2686b93046fSMatt Arsenault         }
26999c14524SMatt Arsenault       }
2703b2e2a59SMatt Arsenault 
2713b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
2723b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
2733b2e2a59SMatt Arsenault         if (!OpC)
2743b2e2a59SMatt Arsenault           continue;
2753b2e2a59SMatt Arsenault 
2766b93046fSMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
2776b93046fSMatt Arsenault           NeedQueuePtr = true;
2786b93046fSMatt Arsenault           break;
2796b93046fSMatt Arsenault         }
2803b2e2a59SMatt Arsenault       }
28199c14524SMatt Arsenault     }
28299c14524SMatt Arsenault   }
28399c14524SMatt Arsenault 
2846b93046fSMatt Arsenault   if (NeedQueuePtr) {
2856b93046fSMatt Arsenault     F.addFnAttr("amdgpu-queue-ptr");
2866b93046fSMatt Arsenault     Changed = true;
28799c14524SMatt Arsenault   }
2883931948bSMatt Arsenault 
289254ad3deSMatt Arsenault   // TODO: We could refine this to captured pointers that could possibly be
290254ad3deSMatt Arsenault   // accessed by flat instructions. For now this is mostly a poor way of
291254ad3deSMatt Arsenault   // estimating whether there are calls before argument lowering.
292254ad3deSMatt Arsenault   if (HasFlat && !IsFunc && HaveCall) {
293254ad3deSMatt Arsenault     F.addFnAttr("amdgpu-flat-scratch");
294254ad3deSMatt Arsenault     Changed = true;
295254ad3deSMatt Arsenault   }
296254ad3deSMatt Arsenault 
2976b93046fSMatt Arsenault   return Changed;
2986b93046fSMatt Arsenault }
2996b93046fSMatt Arsenault 
3006b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
3016b93046fSMatt Arsenault   Module &M = SCC.getCallGraph().getModule();
3023931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
3033931948bSMatt Arsenault 
3046b93046fSMatt Arsenault   bool Changed = false;
3056b93046fSMatt Arsenault   for (CallGraphNode *I : SCC) {
3066b93046fSMatt Arsenault     Function *F = I->getFunction();
3076b93046fSMatt Arsenault     if (!F || F->isDeclaration())
30899c14524SMatt Arsenault       continue;
30999c14524SMatt Arsenault 
3106b93046fSMatt Arsenault     Changed |= addFeatureAttributes(*F);
31199c14524SMatt Arsenault   }
3126b93046fSMatt Arsenault 
3133931948bSMatt Arsenault   return Changed;
3143931948bSMatt Arsenault }
3153931948bSMatt Arsenault 
3166b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
3176b93046fSMatt Arsenault   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
3186b93046fSMatt Arsenault   if (!TPC)
3196b93046fSMatt Arsenault     report_fatal_error("TargetMachine is required");
3206b93046fSMatt Arsenault 
3216b93046fSMatt Arsenault   AS = AMDGPU::getAMDGPUAS(CG.getModule());
3226b93046fSMatt Arsenault   TM = &TPC->getTM<TargetMachine>();
3236b93046fSMatt Arsenault   return false;
3246b93046fSMatt Arsenault }
3256b93046fSMatt Arsenault 
3266b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
3278b61764cSFrancis Visoiu Mistrih   return new AMDGPUAnnotateKernelFeatures();
3283931948bSMatt Arsenault }
329