13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h"
172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h"
186b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h"
198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
216b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h"
223931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
233931948bSMatt Arsenault #include "llvm/IR/Module.h"
243931948bSMatt Arsenault 
253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
263931948bSMatt Arsenault 
273931948bSMatt Arsenault using namespace llvm;
283931948bSMatt Arsenault 
293931948bSMatt Arsenault namespace {
303931948bSMatt Arsenault 
316b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
323931948bSMatt Arsenault private:
336b93046fSMatt Arsenault   const TargetMachine *TM = nullptr;
341a14bfa0SYaxun Liu   AMDGPUAS AS;
3599c14524SMatt Arsenault 
366b93046fSMatt Arsenault   bool addFeatureAttributes(Function &F);
376b93046fSMatt Arsenault 
383931948bSMatt Arsenault public:
393931948bSMatt Arsenault   static char ID;
403931948bSMatt Arsenault 
416b93046fSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
426b93046fSMatt Arsenault 
436b93046fSMatt Arsenault   bool doInitialization(CallGraph &CG) override;
446b93046fSMatt Arsenault   bool runOnSCC(CallGraphSCC &SCC) override;
45117296c0SMehdi Amini   StringRef getPassName() const override {
463931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
473931948bSMatt Arsenault   }
483931948bSMatt Arsenault 
493931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
503931948bSMatt Arsenault     AU.setPreservesAll();
516b93046fSMatt Arsenault     CallGraphSCCPass::getAnalysisUsage(AU);
523931948bSMatt Arsenault   }
533b2e2a59SMatt Arsenault 
541a14bfa0SYaxun Liu   static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
553b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
563b2e2a59SMatt Arsenault     const Constant *EntryC,
571a14bfa0SYaxun Liu     SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
581a14bfa0SYaxun Liu     AMDGPUAS AS);
593931948bSMatt Arsenault };
603931948bSMatt Arsenault 
613931948bSMatt Arsenault }
623931948bSMatt Arsenault 
633931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
643931948bSMatt Arsenault 
653931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
663931948bSMatt Arsenault 
6799c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
683931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
693931948bSMatt Arsenault 
7099c14524SMatt Arsenault 
7199c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
721a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
731a14bfa0SYaxun Liu   return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
7499c14524SMatt Arsenault }
7599c14524SMatt Arsenault 
761a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
771a14bfa0SYaxun Liu     const AMDGPUAS &AS) {
781a14bfa0SYaxun Liu   return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
793b2e2a59SMatt Arsenault }
803b2e2a59SMatt Arsenault 
811a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
821a14bfa0SYaxun Liu     AMDGPUAS AS) {
833b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
843b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
851a14bfa0SYaxun Liu     return castRequiresQueuePtr(SrcAS, AS);
863b2e2a59SMatt Arsenault   }
873b2e2a59SMatt Arsenault 
883b2e2a59SMatt Arsenault   return false;
893b2e2a59SMatt Arsenault }
903b2e2a59SMatt Arsenault 
913b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
923b2e2a59SMatt Arsenault   const Constant *EntryC,
931a14bfa0SYaxun Liu   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
941a14bfa0SYaxun Liu   AMDGPUAS AS) {
953b2e2a59SMatt Arsenault 
963b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
973b2e2a59SMatt Arsenault     return false;
983b2e2a59SMatt Arsenault 
993b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
1003b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
1013b2e2a59SMatt Arsenault 
1023b2e2a59SMatt Arsenault   while (!Stack.empty()) {
1033b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
1043b2e2a59SMatt Arsenault 
1053b2e2a59SMatt Arsenault     // Check this constant expression.
1063b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1071a14bfa0SYaxun Liu       if (visitConstantExpr(CE, AS))
1083b2e2a59SMatt Arsenault         return true;
1093b2e2a59SMatt Arsenault     }
1103b2e2a59SMatt Arsenault 
1113b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1123b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1133b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1143b2e2a59SMatt Arsenault       if (!OpC)
1153b2e2a59SMatt Arsenault         continue;
1163b2e2a59SMatt Arsenault 
1173b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1183b2e2a59SMatt Arsenault         continue;
1193b2e2a59SMatt Arsenault 
1203b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1213b2e2a59SMatt Arsenault     }
1223b2e2a59SMatt Arsenault   }
1233b2e2a59SMatt Arsenault 
1243b2e2a59SMatt Arsenault   return false;
1253b2e2a59SMatt Arsenault }
1263b2e2a59SMatt Arsenault 
1276b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always
1286b93046fSMatt Arsenault // initialized.
1296b93046fSMatt Arsenault //
1306b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup
1316b93046fSMatt Arsenault // size is 1 for y/z.
132e15855d9SMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID,
133e15855d9SMatt Arsenault                                      bool &NonKernelOnly,
134e15855d9SMatt Arsenault                                      bool &IsQueuePtr) {
1356b93046fSMatt Arsenault   switch (ID) {
136e15855d9SMatt Arsenault   case Intrinsic::amdgcn_workitem_id_x:
137e15855d9SMatt Arsenault     NonKernelOnly = true;
138e15855d9SMatt Arsenault     return "amdgpu-work-item-id-x";
139e15855d9SMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_x:
140e15855d9SMatt Arsenault     NonKernelOnly = true;
141e15855d9SMatt Arsenault     return "amdgpu-work-group-id-x";
1426b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_y:
1436b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_y:
1446b93046fSMatt Arsenault     return "amdgpu-work-item-id-y";
1456b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_z:
1466b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_z:
1476b93046fSMatt Arsenault     return "amdgpu-work-item-id-z";
1486b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_y:
1496b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_y:
1506b93046fSMatt Arsenault     return "amdgpu-work-group-id-y";
1516b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_z:
1526b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_z:
1536b93046fSMatt Arsenault     return "amdgpu-work-group-id-z";
1546b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_ptr:
1556b93046fSMatt Arsenault     return "amdgpu-dispatch-ptr";
1566b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_id:
1576b93046fSMatt Arsenault     return "amdgpu-dispatch-id";
15823e4df6aSMatt Arsenault   case Intrinsic::amdgcn_kernarg_segment_ptr:
15923e4df6aSMatt Arsenault     return "amdgpu-kernarg-segment-ptr";
160*9166ce86SMatt Arsenault   case Intrinsic::amdgcn_implicitarg_ptr:
161*9166ce86SMatt Arsenault     return "amdgpu-implicitarg-ptr";
1626b93046fSMatt Arsenault   case Intrinsic::amdgcn_queue_ptr:
1636b93046fSMatt Arsenault   case Intrinsic::trap:
1646b93046fSMatt Arsenault   case Intrinsic::debugtrap:
1656b93046fSMatt Arsenault     IsQueuePtr = true;
1666b93046fSMatt Arsenault     return "amdgpu-queue-ptr";
1676b93046fSMatt Arsenault   default:
1686b93046fSMatt Arsenault     return "";
1696b93046fSMatt Arsenault   }
1706b93046fSMatt Arsenault }
1716b93046fSMatt Arsenault 
1726b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee,
1736b93046fSMatt Arsenault                        StringRef Name) {
1746b93046fSMatt Arsenault   if (Callee.hasFnAttribute(Name)) {
1756b93046fSMatt Arsenault     Parent.addFnAttr(Name);
1766b93046fSMatt Arsenault     return true;
1776b93046fSMatt Arsenault   }
1786b93046fSMatt Arsenault 
1796b93046fSMatt Arsenault   return false;
1806b93046fSMatt Arsenault }
1816b93046fSMatt Arsenault 
1826b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
1836b93046fSMatt Arsenault                                    bool &NeedQueuePtr) {
184e15855d9SMatt Arsenault   // X ids unnecessarily propagated to kernels.
1856b93046fSMatt Arsenault   static const StringRef AttrNames[] = {
186e15855d9SMatt Arsenault     { "amdgpu-work-item-id-x" },
1876b93046fSMatt Arsenault     { "amdgpu-work-item-id-y" },
1886b93046fSMatt Arsenault     { "amdgpu-work-item-id-z" },
189e15855d9SMatt Arsenault     { "amdgpu-work-group-id-x" },
1906b93046fSMatt Arsenault     { "amdgpu-work-group-id-y" },
1916b93046fSMatt Arsenault     { "amdgpu-work-group-id-z" },
1926b93046fSMatt Arsenault     { "amdgpu-dispatch-ptr" },
19323e4df6aSMatt Arsenault     { "amdgpu-dispatch-id" },
194*9166ce86SMatt Arsenault     { "amdgpu-kernarg-segment-ptr" },
195*9166ce86SMatt Arsenault     { "amdgpu-implicitarg-ptr" }
1966b93046fSMatt Arsenault   };
1976b93046fSMatt Arsenault 
1986b93046fSMatt Arsenault   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
1996b93046fSMatt Arsenault     NeedQueuePtr = true;
2006b93046fSMatt Arsenault 
2016b93046fSMatt Arsenault   for (StringRef AttrName : AttrNames)
2026b93046fSMatt Arsenault     handleAttr(Parent, Callee, AttrName);
2036b93046fSMatt Arsenault }
2046b93046fSMatt Arsenault 
2056b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
206254ad3deSMatt Arsenault   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
207254ad3deSMatt Arsenault   bool HasFlat = ST.hasFlatAddressSpace();
208254ad3deSMatt Arsenault   bool HasApertureRegs = ST.hasApertureRegs();
2093b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2103b2e2a59SMatt Arsenault 
2116b93046fSMatt Arsenault   bool Changed = false;
2126b93046fSMatt Arsenault   bool NeedQueuePtr = false;
213254ad3deSMatt Arsenault   bool HaveCall = false;
214e15855d9SMatt Arsenault   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
2156b93046fSMatt Arsenault 
2166b93046fSMatt Arsenault   for (BasicBlock &BB : F) {
2176b93046fSMatt Arsenault     for (Instruction &I : BB) {
2186b93046fSMatt Arsenault       CallSite CS(&I);
2196b93046fSMatt Arsenault       if (CS) {
2206b93046fSMatt Arsenault         Function *Callee = CS.getCalledFunction();
2216b93046fSMatt Arsenault 
2226b93046fSMatt Arsenault         // TODO: Do something with indirect calls.
223254ad3deSMatt Arsenault         if (!Callee) {
224254ad3deSMatt Arsenault           if (!CS.isInlineAsm())
225254ad3deSMatt Arsenault             HaveCall = true;
2266b93046fSMatt Arsenault           continue;
227254ad3deSMatt Arsenault         }
2286b93046fSMatt Arsenault 
2296b93046fSMatt Arsenault         Intrinsic::ID IID = Callee->getIntrinsicID();
2306b93046fSMatt Arsenault         if (IID == Intrinsic::not_intrinsic) {
231254ad3deSMatt Arsenault           HaveCall = true;
2326b93046fSMatt Arsenault           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
2336b93046fSMatt Arsenault           Changed = true;
2346b93046fSMatt Arsenault         } else {
235e15855d9SMatt Arsenault           bool NonKernelOnly = false;
236e15855d9SMatt Arsenault           StringRef AttrName = intrinsicToAttrName(IID,
237e15855d9SMatt Arsenault                                                    NonKernelOnly, NeedQueuePtr);
238e15855d9SMatt Arsenault           if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
2396b93046fSMatt Arsenault             F.addFnAttr(AttrName);
2406b93046fSMatt Arsenault             Changed = true;
2416b93046fSMatt Arsenault           }
2426b93046fSMatt Arsenault         }
2436b93046fSMatt Arsenault       }
2446b93046fSMatt Arsenault 
2456b93046fSMatt Arsenault       if (NeedQueuePtr || HasApertureRegs)
2466b93046fSMatt Arsenault         continue;
2476b93046fSMatt Arsenault 
24899c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
2496b93046fSMatt Arsenault         if (castRequiresQueuePtr(ASC, AS)) {
2506b93046fSMatt Arsenault           NeedQueuePtr = true;
2516b93046fSMatt Arsenault           continue;
2526b93046fSMatt Arsenault         }
25399c14524SMatt Arsenault       }
2543b2e2a59SMatt Arsenault 
2553b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
2563b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
2573b2e2a59SMatt Arsenault         if (!OpC)
2583b2e2a59SMatt Arsenault           continue;
2593b2e2a59SMatt Arsenault 
2606b93046fSMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
2616b93046fSMatt Arsenault           NeedQueuePtr = true;
2626b93046fSMatt Arsenault           break;
2636b93046fSMatt Arsenault         }
2643b2e2a59SMatt Arsenault       }
26599c14524SMatt Arsenault     }
26699c14524SMatt Arsenault   }
26799c14524SMatt Arsenault 
2686b93046fSMatt Arsenault   if (NeedQueuePtr) {
2696b93046fSMatt Arsenault     F.addFnAttr("amdgpu-queue-ptr");
2706b93046fSMatt Arsenault     Changed = true;
27199c14524SMatt Arsenault   }
2723931948bSMatt Arsenault 
273254ad3deSMatt Arsenault   // TODO: We could refine this to captured pointers that could possibly be
274254ad3deSMatt Arsenault   // accessed by flat instructions. For now this is mostly a poor way of
275254ad3deSMatt Arsenault   // estimating whether there are calls before argument lowering.
276254ad3deSMatt Arsenault   if (HasFlat && !IsFunc && HaveCall) {
277254ad3deSMatt Arsenault     F.addFnAttr("amdgpu-flat-scratch");
278254ad3deSMatt Arsenault     Changed = true;
279254ad3deSMatt Arsenault   }
280254ad3deSMatt Arsenault 
2816b93046fSMatt Arsenault   return Changed;
2826b93046fSMatt Arsenault }
2836b93046fSMatt Arsenault 
2846b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
2856b93046fSMatt Arsenault   Module &M = SCC.getCallGraph().getModule();
2863931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
2873931948bSMatt Arsenault 
2886b93046fSMatt Arsenault   bool Changed = false;
2896b93046fSMatt Arsenault   for (CallGraphNode *I : SCC) {
2906b93046fSMatt Arsenault     Function *F = I->getFunction();
2916b93046fSMatt Arsenault     if (!F || F->isDeclaration())
29299c14524SMatt Arsenault       continue;
29399c14524SMatt Arsenault 
2946b93046fSMatt Arsenault     Changed |= addFeatureAttributes(*F);
29599c14524SMatt Arsenault   }
2966b93046fSMatt Arsenault 
29799c14524SMatt Arsenault 
2983931948bSMatt Arsenault   return Changed;
2993931948bSMatt Arsenault }
3003931948bSMatt Arsenault 
3016b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
3026b93046fSMatt Arsenault   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
3036b93046fSMatt Arsenault   if (!TPC)
3046b93046fSMatt Arsenault     report_fatal_error("TargetMachine is required");
3056b93046fSMatt Arsenault 
3066b93046fSMatt Arsenault   AS = AMDGPU::getAMDGPUAS(CG.getModule());
3076b93046fSMatt Arsenault   TM = &TPC->getTM<TargetMachine>();
3086b93046fSMatt Arsenault   return false;
3096b93046fSMatt Arsenault }
3106b93046fSMatt Arsenault 
3116b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
3128b61764cSFrancis Visoiu Mistrih   return new AMDGPUAnnotateKernelFeatures();
3133931948bSMatt Arsenault }
314