13931948bSMatt Arsenault //===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
23931948bSMatt Arsenault //
33931948bSMatt Arsenault //                     The LLVM Compiler Infrastructure
43931948bSMatt Arsenault //
53931948bSMatt Arsenault // This file is distributed under the University of Illinois Open Source
63931948bSMatt Arsenault // License. See LICENSE.TXT for details.
73931948bSMatt Arsenault //
83931948bSMatt Arsenault //===----------------------------------------------------------------------===//
93931948bSMatt Arsenault //
103931948bSMatt Arsenault /// \file This pass adds target attributes to functions which use intrinsics
113931948bSMatt Arsenault /// which will impact calling convention lowering.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16e823d92fSMatt Arsenault #include "AMDGPUSubtarget.h"
172ffe8fd2SMatt Arsenault #include "llvm/ADT/Triple.h"
18*6b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h"
198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
203b2e2a59SMatt Arsenault #include "llvm/IR/Constants.h"
21*6b93046fSMatt Arsenault #include "llvm/IR/InstIterator.h"
223931948bSMatt Arsenault #include "llvm/IR/Instructions.h"
233931948bSMatt Arsenault #include "llvm/IR/Module.h"
243931948bSMatt Arsenault 
253931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
263931948bSMatt Arsenault 
273931948bSMatt Arsenault using namespace llvm;
283931948bSMatt Arsenault 
293931948bSMatt Arsenault namespace {
303931948bSMatt Arsenault 
31*6b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
323931948bSMatt Arsenault private:
33*6b93046fSMatt Arsenault   const TargetMachine *TM = nullptr;
341a14bfa0SYaxun Liu   AMDGPUAS AS;
3599c14524SMatt Arsenault 
36*6b93046fSMatt Arsenault   bool addFeatureAttributes(Function &F);
37*6b93046fSMatt Arsenault 
38*6b93046fSMatt Arsenault   void addAttrToCallers(Function &Intrin, StringRef AttrName);
393931948bSMatt Arsenault   bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
403931948bSMatt Arsenault 
413931948bSMatt Arsenault public:
423931948bSMatt Arsenault   static char ID;
433931948bSMatt Arsenault 
44*6b93046fSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
45*6b93046fSMatt Arsenault 
46*6b93046fSMatt Arsenault   bool doInitialization(CallGraph &CG) override;
47*6b93046fSMatt Arsenault   bool runOnSCC(CallGraphSCC &SCC) override;
48117296c0SMehdi Amini   StringRef getPassName() const override {
493931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
503931948bSMatt Arsenault   }
513931948bSMatt Arsenault 
523931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
533931948bSMatt Arsenault     AU.setPreservesAll();
54*6b93046fSMatt Arsenault     CallGraphSCCPass::getAnalysisUsage(AU);
553931948bSMatt Arsenault   }
563b2e2a59SMatt Arsenault 
571a14bfa0SYaxun Liu   static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
583b2e2a59SMatt Arsenault   static bool visitConstantExprsRecursively(
593b2e2a59SMatt Arsenault     const Constant *EntryC,
601a14bfa0SYaxun Liu     SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
611a14bfa0SYaxun Liu     AMDGPUAS AS);
623931948bSMatt Arsenault };
633931948bSMatt Arsenault 
643931948bSMatt Arsenault }
653931948bSMatt Arsenault 
663931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
673931948bSMatt Arsenault 
683931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
693931948bSMatt Arsenault 
7099c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
713931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
723931948bSMatt Arsenault 
7399c14524SMatt Arsenault 
7499c14524SMatt Arsenault // The queue ptr is only needed when casting to flat, not from it.
751a14bfa0SYaxun Liu static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
761a14bfa0SYaxun Liu   return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
7799c14524SMatt Arsenault }
7899c14524SMatt Arsenault 
791a14bfa0SYaxun Liu static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
801a14bfa0SYaxun Liu     const AMDGPUAS &AS) {
811a14bfa0SYaxun Liu   return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
823b2e2a59SMatt Arsenault }
833b2e2a59SMatt Arsenault 
841a14bfa0SYaxun Liu bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
851a14bfa0SYaxun Liu     AMDGPUAS AS) {
863b2e2a59SMatt Arsenault   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
873b2e2a59SMatt Arsenault     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
881a14bfa0SYaxun Liu     return castRequiresQueuePtr(SrcAS, AS);
893b2e2a59SMatt Arsenault   }
903b2e2a59SMatt Arsenault 
913b2e2a59SMatt Arsenault   return false;
923b2e2a59SMatt Arsenault }
933b2e2a59SMatt Arsenault 
943b2e2a59SMatt Arsenault bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
953b2e2a59SMatt Arsenault   const Constant *EntryC,
961a14bfa0SYaxun Liu   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
971a14bfa0SYaxun Liu   AMDGPUAS AS) {
983b2e2a59SMatt Arsenault 
993b2e2a59SMatt Arsenault   if (!ConstantExprVisited.insert(EntryC).second)
1003b2e2a59SMatt Arsenault     return false;
1013b2e2a59SMatt Arsenault 
1023b2e2a59SMatt Arsenault   SmallVector<const Constant *, 16> Stack;
1033b2e2a59SMatt Arsenault   Stack.push_back(EntryC);
1043b2e2a59SMatt Arsenault 
1053b2e2a59SMatt Arsenault   while (!Stack.empty()) {
1063b2e2a59SMatt Arsenault     const Constant *C = Stack.pop_back_val();
1073b2e2a59SMatt Arsenault 
1083b2e2a59SMatt Arsenault     // Check this constant expression.
1093b2e2a59SMatt Arsenault     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1101a14bfa0SYaxun Liu       if (visitConstantExpr(CE, AS))
1113b2e2a59SMatt Arsenault         return true;
1123b2e2a59SMatt Arsenault     }
1133b2e2a59SMatt Arsenault 
1143b2e2a59SMatt Arsenault     // Visit all sub-expressions.
1153b2e2a59SMatt Arsenault     for (const Use &U : C->operands()) {
1163b2e2a59SMatt Arsenault       const auto *OpC = dyn_cast<Constant>(U);
1173b2e2a59SMatt Arsenault       if (!OpC)
1183b2e2a59SMatt Arsenault         continue;
1193b2e2a59SMatt Arsenault 
1203b2e2a59SMatt Arsenault       if (!ConstantExprVisited.insert(OpC).second)
1213b2e2a59SMatt Arsenault         continue;
1223b2e2a59SMatt Arsenault 
1233b2e2a59SMatt Arsenault       Stack.push_back(OpC);
1243b2e2a59SMatt Arsenault     }
1253b2e2a59SMatt Arsenault   }
1263b2e2a59SMatt Arsenault 
1273b2e2a59SMatt Arsenault   return false;
1283b2e2a59SMatt Arsenault }
1293b2e2a59SMatt Arsenault 
130*6b93046fSMatt Arsenault // We do not need to note the x workitem or workgroup id because they are always
131*6b93046fSMatt Arsenault // initialized.
132*6b93046fSMatt Arsenault //
133*6b93046fSMatt Arsenault // TODO: We should not add the attributes if the known compile time workgroup
134*6b93046fSMatt Arsenault // size is 1 for y/z.
135*6b93046fSMatt Arsenault static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &IsQueuePtr) {
136*6b93046fSMatt Arsenault   switch (ID) {
137*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_y:
138*6b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_y:
139*6b93046fSMatt Arsenault     return "amdgpu-work-item-id-y";
140*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_workitem_id_z:
141*6b93046fSMatt Arsenault   case Intrinsic::r600_read_tidig_z:
142*6b93046fSMatt Arsenault     return "amdgpu-work-item-id-z";
143*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_y:
144*6b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_y:
145*6b93046fSMatt Arsenault     return "amdgpu-work-group-id-y";
146*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_workgroup_id_z:
147*6b93046fSMatt Arsenault   case Intrinsic::r600_read_tgid_z:
148*6b93046fSMatt Arsenault     return "amdgpu-work-group-id-z";
149*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_ptr:
150*6b93046fSMatt Arsenault     return "amdgpu-dispatch-ptr";
151*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_dispatch_id:
152*6b93046fSMatt Arsenault     return "amdgpu-dispatch-id";
153*6b93046fSMatt Arsenault   case Intrinsic::amdgcn_queue_ptr:
154*6b93046fSMatt Arsenault   case Intrinsic::trap:
155*6b93046fSMatt Arsenault   case Intrinsic::debugtrap:
156*6b93046fSMatt Arsenault     IsQueuePtr = true;
157*6b93046fSMatt Arsenault     return "amdgpu-queue-ptr";
158*6b93046fSMatt Arsenault   default:
159*6b93046fSMatt Arsenault     return "";
160*6b93046fSMatt Arsenault   }
161*6b93046fSMatt Arsenault }
162*6b93046fSMatt Arsenault 
163*6b93046fSMatt Arsenault static bool handleAttr(Function &Parent, const Function &Callee,
164*6b93046fSMatt Arsenault                        StringRef Name) {
165*6b93046fSMatt Arsenault   if (Callee.hasFnAttribute(Name)) {
166*6b93046fSMatt Arsenault     Parent.addFnAttr(Name);
167*6b93046fSMatt Arsenault     return true;
168*6b93046fSMatt Arsenault   }
169*6b93046fSMatt Arsenault 
170*6b93046fSMatt Arsenault   return false;
171*6b93046fSMatt Arsenault }
172*6b93046fSMatt Arsenault 
173*6b93046fSMatt Arsenault static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
174*6b93046fSMatt Arsenault                                    bool &NeedQueuePtr) {
175*6b93046fSMatt Arsenault 
176*6b93046fSMatt Arsenault   static const StringRef AttrNames[] = {
177*6b93046fSMatt Arsenault     // .x omitted
178*6b93046fSMatt Arsenault     { "amdgpu-work-item-id-y" },
179*6b93046fSMatt Arsenault     { "amdgpu-work-item-id-z" },
180*6b93046fSMatt Arsenault     // .x omitted
181*6b93046fSMatt Arsenault     { "amdgpu-work-group-id-y" },
182*6b93046fSMatt Arsenault     { "amdgpu-work-group-id-z" },
183*6b93046fSMatt Arsenault     { "amdgpu-dispatch-ptr" },
184*6b93046fSMatt Arsenault     { "amdgpu-dispatch-id" }
185*6b93046fSMatt Arsenault   };
186*6b93046fSMatt Arsenault 
187*6b93046fSMatt Arsenault   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
188*6b93046fSMatt Arsenault     NeedQueuePtr = true;
189*6b93046fSMatt Arsenault 
190*6b93046fSMatt Arsenault   for (StringRef AttrName : AttrNames)
191*6b93046fSMatt Arsenault     handleAttr(Parent, Callee, AttrName);
192*6b93046fSMatt Arsenault }
193*6b93046fSMatt Arsenault 
194*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
195*6b93046fSMatt Arsenault   bool HasApertureRegs = TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
1963b2e2a59SMatt Arsenault   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
1973b2e2a59SMatt Arsenault 
198*6b93046fSMatt Arsenault   bool Changed = false;
199*6b93046fSMatt Arsenault   bool NeedQueuePtr = false;
200*6b93046fSMatt Arsenault 
201*6b93046fSMatt Arsenault   for (BasicBlock &BB : F) {
202*6b93046fSMatt Arsenault     for (Instruction &I : BB) {
203*6b93046fSMatt Arsenault       CallSite CS(&I);
204*6b93046fSMatt Arsenault       if (CS) {
205*6b93046fSMatt Arsenault         Function *Callee = CS.getCalledFunction();
206*6b93046fSMatt Arsenault 
207*6b93046fSMatt Arsenault         // TODO: Do something with indirect calls.
208*6b93046fSMatt Arsenault         if (!Callee)
209*6b93046fSMatt Arsenault           continue;
210*6b93046fSMatt Arsenault 
211*6b93046fSMatt Arsenault         Intrinsic::ID IID = Callee->getIntrinsicID();
212*6b93046fSMatt Arsenault         if (IID == Intrinsic::not_intrinsic) {
213*6b93046fSMatt Arsenault           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
214*6b93046fSMatt Arsenault           Changed = true;
215*6b93046fSMatt Arsenault         } else {
216*6b93046fSMatt Arsenault           StringRef AttrName = intrinsicToAttrName(IID, NeedQueuePtr);
217*6b93046fSMatt Arsenault           if (!AttrName.empty()) {
218*6b93046fSMatt Arsenault             F.addFnAttr(AttrName);
219*6b93046fSMatt Arsenault             Changed = true;
220*6b93046fSMatt Arsenault           }
221*6b93046fSMatt Arsenault         }
222*6b93046fSMatt Arsenault       }
223*6b93046fSMatt Arsenault 
224*6b93046fSMatt Arsenault       if (NeedQueuePtr || HasApertureRegs)
225*6b93046fSMatt Arsenault         continue;
226*6b93046fSMatt Arsenault 
22799c14524SMatt Arsenault       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
228*6b93046fSMatt Arsenault         if (castRequiresQueuePtr(ASC, AS)) {
229*6b93046fSMatt Arsenault           NeedQueuePtr = true;
230*6b93046fSMatt Arsenault           continue;
231*6b93046fSMatt Arsenault         }
23299c14524SMatt Arsenault       }
2333b2e2a59SMatt Arsenault 
2343b2e2a59SMatt Arsenault       for (const Use &U : I.operands()) {
2353b2e2a59SMatt Arsenault         const auto *OpC = dyn_cast<Constant>(U);
2363b2e2a59SMatt Arsenault         if (!OpC)
2373b2e2a59SMatt Arsenault           continue;
2383b2e2a59SMatt Arsenault 
239*6b93046fSMatt Arsenault         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) {
240*6b93046fSMatt Arsenault           NeedQueuePtr = true;
241*6b93046fSMatt Arsenault           break;
242*6b93046fSMatt Arsenault         }
2433b2e2a59SMatt Arsenault       }
24499c14524SMatt Arsenault     }
24599c14524SMatt Arsenault   }
24699c14524SMatt Arsenault 
247*6b93046fSMatt Arsenault   if (NeedQueuePtr) {
248*6b93046fSMatt Arsenault     F.addFnAttr("amdgpu-queue-ptr");
249*6b93046fSMatt Arsenault     Changed = true;
25099c14524SMatt Arsenault   }
2513931948bSMatt Arsenault 
252*6b93046fSMatt Arsenault   return Changed;
253*6b93046fSMatt Arsenault }
254*6b93046fSMatt Arsenault 
255*6b93046fSMatt Arsenault void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function &Intrin,
2563931948bSMatt Arsenault                                                     StringRef AttrName) {
2573931948bSMatt Arsenault   SmallPtrSet<Function *, 4> SeenFuncs;
2583931948bSMatt Arsenault 
259*6b93046fSMatt Arsenault   for (User *U : Intrin.users()) {
2603931948bSMatt Arsenault     // CallInst is the only valid user for an intrinsic.
2613931948bSMatt Arsenault     CallInst *CI = cast<CallInst>(U);
2623931948bSMatt Arsenault 
2633931948bSMatt Arsenault     Function *CallingFunction = CI->getParent()->getParent();
2643931948bSMatt Arsenault     if (SeenFuncs.insert(CallingFunction).second)
2653931948bSMatt Arsenault       CallingFunction->addFnAttr(AttrName);
2663931948bSMatt Arsenault   }
2673931948bSMatt Arsenault }
2683931948bSMatt Arsenault 
2693931948bSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
2703931948bSMatt Arsenault   Module &M,
2713931948bSMatt Arsenault   ArrayRef<StringRef[2]> IntrinsicToAttr) {
2723931948bSMatt Arsenault   bool Changed = false;
2733931948bSMatt Arsenault 
2743931948bSMatt Arsenault   for (const StringRef *Arr  : IntrinsicToAttr) {
2753931948bSMatt Arsenault     if (Function *Fn = M.getFunction(Arr[0])) {
276*6b93046fSMatt Arsenault       addAttrToCallers(*Fn, Arr[1]);
2773931948bSMatt Arsenault       Changed = true;
2783931948bSMatt Arsenault     }
2793931948bSMatt Arsenault   }
2803931948bSMatt Arsenault 
2813931948bSMatt Arsenault   return Changed;
2823931948bSMatt Arsenault }
2833931948bSMatt Arsenault 
284*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
285*6b93046fSMatt Arsenault   Module &M = SCC.getCallGraph().getModule();
2863931948bSMatt Arsenault   Triple TT(M.getTargetTriple());
2873931948bSMatt Arsenault 
288*6b93046fSMatt Arsenault   bool Changed = false;
289*6b93046fSMatt Arsenault   for (CallGraphNode *I : SCC) {
290*6b93046fSMatt Arsenault     Function *F = I->getFunction();
291*6b93046fSMatt Arsenault     if (!F || F->isDeclaration())
29299c14524SMatt Arsenault       continue;
29399c14524SMatt Arsenault 
294*6b93046fSMatt Arsenault     Changed |= addFeatureAttributes(*F);
29599c14524SMatt Arsenault   }
296*6b93046fSMatt Arsenault 
29799c14524SMatt Arsenault 
2983931948bSMatt Arsenault   return Changed;
2993931948bSMatt Arsenault }
3003931948bSMatt Arsenault 
301*6b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
302*6b93046fSMatt Arsenault   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
303*6b93046fSMatt Arsenault   if (!TPC)
304*6b93046fSMatt Arsenault     report_fatal_error("TargetMachine is required");
305*6b93046fSMatt Arsenault 
306*6b93046fSMatt Arsenault   AS = AMDGPU::getAMDGPUAS(CG.getModule());
307*6b93046fSMatt Arsenault   TM = &TPC->getTM<TargetMachine>();
308*6b93046fSMatt Arsenault   return false;
309*6b93046fSMatt Arsenault }
310*6b93046fSMatt Arsenault 
311*6b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
3128b61764cSFrancis Visoiu Mistrih   return new AMDGPUAnnotateKernelFeatures();
3133931948bSMatt Arsenault }
314