12cab237bSDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
27d523365SDimitry Andric //
37d523365SDimitry Andric //                     The LLVM Compiler Infrastructure
47d523365SDimitry Andric //
57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source
67d523365SDimitry Andric // License. See LICENSE.TXT for details.
77d523365SDimitry Andric //
87d523365SDimitry Andric //===----------------------------------------------------------------------===//
97d523365SDimitry Andric //
107d523365SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
117d523365SDimitry Andric /// which will impact calling convention lowering.
127d523365SDimitry Andric //
137d523365SDimitry Andric //===----------------------------------------------------------------------===//
147d523365SDimitry Andric 
157d523365SDimitry Andric #include "AMDGPU.h"
167a7e6055SDimitry Andric #include "AMDGPUSubtarget.h"
172cab237bSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
182cab237bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
192cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
202cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
21d88c1a5aSDimitry Andric #include "llvm/ADT/Triple.h"
222cab237bSDimitry Andric #include "llvm/Analysis/CallGraph.h"
23b40b48b8SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
24d8866befSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
252cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
262cab237bSDimitry Andric #include "llvm/IR/Constant.h"
273ca95b02SDimitry Andric #include "llvm/IR/Constants.h"
282cab237bSDimitry Andric #include "llvm/IR/Function.h"
292cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
307d523365SDimitry Andric #include "llvm/IR/Instructions.h"
312cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
327d523365SDimitry Andric #include "llvm/IR/Module.h"
332cab237bSDimitry Andric #include "llvm/IR/Type.h"
342cab237bSDimitry Andric #include "llvm/IR/Use.h"
352cab237bSDimitry Andric #include "llvm/Pass.h"
362cab237bSDimitry Andric #include "llvm/Support/Casting.h"
372cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
382cab237bSDimitry Andric #include "llvm/Target/TargetMachine.h"
397d523365SDimitry Andric 
407d523365SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
417d523365SDimitry Andric 
427d523365SDimitry Andric using namespace llvm;
437d523365SDimitry Andric 
447d523365SDimitry Andric namespace {
457d523365SDimitry Andric 
46b40b48b8SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
477d523365SDimitry Andric private:
48b40b48b8SDimitry Andric   const TargetMachine *TM = nullptr;
493ca95b02SDimitry Andric 
50b40b48b8SDimitry Andric   bool addFeatureAttributes(Function &F);
517d523365SDimitry Andric 
527d523365SDimitry Andric public:
537d523365SDimitry Andric   static char ID;
547d523365SDimitry Andric 
AMDGPUAnnotateKernelFeatures()55b40b48b8SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
56b40b48b8SDimitry Andric 
57b40b48b8SDimitry Andric   bool doInitialization(CallGraph &CG) override;
58b40b48b8SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
592cab237bSDimitry Andric 
getPassName() const60d88c1a5aSDimitry Andric   StringRef getPassName() const override {
617d523365SDimitry Andric     return "AMDGPU Annotate Kernel Features";
627d523365SDimitry Andric   }
637d523365SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const647d523365SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
657d523365SDimitry Andric     AU.setPreservesAll();
66b40b48b8SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
677d523365SDimitry Andric   }
683ca95b02SDimitry Andric 
69*b5893f02SDimitry Andric   static bool visitConstantExpr(const ConstantExpr *CE);
703ca95b02SDimitry Andric   static bool visitConstantExprsRecursively(
713ca95b02SDimitry Andric     const Constant *EntryC,
72*b5893f02SDimitry Andric     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
737d523365SDimitry Andric };
747d523365SDimitry Andric 
752cab237bSDimitry Andric } // end anonymous namespace
767d523365SDimitry Andric 
777d523365SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
787d523365SDimitry Andric 
797d523365SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
807d523365SDimitry Andric 
813ca95b02SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
827d523365SDimitry Andric                 "Add AMDGPU function attributes", false, false)
837d523365SDimitry Andric 
847d523365SDimitry Andric 
853ca95b02SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
castRequiresQueuePtr(unsigned SrcAS)86*b5893f02SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
87*b5893f02SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
883ca95b02SDimitry Andric }
893ca95b02SDimitry Andric 
castRequiresQueuePtr(const AddrSpaceCastInst * ASC)90*b5893f02SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
91*b5893f02SDimitry Andric   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
923ca95b02SDimitry Andric }
933ca95b02SDimitry Andric 
visitConstantExpr(const ConstantExpr * CE)94*b5893f02SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
953ca95b02SDimitry Andric   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
963ca95b02SDimitry Andric     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
97*b5893f02SDimitry Andric     return castRequiresQueuePtr(SrcAS);
983ca95b02SDimitry Andric   }
993ca95b02SDimitry Andric 
1003ca95b02SDimitry Andric   return false;
1013ca95b02SDimitry Andric }
1023ca95b02SDimitry Andric 
visitConstantExprsRecursively(const Constant * EntryC,SmallPtrSet<const Constant *,8> & ConstantExprVisited)1033ca95b02SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1043ca95b02SDimitry Andric   const Constant *EntryC,
105*b5893f02SDimitry Andric   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
1063ca95b02SDimitry Andric 
1073ca95b02SDimitry Andric   if (!ConstantExprVisited.insert(EntryC).second)
1083ca95b02SDimitry Andric     return false;
1093ca95b02SDimitry Andric 
1103ca95b02SDimitry Andric   SmallVector<const Constant *, 16> Stack;
1113ca95b02SDimitry Andric   Stack.push_back(EntryC);
1123ca95b02SDimitry Andric 
1133ca95b02SDimitry Andric   while (!Stack.empty()) {
1143ca95b02SDimitry Andric     const Constant *C = Stack.pop_back_val();
1153ca95b02SDimitry Andric 
1163ca95b02SDimitry Andric     // Check this constant expression.
1173ca95b02SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
118*b5893f02SDimitry Andric       if (visitConstantExpr(CE))
1193ca95b02SDimitry Andric         return true;
1203ca95b02SDimitry Andric     }
1213ca95b02SDimitry Andric 
1223ca95b02SDimitry Andric     // Visit all sub-expressions.
1233ca95b02SDimitry Andric     for (const Use &U : C->operands()) {
1243ca95b02SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
1253ca95b02SDimitry Andric       if (!OpC)
1263ca95b02SDimitry Andric         continue;
1273ca95b02SDimitry Andric 
1283ca95b02SDimitry Andric       if (!ConstantExprVisited.insert(OpC).second)
1293ca95b02SDimitry Andric         continue;
1303ca95b02SDimitry Andric 
1313ca95b02SDimitry Andric       Stack.push_back(OpC);
1323ca95b02SDimitry Andric     }
1333ca95b02SDimitry Andric   }
1343ca95b02SDimitry Andric 
1353ca95b02SDimitry Andric   return false;
1363ca95b02SDimitry Andric }
1373ca95b02SDimitry Andric 
138b40b48b8SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
139b40b48b8SDimitry Andric // initialized.
140b40b48b8SDimitry Andric //
141b40b48b8SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
142b40b48b8SDimitry Andric // size is 1 for y/z.
intrinsicToAttrName(Intrinsic::ID ID,bool & NonKernelOnly,bool & IsQueuePtr)143b40b48b8SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
144b40b48b8SDimitry Andric                                      bool &NonKernelOnly,
145b40b48b8SDimitry Andric                                      bool &IsQueuePtr) {
146b40b48b8SDimitry Andric   switch (ID) {
147b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
148b40b48b8SDimitry Andric     NonKernelOnly = true;
149b40b48b8SDimitry Andric     return "amdgpu-work-item-id-x";
150b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
151b40b48b8SDimitry Andric     NonKernelOnly = true;
152b40b48b8SDimitry Andric     return "amdgpu-work-group-id-x";
153b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
154b40b48b8SDimitry Andric   case Intrinsic::r600_read_tidig_y:
155b40b48b8SDimitry Andric     return "amdgpu-work-item-id-y";
156b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
157b40b48b8SDimitry Andric   case Intrinsic::r600_read_tidig_z:
158b40b48b8SDimitry Andric     return "amdgpu-work-item-id-z";
159b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
160b40b48b8SDimitry Andric   case Intrinsic::r600_read_tgid_y:
161b40b48b8SDimitry Andric     return "amdgpu-work-group-id-y";
162b40b48b8SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
163b40b48b8SDimitry Andric   case Intrinsic::r600_read_tgid_z:
164b40b48b8SDimitry Andric     return "amdgpu-work-group-id-z";
165b40b48b8SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
166b40b48b8SDimitry Andric     return "amdgpu-dispatch-ptr";
167b40b48b8SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
168b40b48b8SDimitry Andric     return "amdgpu-dispatch-id";
169b40b48b8SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
170b40b48b8SDimitry Andric     return "amdgpu-kernarg-segment-ptr";
1712cab237bSDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
1722cab237bSDimitry Andric     return "amdgpu-implicitarg-ptr";
173b40b48b8SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
174b40b48b8SDimitry Andric   case Intrinsic::trap:
175b40b48b8SDimitry Andric   case Intrinsic::debugtrap:
176b40b48b8SDimitry Andric     IsQueuePtr = true;
177b40b48b8SDimitry Andric     return "amdgpu-queue-ptr";
178b40b48b8SDimitry Andric   default:
179b40b48b8SDimitry Andric     return "";
180b40b48b8SDimitry Andric   }
181b40b48b8SDimitry Andric }
182b40b48b8SDimitry Andric 
handleAttr(Function & Parent,const Function & Callee,StringRef Name)183b40b48b8SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
184b40b48b8SDimitry Andric                        StringRef Name) {
185b40b48b8SDimitry Andric   if (Callee.hasFnAttribute(Name)) {
186b40b48b8SDimitry Andric     Parent.addFnAttr(Name);
187b40b48b8SDimitry Andric     return true;
188b40b48b8SDimitry Andric   }
189b40b48b8SDimitry Andric 
190b40b48b8SDimitry Andric   return false;
191b40b48b8SDimitry Andric }
192b40b48b8SDimitry Andric 
copyFeaturesToFunction(Function & Parent,const Function & Callee,bool & NeedQueuePtr)193b40b48b8SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
194b40b48b8SDimitry Andric                                    bool &NeedQueuePtr) {
195b40b48b8SDimitry Andric   // X ids unnecessarily propagated to kernels.
196b40b48b8SDimitry Andric   static const StringRef AttrNames[] = {
197b40b48b8SDimitry Andric     { "amdgpu-work-item-id-x" },
198b40b48b8SDimitry Andric     { "amdgpu-work-item-id-y" },
199b40b48b8SDimitry Andric     { "amdgpu-work-item-id-z" },
200b40b48b8SDimitry Andric     { "amdgpu-work-group-id-x" },
201b40b48b8SDimitry Andric     { "amdgpu-work-group-id-y" },
202b40b48b8SDimitry Andric     { "amdgpu-work-group-id-z" },
203b40b48b8SDimitry Andric     { "amdgpu-dispatch-ptr" },
204b40b48b8SDimitry Andric     { "amdgpu-dispatch-id" },
2052cab237bSDimitry Andric     { "amdgpu-kernarg-segment-ptr" },
2062cab237bSDimitry Andric     { "amdgpu-implicitarg-ptr" }
207b40b48b8SDimitry Andric   };
208b40b48b8SDimitry Andric 
209b40b48b8SDimitry Andric   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
210b40b48b8SDimitry Andric     NeedQueuePtr = true;
211b40b48b8SDimitry Andric 
212b40b48b8SDimitry Andric   for (StringRef AttrName : AttrNames)
213b40b48b8SDimitry Andric     handleAttr(Parent, Callee, AttrName);
214b40b48b8SDimitry Andric }
215b40b48b8SDimitry Andric 
addFeatureAttributes(Function & F)216b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
2174ba319b5SDimitry Andric   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
218b40b48b8SDimitry Andric   bool HasFlat = ST.hasFlatAddressSpace();
219b40b48b8SDimitry Andric   bool HasApertureRegs = ST.hasApertureRegs();
2203ca95b02SDimitry Andric   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2213ca95b02SDimitry Andric 
222b40b48b8SDimitry Andric   bool Changed = false;
223b40b48b8SDimitry Andric   bool NeedQueuePtr = false;
224b40b48b8SDimitry Andric   bool HaveCall = false;
225b40b48b8SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
226b40b48b8SDimitry Andric 
227b40b48b8SDimitry Andric   for (BasicBlock &BB : F) {
228b40b48b8SDimitry Andric     for (Instruction &I : BB) {
229b40b48b8SDimitry Andric       CallSite CS(&I);
230b40b48b8SDimitry Andric       if (CS) {
231b40b48b8SDimitry Andric         Function *Callee = CS.getCalledFunction();
232b40b48b8SDimitry Andric 
233b40b48b8SDimitry Andric         // TODO: Do something with indirect calls.
234b40b48b8SDimitry Andric         if (!Callee) {
235b40b48b8SDimitry Andric           if (!CS.isInlineAsm())
236b40b48b8SDimitry Andric             HaveCall = true;
237b40b48b8SDimitry Andric           continue;
238b40b48b8SDimitry Andric         }
239b40b48b8SDimitry Andric 
240b40b48b8SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
241b40b48b8SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
242b40b48b8SDimitry Andric           HaveCall = true;
243b40b48b8SDimitry Andric           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
244b40b48b8SDimitry Andric           Changed = true;
245b40b48b8SDimitry Andric         } else {
246b40b48b8SDimitry Andric           bool NonKernelOnly = false;
247b40b48b8SDimitry Andric           StringRef AttrName = intrinsicToAttrName(IID,
248b40b48b8SDimitry Andric                                                    NonKernelOnly, NeedQueuePtr);
249b40b48b8SDimitry Andric           if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
250b40b48b8SDimitry Andric             F.addFnAttr(AttrName);
251b40b48b8SDimitry Andric             Changed = true;
252b40b48b8SDimitry Andric           }
253b40b48b8SDimitry Andric         }
254b40b48b8SDimitry Andric       }
255b40b48b8SDimitry Andric 
256b40b48b8SDimitry Andric       if (NeedQueuePtr || HasApertureRegs)
257b40b48b8SDimitry Andric         continue;
258b40b48b8SDimitry Andric 
2593ca95b02SDimitry Andric       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
260*b5893f02SDimitry Andric         if (castRequiresQueuePtr(ASC)) {
261b40b48b8SDimitry Andric           NeedQueuePtr = true;
262b40b48b8SDimitry Andric           continue;
263b40b48b8SDimitry Andric         }
2643ca95b02SDimitry Andric       }
2653ca95b02SDimitry Andric 
2663ca95b02SDimitry Andric       for (const Use &U : I.operands()) {
2673ca95b02SDimitry Andric         const auto *OpC = dyn_cast<Constant>(U);
2683ca95b02SDimitry Andric         if (!OpC)
2693ca95b02SDimitry Andric           continue;
2703ca95b02SDimitry Andric 
271*b5893f02SDimitry Andric         if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
272b40b48b8SDimitry Andric           NeedQueuePtr = true;
273b40b48b8SDimitry Andric           break;
274b40b48b8SDimitry Andric         }
2753ca95b02SDimitry Andric       }
2763ca95b02SDimitry Andric     }
2773ca95b02SDimitry Andric   }
2783ca95b02SDimitry Andric 
279b40b48b8SDimitry Andric   if (NeedQueuePtr) {
280b40b48b8SDimitry Andric     F.addFnAttr("amdgpu-queue-ptr");
281b40b48b8SDimitry Andric     Changed = true;
282b40b48b8SDimitry Andric   }
283b40b48b8SDimitry Andric 
284b40b48b8SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
285b40b48b8SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
286b40b48b8SDimitry Andric   // estimating whether there are calls before argument lowering.
287b40b48b8SDimitry Andric   if (HasFlat && !IsFunc && HaveCall) {
288b40b48b8SDimitry Andric     F.addFnAttr("amdgpu-flat-scratch");
289b40b48b8SDimitry Andric     Changed = true;
290b40b48b8SDimitry Andric   }
291b40b48b8SDimitry Andric 
292b40b48b8SDimitry Andric   return Changed;
293b40b48b8SDimitry Andric }
294b40b48b8SDimitry Andric 
runOnSCC(CallGraphSCC & SCC)295b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
296b40b48b8SDimitry Andric   Module &M = SCC.getCallGraph().getModule();
297b40b48b8SDimitry Andric   Triple TT(M.getTargetTriple());
298b40b48b8SDimitry Andric 
299b40b48b8SDimitry Andric   bool Changed = false;
300b40b48b8SDimitry Andric   for (CallGraphNode *I : SCC) {
301b40b48b8SDimitry Andric     Function *F = I->getFunction();
302b40b48b8SDimitry Andric     if (!F || F->isDeclaration())
303b40b48b8SDimitry Andric       continue;
304b40b48b8SDimitry Andric 
305b40b48b8SDimitry Andric     Changed |= addFeatureAttributes(*F);
306b40b48b8SDimitry Andric   }
307b40b48b8SDimitry Andric 
308b40b48b8SDimitry Andric   return Changed;
309b40b48b8SDimitry Andric }
310b40b48b8SDimitry Andric 
doInitialization(CallGraph & CG)311b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
312b40b48b8SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
313b40b48b8SDimitry Andric   if (!TPC)
314b40b48b8SDimitry Andric     report_fatal_error("TargetMachine is required");
315b40b48b8SDimitry Andric 
316b40b48b8SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
3173ca95b02SDimitry Andric   return false;
3183ca95b02SDimitry Andric }
3193ca95b02SDimitry Andric 
createAMDGPUAnnotateKernelFeaturesPass()320b40b48b8SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
321d8866befSDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
3227d523365SDimitry Andric }
323