12cab237bSDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
27d523365SDimitry Andric //
37d523365SDimitry Andric // The LLVM Compiler Infrastructure
47d523365SDimitry Andric //
57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source
67d523365SDimitry Andric // License. See LICENSE.TXT for details.
77d523365SDimitry Andric //
87d523365SDimitry Andric //===----------------------------------------------------------------------===//
97d523365SDimitry Andric //
107d523365SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
117d523365SDimitry Andric /// which will impact calling convention lowering.
127d523365SDimitry Andric //
137d523365SDimitry Andric //===----------------------------------------------------------------------===//
147d523365SDimitry Andric
157d523365SDimitry Andric #include "AMDGPU.h"
167a7e6055SDimitry Andric #include "AMDGPUSubtarget.h"
172cab237bSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
182cab237bSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
192cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
202cab237bSDimitry Andric #include "llvm/ADT/StringRef.h"
21d88c1a5aSDimitry Andric #include "llvm/ADT/Triple.h"
222cab237bSDimitry Andric #include "llvm/Analysis/CallGraph.h"
23b40b48b8SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
24d8866befSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
252cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
262cab237bSDimitry Andric #include "llvm/IR/Constant.h"
273ca95b02SDimitry Andric #include "llvm/IR/Constants.h"
282cab237bSDimitry Andric #include "llvm/IR/Function.h"
292cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
307d523365SDimitry Andric #include "llvm/IR/Instructions.h"
312cab237bSDimitry Andric #include "llvm/IR/Intrinsics.h"
327d523365SDimitry Andric #include "llvm/IR/Module.h"
332cab237bSDimitry Andric #include "llvm/IR/Type.h"
342cab237bSDimitry Andric #include "llvm/IR/Use.h"
352cab237bSDimitry Andric #include "llvm/Pass.h"
362cab237bSDimitry Andric #include "llvm/Support/Casting.h"
372cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
382cab237bSDimitry Andric #include "llvm/Target/TargetMachine.h"
397d523365SDimitry Andric
407d523365SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
417d523365SDimitry Andric
427d523365SDimitry Andric using namespace llvm;
437d523365SDimitry Andric
447d523365SDimitry Andric namespace {
457d523365SDimitry Andric
46b40b48b8SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
477d523365SDimitry Andric private:
48b40b48b8SDimitry Andric const TargetMachine *TM = nullptr;
493ca95b02SDimitry Andric
50b40b48b8SDimitry Andric bool addFeatureAttributes(Function &F);
517d523365SDimitry Andric
527d523365SDimitry Andric public:
537d523365SDimitry Andric static char ID;
547d523365SDimitry Andric
AMDGPUAnnotateKernelFeatures()55b40b48b8SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
56b40b48b8SDimitry Andric
57b40b48b8SDimitry Andric bool doInitialization(CallGraph &CG) override;
58b40b48b8SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override;
592cab237bSDimitry Andric
getPassName() const60d88c1a5aSDimitry Andric StringRef getPassName() const override {
617d523365SDimitry Andric return "AMDGPU Annotate Kernel Features";
627d523365SDimitry Andric }
637d523365SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const647d523365SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
657d523365SDimitry Andric AU.setPreservesAll();
66b40b48b8SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU);
677d523365SDimitry Andric }
683ca95b02SDimitry Andric
69*b5893f02SDimitry Andric static bool visitConstantExpr(const ConstantExpr *CE);
703ca95b02SDimitry Andric static bool visitConstantExprsRecursively(
713ca95b02SDimitry Andric const Constant *EntryC,
72*b5893f02SDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
737d523365SDimitry Andric };
747d523365SDimitry Andric
752cab237bSDimitry Andric } // end anonymous namespace
767d523365SDimitry Andric
777d523365SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
787d523365SDimitry Andric
797d523365SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
807d523365SDimitry Andric
813ca95b02SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
827d523365SDimitry Andric "Add AMDGPU function attributes", false, false)
837d523365SDimitry Andric
847d523365SDimitry Andric
853ca95b02SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
castRequiresQueuePtr(unsigned SrcAS)86*b5893f02SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
87*b5893f02SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
883ca95b02SDimitry Andric }
893ca95b02SDimitry Andric
castRequiresQueuePtr(const AddrSpaceCastInst * ASC)90*b5893f02SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
91*b5893f02SDimitry Andric return castRequiresQueuePtr(ASC->getSrcAddressSpace());
923ca95b02SDimitry Andric }
933ca95b02SDimitry Andric
visitConstantExpr(const ConstantExpr * CE)94*b5893f02SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
953ca95b02SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) {
963ca95b02SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
97*b5893f02SDimitry Andric return castRequiresQueuePtr(SrcAS);
983ca95b02SDimitry Andric }
993ca95b02SDimitry Andric
1003ca95b02SDimitry Andric return false;
1013ca95b02SDimitry Andric }
1023ca95b02SDimitry Andric
visitConstantExprsRecursively(const Constant * EntryC,SmallPtrSet<const Constant *,8> & ConstantExprVisited)1033ca95b02SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1043ca95b02SDimitry Andric const Constant *EntryC,
105*b5893f02SDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
1063ca95b02SDimitry Andric
1073ca95b02SDimitry Andric if (!ConstantExprVisited.insert(EntryC).second)
1083ca95b02SDimitry Andric return false;
1093ca95b02SDimitry Andric
1103ca95b02SDimitry Andric SmallVector<const Constant *, 16> Stack;
1113ca95b02SDimitry Andric Stack.push_back(EntryC);
1123ca95b02SDimitry Andric
1133ca95b02SDimitry Andric while (!Stack.empty()) {
1143ca95b02SDimitry Andric const Constant *C = Stack.pop_back_val();
1153ca95b02SDimitry Andric
1163ca95b02SDimitry Andric // Check this constant expression.
1173ca95b02SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
118*b5893f02SDimitry Andric if (visitConstantExpr(CE))
1193ca95b02SDimitry Andric return true;
1203ca95b02SDimitry Andric }
1213ca95b02SDimitry Andric
1223ca95b02SDimitry Andric // Visit all sub-expressions.
1233ca95b02SDimitry Andric for (const Use &U : C->operands()) {
1243ca95b02SDimitry Andric const auto *OpC = dyn_cast<Constant>(U);
1253ca95b02SDimitry Andric if (!OpC)
1263ca95b02SDimitry Andric continue;
1273ca95b02SDimitry Andric
1283ca95b02SDimitry Andric if (!ConstantExprVisited.insert(OpC).second)
1293ca95b02SDimitry Andric continue;
1303ca95b02SDimitry Andric
1313ca95b02SDimitry Andric Stack.push_back(OpC);
1323ca95b02SDimitry Andric }
1333ca95b02SDimitry Andric }
1343ca95b02SDimitry Andric
1353ca95b02SDimitry Andric return false;
1363ca95b02SDimitry Andric }
1373ca95b02SDimitry Andric
138b40b48b8SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
139b40b48b8SDimitry Andric // initialized.
140b40b48b8SDimitry Andric //
141b40b48b8SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
142b40b48b8SDimitry Andric // size is 1 for y/z.
intrinsicToAttrName(Intrinsic::ID ID,bool & NonKernelOnly,bool & IsQueuePtr)143b40b48b8SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
144b40b48b8SDimitry Andric bool &NonKernelOnly,
145b40b48b8SDimitry Andric bool &IsQueuePtr) {
146b40b48b8SDimitry Andric switch (ID) {
147b40b48b8SDimitry Andric case Intrinsic::amdgcn_workitem_id_x:
148b40b48b8SDimitry Andric NonKernelOnly = true;
149b40b48b8SDimitry Andric return "amdgpu-work-item-id-x";
150b40b48b8SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x:
151b40b48b8SDimitry Andric NonKernelOnly = true;
152b40b48b8SDimitry Andric return "amdgpu-work-group-id-x";
153b40b48b8SDimitry Andric case Intrinsic::amdgcn_workitem_id_y:
154b40b48b8SDimitry Andric case Intrinsic::r600_read_tidig_y:
155b40b48b8SDimitry Andric return "amdgpu-work-item-id-y";
156b40b48b8SDimitry Andric case Intrinsic::amdgcn_workitem_id_z:
157b40b48b8SDimitry Andric case Intrinsic::r600_read_tidig_z:
158b40b48b8SDimitry Andric return "amdgpu-work-item-id-z";
159b40b48b8SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y:
160b40b48b8SDimitry Andric case Intrinsic::r600_read_tgid_y:
161b40b48b8SDimitry Andric return "amdgpu-work-group-id-y";
162b40b48b8SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z:
163b40b48b8SDimitry Andric case Intrinsic::r600_read_tgid_z:
164b40b48b8SDimitry Andric return "amdgpu-work-group-id-z";
165b40b48b8SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr:
166b40b48b8SDimitry Andric return "amdgpu-dispatch-ptr";
167b40b48b8SDimitry Andric case Intrinsic::amdgcn_dispatch_id:
168b40b48b8SDimitry Andric return "amdgpu-dispatch-id";
169b40b48b8SDimitry Andric case Intrinsic::amdgcn_kernarg_segment_ptr:
170b40b48b8SDimitry Andric return "amdgpu-kernarg-segment-ptr";
1712cab237bSDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr:
1722cab237bSDimitry Andric return "amdgpu-implicitarg-ptr";
173b40b48b8SDimitry Andric case Intrinsic::amdgcn_queue_ptr:
174b40b48b8SDimitry Andric case Intrinsic::trap:
175b40b48b8SDimitry Andric case Intrinsic::debugtrap:
176b40b48b8SDimitry Andric IsQueuePtr = true;
177b40b48b8SDimitry Andric return "amdgpu-queue-ptr";
178b40b48b8SDimitry Andric default:
179b40b48b8SDimitry Andric return "";
180b40b48b8SDimitry Andric }
181b40b48b8SDimitry Andric }
182b40b48b8SDimitry Andric
handleAttr(Function & Parent,const Function & Callee,StringRef Name)183b40b48b8SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
184b40b48b8SDimitry Andric StringRef Name) {
185b40b48b8SDimitry Andric if (Callee.hasFnAttribute(Name)) {
186b40b48b8SDimitry Andric Parent.addFnAttr(Name);
187b40b48b8SDimitry Andric return true;
188b40b48b8SDimitry Andric }
189b40b48b8SDimitry Andric
190b40b48b8SDimitry Andric return false;
191b40b48b8SDimitry Andric }
192b40b48b8SDimitry Andric
copyFeaturesToFunction(Function & Parent,const Function & Callee,bool & NeedQueuePtr)193b40b48b8SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
194b40b48b8SDimitry Andric bool &NeedQueuePtr) {
195b40b48b8SDimitry Andric // X ids unnecessarily propagated to kernels.
196b40b48b8SDimitry Andric static const StringRef AttrNames[] = {
197b40b48b8SDimitry Andric { "amdgpu-work-item-id-x" },
198b40b48b8SDimitry Andric { "amdgpu-work-item-id-y" },
199b40b48b8SDimitry Andric { "amdgpu-work-item-id-z" },
200b40b48b8SDimitry Andric { "amdgpu-work-group-id-x" },
201b40b48b8SDimitry Andric { "amdgpu-work-group-id-y" },
202b40b48b8SDimitry Andric { "amdgpu-work-group-id-z" },
203b40b48b8SDimitry Andric { "amdgpu-dispatch-ptr" },
204b40b48b8SDimitry Andric { "amdgpu-dispatch-id" },
2052cab237bSDimitry Andric { "amdgpu-kernarg-segment-ptr" },
2062cab237bSDimitry Andric { "amdgpu-implicitarg-ptr" }
207b40b48b8SDimitry Andric };
208b40b48b8SDimitry Andric
209b40b48b8SDimitry Andric if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
210b40b48b8SDimitry Andric NeedQueuePtr = true;
211b40b48b8SDimitry Andric
212b40b48b8SDimitry Andric for (StringRef AttrName : AttrNames)
213b40b48b8SDimitry Andric handleAttr(Parent, Callee, AttrName);
214b40b48b8SDimitry Andric }
215b40b48b8SDimitry Andric
addFeatureAttributes(Function & F)216b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
2174ba319b5SDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
218b40b48b8SDimitry Andric bool HasFlat = ST.hasFlatAddressSpace();
219b40b48b8SDimitry Andric bool HasApertureRegs = ST.hasApertureRegs();
2203ca95b02SDimitry Andric SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2213ca95b02SDimitry Andric
222b40b48b8SDimitry Andric bool Changed = false;
223b40b48b8SDimitry Andric bool NeedQueuePtr = false;
224b40b48b8SDimitry Andric bool HaveCall = false;
225b40b48b8SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
226b40b48b8SDimitry Andric
227b40b48b8SDimitry Andric for (BasicBlock &BB : F) {
228b40b48b8SDimitry Andric for (Instruction &I : BB) {
229b40b48b8SDimitry Andric CallSite CS(&I);
230b40b48b8SDimitry Andric if (CS) {
231b40b48b8SDimitry Andric Function *Callee = CS.getCalledFunction();
232b40b48b8SDimitry Andric
233b40b48b8SDimitry Andric // TODO: Do something with indirect calls.
234b40b48b8SDimitry Andric if (!Callee) {
235b40b48b8SDimitry Andric if (!CS.isInlineAsm())
236b40b48b8SDimitry Andric HaveCall = true;
237b40b48b8SDimitry Andric continue;
238b40b48b8SDimitry Andric }
239b40b48b8SDimitry Andric
240b40b48b8SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID();
241b40b48b8SDimitry Andric if (IID == Intrinsic::not_intrinsic) {
242b40b48b8SDimitry Andric HaveCall = true;
243b40b48b8SDimitry Andric copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
244b40b48b8SDimitry Andric Changed = true;
245b40b48b8SDimitry Andric } else {
246b40b48b8SDimitry Andric bool NonKernelOnly = false;
247b40b48b8SDimitry Andric StringRef AttrName = intrinsicToAttrName(IID,
248b40b48b8SDimitry Andric NonKernelOnly, NeedQueuePtr);
249b40b48b8SDimitry Andric if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
250b40b48b8SDimitry Andric F.addFnAttr(AttrName);
251b40b48b8SDimitry Andric Changed = true;
252b40b48b8SDimitry Andric }
253b40b48b8SDimitry Andric }
254b40b48b8SDimitry Andric }
255b40b48b8SDimitry Andric
256b40b48b8SDimitry Andric if (NeedQueuePtr || HasApertureRegs)
257b40b48b8SDimitry Andric continue;
258b40b48b8SDimitry Andric
2593ca95b02SDimitry Andric if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
260*b5893f02SDimitry Andric if (castRequiresQueuePtr(ASC)) {
261b40b48b8SDimitry Andric NeedQueuePtr = true;
262b40b48b8SDimitry Andric continue;
263b40b48b8SDimitry Andric }
2643ca95b02SDimitry Andric }
2653ca95b02SDimitry Andric
2663ca95b02SDimitry Andric for (const Use &U : I.operands()) {
2673ca95b02SDimitry Andric const auto *OpC = dyn_cast<Constant>(U);
2683ca95b02SDimitry Andric if (!OpC)
2693ca95b02SDimitry Andric continue;
2703ca95b02SDimitry Andric
271*b5893f02SDimitry Andric if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
272b40b48b8SDimitry Andric NeedQueuePtr = true;
273b40b48b8SDimitry Andric break;
274b40b48b8SDimitry Andric }
2753ca95b02SDimitry Andric }
2763ca95b02SDimitry Andric }
2773ca95b02SDimitry Andric }
2783ca95b02SDimitry Andric
279b40b48b8SDimitry Andric if (NeedQueuePtr) {
280b40b48b8SDimitry Andric F.addFnAttr("amdgpu-queue-ptr");
281b40b48b8SDimitry Andric Changed = true;
282b40b48b8SDimitry Andric }
283b40b48b8SDimitry Andric
284b40b48b8SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be
285b40b48b8SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of
286b40b48b8SDimitry Andric // estimating whether there are calls before argument lowering.
287b40b48b8SDimitry Andric if (HasFlat && !IsFunc && HaveCall) {
288b40b48b8SDimitry Andric F.addFnAttr("amdgpu-flat-scratch");
289b40b48b8SDimitry Andric Changed = true;
290b40b48b8SDimitry Andric }
291b40b48b8SDimitry Andric
292b40b48b8SDimitry Andric return Changed;
293b40b48b8SDimitry Andric }
294b40b48b8SDimitry Andric
runOnSCC(CallGraphSCC & SCC)295b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
296b40b48b8SDimitry Andric Module &M = SCC.getCallGraph().getModule();
297b40b48b8SDimitry Andric Triple TT(M.getTargetTriple());
298b40b48b8SDimitry Andric
299b40b48b8SDimitry Andric bool Changed = false;
300b40b48b8SDimitry Andric for (CallGraphNode *I : SCC) {
301b40b48b8SDimitry Andric Function *F = I->getFunction();
302b40b48b8SDimitry Andric if (!F || F->isDeclaration())
303b40b48b8SDimitry Andric continue;
304b40b48b8SDimitry Andric
305b40b48b8SDimitry Andric Changed |= addFeatureAttributes(*F);
306b40b48b8SDimitry Andric }
307b40b48b8SDimitry Andric
308b40b48b8SDimitry Andric return Changed;
309b40b48b8SDimitry Andric }
310b40b48b8SDimitry Andric
doInitialization(CallGraph & CG)311b40b48b8SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
312b40b48b8SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
313b40b48b8SDimitry Andric if (!TPC)
314b40b48b8SDimitry Andric report_fatal_error("TargetMachine is required");
315b40b48b8SDimitry Andric
316b40b48b8SDimitry Andric TM = &TPC->getTM<TargetMachine>();
3173ca95b02SDimitry Andric return false;
3183ca95b02SDimitry Andric }
3193ca95b02SDimitry Andric
createAMDGPUAnnotateKernelFeaturesPass()320b40b48b8SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
321d8866befSDimitry Andric return new AMDGPUAnnotateKernelFeatures();
3227d523365SDimitry Andric }
323