1fa6434beSEugene Zelenko //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
23931948bSMatt Arsenault //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63931948bSMatt Arsenault //
73931948bSMatt Arsenault //===----------------------------------------------------------------------===//
83931948bSMatt Arsenault //
9722b8e0eSMatt Arsenault /// \file This pass propagates the uniform-work-group-size attribute from
10722b8e0eSMatt Arsenault /// kernels to leaf functions when possible. It also adds additional attributes
11722b8e0eSMatt Arsenault /// to hint ABI lowering optimizations later.
123931948bSMatt Arsenault //
133931948bSMatt Arsenault //===----------------------------------------------------------------------===//
143931948bSMatt Arsenault 
153931948bSMatt Arsenault #include "AMDGPU.h"
16560d7e04Sdfukalov #include "GCNSubtarget.h"
17fa6434beSEugene Zelenko #include "llvm/Analysis/CallGraph.h"
186b93046fSMatt Arsenault #include "llvm/Analysis/CallGraphSCCPass.h"
198b61764cSFrancis Visoiu Mistrih #include "llvm/CodeGen/TargetPassConfig.h"
206a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
216a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsR600.h"
22fa6434beSEugene Zelenko #include "llvm/Target/TargetMachine.h"
233931948bSMatt Arsenault 
243931948bSMatt Arsenault #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
253931948bSMatt Arsenault 
263931948bSMatt Arsenault using namespace llvm;
273931948bSMatt Arsenault 
283931948bSMatt Arsenault namespace {
296b93046fSMatt Arsenault class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
303931948bSMatt Arsenault private:
316b93046fSMatt Arsenault   const TargetMachine *TM = nullptr;
3299c14524SMatt Arsenault 
33722b8e0eSMatt Arsenault   bool addFeatureAttributes(Function &F);
346b93046fSMatt Arsenault 
353931948bSMatt Arsenault public:
363931948bSMatt Arsenault   static char ID;
373931948bSMatt Arsenault 
AMDGPUAnnotateKernelFeatures()386b93046fSMatt Arsenault   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
396b93046fSMatt Arsenault 
406b93046fSMatt Arsenault   bool doInitialization(CallGraph &CG) override;
416b93046fSMatt Arsenault   bool runOnSCC(CallGraphSCC &SCC) override;
42fa6434beSEugene Zelenko 
getPassName() const43117296c0SMehdi Amini   StringRef getPassName() const override {
443931948bSMatt Arsenault     return "AMDGPU Annotate Kernel Features";
453931948bSMatt Arsenault   }
463931948bSMatt Arsenault 
getAnalysisUsage(AnalysisUsage & AU) const473931948bSMatt Arsenault   void getAnalysisUsage(AnalysisUsage &AU) const override {
483931948bSMatt Arsenault     AU.setPreservesAll();
496b93046fSMatt Arsenault     CallGraphSCCPass::getAnalysisUsage(AU);
503931948bSMatt Arsenault   }
513931948bSMatt Arsenault };
523931948bSMatt Arsenault 
53fa6434beSEugene Zelenko } // end anonymous namespace
543931948bSMatt Arsenault 
553931948bSMatt Arsenault char AMDGPUAnnotateKernelFeatures::ID = 0;
563931948bSMatt Arsenault 
573931948bSMatt Arsenault char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
583931948bSMatt Arsenault 
5999c14524SMatt Arsenault INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
603931948bSMatt Arsenault                 "Add AMDGPU function attributes", false, false)
613931948bSMatt Arsenault 
addFeatureAttributes(Function & F)626b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
6321d2884aSMatt Arsenault   bool HaveStackObjects = false;
646b93046fSMatt Arsenault   bool Changed = false;
65254ad3deSMatt Arsenault   bool HaveCall = false;
66e15855d9SMatt Arsenault   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
676b93046fSMatt Arsenault 
686b93046fSMatt Arsenault   for (BasicBlock &BB : F) {
696b93046fSMatt Arsenault     for (Instruction &I : BB) {
7021d2884aSMatt Arsenault       if (isa<AllocaInst>(I)) {
7121d2884aSMatt Arsenault         HaveStackObjects = true;
7221d2884aSMatt Arsenault         continue;
7321d2884aSMatt Arsenault       }
7421d2884aSMatt Arsenault 
75447e2c30SMircea Trofin       if (auto *CB = dyn_cast<CallBase>(&I)) {
76447e2c30SMircea Trofin         const Function *Callee =
77a58b62b4SCraig Topper             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
786b93046fSMatt Arsenault 
79*dc6e8dfdSJacob Lambert         // Note the occurrence of indirect call.
80254ad3deSMatt Arsenault         if (!Callee) {
81722b8e0eSMatt Arsenault           if (!CB->isInlineAsm())
82254ad3deSMatt Arsenault             HaveCall = true;
83722b8e0eSMatt Arsenault 
846b93046fSMatt Arsenault           continue;
85254ad3deSMatt Arsenault         }
866b93046fSMatt Arsenault 
876b93046fSMatt Arsenault         Intrinsic::ID IID = Callee->getIntrinsicID();
886b93046fSMatt Arsenault         if (IID == Intrinsic::not_intrinsic) {
89254ad3deSMatt Arsenault           HaveCall = true;
906b93046fSMatt Arsenault           Changed = true;
916b93046fSMatt Arsenault         }
926b93046fSMatt Arsenault       }
936b93046fSMatt Arsenault     }
9499c14524SMatt Arsenault   }
953931948bSMatt Arsenault 
96254ad3deSMatt Arsenault   // TODO: We could refine this to captured pointers that could possibly be
97254ad3deSMatt Arsenault   // accessed by flat instructions. For now this is mostly a poor way of
98254ad3deSMatt Arsenault   // estimating whether there are calls before argument lowering.
99ccc6e780SMatt Arsenault   if (!IsFunc && HaveCall) {
100ccc6e780SMatt Arsenault     F.addFnAttr("amdgpu-calls");
101254ad3deSMatt Arsenault     Changed = true;
102254ad3deSMatt Arsenault   }
103254ad3deSMatt Arsenault 
10421d2884aSMatt Arsenault   if (HaveStackObjects) {
10521d2884aSMatt Arsenault     F.addFnAttr("amdgpu-stack-objects");
10621d2884aSMatt Arsenault     Changed = true;
10721d2884aSMatt Arsenault   }
10821d2884aSMatt Arsenault 
1096b93046fSMatt Arsenault   return Changed;
1106b93046fSMatt Arsenault }
1116b93046fSMatt Arsenault 
runOnSCC(CallGraphSCC & SCC)1126b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
1136b93046fSMatt Arsenault   bool Changed = false;
114c56d2afcSAakanksha Patil 
1156b93046fSMatt Arsenault   for (CallGraphNode *I : SCC) {
1166b93046fSMatt Arsenault     Function *F = I->getFunction();
117c0c8548bSSebastian Neubauer     // Ignore functions with graphics calling conventions, these are currently
118c0c8548bSSebastian Neubauer     // not allowed to have kernel arguments.
119c0c8548bSSebastian Neubauer     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
12099c14524SMatt Arsenault       continue;
121c0c8548bSSebastian Neubauer     // Add feature attributes
1226b93046fSMatt Arsenault     Changed |= addFeatureAttributes(*F);
12399c14524SMatt Arsenault   }
1246b93046fSMatt Arsenault 
1253931948bSMatt Arsenault   return Changed;
1263931948bSMatt Arsenault }
1273931948bSMatt Arsenault 
doInitialization(CallGraph & CG)1286b93046fSMatt Arsenault bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
1296b93046fSMatt Arsenault   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1306b93046fSMatt Arsenault   if (!TPC)
1316b93046fSMatt Arsenault     report_fatal_error("TargetMachine is required");
1326b93046fSMatt Arsenault 
1336b93046fSMatt Arsenault   TM = &TPC->getTM<TargetMachine>();
1346b93046fSMatt Arsenault   return false;
1356b93046fSMatt Arsenault }
1366b93046fSMatt Arsenault 
createAMDGPUAnnotateKernelFeaturesPass()1376b93046fSMatt Arsenault Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
1388b61764cSFrancis Visoiu Mistrih   return new AMDGPUAnnotateKernelFeatures();
1393931948bSMatt Arsenault }
140