1*0b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric /// \file This pass propagates the uniform-work-group-size attribute from
10*0b57cec5SDimitry Andric /// kernels to leaf functions when possible. It also adds additional attributes
11*0b57cec5SDimitry Andric /// to hint ABI lowering optimizations later.
12*0b57cec5SDimitry Andric //
13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
14*0b57cec5SDimitry Andric 
15*0b57cec5SDimitry Andric #include "AMDGPU.h"
16*0b57cec5SDimitry Andric #include "GCNSubtarget.h"
17*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
18*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
19*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
20*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
21*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
22*0b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
23*0b57cec5SDimitry Andric 
24*0b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
25*0b57cec5SDimitry Andric 
26*0b57cec5SDimitry Andric using namespace llvm;
27*0b57cec5SDimitry Andric 
28*0b57cec5SDimitry Andric namespace {
29*0b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
30*0b57cec5SDimitry Andric private:
31*0b57cec5SDimitry Andric   const TargetMachine *TM = nullptr;
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric   bool addFeatureAttributes(Function &F);
34*0b57cec5SDimitry Andric 
35*0b57cec5SDimitry Andric public:
36*0b57cec5SDimitry Andric   static char ID;
37*0b57cec5SDimitry Andric 
AMDGPUAnnotateKernelFeatures()38*0b57cec5SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
39*0b57cec5SDimitry Andric 
40*0b57cec5SDimitry Andric   bool doInitialization(CallGraph &CG) override;
41*0b57cec5SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
42*0b57cec5SDimitry Andric 
getPassName() const43*0b57cec5SDimitry Andric   StringRef getPassName() const override {
44*0b57cec5SDimitry Andric     return "AMDGPU Annotate Kernel Features";
45*0b57cec5SDimitry Andric   }
46*0b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const47*0b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
48*0b57cec5SDimitry Andric     AU.setPreservesAll();
49*0b57cec5SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
50*0b57cec5SDimitry Andric   }
51*0b57cec5SDimitry Andric };
52*0b57cec5SDimitry Andric 
53*0b57cec5SDimitry Andric } // end anonymous namespace
54*0b57cec5SDimitry Andric 
55*0b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
60*0b57cec5SDimitry Andric                 "Add AMDGPU function attributes", false, false)
61*0b57cec5SDimitry Andric 
addFeatureAttributes(Function & F)62*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
63*0b57cec5SDimitry Andric   bool HaveStackObjects = false;
64*0b57cec5SDimitry Andric   bool Changed = false;
65*0b57cec5SDimitry Andric   bool HaveCall = false;
66*0b57cec5SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
67*0b57cec5SDimitry Andric 
68*0b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
69*0b57cec5SDimitry Andric     for (Instruction &I : BB) {
70*0b57cec5SDimitry Andric       if (isa<AllocaInst>(I)) {
71*0b57cec5SDimitry Andric         HaveStackObjects = true;
72*0b57cec5SDimitry Andric         continue;
73*0b57cec5SDimitry Andric       }
74*0b57cec5SDimitry Andric 
75*0b57cec5SDimitry Andric       if (auto *CB = dyn_cast<CallBase>(&I)) {
76*0b57cec5SDimitry Andric         const Function *Callee =
77*0b57cec5SDimitry Andric             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
78*0b57cec5SDimitry Andric 
79*0b57cec5SDimitry Andric         // Note the occurrence of indirect call.
80*0b57cec5SDimitry Andric         if (!Callee) {
81*0b57cec5SDimitry Andric           if (!CB->isInlineAsm())
82*0b57cec5SDimitry Andric             HaveCall = true;
83*0b57cec5SDimitry Andric 
84*0b57cec5SDimitry Andric           continue;
85*0b57cec5SDimitry Andric         }
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
88*0b57cec5SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
89*0b57cec5SDimitry Andric           HaveCall = true;
90*0b57cec5SDimitry Andric           Changed = true;
91*0b57cec5SDimitry Andric         }
92*0b57cec5SDimitry Andric       }
93*0b57cec5SDimitry Andric     }
94*0b57cec5SDimitry Andric   }
95*0b57cec5SDimitry Andric 
96*0b57cec5SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
97*0b57cec5SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
98*0b57cec5SDimitry Andric   // estimating whether there are calls before argument lowering.
99*0b57cec5SDimitry Andric   if (!IsFunc && HaveCall) {
100*0b57cec5SDimitry Andric     F.addFnAttr("amdgpu-calls");
101*0b57cec5SDimitry Andric     Changed = true;
102*0b57cec5SDimitry Andric   }
103*0b57cec5SDimitry Andric 
104*0b57cec5SDimitry Andric   if (HaveStackObjects) {
105*0b57cec5SDimitry Andric     F.addFnAttr("amdgpu-stack-objects");
106*0b57cec5SDimitry Andric     Changed = true;
107*0b57cec5SDimitry Andric   }
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   return Changed;
110*0b57cec5SDimitry Andric }
111*0b57cec5SDimitry Andric 
runOnSCC(CallGraphSCC & SCC)112*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
113*0b57cec5SDimitry Andric   bool Changed = false;
114*0b57cec5SDimitry Andric 
115*0b57cec5SDimitry Andric   for (CallGraphNode *I : SCC) {
116*0b57cec5SDimitry Andric     Function *F = I->getFunction();
117*0b57cec5SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
118*0b57cec5SDimitry Andric     // not allowed to have kernel arguments.
119*0b57cec5SDimitry Andric     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
120*0b57cec5SDimitry Andric       continue;
121*0b57cec5SDimitry Andric     // Add feature attributes
122*0b57cec5SDimitry Andric     Changed |= addFeatureAttributes(*F);
123*0b57cec5SDimitry Andric   }
124*0b57cec5SDimitry Andric 
125*0b57cec5SDimitry Andric   return Changed;
126*0b57cec5SDimitry Andric }
127*0b57cec5SDimitry Andric 
doInitialization(CallGraph & CG)128*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
129*0b57cec5SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
130*0b57cec5SDimitry Andric   if (!TPC)
131*0b57cec5SDimitry Andric     report_fatal_error("TargetMachine is required");
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
134*0b57cec5SDimitry Andric   return false;
135*0b57cec5SDimitry Andric }
136*0b57cec5SDimitry Andric 
createAMDGPUAnnotateKernelFeaturesPass()137*0b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
138*0b57cec5SDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
139*0b57cec5SDimitry Andric }
140*0b57cec5SDimitry Andric