1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass propagates the uniform-work-group-size attribute from
10 /// kernels to leaf functions when possible. It also adds additional attributes
11 /// to hint ABI lowering optimizations later.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "GCNSubtarget.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/CallGraphSCCPass.h"
19 #include "llvm/CodeGen/TargetPassConfig.h"
20 #include "llvm/IR/IntrinsicsAMDGPU.h"
21 #include "llvm/IR/IntrinsicsR600.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
25 
26 using namespace llvm;
27 
28 namespace {
29 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
30 private:
31   const TargetMachine *TM = nullptr;
32   SmallVector<CallGraphNode*, 8> NodeList;
33 
34   bool processUniformWorkGroupAttribute();
35   bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
36   bool addFeatureAttributes(Function &F);
37 
38 public:
39   static char ID;
40 
41   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
42 
43   bool doInitialization(CallGraph &CG) override;
44   bool runOnSCC(CallGraphSCC &SCC) override;
45 
46   StringRef getPassName() const override {
47     return "AMDGPU Annotate Kernel Features";
48   }
49 
50   void getAnalysisUsage(AnalysisUsage &AU) const override {
51     AU.setPreservesAll();
52     CallGraphSCCPass::getAnalysisUsage(AU);
53   }
54 };
55 
56 } // end anonymous namespace
57 
58 char AMDGPUAnnotateKernelFeatures::ID = 0;
59 
60 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
61 
62 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
63                 "Add AMDGPU function attributes", false, false)
64 
65 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
66   bool Changed = false;
67 
68   for (auto *Node : reverse(NodeList)) {
69     Function *Caller = Node->getFunction();
70 
71     for (auto I : *Node) {
72       Function *Callee = std::get<1>(I)->getFunction();
73       if (Callee)
74         Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
75     }
76   }
77 
78   return Changed;
79 }
80 
81 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
82        Function &Caller, Function &Callee) {
83 
84   // Check for externally defined function
85   if (!Callee.hasExactDefinition()) {
86     Callee.addFnAttr("uniform-work-group-size", "false");
87     if (!Caller.hasFnAttribute("uniform-work-group-size"))
88       Caller.addFnAttr("uniform-work-group-size", "false");
89 
90     return true;
91   }
92   // Check if the Caller has the attribute
93   if (Caller.hasFnAttribute("uniform-work-group-size")) {
94     // Check if the value of the attribute is true
95     if (Caller.getFnAttribute("uniform-work-group-size")
96         .getValueAsString().equals("true")) {
97       // Propagate the attribute to the Callee, if it does not have it
98       if (!Callee.hasFnAttribute("uniform-work-group-size")) {
99         Callee.addFnAttr("uniform-work-group-size", "true");
100         return true;
101       }
102     } else {
103       Callee.addFnAttr("uniform-work-group-size", "false");
104       return true;
105     }
106   } else {
107     // If the attribute is absent, set it as false
108     Caller.addFnAttr("uniform-work-group-size", "false");
109     Callee.addFnAttr("uniform-work-group-size", "false");
110     return true;
111   }
112   return false;
113 }
114 
115 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
116   bool HaveStackObjects = false;
117   bool Changed = false;
118   bool HaveCall = false;
119   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
120 
121   for (BasicBlock &BB : F) {
122     for (Instruction &I : BB) {
123       if (isa<AllocaInst>(I)) {
124         HaveStackObjects = true;
125         continue;
126       }
127 
128       if (auto *CB = dyn_cast<CallBase>(&I)) {
129         const Function *Callee =
130             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
131 
132         // Note the occurence of indirect call.
133         if (!Callee) {
134           if (!CB->isInlineAsm())
135             HaveCall = true;
136 
137           continue;
138         }
139 
140         Intrinsic::ID IID = Callee->getIntrinsicID();
141         if (IID == Intrinsic::not_intrinsic) {
142           HaveCall = true;
143           Changed = true;
144         }
145       }
146     }
147   }
148 
149   // TODO: We could refine this to captured pointers that could possibly be
150   // accessed by flat instructions. For now this is mostly a poor way of
151   // estimating whether there are calls before argument lowering.
152   if (!IsFunc && HaveCall) {
153     F.addFnAttr("amdgpu-calls");
154     Changed = true;
155   }
156 
157   if (HaveStackObjects) {
158     F.addFnAttr("amdgpu-stack-objects");
159     Changed = true;
160   }
161 
162   return Changed;
163 }
164 
165 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
166   bool Changed = false;
167 
168   for (CallGraphNode *I : SCC) {
169     // Build a list of CallGraphNodes from most number of uses to least
170     if (I->getNumReferences())
171       NodeList.push_back(I);
172     else {
173       processUniformWorkGroupAttribute();
174       NodeList.clear();
175     }
176 
177     Function *F = I->getFunction();
178     // Ignore functions with graphics calling conventions, these are currently
179     // not allowed to have kernel arguments.
180     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
181       continue;
182     // Add feature attributes
183     Changed |= addFeatureAttributes(*F);
184   }
185 
186   return Changed;
187 }
188 
189 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
190   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
191   if (!TPC)
192     report_fatal_error("TargetMachine is required");
193 
194   TM = &TPC->getTM<TargetMachine>();
195   return false;
196 }
197 
198 Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
199   return new AMDGPUAnnotateKernelFeatures();
200 }
201