1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This pass propagates the uniform-work-group-size attribute from 10 /// kernels to leaf functions when possible. It also adds additional attributes 11 /// to hint ABI lowering optimizations later. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/Analysis/CallGraph.h" 18 #include "llvm/Analysis/CallGraphSCCPass.h" 19 #include "llvm/CodeGen/TargetPassConfig.h" 20 #include "llvm/IR/IntrinsicsAMDGPU.h" 21 #include "llvm/IR/IntrinsicsR600.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 25 26 using namespace llvm; 27 28 namespace { 29 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 30 private: 31 const TargetMachine *TM = nullptr; 32 SmallVector<CallGraphNode*, 8> NodeList; 33 34 bool processUniformWorkGroupAttribute(); 35 bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); 36 bool addFeatureAttributes(Function &F); 37 38 public: 39 static char ID; 40 41 AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 42 43 bool doInitialization(CallGraph &CG) override; 44 bool runOnSCC(CallGraphSCC &SCC) override; 45 46 StringRef getPassName() const override { 47 return "AMDGPU Annotate Kernel Features"; 48 } 49 50 void getAnalysisUsage(AnalysisUsage &AU) const override { 51 AU.setPreservesAll(); 52 CallGraphSCCPass::getAnalysisUsage(AU); 53 } 54 }; 55 56 } // end anonymous namespace 57 58 char AMDGPUAnnotateKernelFeatures::ID = 0; 59 60 char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 61 62 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 63 "Add AMDGPU function attributes", false, false) 64 65 bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { 66 bool Changed = false; 67 68 for (auto *Node : reverse(NodeList)) { 69 Function *Caller = Node->getFunction(); 70 71 for (auto I : *Node) { 72 Function *Callee = std::get<1>(I)->getFunction(); 73 if (Callee) 74 Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); 75 } 76 } 77 78 return Changed; 79 } 80 81 bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( 82 Function &Caller, Function &Callee) { 83 84 // Check for externally defined function 85 if (!Callee.hasExactDefinition()) { 86 Callee.addFnAttr("uniform-work-group-size", "false"); 87 if (!Caller.hasFnAttribute("uniform-work-group-size")) 88 Caller.addFnAttr("uniform-work-group-size", "false"); 89 90 return true; 91 } 92 // Check if the Caller has the attribute 93 if (Caller.hasFnAttribute("uniform-work-group-size")) { 94 // Check if the value of the attribute is true 95 if (Caller.getFnAttribute("uniform-work-group-size") 96 .getValueAsString().equals("true")) { 97 // Propagate the attribute to the Callee, if it does not have it 98 if (!Callee.hasFnAttribute("uniform-work-group-size")) { 99 Callee.addFnAttr("uniform-work-group-size", "true"); 100 return true; 101 } 102 } else { 103 Callee.addFnAttr("uniform-work-group-size", "false"); 104 return true; 105 } 106 } else { 107 // If the attribute is absent, set it as false 108 Caller.addFnAttr("uniform-work-group-size", "false"); 109 Callee.addFnAttr("uniform-work-group-size", "false"); 110 return true; 111 } 112 return false; 113 } 114 115 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 116 bool HaveStackObjects = false; 117 bool Changed = false; 118 bool HaveCall = false; 119 bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 120 121 for (BasicBlock &BB : F) { 122 for (Instruction &I : BB) { 123 if (isa<AllocaInst>(I)) { 124 HaveStackObjects = true; 125 continue; 126 } 127 128 if (auto *CB = dyn_cast<CallBase>(&I)) { 129 const Function *Callee = 130 dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 131 132 // Note the occurence of indirect call. 133 if (!Callee) { 134 if (!CB->isInlineAsm()) 135 HaveCall = true; 136 137 continue; 138 } 139 140 Intrinsic::ID IID = Callee->getIntrinsicID(); 141 if (IID == Intrinsic::not_intrinsic) { 142 HaveCall = true; 143 Changed = true; 144 } 145 } 146 } 147 } 148 149 // TODO: We could refine this to captured pointers that could possibly be 150 // accessed by flat instructions. For now this is mostly a poor way of 151 // estimating whether there are calls before argument lowering. 152 if (!IsFunc && HaveCall) { 153 F.addFnAttr("amdgpu-calls"); 154 Changed = true; 155 } 156 157 if (HaveStackObjects) { 158 F.addFnAttr("amdgpu-stack-objects"); 159 Changed = true; 160 } 161 162 return Changed; 163 } 164 165 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 166 bool Changed = false; 167 168 for (CallGraphNode *I : SCC) { 169 // Build a list of CallGraphNodes from most number of uses to least 170 if (I->getNumReferences()) 171 NodeList.push_back(I); 172 else { 173 processUniformWorkGroupAttribute(); 174 NodeList.clear(); 175 } 176 177 Function *F = I->getFunction(); 178 // Ignore functions with graphics calling conventions, these are currently 179 // not allowed to have kernel arguments. 180 if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv())) 181 continue; 182 // Add feature attributes 183 Changed |= addFeatureAttributes(*F); 184 } 185 186 return Changed; 187 } 188 189 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 190 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 191 if (!TPC) 192 report_fatal_error("TargetMachine is required"); 193 194 TM = &TPC->getTM<TargetMachine>(); 195 return false; 196 } 197 198 Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 199 return new AMDGPUAnnotateKernelFeatures(); 200 } 201