1*0b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file This pass propagates the uniform-work-group-size attribute from 10*0b57cec5SDimitry Andric /// kernels to leaf functions when possible. It also adds additional attributes 11*0b57cec5SDimitry Andric /// to hint ABI lowering optimizations later. 12*0b57cec5SDimitry Andric // 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "AMDGPU.h" 16*0b57cec5SDimitry Andric #include "GCNSubtarget.h" 17*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h" 18*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 19*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 20*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 21*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 22*0b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric using namespace llvm; 27*0b57cec5SDimitry Andric 28*0b57cec5SDimitry Andric namespace { 29*0b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 30*0b57cec5SDimitry Andric private: 31*0b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric bool addFeatureAttributes(Function &F); 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric public: 36*0b57cec5SDimitry Andric static char ID; 37*0b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures()38*0b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 39*0b57cec5SDimitry Andric 40*0b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override; 41*0b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override; 42*0b57cec5SDimitry Andric getPassName() const43*0b57cec5SDimitry Andric StringRef getPassName() const override { 44*0b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features"; 45*0b57cec5SDimitry Andric } 46*0b57cec5SDimitry Andric getAnalysisUsage(AnalysisUsage & AU) const47*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 48*0b57cec5SDimitry Andric AU.setPreservesAll(); 49*0b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 50*0b57cec5SDimitry Andric } 51*0b57cec5SDimitry Andric }; 52*0b57cec5SDimitry Andric 53*0b57cec5SDimitry Andric } // end anonymous namespace 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0; 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 60*0b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false) 61*0b57cec5SDimitry Andric addFeatureAttributes(Function & F)62*0b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 63*0b57cec5SDimitry Andric bool HaveStackObjects = false; 64*0b57cec5SDimitry Andric bool Changed = false; 65*0b57cec5SDimitry Andric bool HaveCall = false; 66*0b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 67*0b57cec5SDimitry Andric 68*0b57cec5SDimitry Andric for (BasicBlock &BB : F) { 69*0b57cec5SDimitry Andric for (Instruction &I : BB) { 70*0b57cec5SDimitry Andric if (isa<AllocaInst>(I)) { 71*0b57cec5SDimitry Andric HaveStackObjects = true; 72*0b57cec5SDimitry Andric continue; 73*0b57cec5SDimitry Andric } 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 76*0b57cec5SDimitry Andric const Function *Callee = 77*0b57cec5SDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric // Note the occurrence of indirect call. 80*0b57cec5SDimitry Andric if (!Callee) { 81*0b57cec5SDimitry Andric if (!CB->isInlineAsm()) 82*0b57cec5SDimitry Andric HaveCall = true; 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric continue; 85*0b57cec5SDimitry Andric } 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 88*0b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) { 89*0b57cec5SDimitry Andric HaveCall = true; 90*0b57cec5SDimitry Andric Changed = true; 91*0b57cec5SDimitry Andric } 92*0b57cec5SDimitry Andric } 93*0b57cec5SDimitry Andric } 94*0b57cec5SDimitry Andric } 95*0b57cec5SDimitry Andric 96*0b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be 97*0b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of 98*0b57cec5SDimitry Andric // estimating whether there are calls before argument lowering. 99*0b57cec5SDimitry Andric if (!IsFunc && HaveCall) { 100*0b57cec5SDimitry Andric F.addFnAttr("amdgpu-calls"); 101*0b57cec5SDimitry Andric Changed = true; 102*0b57cec5SDimitry Andric } 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric if (HaveStackObjects) { 105*0b57cec5SDimitry Andric F.addFnAttr("amdgpu-stack-objects"); 106*0b57cec5SDimitry Andric Changed = true; 107*0b57cec5SDimitry Andric } 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric return Changed; 110*0b57cec5SDimitry Andric } 111*0b57cec5SDimitry Andric runOnSCC(CallGraphSCC & SCC)112*0b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 113*0b57cec5SDimitry Andric bool Changed = false; 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric for (CallGraphNode *I : SCC) { 116*0b57cec5SDimitry Andric Function *F = I->getFunction(); 117*0b57cec5SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 118*0b57cec5SDimitry Andric // not allowed to have kernel arguments. 119*0b57cec5SDimitry Andric if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv())) 120*0b57cec5SDimitry Andric continue; 121*0b57cec5SDimitry Andric // Add feature attributes 122*0b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F); 123*0b57cec5SDimitry Andric } 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric return Changed; 126*0b57cec5SDimitry Andric } 127*0b57cec5SDimitry Andric doInitialization(CallGraph & CG)128*0b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 129*0b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 130*0b57cec5SDimitry Andric if (!TPC) 131*0b57cec5SDimitry Andric report_fatal_error("TargetMachine is required"); 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 134*0b57cec5SDimitry Andric return false; 135*0b57cec5SDimitry Andric } 136*0b57cec5SDimitry Andric createAMDGPUAnnotateKernelFeaturesPass()137*0b57cec5SDimitry AndricPass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 138*0b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures(); 139*0b57cec5SDimitry Andric } 140*0b57cec5SDimitry Andric