1*0f94d2b3SJay Foad //===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===// 2*0f94d2b3SJay Foad // 3*0f94d2b3SJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0f94d2b3SJay Foad // See https://llvm.org/LICENSE.txt for license information. 5*0f94d2b3SJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0f94d2b3SJay Foad // 7*0f94d2b3SJay Foad //===----------------------------------------------------------------------===// 8*0f94d2b3SJay Foad // 9*0f94d2b3SJay Foad /// \file 10*0f94d2b3SJay Foad /// Insert S_SENDMSG instructions to release vgprs on GFX11+. 11*0f94d2b3SJay Foad // 12*0f94d2b3SJay Foad //===----------------------------------------------------------------------===// 13*0f94d2b3SJay Foad 14*0f94d2b3SJay Foad #include "AMDGPU.h" 15*0f94d2b3SJay Foad #include "AMDGPUSubtarget.h" 16*0f94d2b3SJay Foad #include "GCNSubtarget.h" 17*0f94d2b3SJay Foad #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18*0f94d2b3SJay Foad #include "SIDefines.h" 19*0f94d2b3SJay Foad #include "llvm/ADT/STLExtras.h" 20*0f94d2b3SJay Foad #include "llvm/CodeGen/MachineBasicBlock.h" 21*0f94d2b3SJay Foad #include "llvm/CodeGen/MachineOperand.h" 22*0f94d2b3SJay Foad using namespace llvm; 23*0f94d2b3SJay Foad 24*0f94d2b3SJay Foad #define DEBUG_TYPE "release-vgprs" 25*0f94d2b3SJay Foad 26*0f94d2b3SJay Foad namespace { 27*0f94d2b3SJay Foad 28*0f94d2b3SJay Foad class AMDGPUReleaseVGPRs : public MachineFunctionPass { 29*0f94d2b3SJay Foad public: 30*0f94d2b3SJay Foad static char ID; 31*0f94d2b3SJay Foad 32*0f94d2b3SJay Foad const SIInstrInfo *SII; 33*0f94d2b3SJay Foad const SIRegisterInfo *TRI; 34*0f94d2b3SJay Foad AMDGPUReleaseVGPRs()35*0f94d2b3SJay Foad AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {} 36*0f94d2b3SJay Foad getAnalysisUsage(AnalysisUsage & AU) const37*0f94d2b3SJay Foad void getAnalysisUsage(AnalysisUsage &AU) const override { 38*0f94d2b3SJay Foad AU.setPreservesAll(); 39*0f94d2b3SJay Foad MachineFunctionPass::getAnalysisUsage(AU); 40*0f94d2b3SJay Foad } 41*0f94d2b3SJay Foad 42*0f94d2b3SJay Foad // Used to cache the result of isLastInstructionVMEMStore for each block 43*0f94d2b3SJay Foad using BlockVMEMStoreType = DenseMap<MachineBasicBlock *, bool>; 44*0f94d2b3SJay Foad BlockVMEMStoreType BlockVMEMStore; 45*0f94d2b3SJay Foad 46*0f94d2b3SJay Foad // Return true if the last instruction referencing a vgpr in this MBB 47*0f94d2b3SJay Foad // is a VMEM store, otherwise return false. 48*0f94d2b3SJay Foad // Visit previous basic blocks to find this last instruction if needed. 49*0f94d2b3SJay Foad // Because this pass is late in the pipeline, it is expected that the 50*0f94d2b3SJay Foad // last vgpr use will likely be one of vmem store, ds, exp. 51*0f94d2b3SJay Foad // Loads and others vgpr operations would have been 52*0f94d2b3SJay Foad // deleted by this point, except for complex control flow involving loops. 53*0f94d2b3SJay Foad // This is why we are just testing the type of instructions rather 54*0f94d2b3SJay Foad // than the operands. isLastVGPRUseVMEMStore(MachineBasicBlock & MBB)55*0f94d2b3SJay Foad bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) { 56*0f94d2b3SJay Foad // Use the cache to break infinite loop and save some time. Initialize to 57*0f94d2b3SJay Foad // false in case we have a cycle. 58*0f94d2b3SJay Foad BlockVMEMStoreType::iterator It; 59*0f94d2b3SJay Foad bool Inserted; 60*0f94d2b3SJay Foad std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false}); 61*0f94d2b3SJay Foad bool &CacheEntry = It->second; 62*0f94d2b3SJay Foad if (!Inserted) 63*0f94d2b3SJay Foad return CacheEntry; 64*0f94d2b3SJay Foad 65*0f94d2b3SJay Foad for (auto &MI : reverse(MBB.instrs())) { 66*0f94d2b3SJay Foad // If it's a VMEM store, a vgpr will be used, return true. 67*0f94d2b3SJay Foad if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore()) 68*0f94d2b3SJay Foad return CacheEntry = true; 69*0f94d2b3SJay Foad 70*0f94d2b3SJay Foad // If it's referencing a VGPR but is not a VMEM store, return false. 71*0f94d2b3SJay Foad if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) || 72*0f94d2b3SJay Foad SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) || 73*0f94d2b3SJay Foad SIInstrInfo::isVALU(MI)) 74*0f94d2b3SJay Foad return CacheEntry = false; 75*0f94d2b3SJay Foad } 76*0f94d2b3SJay Foad 77*0f94d2b3SJay Foad // Recursive call into parent blocks. Look into predecessors if there is no 78*0f94d2b3SJay Foad // vgpr used in this block. 79*0f94d2b3SJay Foad return CacheEntry = llvm::any_of(MBB.predecessors(), 80*0f94d2b3SJay Foad [this](MachineBasicBlock *Parent) { 81*0f94d2b3SJay Foad return isLastVGPRUseVMEMStore(*Parent); 82*0f94d2b3SJay Foad }); 83*0f94d2b3SJay Foad } 84*0f94d2b3SJay Foad runOnMachineBasicBlock(MachineBasicBlock & MBB)85*0f94d2b3SJay Foad bool runOnMachineBasicBlock(MachineBasicBlock &MBB) { 86*0f94d2b3SJay Foad 87*0f94d2b3SJay Foad bool Changed = false; 88*0f94d2b3SJay Foad 89*0f94d2b3SJay Foad for (auto &MI : MBB.terminators()) { 90*0f94d2b3SJay Foad // Look for S_ENDPGM instructions 91*0f94d2b3SJay Foad if (MI.getOpcode() == AMDGPU::S_ENDPGM || 92*0f94d2b3SJay Foad MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { 93*0f94d2b3SJay Foad // If the last instruction using a VGPR in the block is a VMEM store, 94*0f94d2b3SJay Foad // release VGPRs. The VGPRs release will be placed just before ending 95*0f94d2b3SJay Foad // the program 96*0f94d2b3SJay Foad if (isLastVGPRUseVMEMStore(MBB)) { 97*0f94d2b3SJay Foad BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG)) 98*0f94d2b3SJay Foad .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus); 99*0f94d2b3SJay Foad Changed = true; 100*0f94d2b3SJay Foad } 101*0f94d2b3SJay Foad } 102*0f94d2b3SJay Foad } 103*0f94d2b3SJay Foad 104*0f94d2b3SJay Foad return Changed; 105*0f94d2b3SJay Foad } 106*0f94d2b3SJay Foad runOnMachineFunction(MachineFunction & MF)107*0f94d2b3SJay Foad bool runOnMachineFunction(MachineFunction &MF) override { 108*0f94d2b3SJay Foad Function &F = MF.getFunction(); 109*0f94d2b3SJay Foad if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 110*0f94d2b3SJay Foad return false; 111*0f94d2b3SJay Foad 112*0f94d2b3SJay Foad // This pass only runs on GFX11+ 113*0f94d2b3SJay Foad const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 114*0f94d2b3SJay Foad if (ST.getGeneration() < AMDGPUSubtarget::GFX11) 115*0f94d2b3SJay Foad return false; 116*0f94d2b3SJay Foad 117*0f94d2b3SJay Foad LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName() 118*0f94d2b3SJay Foad << "\n"); 119*0f94d2b3SJay Foad 120*0f94d2b3SJay Foad SII = ST.getInstrInfo(); 121*0f94d2b3SJay Foad TRI = ST.getRegisterInfo(); 122*0f94d2b3SJay Foad 123*0f94d2b3SJay Foad bool Changed = false; 124*0f94d2b3SJay Foad for (auto &MBB : MF) { 125*0f94d2b3SJay Foad Changed |= runOnMachineBasicBlock(MBB); 126*0f94d2b3SJay Foad } 127*0f94d2b3SJay Foad 128*0f94d2b3SJay Foad BlockVMEMStore.clear(); 129*0f94d2b3SJay Foad 130*0f94d2b3SJay Foad return Changed; 131*0f94d2b3SJay Foad } 132*0f94d2b3SJay Foad }; 133*0f94d2b3SJay Foad 134*0f94d2b3SJay Foad } // namespace 135*0f94d2b3SJay Foad 136*0f94d2b3SJay Foad char AMDGPUReleaseVGPRs::ID = 0; 137*0f94d2b3SJay Foad 138*0f94d2b3SJay Foad char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID; 139*0f94d2b3SJay Foad 140*0f94d2b3SJay Foad INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false) 141