1 //===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert S_SENDMSG instructions to release vgprs on GFX11+. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUSubtarget.h" 16 #include "GCNSubtarget.h" 17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18 #include "SIDefines.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineOperand.h" 22 using namespace llvm; 23 24 #define DEBUG_TYPE "release-vgprs" 25 26 namespace { 27 28 class AMDGPUReleaseVGPRs : public MachineFunctionPass { 29 public: 30 static char ID; 31 32 const SIInstrInfo *SII; 33 const SIRegisterInfo *TRI; 34 AMDGPUReleaseVGPRs()35 AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {} 36 getAnalysisUsage(AnalysisUsage & AU) const37 void getAnalysisUsage(AnalysisUsage &AU) const override { 38 AU.setPreservesAll(); 39 MachineFunctionPass::getAnalysisUsage(AU); 40 } 41 42 // Used to cache the result of isLastInstructionVMEMStore for each block 43 using BlockVMEMStoreType = DenseMap<MachineBasicBlock *, bool>; 44 BlockVMEMStoreType BlockVMEMStore; 45 46 // Return true if the last instruction referencing a vgpr in this MBB 47 // is a VMEM store, otherwise return false. 48 // Visit previous basic blocks to find this last instruction if needed. 49 // Because this pass is late in the pipeline, it is expected that the 50 // last vgpr use will likely be one of vmem store, ds, exp. 51 // Loads and others vgpr operations would have been 52 // deleted by this point, except for complex control flow involving loops. 53 // This is why we are just testing the type of instructions rather 54 // than the operands. isLastVGPRUseVMEMStore(MachineBasicBlock & MBB)55 bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) { 56 // Use the cache to break infinite loop and save some time. Initialize to 57 // false in case we have a cycle. 58 BlockVMEMStoreType::iterator It; 59 bool Inserted; 60 std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false}); 61 bool &CacheEntry = It->second; 62 if (!Inserted) 63 return CacheEntry; 64 65 for (auto &MI : reverse(MBB.instrs())) { 66 // If it's a VMEM store, a vgpr will be used, return true. 67 if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore()) 68 return CacheEntry = true; 69 70 // If it's referencing a VGPR but is not a VMEM store, return false. 71 if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) || 72 SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) || 73 SIInstrInfo::isVALU(MI)) 74 return CacheEntry = false; 75 } 76 77 // Recursive call into parent blocks. Look into predecessors if there is no 78 // vgpr used in this block. 79 return CacheEntry = llvm::any_of(MBB.predecessors(), 80 [this](MachineBasicBlock *Parent) { 81 return isLastVGPRUseVMEMStore(*Parent); 82 }); 83 } 84 runOnMachineBasicBlock(MachineBasicBlock & MBB)85 bool runOnMachineBasicBlock(MachineBasicBlock &MBB) { 86 87 bool Changed = false; 88 89 for (auto &MI : MBB.terminators()) { 90 // Look for S_ENDPGM instructions 91 if (MI.getOpcode() == AMDGPU::S_ENDPGM || 92 MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { 93 // If the last instruction using a VGPR in the block is a VMEM store, 94 // release VGPRs. The VGPRs release will be placed just before ending 95 // the program 96 if (isLastVGPRUseVMEMStore(MBB)) { 97 BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG)) 98 .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus); 99 Changed = true; 100 } 101 } 102 } 103 104 return Changed; 105 } 106 runOnMachineFunction(MachineFunction & MF)107 bool runOnMachineFunction(MachineFunction &MF) override { 108 Function &F = MF.getFunction(); 109 if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 110 return false; 111 112 // This pass only runs on GFX11+ 113 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 114 if (ST.getGeneration() < AMDGPUSubtarget::GFX11) 115 return false; 116 117 LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName() 118 << "\n"); 119 120 SII = ST.getInstrInfo(); 121 TRI = ST.getRegisterInfo(); 122 123 bool Changed = false; 124 for (auto &MBB : MF) { 125 Changed |= runOnMachineBasicBlock(MBB); 126 } 127 128 BlockVMEMStore.clear(); 129 130 return Changed; 131 } 132 }; 133 134 } // namespace 135 136 char AMDGPUReleaseVGPRs::ID = 0; 137 138 char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID; 139 140 INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false) 141