1*0f94d2b3SJay Foad //===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===//
2*0f94d2b3SJay Foad //
3*0f94d2b3SJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0f94d2b3SJay Foad // See https://llvm.org/LICENSE.txt for license information.
5*0f94d2b3SJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0f94d2b3SJay Foad //
7*0f94d2b3SJay Foad //===----------------------------------------------------------------------===//
8*0f94d2b3SJay Foad //
9*0f94d2b3SJay Foad /// \file
10*0f94d2b3SJay Foad /// Insert S_SENDMSG instructions to release vgprs on GFX11+.
11*0f94d2b3SJay Foad //
12*0f94d2b3SJay Foad //===----------------------------------------------------------------------===//
13*0f94d2b3SJay Foad 
14*0f94d2b3SJay Foad #include "AMDGPU.h"
15*0f94d2b3SJay Foad #include "AMDGPUSubtarget.h"
16*0f94d2b3SJay Foad #include "GCNSubtarget.h"
17*0f94d2b3SJay Foad #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18*0f94d2b3SJay Foad #include "SIDefines.h"
19*0f94d2b3SJay Foad #include "llvm/ADT/STLExtras.h"
20*0f94d2b3SJay Foad #include "llvm/CodeGen/MachineBasicBlock.h"
21*0f94d2b3SJay Foad #include "llvm/CodeGen/MachineOperand.h"
22*0f94d2b3SJay Foad using namespace llvm;
23*0f94d2b3SJay Foad 
24*0f94d2b3SJay Foad #define DEBUG_TYPE "release-vgprs"
25*0f94d2b3SJay Foad 
26*0f94d2b3SJay Foad namespace {
27*0f94d2b3SJay Foad 
28*0f94d2b3SJay Foad class AMDGPUReleaseVGPRs : public MachineFunctionPass {
29*0f94d2b3SJay Foad public:
30*0f94d2b3SJay Foad   static char ID;
31*0f94d2b3SJay Foad 
32*0f94d2b3SJay Foad   const SIInstrInfo *SII;
33*0f94d2b3SJay Foad   const SIRegisterInfo *TRI;
34*0f94d2b3SJay Foad 
AMDGPUReleaseVGPRs()35*0f94d2b3SJay Foad   AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {}
36*0f94d2b3SJay Foad 
getAnalysisUsage(AnalysisUsage & AU) const37*0f94d2b3SJay Foad   void getAnalysisUsage(AnalysisUsage &AU) const override {
38*0f94d2b3SJay Foad     AU.setPreservesAll();
39*0f94d2b3SJay Foad     MachineFunctionPass::getAnalysisUsage(AU);
40*0f94d2b3SJay Foad   }
41*0f94d2b3SJay Foad 
42*0f94d2b3SJay Foad   // Used to cache the result of isLastInstructionVMEMStore for each block
43*0f94d2b3SJay Foad   using BlockVMEMStoreType = DenseMap<MachineBasicBlock *, bool>;
44*0f94d2b3SJay Foad   BlockVMEMStoreType BlockVMEMStore;
45*0f94d2b3SJay Foad 
46*0f94d2b3SJay Foad   // Return true if the last instruction referencing a vgpr in this MBB
47*0f94d2b3SJay Foad   // is a VMEM store, otherwise return false.
48*0f94d2b3SJay Foad   // Visit previous basic blocks to find this last instruction if needed.
49*0f94d2b3SJay Foad   // Because this pass is late in the pipeline, it is expected that the
50*0f94d2b3SJay Foad   // last vgpr use will likely be one of vmem store, ds, exp.
51*0f94d2b3SJay Foad   // Loads and others vgpr operations would have been
52*0f94d2b3SJay Foad   // deleted by this point, except for complex control flow involving loops.
53*0f94d2b3SJay Foad   // This is why we are just testing the type of instructions rather
54*0f94d2b3SJay Foad   // than the operands.
isLastVGPRUseVMEMStore(MachineBasicBlock & MBB)55*0f94d2b3SJay Foad   bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) {
56*0f94d2b3SJay Foad     // Use the cache to break infinite loop and save some time. Initialize to
57*0f94d2b3SJay Foad     // false in case we have a cycle.
58*0f94d2b3SJay Foad     BlockVMEMStoreType::iterator It;
59*0f94d2b3SJay Foad     bool Inserted;
60*0f94d2b3SJay Foad     std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false});
61*0f94d2b3SJay Foad     bool &CacheEntry = It->second;
62*0f94d2b3SJay Foad     if (!Inserted)
63*0f94d2b3SJay Foad       return CacheEntry;
64*0f94d2b3SJay Foad 
65*0f94d2b3SJay Foad     for (auto &MI : reverse(MBB.instrs())) {
66*0f94d2b3SJay Foad       // If it's a VMEM store, a vgpr will be used, return true.
67*0f94d2b3SJay Foad       if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore())
68*0f94d2b3SJay Foad         return CacheEntry = true;
69*0f94d2b3SJay Foad 
70*0f94d2b3SJay Foad       // If it's referencing a VGPR but is not a VMEM store, return false.
71*0f94d2b3SJay Foad       if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) ||
72*0f94d2b3SJay Foad           SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) ||
73*0f94d2b3SJay Foad           SIInstrInfo::isVALU(MI))
74*0f94d2b3SJay Foad         return CacheEntry = false;
75*0f94d2b3SJay Foad     }
76*0f94d2b3SJay Foad 
77*0f94d2b3SJay Foad     // Recursive call into parent blocks. Look into predecessors if there is no
78*0f94d2b3SJay Foad     // vgpr used in this block.
79*0f94d2b3SJay Foad     return CacheEntry = llvm::any_of(MBB.predecessors(),
80*0f94d2b3SJay Foad                                      [this](MachineBasicBlock *Parent) {
81*0f94d2b3SJay Foad                                        return isLastVGPRUseVMEMStore(*Parent);
82*0f94d2b3SJay Foad                                      });
83*0f94d2b3SJay Foad   }
84*0f94d2b3SJay Foad 
runOnMachineBasicBlock(MachineBasicBlock & MBB)85*0f94d2b3SJay Foad   bool runOnMachineBasicBlock(MachineBasicBlock &MBB) {
86*0f94d2b3SJay Foad 
87*0f94d2b3SJay Foad     bool Changed = false;
88*0f94d2b3SJay Foad 
89*0f94d2b3SJay Foad     for (auto &MI : MBB.terminators()) {
90*0f94d2b3SJay Foad       // Look for S_ENDPGM instructions
91*0f94d2b3SJay Foad       if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
92*0f94d2b3SJay Foad           MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
93*0f94d2b3SJay Foad         // If the last instruction using a VGPR in the block is a VMEM store,
94*0f94d2b3SJay Foad         // release VGPRs. The VGPRs release will be placed just before ending
95*0f94d2b3SJay Foad         // the program
96*0f94d2b3SJay Foad         if (isLastVGPRUseVMEMStore(MBB)) {
97*0f94d2b3SJay Foad           BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG))
98*0f94d2b3SJay Foad               .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
99*0f94d2b3SJay Foad           Changed = true;
100*0f94d2b3SJay Foad         }
101*0f94d2b3SJay Foad       }
102*0f94d2b3SJay Foad     }
103*0f94d2b3SJay Foad 
104*0f94d2b3SJay Foad     return Changed;
105*0f94d2b3SJay Foad   }
106*0f94d2b3SJay Foad 
runOnMachineFunction(MachineFunction & MF)107*0f94d2b3SJay Foad   bool runOnMachineFunction(MachineFunction &MF) override {
108*0f94d2b3SJay Foad     Function &F = MF.getFunction();
109*0f94d2b3SJay Foad     if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
110*0f94d2b3SJay Foad       return false;
111*0f94d2b3SJay Foad 
112*0f94d2b3SJay Foad     // This pass only runs on GFX11+
113*0f94d2b3SJay Foad     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
114*0f94d2b3SJay Foad     if (ST.getGeneration() < AMDGPUSubtarget::GFX11)
115*0f94d2b3SJay Foad       return false;
116*0f94d2b3SJay Foad 
117*0f94d2b3SJay Foad     LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName()
118*0f94d2b3SJay Foad                       << "\n");
119*0f94d2b3SJay Foad 
120*0f94d2b3SJay Foad     SII = ST.getInstrInfo();
121*0f94d2b3SJay Foad     TRI = ST.getRegisterInfo();
122*0f94d2b3SJay Foad 
123*0f94d2b3SJay Foad     bool Changed = false;
124*0f94d2b3SJay Foad     for (auto &MBB : MF) {
125*0f94d2b3SJay Foad       Changed |= runOnMachineBasicBlock(MBB);
126*0f94d2b3SJay Foad     }
127*0f94d2b3SJay Foad 
128*0f94d2b3SJay Foad     BlockVMEMStore.clear();
129*0f94d2b3SJay Foad 
130*0f94d2b3SJay Foad     return Changed;
131*0f94d2b3SJay Foad   }
132*0f94d2b3SJay Foad };
133*0f94d2b3SJay Foad 
134*0f94d2b3SJay Foad } // namespace
135*0f94d2b3SJay Foad 
136*0f94d2b3SJay Foad char AMDGPUReleaseVGPRs::ID = 0;
137*0f94d2b3SJay Foad 
138*0f94d2b3SJay Foad char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID;
139*0f94d2b3SJay Foad 
140*0f94d2b3SJay Foad INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false)
141