1 //===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert S_SENDMSG instructions to release vgprs on GFX11+.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "GCNSubtarget.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "SIDefines.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineOperand.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "release-vgprs"
25 
26 namespace {
27 
28 class AMDGPUReleaseVGPRs : public MachineFunctionPass {
29 public:
30   static char ID;
31 
32   const SIInstrInfo *SII;
33   const SIRegisterInfo *TRI;
34 
AMDGPUReleaseVGPRs()35   AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {}
36 
getAnalysisUsage(AnalysisUsage & AU) const37   void getAnalysisUsage(AnalysisUsage &AU) const override {
38     AU.setPreservesAll();
39     MachineFunctionPass::getAnalysisUsage(AU);
40   }
41 
42   // Used to cache the result of isLastInstructionVMEMStore for each block
43   using BlockVMEMStoreType = DenseMap<MachineBasicBlock *, bool>;
44   BlockVMEMStoreType BlockVMEMStore;
45 
46   // Return true if the last instruction referencing a vgpr in this MBB
47   // is a VMEM store, otherwise return false.
48   // Visit previous basic blocks to find this last instruction if needed.
49   // Because this pass is late in the pipeline, it is expected that the
50   // last vgpr use will likely be one of vmem store, ds, exp.
51   // Loads and others vgpr operations would have been
52   // deleted by this point, except for complex control flow involving loops.
53   // This is why we are just testing the type of instructions rather
54   // than the operands.
isLastVGPRUseVMEMStore(MachineBasicBlock & MBB)55   bool isLastVGPRUseVMEMStore(MachineBasicBlock &MBB) {
56     // Use the cache to break infinite loop and save some time. Initialize to
57     // false in case we have a cycle.
58     BlockVMEMStoreType::iterator It;
59     bool Inserted;
60     std::tie(It, Inserted) = BlockVMEMStore.insert({&MBB, false});
61     bool &CacheEntry = It->second;
62     if (!Inserted)
63       return CacheEntry;
64 
65     for (auto &MI : reverse(MBB.instrs())) {
66       // If it's a VMEM store, a vgpr will be used, return true.
67       if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && MI.mayStore())
68         return CacheEntry = true;
69 
70       // If it's referencing a VGPR but is not a VMEM store, return false.
71       if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) ||
72           SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) ||
73           SIInstrInfo::isVALU(MI))
74         return CacheEntry = false;
75     }
76 
77     // Recursive call into parent blocks. Look into predecessors if there is no
78     // vgpr used in this block.
79     return CacheEntry = llvm::any_of(MBB.predecessors(),
80                                      [this](MachineBasicBlock *Parent) {
81                                        return isLastVGPRUseVMEMStore(*Parent);
82                                      });
83   }
84 
runOnMachineBasicBlock(MachineBasicBlock & MBB)85   bool runOnMachineBasicBlock(MachineBasicBlock &MBB) {
86 
87     bool Changed = false;
88 
89     for (auto &MI : MBB.terminators()) {
90       // Look for S_ENDPGM instructions
91       if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
92           MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
93         // If the last instruction using a VGPR in the block is a VMEM store,
94         // release VGPRs. The VGPRs release will be placed just before ending
95         // the program
96         if (isLastVGPRUseVMEMStore(MBB)) {
97           BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG))
98               .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
99           Changed = true;
100         }
101       }
102     }
103 
104     return Changed;
105   }
106 
runOnMachineFunction(MachineFunction & MF)107   bool runOnMachineFunction(MachineFunction &MF) override {
108     Function &F = MF.getFunction();
109     if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
110       return false;
111 
112     // This pass only runs on GFX11+
113     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
114     if (ST.getGeneration() < AMDGPUSubtarget::GFX11)
115       return false;
116 
117     LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName()
118                       << "\n");
119 
120     SII = ST.getInstrInfo();
121     TRI = ST.getRegisterInfo();
122 
123     bool Changed = false;
124     for (auto &MBB : MF) {
125       Changed |= runOnMachineBasicBlock(MBB);
126     }
127 
128     BlockVMEMStore.clear();
129 
130     return Changed;
131   }
132 };
133 
134 } // namespace
135 
136 char AMDGPUReleaseVGPRs::ID = 0;
137 
138 char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID;
139 
140 INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false)
141