1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
26
27 using namespace llvm;
28
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30
31 namespace {
32
33 class SIPreAllocateWWMRegs : public MachineFunctionPass {
34 private:
35 const SIInstrInfo *TII;
36 const SIRegisterInfo *TRI;
37 MachineRegisterInfo *MRI;
38 LiveIntervals *LIS;
39 LiveRegMatrix *Matrix;
40 VirtRegMap *VRM;
41 RegisterClassInfo RegClassInfo;
42
43 std::vector<unsigned> RegsToRewrite;
44 #ifndef NDEBUG
45 void printWWMInfo(const MachineInstr &MI);
46 #endif
47
48 public:
49 static char ID;
50
SIPreAllocateWWMRegs()51 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
52 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
53 }
54
55 bool runOnMachineFunction(MachineFunction &MF) override;
56
getAnalysisUsage(AnalysisUsage & AU) const57 void getAnalysisUsage(AnalysisUsage &AU) const override {
58 AU.addRequired<LiveIntervals>();
59 AU.addPreserved<LiveIntervals>();
60 AU.addRequired<VirtRegMap>();
61 AU.addRequired<LiveRegMatrix>();
62 AU.addPreserved<SlotIndexes>();
63 AU.setPreservesCFG();
64 MachineFunctionPass::getAnalysisUsage(AU);
65 }
66
67 private:
68 bool processDef(MachineOperand &MO);
69 void rewriteRegs(MachineFunction &MF);
70 };
71
72 } // End anonymous namespace.
73
74 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
75 "SI Pre-allocate WWM Registers", false, false)
76 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
77 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
78 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
79 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
80 "SI Pre-allocate WWM Registers", false, false)
81
82 char SIPreAllocateWWMRegs::ID = 0;
83
84 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
85
createSIPreAllocateWWMRegsPass()86 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
87 return new SIPreAllocateWWMRegs();
88 }
89
processDef(MachineOperand & MO)90 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
91 Register Reg = MO.getReg();
92 if (Reg.isPhysical())
93 return false;
94
95 if (!TRI->isVGPR(*MRI, Reg))
96 return false;
97
98 if (VRM->hasPhys(Reg))
99 return false;
100
101 LiveInterval &LI = LIS->getInterval(Reg);
102
103 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
104 if (!MRI->isPhysRegUsed(PhysReg) &&
105 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
106 Matrix->assign(LI, PhysReg);
107 assert(PhysReg != 0);
108 RegsToRewrite.push_back(Reg);
109 return true;
110 }
111 }
112
113 llvm_unreachable("physreg not found for WWM expression");
114 }
115
rewriteRegs(MachineFunction & MF)116 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
117 for (MachineBasicBlock &MBB : MF) {
118 for (MachineInstr &MI : MBB) {
119 for (MachineOperand &MO : MI.operands()) {
120 if (!MO.isReg())
121 continue;
122
123 const Register VirtReg = MO.getReg();
124 if (VirtReg.isPhysical())
125 continue;
126
127 if (!VRM->hasPhys(VirtReg))
128 continue;
129
130 Register PhysReg = VRM->getPhys(VirtReg);
131 const unsigned SubReg = MO.getSubReg();
132 if (SubReg != 0) {
133 PhysReg = TRI->getSubReg(PhysReg, SubReg);
134 MO.setSubReg(0);
135 }
136
137 MO.setReg(PhysReg);
138 MO.setIsRenamable(false);
139 }
140 }
141 }
142
143 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
144
145 for (unsigned Reg : RegsToRewrite) {
146 LIS->removeInterval(Reg);
147
148 const Register PhysReg = VRM->getPhys(Reg);
149 assert(PhysReg != 0);
150
151 MFI->reserveWWMRegister(PhysReg);
152 }
153
154 RegsToRewrite.clear();
155
156 // Update the set of reserved registers to include WWM ones.
157 MRI->freezeReservedRegs(MF);
158 }
159
160 #ifndef NDEBUG
161 LLVM_DUMP_METHOD void
printWWMInfo(const MachineInstr & MI)162 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
163
164 unsigned Opc = MI.getOpcode();
165
166 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
167 dbgs() << "Entering ";
168 } else {
169 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
170 dbgs() << "Exiting ";
171 }
172
173 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
174 dbgs() << "Strict WWM ";
175 } else {
176 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
177 dbgs() << "Strict WQM ";
178 }
179
180 dbgs() << "region: " << MI;
181 }
182
183 #endif
184
runOnMachineFunction(MachineFunction & MF)185 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
186 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
187
188 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189
190 TII = ST.getInstrInfo();
191 TRI = &TII->getRegisterInfo();
192 MRI = &MF.getRegInfo();
193
194 LIS = &getAnalysis<LiveIntervals>();
195 Matrix = &getAnalysis<LiveRegMatrix>();
196 VRM = &getAnalysis<VirtRegMap>();
197
198 RegClassInfo.runOnMachineFunction(MF);
199
200 bool RegsAssigned = false;
201
202 // We use a reverse post-order traversal of the control-flow graph to
203 // guarantee that we visit definitions in dominance order. Since WWM
204 // expressions are guaranteed to never involve phi nodes, and we can only
205 // escape WWM through the special WWM instruction, this means that this is a
206 // perfect elimination order, so we can never do any better.
207 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
208
209 for (MachineBasicBlock *MBB : RPOT) {
210 bool InWWM = false;
211 for (MachineInstr &MI : *MBB) {
212 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
213 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
214 RegsAssigned |= processDef(MI.getOperand(0));
215
216 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
217 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
218 LLVM_DEBUG(printWWMInfo(MI));
219 InWWM = true;
220 continue;
221 }
222
223 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
224 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
225 LLVM_DEBUG(printWWMInfo(MI));
226 InWWM = false;
227 }
228
229 if (!InWWM)
230 continue;
231
232 LLVM_DEBUG(dbgs() << "Processing " << MI);
233
234 for (MachineOperand &DefOpnd : MI.defs()) {
235 RegsAssigned |= processDef(DefOpnd);
236 }
237 }
238 }
239
240 if (!RegsAssigned)
241 return false;
242
243 rewriteRegs(MF);
244 return true;
245 }
246