1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "GCNSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/CodeGen/LiveIntervals.h" 20 #include "llvm/CodeGen/LiveRegMatrix.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/VirtRegMap.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 30 31 namespace { 32 33 class SIPreAllocateWWMRegs : public MachineFunctionPass { 34 private: 35 const SIInstrInfo *TII; 36 const SIRegisterInfo *TRI; 37 MachineRegisterInfo *MRI; 38 LiveIntervals *LIS; 39 LiveRegMatrix *Matrix; 40 VirtRegMap *VRM; 41 RegisterClassInfo RegClassInfo; 42 43 std::vector<unsigned> RegsToRewrite; 44 #ifndef NDEBUG 45 void printWWMInfo(const MachineInstr &MI); 46 #endif 47 48 public: 49 static char ID; 50 51 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 52 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 53 } 54 55 bool runOnMachineFunction(MachineFunction &MF) override; 56 57 void getAnalysisUsage(AnalysisUsage &AU) const override { 58 AU.addRequired<LiveIntervals>(); 59 AU.addPreserved<LiveIntervals>(); 60 AU.addRequired<VirtRegMap>(); 61 AU.addRequired<LiveRegMatrix>(); 62 AU.addPreserved<SlotIndexes>(); 63 AU.setPreservesCFG(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 private: 68 bool processDef(MachineOperand &MO); 69 void rewriteRegs(MachineFunction &MF); 70 }; 71 72 } // End anonymous namespace. 73 74 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 75 "SI Pre-allocate WWM Registers", false, false) 76 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 77 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 78 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 79 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 80 "SI Pre-allocate WWM Registers", false, false) 81 82 char SIPreAllocateWWMRegs::ID = 0; 83 84 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 85 86 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 87 return new SIPreAllocateWWMRegs(); 88 } 89 90 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 91 if (!MO.isReg()) 92 return false; 93 94 Register Reg = MO.getReg(); 95 if (Reg.isPhysical()) 96 return false; 97 98 if (!TRI->isVGPR(*MRI, Reg)) 99 return false; 100 101 if (VRM->hasPhys(Reg)) 102 return false; 103 104 LiveInterval &LI = LIS->getInterval(Reg); 105 106 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 107 if (!MRI->isPhysRegUsed(PhysReg) && 108 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 109 Matrix->assign(LI, PhysReg); 110 assert(PhysReg != 0); 111 RegsToRewrite.push_back(Reg); 112 return true; 113 } 114 } 115 116 llvm_unreachable("physreg not found for WWM expression"); 117 return false; 118 } 119 120 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 121 for (MachineBasicBlock &MBB : MF) { 122 for (MachineInstr &MI : MBB) { 123 for (MachineOperand &MO : MI.operands()) { 124 if (!MO.isReg()) 125 continue; 126 127 const Register VirtReg = MO.getReg(); 128 if (VirtReg.isPhysical()) 129 continue; 130 131 if (!VRM->hasPhys(VirtReg)) 132 continue; 133 134 Register PhysReg = VRM->getPhys(VirtReg); 135 const unsigned SubReg = MO.getSubReg(); 136 if (SubReg != 0) { 137 PhysReg = TRI->getSubReg(PhysReg, SubReg); 138 MO.setSubReg(0); 139 } 140 141 MO.setReg(PhysReg); 142 MO.setIsRenamable(false); 143 } 144 } 145 } 146 147 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 148 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 149 150 for (unsigned Reg : RegsToRewrite) { 151 LIS->removeInterval(Reg); 152 153 const Register PhysReg = VRM->getPhys(Reg); 154 assert(PhysReg != 0); 155 156 // Check if PhysReg is already reserved 157 if (!MFI->WWMReservedRegs.count(PhysReg)) { 158 Optional<int> FI; 159 if (!MFI->isEntryFunction()) { 160 // Create a stack object for a possible spill in the function prologue. 161 // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes. 162 const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg); 163 FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC), 164 TRI->getSpillAlign(*RC)); 165 } 166 MFI->reserveWWMRegister(PhysReg, FI); 167 } 168 } 169 170 RegsToRewrite.clear(); 171 172 // Update the set of reserved registers to include WWM ones. 173 MRI->freezeReservedRegs(MF); 174 } 175 176 #ifndef NDEBUG 177 LLVM_DUMP_METHOD void 178 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 179 180 unsigned Opc = MI.getOpcode(); 181 182 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { 183 dbgs() << "Entering "; 184 } else { 185 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); 186 dbgs() << "Exiting "; 187 } 188 189 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 190 dbgs() << "Strict WWM "; 191 } else { 192 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 193 dbgs() << "Strict WQM "; 194 } 195 196 dbgs() << "region: " << MI; 197 } 198 199 #endif 200 201 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 202 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 203 204 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 205 206 TII = ST.getInstrInfo(); 207 TRI = &TII->getRegisterInfo(); 208 MRI = &MF.getRegInfo(); 209 210 LIS = &getAnalysis<LiveIntervals>(); 211 Matrix = &getAnalysis<LiveRegMatrix>(); 212 VRM = &getAnalysis<VirtRegMap>(); 213 214 RegClassInfo.runOnMachineFunction(MF); 215 216 bool RegsAssigned = false; 217 218 // We use a reverse post-order traversal of the control-flow graph to 219 // guarantee that we visit definitions in dominance order. Since WWM 220 // expressions are guaranteed to never involve phi nodes, and we can only 221 // escape WWM through the special WWM instruction, this means that this is a 222 // perfect elimination order, so we can never do any better. 223 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 224 225 for (MachineBasicBlock *MBB : RPOT) { 226 bool InWWM = false; 227 for (MachineInstr &MI : *MBB) { 228 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 229 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 230 RegsAssigned |= processDef(MI.getOperand(0)); 231 232 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 233 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { 234 LLVM_DEBUG(printWWMInfo(MI)); 235 InWWM = true; 236 continue; 237 } 238 239 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 240 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { 241 LLVM_DEBUG(printWWMInfo(MI)); 242 InWWM = false; 243 } 244 245 if (!InWWM) 246 continue; 247 248 LLVM_DEBUG(dbgs() << "Processing " << MI); 249 250 for (MachineOperand &DefOpnd : MI.defs()) { 251 RegsAssigned |= processDef(DefOpnd); 252 } 253 } 254 } 255 256 if (!RegsAssigned) 257 return false; 258 259 rewriteRegs(MF); 260 return true; 261 } 262