1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIInstrInfo.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/RegisterScavenging.h" 30 #include "llvm/CodeGen/VirtRegMap.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Target/TargetMachine.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "si-lower-sgpr-spills" 37 38 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 39 40 namespace { 41 42 static cl::opt<bool> EnableSpillVGPRToAGPR( 43 "amdgpu-spill-vgpr-to-agpr", 44 cl::desc("Enable spilling VGPRs to AGPRs"), 45 cl::ReallyHidden, 46 cl::init(true)); 47 48 class SILowerSGPRSpills : public MachineFunctionPass { 49 private: 50 const SIRegisterInfo *TRI = nullptr; 51 const SIInstrInfo *TII = nullptr; 52 VirtRegMap *VRM = nullptr; 53 LiveIntervals *LIS = nullptr; 54 55 // Save and Restore blocks of the current function. Typically there is a 56 // single save block, unless Windows EH funclets are involved. 57 MBBVector SaveBlocks; 58 MBBVector RestoreBlocks; 59 60 public: 61 static char ID; 62 63 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 64 65 void calculateSaveRestoreBlocks(MachineFunction &MF); 66 bool spillCalleeSavedRegs(MachineFunction &MF); 67 68 bool runOnMachineFunction(MachineFunction &MF) override; 69 70 void getAnalysisUsage(AnalysisUsage &AU) const override { 71 AU.setPreservesAll(); 72 MachineFunctionPass::getAnalysisUsage(AU); 73 } 74 }; 75 76 } // end anonymous namespace 77 78 char SILowerSGPRSpills::ID = 0; 79 80 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 81 "SI lower SGPR spill instructions", false, false) 82 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 83 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 84 "SI lower SGPR spill instructions", false, false) 85 86 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 87 88 /// Insert restore code for the callee-saved registers used in the function. 89 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 90 ArrayRef<CalleeSavedInfo> CSI, 91 LiveIntervals *LIS) { 92 MachineFunction &MF = *SaveBlock.getParent(); 93 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 94 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 95 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 96 97 MachineBasicBlock::iterator I = SaveBlock.begin(); 98 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 99 for (const CalleeSavedInfo &CS : CSI) { 100 // Insert the spill to the stack frame. 101 unsigned Reg = CS.getReg(); 102 103 MachineInstrSpan MIS(I, &SaveBlock); 104 const TargetRegisterClass *RC = 105 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 106 107 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC, 108 TRI); 109 110 if (LIS) { 111 assert(std::distance(MIS.begin(), I) == 1); 112 MachineInstr &Inst = *std::prev(I); 113 114 LIS->InsertMachineInstrInMaps(Inst); 115 LIS->removeAllRegUnitsForPhysReg(Reg); 116 } 117 } 118 } 119 } 120 121 /// Insert restore code for the callee-saved registers used in the function. 122 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 123 MutableArrayRef<CalleeSavedInfo> CSI, 124 LiveIntervals *LIS) { 125 MachineFunction &MF = *RestoreBlock.getParent(); 126 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 127 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 128 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 129 130 // Restore all registers immediately before the return and any 131 // terminators that precede it. 132 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 133 134 // FIXME: Just emit the readlane/writelane directly 135 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 136 for (const CalleeSavedInfo &CI : reverse(CSI)) { 137 unsigned Reg = CI.getReg(); 138 const TargetRegisterClass *RC = 139 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 140 141 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 142 assert(I != RestoreBlock.begin() && 143 "loadRegFromStackSlot didn't insert any code!"); 144 // Insert in reverse order. loadRegFromStackSlot can insert 145 // multiple instructions. 146 147 if (LIS) { 148 MachineInstr &Inst = *std::prev(I); 149 LIS->InsertMachineInstrInMaps(Inst); 150 LIS->removeAllRegUnitsForPhysReg(Reg); 151 } 152 } 153 } 154 } 155 156 /// Compute the sets of entry and return blocks for saving and restoring 157 /// callee-saved registers, and placing prolog and epilog code. 158 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 159 const MachineFrameInfo &MFI = MF.getFrameInfo(); 160 161 // Even when we do not change any CSR, we still want to insert the 162 // prologue and epilogue of the function. 163 // So set the save points for those. 164 165 // Use the points found by shrink-wrapping, if any. 166 if (MFI.getSavePoint()) { 167 SaveBlocks.push_back(MFI.getSavePoint()); 168 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 169 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 170 // If RestoreBlock does not have any successor and is not a return block 171 // then the end point is unreachable and we do not need to insert any 172 // epilogue. 173 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 174 RestoreBlocks.push_back(RestoreBlock); 175 return; 176 } 177 178 // Save refs to entry and return blocks. 179 SaveBlocks.push_back(&MF.front()); 180 for (MachineBasicBlock &MBB : MF) { 181 if (MBB.isEHFuncletEntry()) 182 SaveBlocks.push_back(&MBB); 183 if (MBB.isReturnBlock()) 184 RestoreBlocks.push_back(&MBB); 185 } 186 } 187 188 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 189 MachineRegisterInfo &MRI = MF.getRegInfo(); 190 const Function &F = MF.getFunction(); 191 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 192 const SIFrameLowering *TFI = ST.getFrameLowering(); 193 MachineFrameInfo &MFI = MF.getFrameInfo(); 194 RegScavenger *RS = nullptr; 195 196 // Determine which of the registers in the callee save list should be saved. 197 BitVector SavedRegs; 198 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 199 200 // Add the code to save and restore the callee saved registers. 201 if (!F.hasFnAttribute(Attribute::Naked)) { 202 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 203 // necessary for verifier liveness checks. 204 MFI.setCalleeSavedInfoValid(true); 205 206 std::vector<CalleeSavedInfo> CSI; 207 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 208 209 for (unsigned I = 0; CSRegs[I]; ++I) { 210 unsigned Reg = CSRegs[I]; 211 if (SavedRegs.test(Reg)) { 212 const TargetRegisterClass *RC = 213 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 214 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 215 TRI->getSpillAlign(*RC), true); 216 217 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 218 } 219 } 220 221 if (!CSI.empty()) { 222 for (MachineBasicBlock *SaveBlock : SaveBlocks) 223 insertCSRSaves(*SaveBlock, CSI, LIS); 224 225 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 226 insertCSRRestores(*RestoreBlock, CSI, LIS); 227 return true; 228 } 229 } 230 231 return false; 232 } 233 234 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills. 235 static bool lowerShiftReservedVGPR(MachineFunction &MF, 236 const GCNSubtarget &ST) { 237 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 238 const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill; 239 // Early out if pre-reservation of a VGPR for SGPR spilling is disabled. 240 if (!PreReservedVGPR) 241 return false; 242 243 // If there are no free lower VGPRs available, default to using the 244 // pre-reserved register instead. 245 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 246 Register LowestAvailableVGPR = 247 TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF); 248 if (!LowestAvailableVGPR) 249 LowestAvailableVGPR = PreReservedVGPR; 250 251 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 252 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 253 Optional<int> FI; 254 // Check if we are reserving a CSR. Create a stack object for a possible spill 255 // in the function prologue. 256 if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR)) 257 FI = FrameInfo.CreateSpillStackObject(4, Align(4)); 258 259 // Find saved info about the pre-reserved register. 260 const auto *ReservedVGPRInfoItr = 261 std::find_if(FuncInfo->getSGPRSpillVGPRs().begin(), 262 FuncInfo->getSGPRSpillVGPRs().end(), 263 [PreReservedVGPR](const auto &SpillRegInfo) { 264 return SpillRegInfo.VGPR == PreReservedVGPR; 265 }); 266 267 assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end()); 268 auto Index = 269 std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr); 270 271 FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index); 272 273 for (MachineBasicBlock &MBB : MF) { 274 MBB.addLiveIn(LowestAvailableVGPR); 275 MBB.sortUniqueLiveIns(); 276 } 277 278 return true; 279 } 280 281 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 282 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 283 TII = ST.getInstrInfo(); 284 TRI = &TII->getRegisterInfo(); 285 286 VRM = getAnalysisIfAvailable<VirtRegMap>(); 287 288 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 289 290 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 291 // does, but somewhat simpler. 292 calculateSaveRestoreBlocks(MF); 293 bool HasCSRs = spillCalleeSavedRegs(MF); 294 295 MachineFrameInfo &MFI = MF.getFrameInfo(); 296 if (!MFI.hasStackObjects() && !HasCSRs) { 297 SaveBlocks.clear(); 298 RestoreBlocks.clear(); 299 return false; 300 } 301 302 MachineRegisterInfo &MRI = MF.getRegInfo(); 303 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 304 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 305 && EnableSpillVGPRToAGPR; 306 307 bool MadeChange = false; 308 309 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); 310 std::unique_ptr<RegScavenger> RS; 311 312 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 313 // handled as SpilledToReg in regular PrologEpilogInserter. 314 if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || 315 SpillVGPRToAGPR) { 316 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 317 // are spilled to VGPRs, in which case we can eliminate the stack usage. 318 // 319 // This operates under the assumption that only other SGPR spills are users 320 // of the frame index. 321 322 lowerShiftReservedVGPR(MF, ST); 323 324 for (MachineBasicBlock &MBB : MF) { 325 MachineBasicBlock::iterator Next; 326 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 327 MachineInstr &MI = *I; 328 Next = std::next(I); 329 330 if (SpillToAGPR && TII->isVGPRSpill(MI)) { 331 // Try to eliminate stack used by VGPR spills before frame 332 // finalization. 333 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 334 AMDGPU::OpName::vaddr); 335 int FI = MI.getOperand(FIOp).getIndex(); 336 Register VReg = 337 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 338 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 339 TRI->isAGPR(MRI, VReg))) { 340 if (!RS) 341 RS.reset(new RegScavenger()); 342 343 // FIXME: change to enterBasicBlockEnd() 344 RS->enterBasicBlock(MBB); 345 TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get()); 346 continue; 347 } 348 } 349 350 if (!TII->isSGPRSpill(MI)) 351 continue; 352 353 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 354 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 355 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 356 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); 357 (void)Spilled; 358 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 359 } 360 } 361 } 362 363 for (MachineBasicBlock &MBB : MF) { 364 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 365 MBB.addLiveIn(SSpill.VGPR); 366 367 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 368 MBB.addLiveIn(Reg); 369 370 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 371 MBB.addLiveIn(Reg); 372 373 MBB.sortUniqueLiveIns(); 374 } 375 376 MadeChange = true; 377 } else if (FuncInfo->VGPRReservedForSGPRSpill) { 378 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); 379 } 380 381 SaveBlocks.clear(); 382 RestoreBlocks.clear(); 383 384 return MadeChange; 385 } 386