1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all 10 // SGPR spills, so must insert CSR SGPR spills as well as expand them. 11 // 12 // This pass must never create new SGPR virtual registers. 13 // 14 // FIXME: Must stop RegScavenger spills in later passes. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 #include "llvm/InitializePasses.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "si-lower-sgpr-spills" 29 30 using MBBVector = SmallVector<MachineBasicBlock *, 4>; 31 32 namespace { 33 34 static cl::opt<bool> EnableSpillVGPRToAGPR( 35 "amdgpu-spill-vgpr-to-agpr", 36 cl::desc("Enable spilling VGPRs to AGPRs"), 37 cl::ReallyHidden, 38 cl::init(true)); 39 40 class SILowerSGPRSpills : public MachineFunctionPass { 41 private: 42 const SIRegisterInfo *TRI = nullptr; 43 const SIInstrInfo *TII = nullptr; 44 VirtRegMap *VRM = nullptr; 45 LiveIntervals *LIS = nullptr; 46 47 // Save and Restore blocks of the current function. Typically there is a 48 // single save block, unless Windows EH funclets are involved. 49 MBBVector SaveBlocks; 50 MBBVector RestoreBlocks; 51 52 public: 53 static char ID; 54 55 SILowerSGPRSpills() : MachineFunctionPass(ID) {} 56 57 void calculateSaveRestoreBlocks(MachineFunction &MF); 58 bool spillCalleeSavedRegs(MachineFunction &MF); 59 60 bool runOnMachineFunction(MachineFunction &MF) override; 61 62 void getAnalysisUsage(AnalysisUsage &AU) const override { 63 AU.setPreservesAll(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 }; 67 68 } // end anonymous namespace 69 70 char SILowerSGPRSpills::ID = 0; 71 72 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE, 73 "SI lower SGPR spill instructions", false, false) 74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 75 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE, 76 "SI lower SGPR spill instructions", false, false) 77 78 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID; 79 80 /// Insert restore code for the callee-saved registers used in the function. 81 static void insertCSRSaves(MachineBasicBlock &SaveBlock, 82 ArrayRef<CalleeSavedInfo> CSI, 83 LiveIntervals *LIS) { 84 MachineFunction &MF = *SaveBlock.getParent(); 85 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 86 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 87 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 88 89 MachineBasicBlock::iterator I = SaveBlock.begin(); 90 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { 91 const MachineRegisterInfo &MRI = MF.getRegInfo(); 92 93 for (const CalleeSavedInfo &CS : CSI) { 94 // Insert the spill to the stack frame. 95 MCRegister Reg = CS.getReg(); 96 97 MachineInstrSpan MIS(I, &SaveBlock); 98 const TargetRegisterClass *RC = 99 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 100 101 // If this value was already livein, we probably have a direct use of the 102 // incoming register value, so don't kill at the spill point. This happens 103 // since we pass some special inputs (workgroup IDs) in the callee saved 104 // range. 105 const bool IsLiveIn = MRI.isLiveIn(Reg); 106 TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(), 107 RC, TRI); 108 109 if (LIS) { 110 assert(std::distance(MIS.begin(), I) == 1); 111 MachineInstr &Inst = *std::prev(I); 112 113 LIS->InsertMachineInstrInMaps(Inst); 114 LIS->removeAllRegUnitsForPhysReg(Reg); 115 } 116 } 117 } 118 } 119 120 /// Insert restore code for the callee-saved registers used in the function. 121 static void insertCSRRestores(MachineBasicBlock &RestoreBlock, 122 MutableArrayRef<CalleeSavedInfo> CSI, 123 LiveIntervals *LIS) { 124 MachineFunction &MF = *RestoreBlock.getParent(); 125 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 126 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 127 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 128 129 // Restore all registers immediately before the return and any 130 // terminators that precede it. 131 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); 132 133 // FIXME: Just emit the readlane/writelane directly 134 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { 135 for (const CalleeSavedInfo &CI : reverse(CSI)) { 136 unsigned Reg = CI.getReg(); 137 const TargetRegisterClass *RC = 138 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 139 140 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); 141 assert(I != RestoreBlock.begin() && 142 "loadRegFromStackSlot didn't insert any code!"); 143 // Insert in reverse order. loadRegFromStackSlot can insert 144 // multiple instructions. 145 146 if (LIS) { 147 MachineInstr &Inst = *std::prev(I); 148 LIS->InsertMachineInstrInMaps(Inst); 149 LIS->removeAllRegUnitsForPhysReg(Reg); 150 } 151 } 152 } 153 } 154 155 /// Compute the sets of entry and return blocks for saving and restoring 156 /// callee-saved registers, and placing prolog and epilog code. 157 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) { 158 const MachineFrameInfo &MFI = MF.getFrameInfo(); 159 160 // Even when we do not change any CSR, we still want to insert the 161 // prologue and epilogue of the function. 162 // So set the save points for those. 163 164 // Use the points found by shrink-wrapping, if any. 165 if (MFI.getSavePoint()) { 166 SaveBlocks.push_back(MFI.getSavePoint()); 167 assert(MFI.getRestorePoint() && "Both restore and save must be set"); 168 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 169 // If RestoreBlock does not have any successor and is not a return block 170 // then the end point is unreachable and we do not need to insert any 171 // epilogue. 172 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) 173 RestoreBlocks.push_back(RestoreBlock); 174 return; 175 } 176 177 // Save refs to entry and return blocks. 178 SaveBlocks.push_back(&MF.front()); 179 for (MachineBasicBlock &MBB : MF) { 180 if (MBB.isEHFuncletEntry()) 181 SaveBlocks.push_back(&MBB); 182 if (MBB.isReturnBlock()) 183 RestoreBlocks.push_back(&MBB); 184 } 185 } 186 187 // TODO: To support shrink wrapping, this would need to copy 188 // PrologEpilogInserter's updateLiveness. 189 static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) { 190 MachineBasicBlock &EntryBB = MF.front(); 191 192 for (const CalleeSavedInfo &CSIReg : CSI) 193 EntryBB.addLiveIn(CSIReg.getReg()); 194 EntryBB.sortUniqueLiveIns(); 195 } 196 197 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) { 198 MachineRegisterInfo &MRI = MF.getRegInfo(); 199 const Function &F = MF.getFunction(); 200 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 201 const SIFrameLowering *TFI = ST.getFrameLowering(); 202 MachineFrameInfo &MFI = MF.getFrameInfo(); 203 RegScavenger *RS = nullptr; 204 205 // Determine which of the registers in the callee save list should be saved. 206 BitVector SavedRegs; 207 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS); 208 209 // Add the code to save and restore the callee saved registers. 210 if (!F.hasFnAttribute(Attribute::Naked)) { 211 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is 212 // necessary for verifier liveness checks. 213 MFI.setCalleeSavedInfoValid(true); 214 215 std::vector<CalleeSavedInfo> CSI; 216 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 217 218 for (unsigned I = 0; CSRegs[I]; ++I) { 219 MCRegister Reg = CSRegs[I]; 220 221 if (SavedRegs.test(Reg)) { 222 const TargetRegisterClass *RC = 223 TRI->getMinimalPhysRegClass(Reg, MVT::i32); 224 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC), 225 TRI->getSpillAlign(*RC), true); 226 227 CSI.push_back(CalleeSavedInfo(Reg, JunkFI)); 228 } 229 } 230 231 if (!CSI.empty()) { 232 for (MachineBasicBlock *SaveBlock : SaveBlocks) 233 insertCSRSaves(*SaveBlock, CSI, LIS); 234 235 // Add live ins to save blocks. 236 assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented"); 237 updateLiveness(MF, CSI); 238 239 for (MachineBasicBlock *RestoreBlock : RestoreBlocks) 240 insertCSRRestores(*RestoreBlock, CSI, LIS); 241 return true; 242 } 243 } 244 245 return false; 246 } 247 248 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills. 249 static bool lowerShiftReservedVGPR(MachineFunction &MF, 250 const GCNSubtarget &ST) { 251 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 252 const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill; 253 // Early out if pre-reservation of a VGPR for SGPR spilling is disabled. 254 if (!PreReservedVGPR) 255 return false; 256 257 // If there are no free lower VGPRs available, default to using the 258 // pre-reserved register instead. 259 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 260 Register LowestAvailableVGPR = 261 TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF); 262 if (!LowestAvailableVGPR) 263 LowestAvailableVGPR = PreReservedVGPR; 264 265 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs(); 266 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 267 Optional<int> FI; 268 // Check if we are reserving a CSR. Create a stack object for a possible spill 269 // in the function prologue. 270 if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR)) 271 FI = FrameInfo.CreateSpillStackObject(4, Align(4)); 272 273 // Find saved info about the pre-reserved register. 274 const auto *ReservedVGPRInfoItr = 275 llvm::find_if(FuncInfo->getSGPRSpillVGPRs(), 276 [PreReservedVGPR](const auto &SpillRegInfo) { 277 return SpillRegInfo.VGPR == PreReservedVGPR; 278 }); 279 280 assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end()); 281 auto Index = 282 std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr); 283 284 FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index); 285 286 for (MachineBasicBlock &MBB : MF) { 287 assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR"); 288 MBB.addLiveIn(LowestAvailableVGPR); 289 MBB.sortUniqueLiveIns(); 290 } 291 292 return true; 293 } 294 295 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { 296 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 297 TII = ST.getInstrInfo(); 298 TRI = &TII->getRegisterInfo(); 299 300 VRM = getAnalysisIfAvailable<VirtRegMap>(); 301 302 assert(SaveBlocks.empty() && RestoreBlocks.empty()); 303 304 // First, expose any CSR SGPR spills. This is mostly the same as what PEI 305 // does, but somewhat simpler. 306 calculateSaveRestoreBlocks(MF); 307 bool HasCSRs = spillCalleeSavedRegs(MF); 308 309 MachineFrameInfo &MFI = MF.getFrameInfo(); 310 if (!MFI.hasStackObjects() && !HasCSRs) { 311 SaveBlocks.clear(); 312 RestoreBlocks.clear(); 313 return false; 314 } 315 316 MachineRegisterInfo &MRI = MF.getRegInfo(); 317 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 318 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() 319 && EnableSpillVGPRToAGPR; 320 321 bool MadeChange = false; 322 323 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); 324 std::unique_ptr<RegScavenger> RS; 325 326 bool NewReservedRegs = false; 327 328 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be 329 // handled as SpilledToReg in regular PrologEpilogInserter. 330 const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() && 331 (HasCSRs || FuncInfo->hasSpilledSGPRs()); 332 if (HasSGPRSpillToVGPR || SpillVGPRToAGPR) { 333 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs 334 // are spilled to VGPRs, in which case we can eliminate the stack usage. 335 // 336 // This operates under the assumption that only other SGPR spills are users 337 // of the frame index. 338 339 lowerShiftReservedVGPR(MF, ST); 340 341 for (MachineBasicBlock &MBB : MF) { 342 MachineBasicBlock::iterator Next; 343 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { 344 MachineInstr &MI = *I; 345 Next = std::next(I); 346 347 if (SpillToAGPR && TII->isVGPRSpill(MI)) { 348 // Try to eliminate stack used by VGPR spills before frame 349 // finalization. 350 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 351 AMDGPU::OpName::vaddr); 352 int FI = MI.getOperand(FIOp).getIndex(); 353 Register VReg = 354 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); 355 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, 356 TRI->isAGPR(MRI, VReg))) { 357 NewReservedRegs = true; 358 if (!RS) 359 RS.reset(new RegScavenger()); 360 361 // FIXME: change to enterBasicBlockEnd() 362 RS->enterBasicBlock(MBB); 363 TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get()); 364 continue; 365 } 366 } 367 368 if (!TII->isSGPRSpill(MI) || !TRI->spillSGPRToVGPR()) 369 continue; 370 371 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); 372 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); 373 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) { 374 NewReservedRegs = true; 375 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); 376 (void)Spilled; 377 assert(Spilled && "failed to spill SGPR to VGPR when allocated"); 378 } 379 } 380 } 381 382 for (MachineBasicBlock &MBB : MF) { 383 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) 384 MBB.addLiveIn(SSpill.VGPR); 385 386 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) 387 MBB.addLiveIn(Reg); 388 389 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) 390 MBB.addLiveIn(Reg); 391 392 MBB.sortUniqueLiveIns(); 393 } 394 395 MadeChange = true; 396 } else if (FuncInfo->VGPRReservedForSGPRSpill) { 397 FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); 398 } 399 400 SaveBlocks.clear(); 401 RestoreBlocks.clear(); 402 403 // Updated the reserved registers with any VGPRs added for SGPR spills. 404 if (NewReservedRegs) 405 MRI.freezeReservedRegs(MF); 406 407 return MadeChange; 408 } 409