1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/RegisterScavenging.h"
30 #include "llvm/CodeGen/VirtRegMap.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Target/TargetMachine.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "si-lower-sgpr-spills"
37 
38 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
39 
40 namespace {
41 
42 static cl::opt<bool> EnableSpillVGPRToAGPR(
43   "amdgpu-spill-vgpr-to-agpr",
44   cl::desc("Enable spilling VGPRs to AGPRs"),
45   cl::ReallyHidden,
46   cl::init(true));
47 
48 class SILowerSGPRSpills : public MachineFunctionPass {
49 private:
50   const SIRegisterInfo *TRI = nullptr;
51   const SIInstrInfo *TII = nullptr;
52   VirtRegMap *VRM = nullptr;
53   LiveIntervals *LIS = nullptr;
54 
55   // Save and Restore blocks of the current function. Typically there is a
56   // single save block, unless Windows EH funclets are involved.
57   MBBVector SaveBlocks;
58   MBBVector RestoreBlocks;
59 
60 public:
61   static char ID;
62 
63   SILowerSGPRSpills() : MachineFunctionPass(ID) {}
64 
65   void calculateSaveRestoreBlocks(MachineFunction &MF);
66   bool spillCalleeSavedRegs(MachineFunction &MF);
67 
68   bool runOnMachineFunction(MachineFunction &MF) override;
69 
70   void getAnalysisUsage(AnalysisUsage &AU) const override {
71     AU.setPreservesAll();
72     MachineFunctionPass::getAnalysisUsage(AU);
73   }
74 };
75 
76 } // end anonymous namespace
77 
78 char SILowerSGPRSpills::ID = 0;
79 
80 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
81                       "SI lower SGPR spill instructions", false, false)
82 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
83 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
84                     "SI lower SGPR spill instructions", false, false)
85 
86 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
87 
88 /// Insert restore code for the callee-saved registers used in the function.
89 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
90                            ArrayRef<CalleeSavedInfo> CSI,
91                            LiveIntervals *LIS) {
92   MachineFunction &MF = *SaveBlock.getParent();
93   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
94   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
95   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
96 
97   MachineBasicBlock::iterator I = SaveBlock.begin();
98   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
99     for (const CalleeSavedInfo &CS : CSI) {
100       // Insert the spill to the stack frame.
101       unsigned Reg = CS.getReg();
102 
103       MachineInstrSpan MIS(I, &SaveBlock);
104       const TargetRegisterClass *RC =
105         TRI->getMinimalPhysRegClass(Reg, MVT::i32);
106 
107       TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
108                               TRI);
109 
110       if (LIS) {
111         assert(std::distance(MIS.begin(), I) == 1);
112         MachineInstr &Inst = *std::prev(I);
113 
114         LIS->InsertMachineInstrInMaps(Inst);
115         LIS->removeAllRegUnitsForPhysReg(Reg);
116       }
117     }
118   }
119 }
120 
121 /// Insert restore code for the callee-saved registers used in the function.
122 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
123                               MutableArrayRef<CalleeSavedInfo> CSI,
124                               LiveIntervals *LIS) {
125   MachineFunction &MF = *RestoreBlock.getParent();
126   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
127   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
128   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
129 
130   // Restore all registers immediately before the return and any
131   // terminators that precede it.
132   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
133 
134   // FIXME: Just emit the readlane/writelane directly
135   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
136     for (const CalleeSavedInfo &CI : reverse(CSI)) {
137       unsigned Reg = CI.getReg();
138       const TargetRegisterClass *RC =
139         TRI->getMinimalPhysRegClass(Reg, MVT::i32);
140 
141       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
142       assert(I != RestoreBlock.begin() &&
143              "loadRegFromStackSlot didn't insert any code!");
144       // Insert in reverse order.  loadRegFromStackSlot can insert
145       // multiple instructions.
146 
147       if (LIS) {
148         MachineInstr &Inst = *std::prev(I);
149         LIS->InsertMachineInstrInMaps(Inst);
150         LIS->removeAllRegUnitsForPhysReg(Reg);
151       }
152     }
153   }
154 }
155 
156 /// Compute the sets of entry and return blocks for saving and restoring
157 /// callee-saved registers, and placing prolog and epilog code.
158 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
159   const MachineFrameInfo &MFI = MF.getFrameInfo();
160 
161   // Even when we do not change any CSR, we still want to insert the
162   // prologue and epilogue of the function.
163   // So set the save points for those.
164 
165   // Use the points found by shrink-wrapping, if any.
166   if (MFI.getSavePoint()) {
167     SaveBlocks.push_back(MFI.getSavePoint());
168     assert(MFI.getRestorePoint() && "Both restore and save must be set");
169     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
170     // If RestoreBlock does not have any successor and is not a return block
171     // then the end point is unreachable and we do not need to insert any
172     // epilogue.
173     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
174       RestoreBlocks.push_back(RestoreBlock);
175     return;
176   }
177 
178   // Save refs to entry and return blocks.
179   SaveBlocks.push_back(&MF.front());
180   for (MachineBasicBlock &MBB : MF) {
181     if (MBB.isEHFuncletEntry())
182       SaveBlocks.push_back(&MBB);
183     if (MBB.isReturnBlock())
184       RestoreBlocks.push_back(&MBB);
185   }
186 }
187 
188 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
189   MachineRegisterInfo &MRI = MF.getRegInfo();
190   const Function &F = MF.getFunction();
191   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
192   const SIFrameLowering *TFI = ST.getFrameLowering();
193   MachineFrameInfo &MFI = MF.getFrameInfo();
194   RegScavenger *RS = nullptr;
195 
196   // Determine which of the registers in the callee save list should be saved.
197   BitVector SavedRegs;
198   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
199 
200   // Add the code to save and restore the callee saved registers.
201   if (!F.hasFnAttribute(Attribute::Naked)) {
202     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
203     // necessary for verifier liveness checks.
204     MFI.setCalleeSavedInfoValid(true);
205 
206     std::vector<CalleeSavedInfo> CSI;
207     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
208 
209     for (unsigned I = 0; CSRegs[I]; ++I) {
210       unsigned Reg = CSRegs[I];
211       if (SavedRegs.test(Reg)) {
212         const TargetRegisterClass *RC =
213           TRI->getMinimalPhysRegClass(Reg, MVT::i32);
214         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
215                                            TRI->getSpillAlign(*RC), true);
216 
217         CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
218       }
219     }
220 
221     if (!CSI.empty()) {
222       for (MachineBasicBlock *SaveBlock : SaveBlocks)
223         insertCSRSaves(*SaveBlock, CSI, LIS);
224 
225       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
226         insertCSRRestores(*RestoreBlock, CSI, LIS);
227       return true;
228     }
229   }
230 
231   return false;
232 }
233 
234 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
235 static bool lowerShiftReservedVGPR(MachineFunction &MF,
236                                    const GCNSubtarget &ST) {
237   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
238   const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
239   // Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
240   if (!PreReservedVGPR)
241     return false;
242 
243   // If there are no free lower VGPRs available, default to using the
244   // pre-reserved register instead.
245   const SIRegisterInfo *TRI = ST.getRegisterInfo();
246   Register LowestAvailableVGPR =
247       TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
248   if (!LowestAvailableVGPR)
249     LowestAvailableVGPR = PreReservedVGPR;
250 
251   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
252   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
253   Optional<int> FI;
254   // Check if we are reserving a CSR. Create a stack object for a possible spill
255   // in the function prologue.
256   if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
257     FI = FrameInfo.CreateSpillStackObject(4, Align(4));
258 
259   // Find saved info about the pre-reserved register.
260   const auto *ReservedVGPRInfoItr =
261       std::find_if(FuncInfo->getSGPRSpillVGPRs().begin(),
262                    FuncInfo->getSGPRSpillVGPRs().end(),
263                    [PreReservedVGPR](const auto &SpillRegInfo) {
264                      return SpillRegInfo.VGPR == PreReservedVGPR;
265                    });
266 
267   assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
268   auto Index =
269       std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
270 
271   FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
272 
273   for (MachineBasicBlock &MBB : MF) {
274     MBB.addLiveIn(LowestAvailableVGPR);
275     MBB.sortUniqueLiveIns();
276   }
277 
278   return true;
279 }
280 
281 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
282   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
283   TII = ST.getInstrInfo();
284   TRI = &TII->getRegisterInfo();
285 
286   VRM = getAnalysisIfAvailable<VirtRegMap>();
287 
288   assert(SaveBlocks.empty() && RestoreBlocks.empty());
289 
290   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
291   // does, but somewhat simpler.
292   calculateSaveRestoreBlocks(MF);
293   bool HasCSRs = spillCalleeSavedRegs(MF);
294 
295   MachineFrameInfo &MFI = MF.getFrameInfo();
296   if (!MFI.hasStackObjects() && !HasCSRs) {
297     SaveBlocks.clear();
298     RestoreBlocks.clear();
299     return false;
300   }
301 
302   MachineRegisterInfo &MRI = MF.getRegInfo();
303   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
304   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
305     && EnableSpillVGPRToAGPR;
306 
307   bool MadeChange = false;
308 
309   const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
310   std::unique_ptr<RegScavenger> RS;
311 
312   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
313   // handled as SpilledToReg in regular PrologEpilogInserter.
314   if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
315       SpillVGPRToAGPR) {
316     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
317     // are spilled to VGPRs, in which case we can eliminate the stack usage.
318     //
319     // This operates under the assumption that only other SGPR spills are users
320     // of the frame index.
321 
322     lowerShiftReservedVGPR(MF, ST);
323 
324     for (MachineBasicBlock &MBB : MF) {
325       MachineBasicBlock::iterator Next;
326       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
327         MachineInstr &MI = *I;
328         Next = std::next(I);
329 
330         if (SpillToAGPR && TII->isVGPRSpill(MI)) {
331           // Try to eliminate stack used by VGPR spills before frame
332           // finalization.
333           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
334                                                      AMDGPU::OpName::vaddr);
335           int FI = MI.getOperand(FIOp).getIndex();
336           Register VReg =
337               TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
338           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
339                                                 TRI->isAGPR(MRI, VReg))) {
340             if (!RS)
341               RS.reset(new RegScavenger());
342 
343             // FIXME: change to enterBasicBlockEnd()
344             RS->enterBasicBlock(MBB);
345             TRI->eliminateFrameIndex(MI, 0, FIOp, RS.get());
346             continue;
347           }
348         }
349 
350         if (!TII->isSGPRSpill(MI))
351           continue;
352 
353         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
354         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
355         if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
356           bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
357           (void)Spilled;
358           assert(Spilled && "failed to spill SGPR to VGPR when allocated");
359         }
360       }
361     }
362 
363     for (MachineBasicBlock &MBB : MF) {
364       for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
365         MBB.addLiveIn(SSpill.VGPR);
366 
367       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
368         MBB.addLiveIn(Reg);
369 
370       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
371         MBB.addLiveIn(Reg);
372 
373       MBB.sortUniqueLiveIns();
374     }
375 
376     MadeChange = true;
377   } else if (FuncInfo->VGPRReservedForSGPRSpill) {
378     FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
379   }
380 
381   SaveBlocks.clear();
382   RestoreBlocks.clear();
383 
384   return MadeChange;
385 }
386