1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// SI implementation of the TargetRegisterInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SIRegisterInfo.h"
15 #include "AMDGPURegisterBankInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "SIInstrInfo.h"
18 #include "SIMachineFunctionInfo.h"
19 #include "MCTargetDesc/AMDGPUInstPrinter.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "llvm/CodeGen/LiveIntervals.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/CodeGen/SlotIndexes.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/LLVMContext.h"
29 
30 using namespace llvm;
31 
32 #define GET_REGINFO_TARGET_DESC
33 #include "AMDGPUGenRegisterInfo.inc"
34 
35 static cl::opt<bool> EnableSpillSGPRToVGPR(
36   "amdgpu-spill-sgpr-to-vgpr",
37   cl::desc("Enable spilling VGPRs to SGPRs"),
38   cl::ReallyHidden,
39   cl::init(true));
40 
41 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
42     : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
43       SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
44 
45   assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
46          getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
47          (getSubRegIndexLaneMask(AMDGPU::lo16) |
48           getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
49            getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
50          "getNumCoveredRegs() will not work with generated subreg masks!");
51 
52   RegPressureIgnoredUnits.resize(getNumRegUnits());
53   RegPressureIgnoredUnits.set(*MCRegUnitIterator(AMDGPU::M0, this));
54   for (auto Reg : AMDGPU::VGPR_HI16RegClass)
55     RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
56 }
57 
58 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
59                                            MCRegister Reg) const {
60   MCRegAliasIterator R(Reg, this, true);
61 
62   for (; R.isValid(); ++R)
63     Reserved.set(*R);
64 }
65 
66 // Forced to be here by one .inc
67 const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
68   const MachineFunction *MF) const {
69   CallingConv::ID CC = MF->getFunction().getCallingConv();
70   switch (CC) {
71   case CallingConv::C:
72   case CallingConv::Fast:
73   case CallingConv::Cold:
74     return CSR_AMDGPU_HighRegs_SaveList;
75   default: {
76     // Dummy to not crash RegisterClassInfo.
77     static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
78     return &NoCalleeSavedReg;
79   }
80   }
81 }
82 
83 const MCPhysReg *
84 SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
85   return nullptr;
86 }
87 
88 const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
89                                                      CallingConv::ID CC) const {
90   switch (CC) {
91   case CallingConv::C:
92   case CallingConv::Fast:
93   case CallingConv::Cold:
94     return CSR_AMDGPU_HighRegs_RegMask;
95   default:
96     return nullptr;
97   }
98 }
99 
100 Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
101   const SIFrameLowering *TFI =
102       MF.getSubtarget<GCNSubtarget>().getFrameLowering();
103   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
104   // During ISel lowering we always reserve the stack pointer in entry
105   // functions, but never actually want to reference it when accessing our own
106   // frame. If we need a frame pointer we use it, but otherwise we can just use
107   // an immediate "0" which we represent by returning NoRegister.
108   if (FuncInfo->isEntryFunction()) {
109     return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
110   }
111   return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
112                         : FuncInfo->getStackPtrOffsetReg();
113 }
114 
115 const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const {
116   return CSR_AMDGPU_AllVGPRs_RegMask;
117 }
118 
119 const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
120   return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
121 }
122 
123 // FIXME: TableGen should generate something to make this manageable for all
124 // register classes. At a minimum we could use the opposite of
125 // composeSubRegIndices and go up from the base 32-bit subreg.
126 unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
127                                               unsigned NumRegs) {
128   // Table of NumRegs sized pieces at every 32-bit offset.
129   static const uint16_t SubRegFromChannelTable[][32] = {
130       {AMDGPU::sub0,  AMDGPU::sub1,  AMDGPU::sub2,  AMDGPU::sub3,
131        AMDGPU::sub4,  AMDGPU::sub5,  AMDGPU::sub6,  AMDGPU::sub7,
132        AMDGPU::sub8,  AMDGPU::sub9,  AMDGPU::sub10, AMDGPU::sub11,
133        AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
134        AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
135        AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
136        AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
137        AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31},
138       {AMDGPU::sub0_sub1,   AMDGPU::sub1_sub2,    AMDGPU::sub2_sub3,
139        AMDGPU::sub3_sub4,   AMDGPU::sub4_sub5,    AMDGPU::sub5_sub6,
140        AMDGPU::sub6_sub7,   AMDGPU::sub7_sub8,    AMDGPU::sub8_sub9,
141        AMDGPU::sub9_sub10,  AMDGPU::sub10_sub11,  AMDGPU::sub11_sub12,
142        AMDGPU::sub12_sub13, AMDGPU::sub13_sub14,  AMDGPU::sub14_sub15,
143        AMDGPU::sub15_sub16, AMDGPU::sub16_sub17,  AMDGPU::sub17_sub18,
144        AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,  AMDGPU::sub20_sub21,
145        AMDGPU::sub21_sub22, AMDGPU::sub22_sub23,  AMDGPU::sub23_sub24,
146        AMDGPU::sub24_sub25, AMDGPU::sub25_sub26,  AMDGPU::sub26_sub27,
147        AMDGPU::sub27_sub28, AMDGPU::sub28_sub29,  AMDGPU::sub29_sub30,
148        AMDGPU::sub30_sub31, AMDGPU::NoSubRegister},
149       {AMDGPU::sub0_sub1_sub2,    AMDGPU::sub1_sub2_sub3,
150        AMDGPU::sub2_sub3_sub4,    AMDGPU::sub3_sub4_sub5,
151        AMDGPU::sub4_sub5_sub6,    AMDGPU::sub5_sub6_sub7,
152        AMDGPU::sub6_sub7_sub8,    AMDGPU::sub7_sub8_sub9,
153        AMDGPU::sub8_sub9_sub10,   AMDGPU::sub9_sub10_sub11,
154        AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
155        AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15,
156        AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
157        AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19,
158        AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
159        AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23,
160        AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
161        AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27,
162        AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
163        AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31,
164        AMDGPU::NoSubRegister,     AMDGPU::NoSubRegister},
165       {AMDGPU::sub0_sub1_sub2_sub3,     AMDGPU::sub1_sub2_sub3_sub4,
166        AMDGPU::sub2_sub3_sub4_sub5,     AMDGPU::sub3_sub4_sub5_sub6,
167        AMDGPU::sub4_sub5_sub6_sub7,     AMDGPU::sub5_sub6_sub7_sub8,
168        AMDGPU::sub6_sub7_sub8_sub9,     AMDGPU::sub7_sub8_sub9_sub10,
169        AMDGPU::sub8_sub9_sub10_sub11,   AMDGPU::sub9_sub10_sub11_sub12,
170        AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
171        AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16,
172        AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
173        AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20,
174        AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
175        AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24,
176        AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
177        AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28,
178        AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
179        AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister,
180        AMDGPU::NoSubRegister,           AMDGPU::NoSubRegister}};
181 
182   const unsigned NumRegIndex = NumRegs - 1;
183 
184   assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
185          "Not implemented");
186   assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
187   return SubRegFromChannelTable[NumRegIndex][Channel];
188 }
189 
190 MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
191   const MachineFunction &MF) const {
192   unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
193   MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
194   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
195 }
196 
197 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
198   BitVector Reserved(getNumRegs());
199 
200   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
201   // this seems likely to result in bugs, so I'm marking them as reserved.
202   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
203   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
204 
205   // M0 has to be reserved so that llvm accepts it as a live-in into a block.
206   reserveRegisterTuples(Reserved, AMDGPU::M0);
207 
208   // Reserve src_vccz, src_execz, src_scc.
209   reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
210   reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
211   reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
212 
213   // Reserve the memory aperture registers.
214   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
215   reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
216   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
217   reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
218 
219   // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
220   reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
221 
222   // Reserve xnack_mask registers - support is not implemented in Codegen.
223   reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
224 
225   // Reserve lds_direct register - support is not implemented in Codegen.
226   reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
227 
228   // Reserve Trap Handler registers - support is not implemented in Codegen.
229   reserveRegisterTuples(Reserved, AMDGPU::TBA);
230   reserveRegisterTuples(Reserved, AMDGPU::TMA);
231   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
232   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
233   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
234   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
235   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
236   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
237   reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
238   reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
239 
240   // Reserve null register - it shall never be allocated
241   reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL);
242 
243   // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
244   // will result in bugs.
245   if (isWave32) {
246     Reserved.set(AMDGPU::VCC);
247     Reserved.set(AMDGPU::VCC_HI);
248   }
249 
250   unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
251   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
252   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
253     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
254     reserveRegisterTuples(Reserved, Reg);
255   }
256 
257   unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
258   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
259   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
260     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
261     reserveRegisterTuples(Reserved, Reg);
262     Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
263     reserveRegisterTuples(Reserved, Reg);
264   }
265 
266   // Reserve all the rest AGPRs if there are no instructions to use it.
267   if (!ST.hasMAIInsts()) {
268     for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
269       unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
270       reserveRegisterTuples(Reserved, Reg);
271     }
272   }
273 
274   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
275 
276   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
277   if (ScratchRSrcReg != AMDGPU::NoRegister) {
278     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
279     // to spill.
280     // TODO: May need to reserve a VGPR if doing LDS spilling.
281     reserveRegisterTuples(Reserved, ScratchRSrcReg);
282   }
283 
284   // We have to assume the SP is needed in case there are calls in the function,
285   // which is detected after the function is lowered. If we aren't really going
286   // to need SP, don't bother reserving it.
287   MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
288 
289   if (StackPtrReg) {
290     reserveRegisterTuples(Reserved, StackPtrReg);
291     assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
292   }
293 
294   MCRegister FrameReg = MFI->getFrameOffsetReg();
295   if (FrameReg) {
296     reserveRegisterTuples(Reserved, FrameReg);
297     assert(!isSubRegister(ScratchRSrcReg, FrameReg));
298   }
299 
300   for (MCRegister Reg : MFI->WWMReservedRegs) {
301     reserveRegisterTuples(Reserved, Reg);
302   }
303 
304   // FIXME: Stop using reserved registers for this.
305   for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
306     reserveRegisterTuples(Reserved, Reg);
307 
308   for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
309     reserveRegisterTuples(Reserved, Reg);
310 
311   return Reserved;
312 }
313 
314 bool SIRegisterInfo::canRealignStack(const MachineFunction &MF) const {
315   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
316   // On entry, the base address is 0, so it can't possibly need any more
317   // alignment.
318 
319   // FIXME: Should be able to specify the entry frame alignment per calling
320   // convention instead.
321   if (Info->isEntryFunction())
322     return false;
323 
324   return TargetRegisterInfo::canRealignStack(MF);
325 }
326 
327 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
328   const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
329   if (Info->isEntryFunction()) {
330     const MachineFrameInfo &MFI = Fn.getFrameInfo();
331     return MFI.hasStackObjects() || MFI.hasCalls();
332   }
333 
334   // May need scavenger for dealing with callee saved registers.
335   return true;
336 }
337 
338 bool SIRegisterInfo::requiresFrameIndexScavenging(
339   const MachineFunction &MF) const {
340   // Do not use frame virtual registers. They used to be used for SGPRs, but
341   // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
342   // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
343   // spill.
344   return false;
345 }
346 
347 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
348   const MachineFunction &MF) const {
349   const MachineFrameInfo &MFI = MF.getFrameInfo();
350   return MFI.hasStackObjects();
351 }
352 
353 bool SIRegisterInfo::requiresVirtualBaseRegisters(
354   const MachineFunction &) const {
355   // There are no special dedicated stack or frame pointers.
356   return true;
357 }
358 
359 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
360   assert(SIInstrInfo::isMUBUF(*MI));
361 
362   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
363                                           AMDGPU::OpName::offset);
364   return MI->getOperand(OffIdx).getImm();
365 }
366 
367 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
368                                                  int Idx) const {
369   if (!SIInstrInfo::isMUBUF(*MI))
370     return 0;
371 
372   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
373                                            AMDGPU::OpName::vaddr) &&
374          "Should never see frame index on non-address operand");
375 
376   return getMUBUFInstrOffset(MI);
377 }
378 
379 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
380   if (!MI->mayLoadOrStore())
381     return false;
382 
383   int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
384 
385   return !isUInt<12>(FullOffset);
386 }
387 
388 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
389                                                   unsigned BaseReg,
390                                                   int FrameIdx,
391                                                   int64_t Offset) const {
392   MachineBasicBlock::iterator Ins = MBB->begin();
393   DebugLoc DL; // Defaults to "unknown"
394 
395   if (Ins != MBB->end())
396     DL = Ins->getDebugLoc();
397 
398   MachineFunction *MF = MBB->getParent();
399   const SIInstrInfo *TII = ST.getInstrInfo();
400 
401   if (Offset == 0) {
402     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
403       .addFrameIndex(FrameIdx);
404     return;
405   }
406 
407   MachineRegisterInfo &MRI = MF->getRegInfo();
408   Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
409 
410   Register FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
411 
412   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
413     .addImm(Offset);
414   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
415     .addFrameIndex(FrameIdx);
416 
417   TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
418     .addReg(OffsetReg, RegState::Kill)
419     .addReg(FIReg)
420     .addImm(0); // clamp bit
421 }
422 
423 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
424                                        int64_t Offset) const {
425   const SIInstrInfo *TII = ST.getInstrInfo();
426 
427 #ifndef NDEBUG
428   // FIXME: Is it possible to be storing a frame index to itself?
429   bool SeenFI = false;
430   for (const MachineOperand &MO: MI.operands()) {
431     if (MO.isFI()) {
432       if (SeenFI)
433         llvm_unreachable("should not see multiple frame indices");
434 
435       SeenFI = true;
436     }
437   }
438 #endif
439 
440   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
441 #ifndef NDEBUG
442   MachineBasicBlock *MBB = MI.getParent();
443   MachineFunction *MF = MBB->getParent();
444 #endif
445   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
446   assert(TII->isMUBUF(MI));
447   assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
448          MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
449          "should only be seeing stack pointer offset relative FrameIndex");
450 
451   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
452   int64_t NewOffset = OffsetOp->getImm() + Offset;
453   assert(isUInt<12>(NewOffset) && "offset should be legal");
454 
455   FIOp->ChangeToRegister(BaseReg, false);
456   OffsetOp->setImm(NewOffset);
457 }
458 
459 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
460                                         unsigned BaseReg,
461                                         int64_t Offset) const {
462   if (!SIInstrInfo::isMUBUF(*MI))
463     return false;
464 
465   int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
466 
467   return isUInt<12>(NewOffset);
468 }
469 
470 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
471   const MachineFunction &MF, unsigned Kind) const {
472   // This is inaccurate. It depends on the instruction and address space. The
473   // only place where we should hit this is for dealing with frame indexes /
474   // private accesses, so this is correct in that case.
475   return &AMDGPU::VGPR_32RegClass;
476 }
477 
478 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
479 
480   switch (Op) {
481   case AMDGPU::SI_SPILL_S1024_SAVE:
482   case AMDGPU::SI_SPILL_S1024_RESTORE:
483   case AMDGPU::SI_SPILL_V1024_SAVE:
484   case AMDGPU::SI_SPILL_V1024_RESTORE:
485   case AMDGPU::SI_SPILL_A1024_SAVE:
486   case AMDGPU::SI_SPILL_A1024_RESTORE:
487     return 32;
488   case AMDGPU::SI_SPILL_S512_SAVE:
489   case AMDGPU::SI_SPILL_S512_RESTORE:
490   case AMDGPU::SI_SPILL_V512_SAVE:
491   case AMDGPU::SI_SPILL_V512_RESTORE:
492   case AMDGPU::SI_SPILL_A512_SAVE:
493   case AMDGPU::SI_SPILL_A512_RESTORE:
494     return 16;
495   case AMDGPU::SI_SPILL_S256_SAVE:
496   case AMDGPU::SI_SPILL_S256_RESTORE:
497   case AMDGPU::SI_SPILL_V256_SAVE:
498   case AMDGPU::SI_SPILL_V256_RESTORE:
499     return 8;
500   case AMDGPU::SI_SPILL_S160_SAVE:
501   case AMDGPU::SI_SPILL_S160_RESTORE:
502   case AMDGPU::SI_SPILL_V160_SAVE:
503   case AMDGPU::SI_SPILL_V160_RESTORE:
504     return 5;
505   case AMDGPU::SI_SPILL_S128_SAVE:
506   case AMDGPU::SI_SPILL_S128_RESTORE:
507   case AMDGPU::SI_SPILL_V128_SAVE:
508   case AMDGPU::SI_SPILL_V128_RESTORE:
509   case AMDGPU::SI_SPILL_A128_SAVE:
510   case AMDGPU::SI_SPILL_A128_RESTORE:
511     return 4;
512   case AMDGPU::SI_SPILL_S96_SAVE:
513   case AMDGPU::SI_SPILL_S96_RESTORE:
514   case AMDGPU::SI_SPILL_V96_SAVE:
515   case AMDGPU::SI_SPILL_V96_RESTORE:
516     return 3;
517   case AMDGPU::SI_SPILL_S64_SAVE:
518   case AMDGPU::SI_SPILL_S64_RESTORE:
519   case AMDGPU::SI_SPILL_V64_SAVE:
520   case AMDGPU::SI_SPILL_V64_RESTORE:
521   case AMDGPU::SI_SPILL_A64_SAVE:
522   case AMDGPU::SI_SPILL_A64_RESTORE:
523     return 2;
524   case AMDGPU::SI_SPILL_S32_SAVE:
525   case AMDGPU::SI_SPILL_S32_RESTORE:
526   case AMDGPU::SI_SPILL_V32_SAVE:
527   case AMDGPU::SI_SPILL_V32_RESTORE:
528   case AMDGPU::SI_SPILL_A32_SAVE:
529   case AMDGPU::SI_SPILL_A32_RESTORE:
530     return 1;
531   default: llvm_unreachable("Invalid spill opcode");
532   }
533 }
534 
535 static int getOffsetMUBUFStore(unsigned Opc) {
536   switch (Opc) {
537   case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
538     return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
539   case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
540     return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
541   case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
542     return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
543   case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
544     return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
545   case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
546     return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
547   case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
548     return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
549   case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
550     return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
551   default:
552     return -1;
553   }
554 }
555 
556 static int getOffsetMUBUFLoad(unsigned Opc) {
557   switch (Opc) {
558   case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
559     return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
560   case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
561     return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
562   case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
563     return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
564   case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
565     return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
566   case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
567     return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
568   case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
569     return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
570   case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
571     return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
572   case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
573     return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
574   case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
575     return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
576   case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
577     return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
578   case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
579     return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
580   case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
581     return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
582   case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
583     return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
584   default:
585     return -1;
586   }
587 }
588 
589 static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
590                                            MachineBasicBlock::iterator MI,
591                                            int Index,
592                                            unsigned Lane,
593                                            unsigned ValueReg,
594                                            bool IsKill) {
595   MachineBasicBlock *MBB = MI->getParent();
596   MachineFunction *MF = MI->getParent()->getParent();
597   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
598   const SIInstrInfo *TII = ST.getInstrInfo();
599 
600   MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
601 
602   if (Reg == AMDGPU::NoRegister)
603     return MachineInstrBuilder();
604 
605   bool IsStore = MI->mayStore();
606   MachineRegisterInfo &MRI = MF->getRegInfo();
607   auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
608 
609   unsigned Dst = IsStore ? Reg : ValueReg;
610   unsigned Src = IsStore ? ValueReg : Reg;
611   unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
612                                                    : AMDGPU::V_ACCVGPR_READ_B32;
613 
614   return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
615            .addReg(Src, getKillRegState(IsKill));
616 }
617 
618 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
619 // need to handle the case where an SGPR may need to be spilled while spilling.
620 static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
621                                       MachineFrameInfo &MFI,
622                                       MachineBasicBlock::iterator MI,
623                                       int Index,
624                                       int64_t Offset) {
625   const SIInstrInfo *TII = ST.getInstrInfo();
626   MachineBasicBlock *MBB = MI->getParent();
627   const DebugLoc &DL = MI->getDebugLoc();
628   bool IsStore = MI->mayStore();
629 
630   unsigned Opc = MI->getOpcode();
631   int LoadStoreOp = IsStore ?
632     getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
633   if (LoadStoreOp == -1)
634     return false;
635 
636   const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
637   if (spillVGPRtoAGPR(ST, MI, Index, 0, Reg->getReg(), false).getInstr())
638     return true;
639 
640   MachineInstrBuilder NewMI =
641       BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
642           .add(*Reg)
643           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
644           .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
645           .addImm(Offset)
646           .addImm(0) // glc
647           .addImm(0) // slc
648           .addImm(0) // tfe
649           .addImm(0) // dlc
650           .addImm(0) // swz
651           .cloneMemRefs(*MI);
652 
653   const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
654                                                        AMDGPU::OpName::vdata_in);
655   if (VDataIn)
656     NewMI.add(*VDataIn);
657   return true;
658 }
659 
660 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
661                                          unsigned LoadStoreOp,
662                                          int Index,
663                                          Register ValueReg,
664                                          bool IsKill,
665                                          MCRegister ScratchRsrcReg,
666                                          MCRegister ScratchOffsetReg,
667                                          int64_t InstOffset,
668                                          MachineMemOperand *MMO,
669                                          RegScavenger *RS) const {
670   MachineBasicBlock *MBB = MI->getParent();
671   MachineFunction *MF = MI->getParent()->getParent();
672   const SIInstrInfo *TII = ST.getInstrInfo();
673   const MachineFrameInfo &MFI = MF->getFrameInfo();
674 
675   const MCInstrDesc &Desc = TII->get(LoadStoreOp);
676   const DebugLoc &DL = MI->getDebugLoc();
677   bool IsStore = Desc.mayStore();
678 
679   bool Scavenged = false;
680   MCRegister SOffset = ScratchOffsetReg;
681 
682   const unsigned EltSize = 4;
683   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
684   unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
685   unsigned Size = NumSubRegs * EltSize;
686   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
687   int64_t ScratchOffsetRegDelta = 0;
688 
689   Align Alignment = MFI.getObjectAlign(Index);
690   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
691 
692   Register TmpReg =
693     hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
694                  : Register();
695 
696   assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
697 
698   if (!isUInt<12>(Offset + Size - EltSize)) {
699     SOffset = MCRegister();
700 
701     // We currently only support spilling VGPRs to EltSize boundaries, meaning
702     // we can simplify the adjustment of Offset here to just scale with
703     // WavefrontSize.
704     Offset *= ST.getWavefrontSize();
705 
706     // We don't have access to the register scavenger if this function is called
707     // during  PEI::scavengeFrameVirtualRegs().
708     if (RS)
709       SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
710 
711     if (!SOffset) {
712       if (!ScratchOffsetReg) {
713         report_fatal_error("could not scavenge SGPR to spill in entry function");
714       }
715       // There are no free SGPRs, and since we are in the process of spilling
716       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
717       // on SI/CI and on VI it is true until we implement spilling using scalar
718       // stores), we have no way to free up an SGPR.  Our solution here is to
719       // add the offset directly to the ScratchOffset register, and then
720       // subtract the offset after the spill to return ScratchOffset to it's
721       // original value.
722       SOffset = ScratchOffsetReg;
723       ScratchOffsetRegDelta = Offset;
724     } else {
725       Scavenged = true;
726     }
727 
728     if (ScratchOffsetReg == AMDGPU::NoRegister) {
729       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset)
730           .addImm(Offset);
731     } else {
732       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
733           .addReg(ScratchOffsetReg)
734           .addImm(Offset);
735     }
736 
737     Offset = 0;
738   }
739 
740   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
741     Register SubReg = NumSubRegs == 1
742                           ? Register(ValueReg)
743                           : getSubReg(ValueReg, getSubRegFromChannel(i));
744 
745     unsigned SOffsetRegState = 0;
746     unsigned SrcDstRegState = getDefRegState(!IsStore);
747     if (i + 1 == e) {
748       SOffsetRegState |= getKillRegState(Scavenged);
749       // The last implicit use carries the "Kill" flag.
750       SrcDstRegState |= getKillRegState(IsKill);
751     }
752 
753     auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
754 
755     if (!MIB.getInstr()) {
756       unsigned FinalReg = SubReg;
757       if (TmpReg != AMDGPU::NoRegister) {
758         if (IsStore)
759           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
760             .addReg(SubReg, getKillRegState(IsKill));
761         SubReg = TmpReg;
762       }
763 
764       MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
765       MachineMemOperand *NewMMO =
766           MF->getMachineMemOperand(PInfo, MMO->getFlags(), EltSize,
767                                    commonAlignment(Alignment, EltSize * i));
768 
769       MIB = BuildMI(*MBB, MI, DL, Desc)
770                 .addReg(SubReg,
771                         getDefRegState(!IsStore) | getKillRegState(IsKill))
772                 .addReg(ScratchRsrcReg);
773       if (SOffset == AMDGPU::NoRegister) {
774         MIB.addImm(0);
775       } else {
776         MIB.addReg(SOffset, SOffsetRegState);
777       }
778       MIB.addImm(Offset)
779           .addImm(0) // glc
780           .addImm(0) // slc
781           .addImm(0) // tfe
782           .addImm(0) // dlc
783           .addImm(0) // swz
784           .addMemOperand(NewMMO);
785 
786       if (!IsStore && TmpReg != AMDGPU::NoRegister)
787         MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
788                       FinalReg)
789           .addReg(TmpReg, RegState::Kill);
790     }
791 
792     if (NumSubRegs > 1)
793       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
794   }
795 
796   if (ScratchOffsetRegDelta != 0) {
797     // Subtract the offset we added to the ScratchOffset register.
798     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
799         .addReg(ScratchOffsetReg)
800         .addImm(ScratchOffsetRegDelta);
801   }
802 }
803 
804 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
805                                int Index,
806                                RegScavenger *RS,
807                                bool OnlyToVGPR) const {
808   MachineBasicBlock *MBB = MI->getParent();
809   MachineFunction *MF = MBB->getParent();
810   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
811   DenseSet<unsigned> SGPRSpillVGPRDefinedSet;
812 
813   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
814     = MFI->getSGPRToVGPRSpills(Index);
815   bool SpillToVGPR = !VGPRSpills.empty();
816   if (OnlyToVGPR && !SpillToVGPR)
817     return false;
818 
819   const SIInstrInfo *TII = ST.getInstrInfo();
820 
821   Register SuperReg = MI->getOperand(0).getReg();
822   bool IsKill = MI->getOperand(0).isKill();
823   const DebugLoc &DL = MI->getDebugLoc();
824 
825   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
826 
827   assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
828                          SuperReg != MFI->getFrameOffsetReg()));
829 
830   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
831 
832   unsigned EltSize = 4;
833   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
834 
835   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
836   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
837 
838   // Scavenged temporary VGPR to use. It must be scavenged once for any number
839   // of spilled subregs.
840   Register TmpVGPR;
841 
842   // SubReg carries the "Kill" flag when SubReg == SuperReg.
843   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
844   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
845     Register SubReg =
846         NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
847 
848     if (SpillToVGPR) {
849       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
850 
851       // During SGPR spilling to VGPR, determine if the VGPR is defined. The
852       // only circumstance in which we say it is undefined is when it is the
853       // first spill to this VGPR in the first basic block.
854       bool VGPRDefined = true;
855       if (MBB == &MF->front())
856         VGPRDefined = !SGPRSpillVGPRDefinedSet.insert(Spill.VGPR).second;
857 
858       // Mark the "old value of vgpr" input undef only if this is the first sgpr
859       // spill to this specific vgpr in the first basic block.
860       BuildMI(*MBB, MI, DL,
861               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
862               Spill.VGPR)
863         .addReg(SubReg, getKillRegState(IsKill))
864         .addImm(Spill.Lane)
865         .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef);
866 
867       // FIXME: Since this spills to another register instead of an actual
868       // frame index, we should delete the frame index when all references to
869       // it are fixed.
870     } else {
871       // XXX - Can to VGPR spill fail for some subregisters but not others?
872       if (OnlyToVGPR)
873         return false;
874 
875       // Spill SGPR to a frame index.
876       if (!TmpVGPR.isValid())
877         TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
878 
879       MachineInstrBuilder Mov
880         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
881         .addReg(SubReg, SubKillState);
882 
883       // There could be undef components of a spilled super register.
884       // TODO: Can we detect this and skip the spill?
885       if (NumSubRegs > 1) {
886         // The last implicit use of the SuperReg carries the "Kill" flag.
887         unsigned SuperKillState = 0;
888         if (i + 1 == e)
889           SuperKillState |= getKillRegState(IsKill);
890         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
891       }
892 
893       Align Alignment = FrameInfo.getObjectAlign(Index);
894       MachinePointerInfo PtrInfo
895         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
896       MachineMemOperand *MMO =
897           MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize,
898                                    commonAlignment(Alignment, EltSize * i));
899       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
900         .addReg(TmpVGPR, RegState::Kill)      // src
901         .addFrameIndex(Index)                 // vaddr
902         .addReg(MFI->getScratchRSrcReg())     // srrsrc
903         .addReg(MFI->getStackPtrOffsetReg())  // soffset
904         .addImm(i * 4)                        // offset
905         .addMemOperand(MMO);
906     }
907   }
908 
909   MI->eraseFromParent();
910   MFI->addToSpilledSGPRs(NumSubRegs);
911   return true;
912 }
913 
914 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
915                                  int Index,
916                                  RegScavenger *RS,
917                                  bool OnlyToVGPR) const {
918   MachineFunction *MF = MI->getParent()->getParent();
919   MachineBasicBlock *MBB = MI->getParent();
920   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
921 
922   ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
923     = MFI->getSGPRToVGPRSpills(Index);
924   bool SpillToVGPR = !VGPRSpills.empty();
925   if (OnlyToVGPR && !SpillToVGPR)
926     return false;
927 
928   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
929   const SIInstrInfo *TII = ST.getInstrInfo();
930   const DebugLoc &DL = MI->getDebugLoc();
931 
932   Register SuperReg = MI->getOperand(0).getReg();
933 
934   assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
935 
936   unsigned EltSize = 4;
937 
938   const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
939 
940   ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
941   unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
942 
943   Register TmpVGPR;
944 
945   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
946     Register SubReg =
947         NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
948 
949     if (SpillToVGPR) {
950       SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
951       auto MIB =
952         BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
953                 SubReg)
954         .addReg(Spill.VGPR)
955         .addImm(Spill.Lane);
956 
957       if (NumSubRegs > 1 && i == 0)
958         MIB.addReg(SuperReg, RegState::ImplicitDefine);
959     } else {
960       if (OnlyToVGPR)
961         return false;
962 
963       // Restore SGPR from a stack slot.
964       // FIXME: We should use S_LOAD_DWORD here for VI.
965       if (!TmpVGPR.isValid())
966         TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
967       Align Alignment = FrameInfo.getObjectAlign(Index);
968 
969       MachinePointerInfo PtrInfo
970         = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
971 
972       MachineMemOperand *MMO =
973           MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, EltSize,
974                                    commonAlignment(Alignment, EltSize * i));
975 
976       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpVGPR)
977         .addFrameIndex(Index)                 // vaddr
978         .addReg(MFI->getScratchRSrcReg())     // srsrc
979         .addReg(MFI->getStackPtrOffsetReg())  // soffset
980         .addImm(i * 4)                        // offset
981         .addMemOperand(MMO);
982 
983       auto MIB =
984         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
985         .addReg(TmpVGPR, RegState::Kill);
986 
987       if (NumSubRegs > 1)
988         MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
989     }
990   }
991 
992   MI->eraseFromParent();
993   return true;
994 }
995 
996 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
997 /// a VGPR and the stack slot can be safely eliminated when all other users are
998 /// handled.
999 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
1000   MachineBasicBlock::iterator MI,
1001   int FI,
1002   RegScavenger *RS) const {
1003   switch (MI->getOpcode()) {
1004   case AMDGPU::SI_SPILL_S1024_SAVE:
1005   case AMDGPU::SI_SPILL_S512_SAVE:
1006   case AMDGPU::SI_SPILL_S256_SAVE:
1007   case AMDGPU::SI_SPILL_S160_SAVE:
1008   case AMDGPU::SI_SPILL_S128_SAVE:
1009   case AMDGPU::SI_SPILL_S96_SAVE:
1010   case AMDGPU::SI_SPILL_S64_SAVE:
1011   case AMDGPU::SI_SPILL_S32_SAVE:
1012     return spillSGPR(MI, FI, RS, true);
1013   case AMDGPU::SI_SPILL_S1024_RESTORE:
1014   case AMDGPU::SI_SPILL_S512_RESTORE:
1015   case AMDGPU::SI_SPILL_S256_RESTORE:
1016   case AMDGPU::SI_SPILL_S160_RESTORE:
1017   case AMDGPU::SI_SPILL_S128_RESTORE:
1018   case AMDGPU::SI_SPILL_S96_RESTORE:
1019   case AMDGPU::SI_SPILL_S64_RESTORE:
1020   case AMDGPU::SI_SPILL_S32_RESTORE:
1021     return restoreSGPR(MI, FI, RS, true);
1022   default:
1023     llvm_unreachable("not an SGPR spill instruction");
1024   }
1025 }
1026 
1027 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
1028                                         int SPAdj, unsigned FIOperandNum,
1029                                         RegScavenger *RS) const {
1030   MachineFunction *MF = MI->getParent()->getParent();
1031   MachineBasicBlock *MBB = MI->getParent();
1032   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1033   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
1034   const SIInstrInfo *TII = ST.getInstrInfo();
1035   DebugLoc DL = MI->getDebugLoc();
1036 
1037   assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
1038 
1039   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
1040   int Index = MI->getOperand(FIOperandNum).getIndex();
1041 
1042   Register FrameReg = getFrameRegister(*MF);
1043 
1044   switch (MI->getOpcode()) {
1045     // SGPR register spill
1046     case AMDGPU::SI_SPILL_S1024_SAVE:
1047     case AMDGPU::SI_SPILL_S512_SAVE:
1048     case AMDGPU::SI_SPILL_S256_SAVE:
1049     case AMDGPU::SI_SPILL_S160_SAVE:
1050     case AMDGPU::SI_SPILL_S128_SAVE:
1051     case AMDGPU::SI_SPILL_S96_SAVE:
1052     case AMDGPU::SI_SPILL_S64_SAVE:
1053     case AMDGPU::SI_SPILL_S32_SAVE: {
1054       spillSGPR(MI, Index, RS);
1055       break;
1056     }
1057 
1058     // SGPR register restore
1059     case AMDGPU::SI_SPILL_S1024_RESTORE:
1060     case AMDGPU::SI_SPILL_S512_RESTORE:
1061     case AMDGPU::SI_SPILL_S256_RESTORE:
1062     case AMDGPU::SI_SPILL_S160_RESTORE:
1063     case AMDGPU::SI_SPILL_S128_RESTORE:
1064     case AMDGPU::SI_SPILL_S96_RESTORE:
1065     case AMDGPU::SI_SPILL_S64_RESTORE:
1066     case AMDGPU::SI_SPILL_S32_RESTORE: {
1067       restoreSGPR(MI, Index, RS);
1068       break;
1069     }
1070 
1071     // VGPR register spill
1072     case AMDGPU::SI_SPILL_V1024_SAVE:
1073     case AMDGPU::SI_SPILL_V512_SAVE:
1074     case AMDGPU::SI_SPILL_V256_SAVE:
1075     case AMDGPU::SI_SPILL_V160_SAVE:
1076     case AMDGPU::SI_SPILL_V128_SAVE:
1077     case AMDGPU::SI_SPILL_V96_SAVE:
1078     case AMDGPU::SI_SPILL_V64_SAVE:
1079     case AMDGPU::SI_SPILL_V32_SAVE:
1080     case AMDGPU::SI_SPILL_A1024_SAVE:
1081     case AMDGPU::SI_SPILL_A512_SAVE:
1082     case AMDGPU::SI_SPILL_A128_SAVE:
1083     case AMDGPU::SI_SPILL_A64_SAVE:
1084     case AMDGPU::SI_SPILL_A32_SAVE: {
1085       const MachineOperand *VData = TII->getNamedOperand(*MI,
1086                                                          AMDGPU::OpName::vdata);
1087       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1088              MFI->getStackPtrOffsetReg());
1089 
1090       buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
1091             Index,
1092             VData->getReg(), VData->isKill(),
1093             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1094             FrameReg,
1095             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1096             *MI->memoperands_begin(),
1097             RS);
1098       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
1099       MI->eraseFromParent();
1100       break;
1101     }
1102     case AMDGPU::SI_SPILL_V32_RESTORE:
1103     case AMDGPU::SI_SPILL_V64_RESTORE:
1104     case AMDGPU::SI_SPILL_V96_RESTORE:
1105     case AMDGPU::SI_SPILL_V128_RESTORE:
1106     case AMDGPU::SI_SPILL_V160_RESTORE:
1107     case AMDGPU::SI_SPILL_V256_RESTORE:
1108     case AMDGPU::SI_SPILL_V512_RESTORE:
1109     case AMDGPU::SI_SPILL_V1024_RESTORE:
1110     case AMDGPU::SI_SPILL_A32_RESTORE:
1111     case AMDGPU::SI_SPILL_A64_RESTORE:
1112     case AMDGPU::SI_SPILL_A128_RESTORE:
1113     case AMDGPU::SI_SPILL_A512_RESTORE:
1114     case AMDGPU::SI_SPILL_A1024_RESTORE: {
1115       const MachineOperand *VData = TII->getNamedOperand(*MI,
1116                                                          AMDGPU::OpName::vdata);
1117       assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
1118              MFI->getStackPtrOffsetReg());
1119 
1120       buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
1121             Index,
1122             VData->getReg(), VData->isKill(),
1123             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
1124             FrameReg,
1125             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
1126             *MI->memoperands_begin(),
1127             RS);
1128       MI->eraseFromParent();
1129       break;
1130     }
1131 
1132     default: {
1133       const DebugLoc &DL = MI->getDebugLoc();
1134       bool IsMUBUF = TII->isMUBUF(*MI);
1135 
1136       if (!IsMUBUF && !MFI->isEntryFunction()) {
1137         // Convert to a swizzled stack address by scaling by the wave size.
1138         //
1139         // In an entry function/kernel the offset is already swizzled.
1140 
1141         bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32;
1142         Register ResultReg =
1143             IsCopy ? MI->getOperand(0).getReg()
1144                    : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1145 
1146         int64_t Offset = FrameInfo.getObjectOffset(Index);
1147         if (Offset == 0) {
1148           // XXX - This never happens because of emergency scavenging slot at 0?
1149           BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg)
1150             .addImm(ST.getWavefrontSizeLog2())
1151             .addReg(FrameReg);
1152         } else {
1153           if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) {
1154             // Reuse ResultReg in intermediate step.
1155             Register ScaledReg = ResultReg;
1156 
1157             BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
1158                     ScaledReg)
1159               .addImm(ST.getWavefrontSizeLog2())
1160               .addReg(FrameReg);
1161 
1162             const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
1163 
1164             // TODO: Fold if use instruction is another add of a constant.
1165             if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) {
1166               // FIXME: This can fail
1167               MIB.addImm(Offset);
1168               MIB.addReg(ScaledReg, RegState::Kill);
1169               if (!IsVOP2)
1170                 MIB.addImm(0); // clamp bit
1171             } else {
1172               assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 &&
1173                      "Need to reuse carry out register");
1174 
1175               // Use scavenged unused carry out as offset register.
1176               Register ConstOffsetReg;
1177               if (!isWave32)
1178                 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
1179               else
1180                 ConstOffsetReg = MIB.getReg(1);
1181 
1182               BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
1183                 .addImm(Offset);
1184               MIB.addReg(ConstOffsetReg, RegState::Kill);
1185               MIB.addReg(ScaledReg, RegState::Kill);
1186               MIB.addImm(0); // clamp bit
1187             }
1188           } else {
1189             // We have to produce a carry out, and there isn't a free SGPR pair
1190             // for it. We can keep the whole computation on the SALU to avoid
1191             // clobbering an additional register at the cost of an extra mov.
1192 
1193             // We may have 1 free scratch SGPR even though a carry out is
1194             // unavailable. Only one additional mov is needed.
1195             Register TmpScaledReg =
1196                 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
1197             Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
1198 
1199             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
1200               .addReg(FrameReg)
1201               .addImm(ST.getWavefrontSizeLog2());
1202             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg)
1203               .addReg(ScaledReg, RegState::Kill)
1204               .addImm(Offset);
1205             BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
1206               .addReg(ScaledReg, RegState::Kill);
1207 
1208             // If there were truly no free SGPRs, we need to undo everything.
1209             if (!TmpScaledReg.isValid()) {
1210               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg)
1211                 .addReg(ScaledReg, RegState::Kill)
1212                 .addImm(Offset);
1213               BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
1214                 .addReg(FrameReg)
1215                 .addImm(ST.getWavefrontSizeLog2());
1216             }
1217           }
1218         }
1219 
1220         // Don't introduce an extra copy if we're just materializing in a mov.
1221         if (IsCopy)
1222           MI->eraseFromParent();
1223         else
1224           FIOp.ChangeToRegister(ResultReg, false, false, true);
1225         return;
1226       }
1227 
1228       if (IsMUBUF) {
1229         // Disable offen so we don't need a 0 vgpr base.
1230         assert(static_cast<int>(FIOperandNum) ==
1231                AMDGPU::getNamedOperandIdx(MI->getOpcode(),
1232                                           AMDGPU::OpName::vaddr));
1233 
1234         auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
1235         assert((SOffset.isReg() &&
1236                 SOffset.getReg() == MFI->getStackPtrOffsetReg()) ||
1237                (SOffset.isImm() && SOffset.getImm() == 0));
1238         if (SOffset.isReg()) {
1239           if (FrameReg == AMDGPU::NoRegister) {
1240             SOffset.ChangeToImmediate(0);
1241           } else {
1242             SOffset.setReg(FrameReg);
1243           }
1244         }
1245 
1246         int64_t Offset = FrameInfo.getObjectOffset(Index);
1247         int64_t OldImm
1248           = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
1249         int64_t NewOffset = OldImm + Offset;
1250 
1251         if (isUInt<12>(NewOffset) &&
1252             buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
1253           MI->eraseFromParent();
1254           return;
1255         }
1256       }
1257 
1258       // If the offset is simply too big, don't convert to a scratch wave offset
1259       // relative index.
1260 
1261       int64_t Offset = FrameInfo.getObjectOffset(Index);
1262       FIOp.ChangeToImmediate(Offset);
1263       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
1264         Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1265         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
1266           .addImm(Offset);
1267         FIOp.ChangeToRegister(TmpReg, false, false, true);
1268       }
1269     }
1270   }
1271 }
1272 
1273 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const {
1274   return AMDGPUInstPrinter::getRegisterName(Reg);
1275 }
1276 
1277 // FIXME: This is very slow. It might be worth creating a map from physreg to
1278 // register class.
1279 const TargetRegisterClass *
1280 SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
1281   static const TargetRegisterClass *const BaseClasses[] = {
1282     &AMDGPU::VGPR_32RegClass,
1283     &AMDGPU::SReg_32RegClass,
1284     &AMDGPU::AGPR_32RegClass,
1285     &AMDGPU::VReg_64RegClass,
1286     &AMDGPU::SReg_64RegClass,
1287     &AMDGPU::AReg_64RegClass,
1288     &AMDGPU::VReg_96RegClass,
1289     &AMDGPU::SReg_96RegClass,
1290     &AMDGPU::VReg_128RegClass,
1291     &AMDGPU::SReg_128RegClass,
1292     &AMDGPU::AReg_128RegClass,
1293     &AMDGPU::VReg_160RegClass,
1294     &AMDGPU::SReg_160RegClass,
1295     &AMDGPU::VReg_256RegClass,
1296     &AMDGPU::SReg_256RegClass,
1297     &AMDGPU::VReg_512RegClass,
1298     &AMDGPU::SReg_512RegClass,
1299     &AMDGPU::AReg_512RegClass,
1300     &AMDGPU::SReg_1024RegClass,
1301     &AMDGPU::VReg_1024RegClass,
1302     &AMDGPU::AReg_1024RegClass,
1303     &AMDGPU::SCC_CLASSRegClass,
1304     &AMDGPU::Pseudo_SReg_32RegClass,
1305     &AMDGPU::Pseudo_SReg_128RegClass,
1306   };
1307 
1308   for (const TargetRegisterClass *BaseClass : BaseClasses) {
1309     if (BaseClass->contains(Reg)) {
1310       return BaseClass;
1311     }
1312   }
1313   return nullptr;
1314 }
1315 
1316 // TODO: It might be helpful to have some target specific flags in
1317 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
1318 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
1319   unsigned Size = getRegSizeInBits(*RC);
1320   switch (Size) {
1321   case 32:
1322     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
1323   case 64:
1324     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
1325   case 96:
1326     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
1327   case 128:
1328     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
1329   case 160:
1330     return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
1331   case 256:
1332     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
1333   case 512:
1334     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
1335   case 1024:
1336     return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
1337   case 1:
1338     return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
1339   default:
1340     assert(Size < 32 && "Invalid register class size");
1341     return false;
1342   }
1343 }
1344 
1345 bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
1346   unsigned Size = getRegSizeInBits(*RC);
1347   if (Size < 32)
1348     return false;
1349   switch (Size) {
1350   case 32:
1351     return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
1352   case 64:
1353     return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
1354   case 96:
1355     return false;
1356   case 128:
1357     return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
1358   case 160:
1359   case 256:
1360     return false;
1361   case 512:
1362     return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
1363   case 1024:
1364     return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
1365   default:
1366     llvm_unreachable("Invalid register class size");
1367   }
1368 }
1369 
1370 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
1371                                          const TargetRegisterClass *SRC) const {
1372   switch (getRegSizeInBits(*SRC)) {
1373   case 32:
1374     return &AMDGPU::VGPR_32RegClass;
1375   case 64:
1376     return &AMDGPU::VReg_64RegClass;
1377   case 96:
1378     return &AMDGPU::VReg_96RegClass;
1379   case 128:
1380     return &AMDGPU::VReg_128RegClass;
1381   case 160:
1382     return &AMDGPU::VReg_160RegClass;
1383   case 256:
1384     return &AMDGPU::VReg_256RegClass;
1385   case 512:
1386     return &AMDGPU::VReg_512RegClass;
1387   case 1024:
1388     return &AMDGPU::VReg_1024RegClass;
1389   case 1:
1390     return &AMDGPU::VReg_1RegClass;
1391   default:
1392     llvm_unreachable("Invalid register class size");
1393   }
1394 }
1395 
1396 const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
1397                                          const TargetRegisterClass *SRC) const {
1398   switch (getRegSizeInBits(*SRC)) {
1399   case 32:
1400     return &AMDGPU::AGPR_32RegClass;
1401   case 64:
1402     return &AMDGPU::AReg_64RegClass;
1403   case 128:
1404     return &AMDGPU::AReg_128RegClass;
1405   case 512:
1406     return &AMDGPU::AReg_512RegClass;
1407   case 1024:
1408     return &AMDGPU::AReg_1024RegClass;
1409   default:
1410     llvm_unreachable("Invalid register class size");
1411   }
1412 }
1413 
1414 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
1415                                          const TargetRegisterClass *VRC) const {
1416   switch (getRegSizeInBits(*VRC)) {
1417   case 32:
1418     return &AMDGPU::SGPR_32RegClass;
1419   case 64:
1420     return &AMDGPU::SReg_64RegClass;
1421   case 96:
1422     return &AMDGPU::SReg_96RegClass;
1423   case 128:
1424     return &AMDGPU::SGPR_128RegClass;
1425   case 160:
1426     return &AMDGPU::SReg_160RegClass;
1427   case 256:
1428     return &AMDGPU::SReg_256RegClass;
1429   case 512:
1430     return &AMDGPU::SReg_512RegClass;
1431   case 1024:
1432     return &AMDGPU::SReg_1024RegClass;
1433   default:
1434     llvm_unreachable("Invalid register class size");
1435   }
1436 }
1437 
1438 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
1439                          const TargetRegisterClass *RC, unsigned SubIdx) const {
1440   if (SubIdx == AMDGPU::NoSubRegister)
1441     return RC;
1442 
1443   // We can assume that each lane corresponds to one 32-bit register.
1444   unsigned Count = getNumChannelsFromSubReg(SubIdx);
1445   if (isSGPRClass(RC)) {
1446     switch (Count) {
1447     case 1:
1448       return &AMDGPU::SGPR_32RegClass;
1449     case 2:
1450       return &AMDGPU::SReg_64RegClass;
1451     case 3:
1452       return &AMDGPU::SReg_96RegClass;
1453     case 4:
1454       return &AMDGPU::SGPR_128RegClass;
1455     case 5:
1456       return &AMDGPU::SReg_160RegClass;
1457     case 8:
1458       return &AMDGPU::SReg_256RegClass;
1459     case 16:
1460       return &AMDGPU::SReg_512RegClass;
1461     case 32: /* fall-through */
1462     default:
1463       llvm_unreachable("Invalid sub-register class size");
1464     }
1465   } else if (hasAGPRs(RC)) {
1466     switch (Count) {
1467     case 1:
1468       return &AMDGPU::AGPR_32RegClass;
1469     case 2:
1470       return &AMDGPU::AReg_64RegClass;
1471     case 4:
1472       return &AMDGPU::AReg_128RegClass;
1473     case 16:
1474       return &AMDGPU::AReg_512RegClass;
1475     case 32: /* fall-through */
1476     default:
1477       llvm_unreachable("Invalid sub-register class size");
1478     }
1479   } else {
1480     switch (Count) {
1481     case 1:
1482       return &AMDGPU::VGPR_32RegClass;
1483     case 2:
1484       return &AMDGPU::VReg_64RegClass;
1485     case 3:
1486       return &AMDGPU::VReg_96RegClass;
1487     case 4:
1488       return &AMDGPU::VReg_128RegClass;
1489     case 5:
1490       return &AMDGPU::VReg_160RegClass;
1491     case 8:
1492       return &AMDGPU::VReg_256RegClass;
1493     case 16:
1494       return &AMDGPU::VReg_512RegClass;
1495     case 32: /* fall-through */
1496     default:
1497       llvm_unreachable("Invalid sub-register class size");
1498     }
1499   }
1500 }
1501 
1502 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
1503   if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
1504       OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST)
1505     return !ST.hasMFMAInlineLiteralBug();
1506 
1507   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1508          OpType <= AMDGPU::OPERAND_SRC_LAST;
1509 }
1510 
1511 bool SIRegisterInfo::shouldRewriteCopySrc(
1512   const TargetRegisterClass *DefRC,
1513   unsigned DefSubReg,
1514   const TargetRegisterClass *SrcRC,
1515   unsigned SrcSubReg) const {
1516   // We want to prefer the smallest register class possible, so we don't want to
1517   // stop and rewrite on anything that looks like a subregister
1518   // extract. Operations mostly don't care about the super register class, so we
1519   // only want to stop on the most basic of copies between the same register
1520   // class.
1521   //
1522   // e.g. if we have something like
1523   // %0 = ...
1524   // %1 = ...
1525   // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
1526   // %3 = COPY %2, sub0
1527   //
1528   // We want to look through the COPY to find:
1529   //  => %3 = COPY %0
1530 
1531   // Plain copy.
1532   return getCommonSubClass(DefRC, SrcRC) != nullptr;
1533 }
1534 
1535 /// Returns a register that is not used at any point in the function.
1536 ///        If all registers are used, then this function will return
1537 //         AMDGPU::NoRegister.
1538 MCRegister
1539 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
1540                                    const TargetRegisterClass *RC,
1541                                    const MachineFunction &MF) const {
1542 
1543   for (MCRegister Reg : *RC)
1544     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
1545       return Reg;
1546   return MCRegister();
1547 }
1548 
1549 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
1550                                                    unsigned EltSize) const {
1551   if (EltSize == 4) {
1552     static const int16_t Sub0_31[] = {
1553       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1554       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1555       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1556       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1557       AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
1558       AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
1559       AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
1560       AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
1561     };
1562 
1563     static const int16_t Sub0_15[] = {
1564       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1565       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1566       AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
1567       AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
1568     };
1569 
1570     static const int16_t Sub0_7[] = {
1571       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1572       AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
1573     };
1574 
1575     static const int16_t Sub0_4[] = {
1576       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
1577     };
1578 
1579     static const int16_t Sub0_3[] = {
1580       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
1581     };
1582 
1583     static const int16_t Sub0_2[] = {
1584       AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
1585     };
1586 
1587     static const int16_t Sub0_1[] = {
1588       AMDGPU::sub0, AMDGPU::sub1,
1589     };
1590 
1591     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1592     case 32:
1593       return {};
1594     case 64:
1595       return makeArrayRef(Sub0_1);
1596     case 96:
1597       return makeArrayRef(Sub0_2);
1598     case 128:
1599       return makeArrayRef(Sub0_3);
1600     case 160:
1601       return makeArrayRef(Sub0_4);
1602     case 256:
1603       return makeArrayRef(Sub0_7);
1604     case 512:
1605       return makeArrayRef(Sub0_15);
1606     case 1024:
1607       return makeArrayRef(Sub0_31);
1608     default:
1609       llvm_unreachable("unhandled register size");
1610     }
1611   }
1612 
1613   if (EltSize == 8) {
1614     static const int16_t Sub0_31_64[] = {
1615       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1616       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1617       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1618       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
1619       AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
1620       AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
1621       AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
1622       AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
1623     };
1624 
1625     static const int16_t Sub0_15_64[] = {
1626       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1627       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
1628       AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
1629       AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
1630     };
1631 
1632     static const int16_t Sub0_7_64[] = {
1633       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
1634       AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
1635     };
1636 
1637 
1638     static const int16_t Sub0_3_64[] = {
1639       AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
1640     };
1641 
1642     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1643     case 64:
1644       return {};
1645     case 128:
1646       return makeArrayRef(Sub0_3_64);
1647     case 256:
1648       return makeArrayRef(Sub0_7_64);
1649     case 512:
1650       return makeArrayRef(Sub0_15_64);
1651     case 1024:
1652       return makeArrayRef(Sub0_31_64);
1653     default:
1654       llvm_unreachable("unhandled register size");
1655     }
1656   }
1657 
1658   if (EltSize == 16) {
1659 
1660     static const int16_t Sub0_31_128[] = {
1661       AMDGPU::sub0_sub1_sub2_sub3,
1662       AMDGPU::sub4_sub5_sub6_sub7,
1663       AMDGPU::sub8_sub9_sub10_sub11,
1664       AMDGPU::sub12_sub13_sub14_sub15,
1665       AMDGPU::sub16_sub17_sub18_sub19,
1666       AMDGPU::sub20_sub21_sub22_sub23,
1667       AMDGPU::sub24_sub25_sub26_sub27,
1668       AMDGPU::sub28_sub29_sub30_sub31
1669     };
1670 
1671     static const int16_t Sub0_15_128[] = {
1672       AMDGPU::sub0_sub1_sub2_sub3,
1673       AMDGPU::sub4_sub5_sub6_sub7,
1674       AMDGPU::sub8_sub9_sub10_sub11,
1675       AMDGPU::sub12_sub13_sub14_sub15
1676     };
1677 
1678     static const int16_t Sub0_7_128[] = {
1679       AMDGPU::sub0_sub1_sub2_sub3,
1680       AMDGPU::sub4_sub5_sub6_sub7
1681     };
1682 
1683     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1684     case 128:
1685       return {};
1686     case 256:
1687       return makeArrayRef(Sub0_7_128);
1688     case 512:
1689       return makeArrayRef(Sub0_15_128);
1690     case 1024:
1691       return makeArrayRef(Sub0_31_128);
1692     default:
1693       llvm_unreachable("unhandled register size");
1694     }
1695   }
1696 
1697   if (EltSize == 32) {
1698     static const int16_t Sub0_31_256[] = {
1699       AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1700       AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1701       AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1702       AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1703     };
1704 
1705     static const int16_t Sub0_15_256[] = {
1706       AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1707       AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1708     };
1709 
1710     switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1711     case 256:
1712       return {};
1713     case 512:
1714       return makeArrayRef(Sub0_15_256);
1715     case 1024:
1716       return makeArrayRef(Sub0_31_256);
1717     default:
1718       llvm_unreachable("unhandled register size");
1719     }
1720   }
1721 
1722   assert(EltSize == 64 && "unhandled elt size");
1723   static const int16_t Sub0_31_512[] = {
1724     AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1725     AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1726   };
1727 
1728   switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1729   case 512:
1730     return {};
1731   case 1024:
1732     return makeArrayRef(Sub0_31_512);
1733   default:
1734     llvm_unreachable("unhandled register size");
1735   }
1736 }
1737 
1738 const TargetRegisterClass*
1739 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
1740                                   Register Reg) const {
1741   return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
1742 }
1743 
1744 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
1745                             Register Reg) const {
1746   const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
1747   assert(RC && "Register class for the reg not found");
1748   return hasVGPRs(RC);
1749 }
1750 
1751 bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
1752                             Register Reg) const {
1753   const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
1754   assert(RC && "Register class for the reg not found");
1755   return hasAGPRs(RC);
1756 }
1757 
1758 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
1759                                     const TargetRegisterClass *SrcRC,
1760                                     unsigned SubReg,
1761                                     const TargetRegisterClass *DstRC,
1762                                     unsigned DstSubReg,
1763                                     const TargetRegisterClass *NewRC,
1764                                     LiveIntervals &LIS) const {
1765   unsigned SrcSize = getRegSizeInBits(*SrcRC);
1766   unsigned DstSize = getRegSizeInBits(*DstRC);
1767   unsigned NewSize = getRegSizeInBits(*NewRC);
1768 
1769   // Do not increase size of registers beyond dword, we would need to allocate
1770   // adjacent registers and constraint regalloc more than needed.
1771 
1772   // Always allow dword coalescing.
1773   if (SrcSize <= 32 || DstSize <= 32)
1774     return true;
1775 
1776   return NewSize <= DstSize || NewSize <= SrcSize;
1777 }
1778 
1779 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
1780                                              MachineFunction &MF) const {
1781   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1782 
1783   unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
1784                                                        MF.getFunction());
1785   switch (RC->getID()) {
1786   default:
1787     return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
1788   case AMDGPU::VGPR_32RegClassID:
1789   case AMDGPU::VGPR_LO16RegClassID:
1790   case AMDGPU::VGPR_HI16RegClassID:
1791     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
1792   case AMDGPU::SGPR_32RegClassID:
1793     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
1794   }
1795 }
1796 
1797 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
1798                                                 unsigned Idx) const {
1799   if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
1800       Idx == AMDGPU::RegisterPressureSets::AGPR_32)
1801     return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
1802                                const_cast<MachineFunction &>(MF));
1803 
1804   if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
1805     return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
1806                                const_cast<MachineFunction &>(MF));
1807 
1808   llvm_unreachable("Unexpected register pressure set!");
1809 }
1810 
1811 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
1812   static const int Empty[] = { -1 };
1813 
1814   if (RegPressureIgnoredUnits[RegUnit])
1815     return Empty;
1816 
1817   return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
1818 }
1819 
1820 MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const {
1821   // Not a callee saved register.
1822   return AMDGPU::SGPR30_SGPR31;
1823 }
1824 
1825 const TargetRegisterClass *
1826 SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
1827                                          const RegisterBank &RB,
1828                                          const MachineRegisterInfo &MRI) const {
1829   switch (Size) {
1830   case 1: {
1831     switch (RB.getID()) {
1832     case AMDGPU::VGPRRegBankID:
1833       return &AMDGPU::VGPR_32RegClass;
1834     case AMDGPU::VCCRegBankID:
1835       return isWave32 ?
1836         &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
1837     case AMDGPU::SGPRRegBankID:
1838       return &AMDGPU::SReg_32RegClass;
1839     default:
1840       llvm_unreachable("unknown register bank");
1841     }
1842   }
1843   case 32:
1844     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1845                                                  &AMDGPU::SReg_32RegClass;
1846   case 64:
1847     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
1848                                                  &AMDGPU::SReg_64RegClass;
1849   case 96:
1850     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1851                                                  &AMDGPU::SReg_96RegClass;
1852   case 128:
1853     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1854                                                  &AMDGPU::SGPR_128RegClass;
1855   case 160:
1856     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
1857                                                  &AMDGPU::SReg_160RegClass;
1858   case 256:
1859     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
1860                                                  &AMDGPU::SReg_256RegClass;
1861   case 512:
1862     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
1863                                                  &AMDGPU::SReg_512RegClass;
1864   case 1024:
1865     return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
1866                                                  &AMDGPU::SReg_1024RegClass;
1867   default:
1868     if (Size < 32)
1869       return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
1870                                                    &AMDGPU::SReg_32RegClass;
1871     return nullptr;
1872   }
1873 }
1874 
1875 const TargetRegisterClass *
1876 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
1877                                          const MachineRegisterInfo &MRI) const {
1878   const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
1879   if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
1880     return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
1881 
1882   const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
1883   return getAllocatableClass(RC);
1884 }
1885 
1886 MCRegister SIRegisterInfo::getVCC() const {
1887   return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
1888 }
1889 
1890 const TargetRegisterClass *
1891 SIRegisterInfo::getRegClass(unsigned RCID) const {
1892   switch ((int)RCID) {
1893   case AMDGPU::SReg_1RegClassID:
1894     return getBoolRC();
1895   case AMDGPU::SReg_1_XEXECRegClassID:
1896     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
1897       : &AMDGPU::SReg_64_XEXECRegClass;
1898   case -1:
1899     return nullptr;
1900   default:
1901     return AMDGPUGenRegisterInfo::getRegClass(RCID);
1902   }
1903 }
1904 
1905 // Find reaching register definition
1906 MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
1907                                               MachineInstr &Use,
1908                                               MachineRegisterInfo &MRI,
1909                                               LiveIntervals *LIS) const {
1910   auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
1911   SlotIndex UseIdx = LIS->getInstructionIndex(Use);
1912   SlotIndex DefIdx;
1913 
1914   if (Reg.isVirtual()) {
1915     if (!LIS->hasInterval(Reg))
1916       return nullptr;
1917     LiveInterval &LI = LIS->getInterval(Reg);
1918     LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
1919                                   : MRI.getMaxLaneMaskForVReg(Reg);
1920     VNInfo *V = nullptr;
1921     if (LI.hasSubRanges()) {
1922       for (auto &S : LI.subranges()) {
1923         if ((S.LaneMask & SubLanes) == SubLanes) {
1924           V = S.getVNInfoAt(UseIdx);
1925           break;
1926         }
1927       }
1928     } else {
1929       V = LI.getVNInfoAt(UseIdx);
1930     }
1931     if (!V)
1932       return nullptr;
1933     DefIdx = V->def;
1934   } else {
1935     // Find last def.
1936     for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) {
1937       LiveRange &LR = LIS->getRegUnit(*Units);
1938       if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
1939         if (!DefIdx.isValid() ||
1940             MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
1941                           LIS->getInstructionFromIndex(V->def)))
1942           DefIdx = V->def;
1943       } else {
1944         return nullptr;
1945       }
1946     }
1947   }
1948 
1949   MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
1950 
1951   if (!Def || !MDT.dominates(Def, &Use))
1952     return nullptr;
1953 
1954   assert(Def->modifiesRegister(Reg, this));
1955 
1956   return Def;
1957 }
1958