1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief SI implementation of the TargetRegisterInfo class.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SIRegisterInfo.h"
16 #include "SIInstrInfo.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/RegisterScavenging.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/LLVMContext.h"
24 
25 using namespace llvm;
26 
27 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
28   for (unsigned i = 0; PSets[i] != -1; ++i) {
29     if (PSets[i] == (int)PSetID)
30       return true;
31   }
32   return false;
33 }
34 
35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
36                                          BitVector &PressureSets) const {
37   for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) {
38     const int *PSets = getRegUnitPressureSets(*U);
39     if (hasPressureSet(PSets, PSetID)) {
40       PressureSets.set(PSetID);
41       break;
42     }
43   }
44 }
45 
46 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
47                                    SGPRPressureSets(getNumRegPressureSets()),
48                                    VGPRPressureSets(getNumRegPressureSets()) {
49   unsigned NumRegPressureSets = getNumRegPressureSets();
50 
51   SGPRSetID = NumRegPressureSets;
52   VGPRSetID = NumRegPressureSets;
53 
54   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
55     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
56     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
57   }
58 
59   // Determine the number of reg units for each pressure set.
60   std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
61   for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
62     const int *PSets = getRegUnitPressureSets(i);
63     for (unsigned j = 0; PSets[j] != -1; ++j) {
64       ++PressureSetRegUnits[PSets[j]];
65     }
66   }
67 
68   unsigned VGPRMax = 0, SGPRMax = 0;
69   for (unsigned i = 0; i < NumRegPressureSets; ++i) {
70     if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
71       VGPRSetID = i;
72       VGPRMax = PressureSetRegUnits[i];
73       continue;
74     }
75     if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
76       SGPRSetID = i;
77       SGPRMax = PressureSetRegUnits[i];
78     }
79   }
80 
81   assert(SGPRSetID < NumRegPressureSets &&
82          VGPRSetID < NumRegPressureSets);
83 }
84 
85 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
86   MCRegAliasIterator R(Reg, this, true);
87 
88   for (; R.isValid(); ++R)
89     Reserved.set(*R);
90 }
91 
92 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
93   const MachineFunction &MF) const {
94   unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
95   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
96   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
97 }
98 
99 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
100   const MachineFunction &MF) const {
101   unsigned RegCount = getMaxNumSGPRs(MF);
102   unsigned Reg;
103 
104   // Try to place it in a hole after PrivateSegmentbufferReg.
105   if (RegCount & 3) {
106     // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to
107     // alignment constraints, so we have a hole where can put the wave offset.
108     Reg = RegCount - 1;
109   } else {
110     // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the
111     // wave offset before it.
112     Reg = RegCount - 5;
113   }
114   return AMDGPU::SGPR_32RegClass.getRegister(Reg);
115 }
116 
117 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
118   BitVector Reserved(getNumRegs());
119   Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
120 
121   // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
122   // this seems likely to result in bugs, so I'm marking them as reserved.
123   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
124   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
125 
126   // Reserve Trap Handler registers - support is not implemented in Codegen.
127   reserveRegisterTuples(Reserved, AMDGPU::TBA);
128   reserveRegisterTuples(Reserved, AMDGPU::TMA);
129   reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
130   reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
131   reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
132   reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
133   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
134   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
135 
136   unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
137   unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
138   for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
139     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
140     reserveRegisterTuples(Reserved, Reg);
141   }
142 
143   unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
144   unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
145   for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
146     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
147     reserveRegisterTuples(Reserved, Reg);
148   }
149 
150   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
151 
152   unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
153   if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
154     // Reserve 1 SGPR for scratch wave offset in case we need to spill.
155     reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
156   }
157 
158   unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
159   if (ScratchRSrcReg != AMDGPU::NoRegister) {
160     // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
161     // to spill.
162     // TODO: May need to reserve a VGPR if doing LDS spilling.
163     reserveRegisterTuples(Reserved, ScratchRSrcReg);
164     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
165   }
166 
167   return Reserved;
168 }
169 
170 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
171   return Fn.getFrameInfo().hasStackObjects();
172 }
173 
174 bool
175 SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
176   return MF.getFrameInfo().hasStackObjects();
177 }
178 
179 bool SIRegisterInfo::requiresVirtualBaseRegisters(
180   const MachineFunction &) const {
181   // There are no special dedicated stack or frame pointers.
182   return true;
183 }
184 
185 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
186   // This helps catch bugs as verifier errors.
187   return true;
188 }
189 
190 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
191                                                  int Idx) const {
192   if (!SIInstrInfo::isMUBUF(*MI))
193     return 0;
194 
195   assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
196                                            AMDGPU::OpName::vaddr) &&
197          "Should never see frame index on non-address operand");
198 
199   int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
200                                           AMDGPU::OpName::offset);
201   return MI->getOperand(OffIdx).getImm();
202 }
203 
204 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
205   return MI->mayLoadOrStore();
206 }
207 
208 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
209                                                   unsigned BaseReg,
210                                                   int FrameIdx,
211                                                   int64_t Offset) const {
212   MachineBasicBlock::iterator Ins = MBB->begin();
213   DebugLoc DL; // Defaults to "unknown"
214 
215   if (Ins != MBB->end())
216     DL = Ins->getDebugLoc();
217 
218   MachineFunction *MF = MBB->getParent();
219   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
220   const SIInstrInfo *TII = Subtarget.getInstrInfo();
221 
222   if (Offset == 0) {
223     BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg)
224       .addFrameIndex(FrameIdx);
225     return;
226   }
227 
228   MachineRegisterInfo &MRI = MF->getRegInfo();
229   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
230   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
231 
232   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
233     .addImm(Offset);
234   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
235     .addReg(UnusedCarry, RegState::Define | RegState::Dead)
236     .addReg(OffsetReg, RegState::Kill)
237     .addFrameIndex(FrameIdx);
238 }
239 
240 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
241                                        int64_t Offset) const {
242 
243   MachineBasicBlock *MBB = MI.getParent();
244   MachineFunction *MF = MBB->getParent();
245   const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
246   const SIInstrInfo *TII = Subtarget.getInstrInfo();
247 
248 #ifndef NDEBUG
249   // FIXME: Is it possible to be storing a frame index to itself?
250   bool SeenFI = false;
251   for (const MachineOperand &MO: MI.operands()) {
252     if (MO.isFI()) {
253       if (SeenFI)
254         llvm_unreachable("should not see multiple frame indices");
255 
256       SeenFI = true;
257     }
258   }
259 #endif
260 
261   MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
262   assert(FIOp && FIOp->isFI() && "frame index must be address operand");
263 
264   assert(TII->isMUBUF(MI));
265 
266   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
267   int64_t NewOffset = OffsetOp->getImm() + Offset;
268   assert(isUInt<12>(NewOffset) && "offset should be legal");
269 
270   FIOp->ChangeToRegister(BaseReg, false);
271   OffsetOp->setImm(NewOffset);
272 }
273 
274 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
275                                         unsigned BaseReg,
276                                         int64_t Offset) const {
277   return SIInstrInfo::isMUBUF(*MI) && isUInt<12>(Offset);
278 }
279 
280 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
281   const MachineFunction &MF, unsigned Kind) const {
282   // This is inaccurate. It depends on the instruction and address space. The
283   // only place where we should hit this is for dealing with frame indexes /
284   // private accesses, so this is correct in that case.
285   return &AMDGPU::VGPR_32RegClass;
286 }
287 
288 static unsigned getNumSubRegsForSpillOp(unsigned Op) {
289 
290   switch (Op) {
291   case AMDGPU::SI_SPILL_S512_SAVE:
292   case AMDGPU::SI_SPILL_S512_RESTORE:
293   case AMDGPU::SI_SPILL_V512_SAVE:
294   case AMDGPU::SI_SPILL_V512_RESTORE:
295     return 16;
296   case AMDGPU::SI_SPILL_S256_SAVE:
297   case AMDGPU::SI_SPILL_S256_RESTORE:
298   case AMDGPU::SI_SPILL_V256_SAVE:
299   case AMDGPU::SI_SPILL_V256_RESTORE:
300     return 8;
301   case AMDGPU::SI_SPILL_S128_SAVE:
302   case AMDGPU::SI_SPILL_S128_RESTORE:
303   case AMDGPU::SI_SPILL_V128_SAVE:
304   case AMDGPU::SI_SPILL_V128_RESTORE:
305     return 4;
306   case AMDGPU::SI_SPILL_V96_SAVE:
307   case AMDGPU::SI_SPILL_V96_RESTORE:
308     return 3;
309   case AMDGPU::SI_SPILL_S64_SAVE:
310   case AMDGPU::SI_SPILL_S64_RESTORE:
311   case AMDGPU::SI_SPILL_V64_SAVE:
312   case AMDGPU::SI_SPILL_V64_RESTORE:
313     return 2;
314   case AMDGPU::SI_SPILL_S32_SAVE:
315   case AMDGPU::SI_SPILL_S32_RESTORE:
316   case AMDGPU::SI_SPILL_V32_SAVE:
317   case AMDGPU::SI_SPILL_V32_RESTORE:
318     return 1;
319   default: llvm_unreachable("Invalid spill opcode");
320   }
321 }
322 
323 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
324                                            unsigned LoadStoreOp,
325                                            const MachineOperand *SrcDst,
326                                            unsigned ScratchRsrcReg,
327                                            unsigned ScratchOffset,
328                                            int64_t Offset,
329                                            RegScavenger *RS) const {
330 
331   unsigned Value = SrcDst->getReg();
332   bool IsKill = SrcDst->isKill();
333   MachineBasicBlock *MBB = MI->getParent();
334   MachineFunction *MF = MI->getParent()->getParent();
335   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
336   const SIInstrInfo *TII = ST.getInstrInfo();
337 
338   DebugLoc DL = MI->getDebugLoc();
339   bool IsStore = MI->mayStore();
340 
341   bool RanOutOfSGPRs = false;
342   bool Scavenged = false;
343   unsigned SOffset = ScratchOffset;
344   unsigned OriginalImmOffset = Offset;
345 
346   unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
347   unsigned Size = NumSubRegs * 4;
348 
349   if (!isUInt<12>(Offset + Size)) {
350     SOffset = AMDGPU::NoRegister;
351 
352     // We don't have access to the register scavenger if this function is called
353     // during  PEI::scavengeFrameVirtualRegs().
354     if (RS)
355       SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass);
356 
357     if (SOffset == AMDGPU::NoRegister) {
358       // There are no free SGPRs, and since we are in the process of spilling
359       // VGPRs too.  Since we need a VGPR in order to spill SGPRs (this is true
360       // on SI/CI and on VI it is true until we implement spilling using scalar
361       // stores), we have no way to free up an SGPR.  Our solution here is to
362       // add the offset directly to the ScratchOffset register, and then
363       // subtract the offset after the spill to return ScratchOffset to it's
364       // original value.
365       RanOutOfSGPRs = true;
366       SOffset = ScratchOffset;
367     } else {
368       Scavenged = true;
369     }
370     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
371             .addReg(ScratchOffset)
372             .addImm(Offset);
373     Offset = 0;
374   }
375 
376   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
377     unsigned SubReg = NumSubRegs == 1 ?
378       Value : getSubReg(Value, getSubRegFromChannel(i));
379 
380     unsigned SOffsetRegState = 0;
381     unsigned SrcDstRegState = getDefRegState(!IsStore);
382     if (i + 1 == e) {
383       SOffsetRegState |= getKillRegState(Scavenged);
384       // The last implicit use carries the "Kill" flag.
385       SrcDstRegState |= getKillRegState(IsKill);
386     }
387 
388     BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
389       .addReg(SubReg, getDefRegState(!IsStore))
390       .addReg(ScratchRsrcReg)
391       .addReg(SOffset, SOffsetRegState)
392       .addImm(Offset)
393       .addImm(0) // glc
394       .addImm(0) // slc
395       .addImm(0) // tfe
396       .addReg(Value, RegState::Implicit | SrcDstRegState)
397       .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
398   }
399   if (RanOutOfSGPRs) {
400     // Subtract the offset we added to the ScratchOffset register.
401     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset)
402             .addReg(ScratchOffset)
403             .addImm(OriginalImmOffset);
404   }
405 }
406 
407 void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
408                                int Index,
409                                RegScavenger *RS) const {
410   MachineFunction *MF = MI->getParent()->getParent();
411   MachineRegisterInfo &MRI = MF->getRegInfo();
412   MachineBasicBlock *MBB = MI->getParent();
413   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
414   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
415   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
416   const SIInstrInfo *TII = ST.getInstrInfo();
417   const DebugLoc &DL = MI->getDebugLoc();
418 
419   unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
420   unsigned SuperReg = MI->getOperand(0).getReg();
421   bool IsKill = MI->getOperand(0).isKill();
422 
423   // SubReg carries the "Kill" flag when SubReg == SuperReg.
424   unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
425   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
426     unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
427     unsigned SubReg = NumSubRegs == 1 ?
428       SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
429 
430     struct SIMachineFunctionInfo::SpilledReg Spill =
431       MFI->getSpilledReg(MF, Index, i);
432     if (Spill.hasReg()) {
433       if (SuperReg == AMDGPU::M0) {
434         assert(NumSubRegs == 1);
435         unsigned CopyM0
436           = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
437         BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0)
438           .addReg(SuperReg, getKillRegState(IsKill));
439 
440         // The real spill now kills the temp copy.
441         SubReg = SuperReg = CopyM0;
442         IsKill = true;
443       }
444 
445       BuildMI(*MBB, MI, DL,
446               TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
447               Spill.VGPR)
448         .addReg(SubReg, getKillRegState(IsKill))
449         .addImm(Spill.Lane);
450 
451       // FIXME: Since this spills to another register instead of an actual
452       // frame index, we should delete the frame index when all references to
453       // it are fixed.
454     } else {
455       // Spill SGPR to a frame index.
456       // FIXME we should use S_STORE_DWORD here for VI.
457       MachineInstrBuilder Mov
458         = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
459         .addReg(SubReg, SubKillState);
460 
461 
462       // There could be undef components of a spilled super register.
463       // TODO: Can we detect this and skip the spill?
464       if (NumSubRegs > 1) {
465         // The last implicit use of the SuperReg carries the "Kill" flag.
466         unsigned SuperKillState = 0;
467         if (i + 1 == e)
468           SuperKillState |= getKillRegState(IsKill);
469         Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
470       }
471 
472       unsigned Size = FrameInfo.getObjectSize(Index);
473       unsigned Align = FrameInfo.getObjectAlignment(Index);
474       MachinePointerInfo PtrInfo
475         = MachinePointerInfo::getFixedStack(*MF, Index);
476       MachineMemOperand *MMO
477         = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
478                                    Size, Align);
479       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
480         .addReg(TmpReg, RegState::Kill)         // src
481         .addFrameIndex(Index)                   // vaddr
482         .addReg(MFI->getScratchRSrcReg())       // srrsrc
483         .addReg(MFI->getScratchWaveOffsetReg()) // soffset
484         .addImm(i * 4)                          // offset
485         .addMemOperand(MMO);
486     }
487   }
488 
489   MI->eraseFromParent();
490   MFI->addToSpilledSGPRs(NumSubRegs);
491 }
492 
493 void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
494                                  int Index,
495                                  RegScavenger *RS) const {
496   MachineFunction *MF = MI->getParent()->getParent();
497   MachineRegisterInfo &MRI = MF->getRegInfo();
498   MachineBasicBlock *MBB = MI->getParent();
499   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
500   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
501   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
502   const SIInstrInfo *TII = ST.getInstrInfo();
503   const DebugLoc &DL = MI->getDebugLoc();
504 
505   unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
506   unsigned SuperReg = MI->getOperand(0).getReg();
507 
508   // m0 is not allowed as with readlane/writelane, so a temporary SGPR and
509   // extra copy is needed.
510   bool IsM0 = (SuperReg == AMDGPU::M0);
511   if (IsM0) {
512     assert(NumSubRegs == 1);
513     SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
514   }
515 
516   for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
517     unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
518     unsigned SubReg = NumSubRegs == 1 ?
519       SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
520 
521     SIMachineFunctionInfo::SpilledReg Spill
522       = MFI->getSpilledReg(MF, Index, i);
523 
524     if (Spill.hasReg()) {
525       BuildMI(*MBB, MI, DL,
526               TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
527               SubReg)
528         .addReg(Spill.VGPR)
529         .addImm(Spill.Lane)
530         .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
531     } else {
532       // Restore SGPR from a stack slot.
533       // FIXME: We should use S_LOAD_DWORD here for VI.
534 
535       unsigned Align = FrameInfo.getObjectAlignment(Index);
536       unsigned Size = FrameInfo.getObjectSize(Index);
537 
538       MachinePointerInfo PtrInfo
539         = MachinePointerInfo::getFixedStack(*MF, Index);
540 
541       MachineMemOperand *MMO = MF->getMachineMemOperand(
542         PtrInfo, MachineMemOperand::MOLoad, Size, Align);
543 
544       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
545         .addFrameIndex(Index)                   // vaddr
546         .addReg(MFI->getScratchRSrcReg())       // srsrc
547         .addReg(MFI->getScratchWaveOffsetReg()) // soffset
548         .addImm(i * 4)                          // offset
549         .addMemOperand(MMO);
550       BuildMI(*MBB, MI, DL,
551               TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
552         .addReg(TmpReg, RegState::Kill)
553         .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
554     }
555   }
556 
557   if (IsM0 && SuperReg != AMDGPU::M0) {
558     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
559       .addReg(SuperReg);
560   }
561 
562   MI->eraseFromParent();
563 }
564 
565 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
566                                         int SPAdj, unsigned FIOperandNum,
567                                         RegScavenger *RS) const {
568   MachineFunction *MF = MI->getParent()->getParent();
569   MachineRegisterInfo &MRI = MF->getRegInfo();
570   MachineBasicBlock *MBB = MI->getParent();
571   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
572   MachineFrameInfo &FrameInfo = MF->getFrameInfo();
573   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
574   const SIInstrInfo *TII = ST.getInstrInfo();
575   DebugLoc DL = MI->getDebugLoc();
576 
577   MachineOperand &FIOp = MI->getOperand(FIOperandNum);
578   int Index = MI->getOperand(FIOperandNum).getIndex();
579 
580   switch (MI->getOpcode()) {
581     // SGPR register spill
582     case AMDGPU::SI_SPILL_S512_SAVE:
583     case AMDGPU::SI_SPILL_S256_SAVE:
584     case AMDGPU::SI_SPILL_S128_SAVE:
585     case AMDGPU::SI_SPILL_S64_SAVE:
586     case AMDGPU::SI_SPILL_S32_SAVE: {
587       spillSGPR(MI, Index, RS);
588       break;
589     }
590 
591     // SGPR register restore
592     case AMDGPU::SI_SPILL_S512_RESTORE:
593     case AMDGPU::SI_SPILL_S256_RESTORE:
594     case AMDGPU::SI_SPILL_S128_RESTORE:
595     case AMDGPU::SI_SPILL_S64_RESTORE:
596     case AMDGPU::SI_SPILL_S32_RESTORE: {
597       restoreSGPR(MI, Index, RS);
598       break;
599     }
600 
601     // VGPR register spill
602     case AMDGPU::SI_SPILL_V512_SAVE:
603     case AMDGPU::SI_SPILL_V256_SAVE:
604     case AMDGPU::SI_SPILL_V128_SAVE:
605     case AMDGPU::SI_SPILL_V96_SAVE:
606     case AMDGPU::SI_SPILL_V64_SAVE:
607     case AMDGPU::SI_SPILL_V32_SAVE:
608       buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
609             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
610             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
611             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
612             FrameInfo.getObjectOffset(Index) +
613             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
614       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
615       MI->eraseFromParent();
616       break;
617     case AMDGPU::SI_SPILL_V32_RESTORE:
618     case AMDGPU::SI_SPILL_V64_RESTORE:
619     case AMDGPU::SI_SPILL_V96_RESTORE:
620     case AMDGPU::SI_SPILL_V128_RESTORE:
621     case AMDGPU::SI_SPILL_V256_RESTORE:
622     case AMDGPU::SI_SPILL_V512_RESTORE: {
623       buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
624             TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
625             TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
626             TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
627             FrameInfo.getObjectOffset(Index) +
628             TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
629       MI->eraseFromParent();
630       break;
631     }
632 
633     default: {
634       int64_t Offset = FrameInfo.getObjectOffset(Index);
635       FIOp.ChangeToImmediate(Offset);
636       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
637         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
638         BuildMI(*MBB, MI, MI->getDebugLoc(),
639                 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
640                 .addImm(Offset);
641         FIOp.ChangeToRegister(TmpReg, false, false, true);
642       }
643     }
644   }
645 }
646 
647 // FIXME: This is very slow. It might be worth creating a map from physreg to
648 // register class.
649 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
650   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
651 
652   static const TargetRegisterClass *const BaseClasses[] = {
653     &AMDGPU::VGPR_32RegClass,
654     &AMDGPU::SReg_32RegClass,
655     &AMDGPU::VReg_64RegClass,
656     &AMDGPU::SReg_64RegClass,
657     &AMDGPU::VReg_96RegClass,
658     &AMDGPU::VReg_128RegClass,
659     &AMDGPU::SReg_128RegClass,
660     &AMDGPU::VReg_256RegClass,
661     &AMDGPU::SReg_256RegClass,
662     &AMDGPU::VReg_512RegClass,
663     &AMDGPU::SReg_512RegClass,
664     &AMDGPU::SCC_CLASSRegClass,
665   };
666 
667   for (const TargetRegisterClass *BaseClass : BaseClasses) {
668     if (BaseClass->contains(Reg)) {
669       return BaseClass;
670     }
671   }
672   return nullptr;
673 }
674 
675 // TODO: It might be helpful to have some target specific flags in
676 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
677 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
678   switch (RC->getSize()) {
679   case 0: return false;
680   case 1: return false;
681   case 4:
682     return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
683   case 8:
684     return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
685   case 12:
686     return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
687   case 16:
688     return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
689   case 32:
690     return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
691   case 64:
692     return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
693   default:
694     llvm_unreachable("Invalid register class size");
695   }
696 }
697 
698 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
699                                          const TargetRegisterClass *SRC) const {
700   switch (SRC->getSize()) {
701   case 4:
702     return &AMDGPU::VGPR_32RegClass;
703   case 8:
704     return &AMDGPU::VReg_64RegClass;
705   case 12:
706     return &AMDGPU::VReg_96RegClass;
707   case 16:
708     return &AMDGPU::VReg_128RegClass;
709   case 32:
710     return &AMDGPU::VReg_256RegClass;
711   case 64:
712     return &AMDGPU::VReg_512RegClass;
713   default:
714     llvm_unreachable("Invalid register class size");
715   }
716 }
717 
718 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
719                                          const TargetRegisterClass *VRC) const {
720   switch (VRC->getSize()) {
721   case 4:
722     return &AMDGPU::SGPR_32RegClass;
723   case 8:
724     return &AMDGPU::SReg_64RegClass;
725   case 16:
726     return &AMDGPU::SReg_128RegClass;
727   case 32:
728     return &AMDGPU::SReg_256RegClass;
729   case 64:
730     return &AMDGPU::SReg_512RegClass;
731   default:
732     llvm_unreachable("Invalid register class size");
733   }
734 }
735 
736 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
737                          const TargetRegisterClass *RC, unsigned SubIdx) const {
738   if (SubIdx == AMDGPU::NoSubRegister)
739     return RC;
740 
741   // We can assume that each lane corresponds to one 32-bit register.
742   unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx));
743   if (isSGPRClass(RC)) {
744     switch (Count) {
745     case 1:
746       return &AMDGPU::SGPR_32RegClass;
747     case 2:
748       return &AMDGPU::SReg_64RegClass;
749     case 4:
750       return &AMDGPU::SReg_128RegClass;
751     case 8:
752       return &AMDGPU::SReg_256RegClass;
753     case 16: /* fall-through */
754     default:
755       llvm_unreachable("Invalid sub-register class size");
756     }
757   } else {
758     switch (Count) {
759     case 1:
760       return &AMDGPU::VGPR_32RegClass;
761     case 2:
762       return &AMDGPU::VReg_64RegClass;
763     case 3:
764       return &AMDGPU::VReg_96RegClass;
765     case 4:
766       return &AMDGPU::VReg_128RegClass;
767     case 8:
768       return &AMDGPU::VReg_256RegClass;
769     case 16: /* fall-through */
770     default:
771       llvm_unreachable("Invalid sub-register class size");
772     }
773   }
774 }
775 
776 bool SIRegisterInfo::shouldRewriteCopySrc(
777   const TargetRegisterClass *DefRC,
778   unsigned DefSubReg,
779   const TargetRegisterClass *SrcRC,
780   unsigned SrcSubReg) const {
781   // We want to prefer the smallest register class possible, so we don't want to
782   // stop and rewrite on anything that looks like a subregister
783   // extract. Operations mostly don't care about the super register class, so we
784   // only want to stop on the most basic of copies between the same register
785   // class.
786   //
787   // e.g. if we have something like
788   // vreg0 = ...
789   // vreg1 = ...
790   // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
791   // vreg3 = COPY vreg2, sub0
792   //
793   // We want to look through the COPY to find:
794   //  => vreg3 = COPY vreg0
795 
796   // Plain copy.
797   return getCommonSubClass(DefRC, SrcRC) != nullptr;
798 }
799 
800 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
801   return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
802          OpType == AMDGPU::OPERAND_REG_IMM32_FP;
803 }
804 
805 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
806   if (opCanUseLiteralConstant(OpType))
807     return true;
808 
809   return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
810          OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
811 }
812 
813 // FIXME: Most of these are flexible with HSA and we don't need to reserve them
814 // as input registers if unused. Whether the dispatch ptr is necessary should be
815 // easy to detect from used intrinsics. Scratch setup is harder to know.
816 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
817                                            enum PreloadedValue Value) const {
818 
819   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
820   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
821   (void)ST;
822   switch (Value) {
823   case SIRegisterInfo::WORKGROUP_ID_X:
824     assert(MFI->hasWorkGroupIDX());
825     return MFI->WorkGroupIDXSystemSGPR;
826   case SIRegisterInfo::WORKGROUP_ID_Y:
827     assert(MFI->hasWorkGroupIDY());
828     return MFI->WorkGroupIDYSystemSGPR;
829   case SIRegisterInfo::WORKGROUP_ID_Z:
830     assert(MFI->hasWorkGroupIDZ());
831     return MFI->WorkGroupIDZSystemSGPR;
832   case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
833     return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
834   case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
835     assert(ST.isAmdCodeObjectV2() &&
836            "Non-CodeObjectV2 ABI currently uses relocations");
837     assert(MFI->hasPrivateSegmentBuffer());
838     return MFI->PrivateSegmentBufferUserSGPR;
839   case SIRegisterInfo::KERNARG_SEGMENT_PTR:
840     assert(MFI->hasKernargSegmentPtr());
841     return MFI->KernargSegmentPtrUserSGPR;
842   case SIRegisterInfo::DISPATCH_ID:
843     assert(MFI->hasDispatchID());
844     return MFI->DispatchIDUserSGPR;
845   case SIRegisterInfo::FLAT_SCRATCH_INIT:
846     assert(MFI->hasFlatScratchInit());
847     return MFI->FlatScratchInitUserSGPR;
848   case SIRegisterInfo::DISPATCH_PTR:
849     assert(MFI->hasDispatchPtr());
850     return MFI->DispatchPtrUserSGPR;
851   case SIRegisterInfo::QUEUE_PTR:
852     assert(MFI->hasQueuePtr());
853     return MFI->QueuePtrUserSGPR;
854   case SIRegisterInfo::WORKITEM_ID_X:
855     assert(MFI->hasWorkItemIDX());
856     return AMDGPU::VGPR0;
857   case SIRegisterInfo::WORKITEM_ID_Y:
858     assert(MFI->hasWorkItemIDY());
859     return AMDGPU::VGPR1;
860   case SIRegisterInfo::WORKITEM_ID_Z:
861     assert(MFI->hasWorkItemIDZ());
862     return AMDGPU::VGPR2;
863   }
864   llvm_unreachable("unexpected preloaded value type");
865 }
866 
867 /// \brief Returns a register that is not used at any point in the function.
868 ///        If all registers are used, then this function will return
869 //         AMDGPU::NoRegister.
870 unsigned
871 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
872                                    const TargetRegisterClass *RC,
873                                    const MachineFunction &MF) const {
874 
875   for (unsigned Reg : *RC)
876     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
877       return Reg;
878   return AMDGPU::NoRegister;
879 }
880 
881 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
882                             unsigned Reg) const {
883   const TargetRegisterClass *RC;
884   if (TargetRegisterInfo::isVirtualRegister(Reg))
885     RC = MRI.getRegClass(Reg);
886   else
887     RC = getPhysRegClass(Reg);
888 
889   return hasVGPRs(RC);
890 }
891 
892 unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
893   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
894     return 800;
895   return 512;
896 }
897 
898 unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
899   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
900     return 102;
901   return 104;
902 }
903 
904 unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST) const {
905   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
906     return 6; // VCC, FLAT_SCRATCH, XNACK.
907   return 2; // VCC.
908 }
909 
910 unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
911                                         unsigned WavesPerEU) const {
912   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
913     switch (WavesPerEU) {
914       case 0:  return 0;
915       case 10: return 0;
916       case 9:  return 0;
917       case 8:  return 81;
918       default: return 97;
919     }
920   } else {
921     switch (WavesPerEU) {
922       case 0:  return 0;
923       case 10: return 0;
924       case 9:  return 49;
925       case 8:  return 57;
926       case 7:  return 65;
927       case 6:  return 73;
928       case 5:  return 81;
929       default: return 97;
930     }
931   }
932 }
933 
934 unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
935                                         unsigned WavesPerEU) const {
936   if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
937     switch (WavesPerEU) {
938       case 0:  return 80;
939       case 10: return 80;
940       case 9:  return 80;
941       case 8:  return 96;
942       default: return getNumAddressableSGPRs(ST);
943     }
944   } else {
945     switch (WavesPerEU) {
946       case 0:  return 48;
947       case 10: return 48;
948       case 9:  return 56;
949       case 8:  return 64;
950       case 7:  return 72;
951       case 6:  return 80;
952       case 5:  return 96;
953       default: return getNumAddressableSGPRs(ST);
954     }
955   }
956 }
957 
958 unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
959   const Function &F = *MF.getFunction();
960 
961   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
962   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
963 
964   // Compute maximum number of SGPRs function can use using default/requested
965   // minimum number of waves per execution unit.
966   std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
967   unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first);
968 
969   // Check if maximum number of SGPRs was explicitly requested using
970   // "amdgpu-num-sgpr" attribute.
971   if (F.hasFnAttribute("amdgpu-num-sgpr")) {
972     unsigned Requested = AMDGPU::getIntegerAttribute(
973       F, "amdgpu-num-sgpr", MaxNumSGPRs);
974 
975     // Make sure requested value does not violate subtarget's specifications.
976     if (Requested && Requested <= getNumReservedSGPRs(ST))
977       Requested = 0;
978 
979     // Make sure requested value is compatible with values implied by
980     // default/requested minimum/maximum number of waves per execution unit.
981     if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first))
982       Requested = 0;
983     if (WavesPerEU.second &&
984         Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
985       Requested = 0;
986 
987     if (Requested)
988       MaxNumSGPRs = Requested;
989   }
990 
991   if (ST.hasSGPRInitBug())
992     MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
993 
994   return MaxNumSGPRs - getNumReservedSGPRs(ST);
995 }
996 
997 unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
998   const SISubtarget &ST) const {
999   if (ST.debuggerReserveRegs())
1000     return 4;
1001   return 0;
1002 }
1003 
1004 unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
1005   switch (WavesPerEU) {
1006     case 0:  return 0;
1007     case 10: return 0;
1008     case 9:  return 25;
1009     case 8:  return 29;
1010     case 7:  return 33;
1011     case 6:  return 37;
1012     case 5:  return 41;
1013     case 4:  return 49;
1014     case 3:  return 65;
1015     case 2:  return 85;
1016     default: return 129;
1017   }
1018 }
1019 
1020 unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
1021   switch (WavesPerEU) {
1022     case 0:  return 24;
1023     case 10: return 24;
1024     case 9:  return 28;
1025     case 8:  return 32;
1026     case 7:  return 36;
1027     case 6:  return 40;
1028     case 5:  return 48;
1029     case 4:  return 64;
1030     case 3:  return 84;
1031     case 2:  return 128;
1032     default: return getTotalNumVGPRs();
1033   }
1034 }
1035 
1036 unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
1037   const Function &F = *MF.getFunction();
1038 
1039   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
1040   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1041 
1042   // Compute maximum number of VGPRs function can use using default/requested
1043   // minimum number of waves per execution unit.
1044   std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
1045   unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
1046 
1047   // Check if maximum number of VGPRs was explicitly requested using
1048   // "amdgpu-num-vgpr" attribute.
1049   if (F.hasFnAttribute("amdgpu-num-vgpr")) {
1050     unsigned Requested = AMDGPU::getIntegerAttribute(
1051       F, "amdgpu-num-vgpr", MaxNumVGPRs);
1052 
1053     // Make sure requested value does not violate subtarget's specifications.
1054     if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
1055       Requested = 0;
1056 
1057     // Make sure requested value is compatible with values implied by
1058     // default/requested minimum/maximum number of waves per execution unit.
1059     if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
1060       Requested = 0;
1061     if (WavesPerEU.second &&
1062         Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
1063       Requested = 0;
1064 
1065     if (Requested)
1066       MaxNumVGPRs = Requested;
1067   }
1068 
1069   return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
1070 }
1071