1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/raw_ostream.h"
35 
36 #define DEBUG_TYPE "amdgpu-isel"
37 
38 using namespace llvm;
39 using namespace MIPatternMatch;
40 
41 #define GET_GLOBALISEL_IMPL
42 #define AMDGPUSubtarget GCNSubtarget
43 #include "AMDGPUGenGlobalISel.inc"
44 #undef GET_GLOBALISEL_IMPL
45 #undef AMDGPUSubtarget
46 
47 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
48     const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
49     const AMDGPUTargetMachine &TM)
50     : InstructionSelector(), TII(*STI.getInstrInfo()),
51       TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
52       STI(STI),
53       EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
54 #define GET_GLOBALISEL_PREDICATES_INIT
55 #include "AMDGPUGenGlobalISel.inc"
56 #undef GET_GLOBALISEL_PREDICATES_INIT
57 #define GET_GLOBALISEL_TEMPORARIES_INIT
58 #include "AMDGPUGenGlobalISel.inc"
59 #undef GET_GLOBALISEL_TEMPORARIES_INIT
60 {
61 }
62 
63 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
64 
65 static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) {
66   if (Register::isPhysicalRegister(Reg))
67     return Reg == AMDGPU::SCC;
68 
69   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70   const TargetRegisterClass *RC =
71       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72   if (RC) {
73     // FIXME: This is ambiguous for wave32. This could be SCC or VCC, but the
74     // context of the register bank has been lost.
75     if (RC->getID() != AMDGPU::SReg_32_XM0RegClassID)
76       return false;
77     const LLT Ty = MRI.getType(Reg);
78     return Ty.isValid() && Ty.getSizeInBits() == 1;
79   }
80 
81   const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
82   return RB->getID() == AMDGPU::SCCRegBankID;
83 }
84 
85 bool AMDGPUInstructionSelector::isVCC(Register Reg,
86                                       const MachineRegisterInfo &MRI) const {
87   if (Register::isPhysicalRegister(Reg))
88     return Reg == TRI.getVCC();
89 
90   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
91   const TargetRegisterClass *RC =
92       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
93   if (RC) {
94     const LLT Ty = MRI.getType(Reg);
95     return RC->hasSuperClassEq(TRI.getBoolRC()) &&
96            Ty.isValid() && Ty.getSizeInBits() == 1;
97   }
98 
99   const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
100   return RB->getID() == AMDGPU::VCCRegBankID;
101 }
102 
103 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
104   const DebugLoc &DL = I.getDebugLoc();
105   MachineBasicBlock *BB = I.getParent();
106   MachineFunction *MF = BB->getParent();
107   MachineRegisterInfo &MRI = MF->getRegInfo();
108   I.setDesc(TII.get(TargetOpcode::COPY));
109 
110   const MachineOperand &Src = I.getOperand(1);
111   MachineOperand &Dst = I.getOperand(0);
112   Register DstReg = Dst.getReg();
113   Register SrcReg = Src.getReg();
114 
115   if (isVCC(DstReg, MRI)) {
116     if (SrcReg == AMDGPU::SCC) {
117       const TargetRegisterClass *RC
118         = TRI.getConstrainedRegClassForOperand(Dst, MRI);
119       if (!RC)
120         return true;
121       return RBI.constrainGenericRegister(DstReg, *RC, MRI);
122     }
123 
124     if (!isVCC(SrcReg, MRI)) {
125       // TODO: Should probably leave the copy and let copyPhysReg expand it.
126       if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), MRI))
127         return false;
128 
129       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
130         .addImm(0)
131         .addReg(SrcReg);
132 
133       if (!MRI.getRegClassOrNull(SrcReg))
134         MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
135       I.eraseFromParent();
136       return true;
137     }
138 
139     const TargetRegisterClass *RC =
140       TRI.getConstrainedRegClassForOperand(Dst, MRI);
141     if (RC && !RBI.constrainGenericRegister(DstReg, *RC, MRI))
142       return false;
143 
144     // Don't constrain the source register to a class so the def instruction
145     // handles it (unless it's undef).
146     //
147     // FIXME: This is a hack. When selecting the def, we neeed to know
148     // specifically know that the result is VCCRegBank, and not just an SGPR
149     // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
150     if (Src.isUndef()) {
151       const TargetRegisterClass *SrcRC =
152         TRI.getConstrainedRegClassForOperand(Src, MRI);
153       if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
154         return false;
155     }
156 
157     return true;
158   }
159 
160   for (const MachineOperand &MO : I.operands()) {
161     if (Register::isPhysicalRegister(MO.getReg()))
162       continue;
163 
164     const TargetRegisterClass *RC =
165             TRI.getConstrainedRegClassForOperand(MO, MRI);
166     if (!RC)
167       continue;
168     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
169   }
170   return true;
171 }
172 
173 bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
174   MachineBasicBlock *BB = I.getParent();
175   MachineFunction *MF = BB->getParent();
176   MachineRegisterInfo &MRI = MF->getRegInfo();
177 
178   const Register DefReg = I.getOperand(0).getReg();
179   const LLT DefTy = MRI.getType(DefReg);
180 
181   // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
182 
183   const RegClassOrRegBank &RegClassOrBank =
184     MRI.getRegClassOrRegBank(DefReg);
185 
186   const TargetRegisterClass *DefRC
187     = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
188   if (!DefRC) {
189     if (!DefTy.isValid()) {
190       LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
191       return false;
192     }
193 
194     const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
195     if (RB.getID() == AMDGPU::SCCRegBankID) {
196       LLVM_DEBUG(dbgs() << "illegal scc phi\n");
197       return false;
198     }
199 
200     DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB, MRI);
201     if (!DefRC) {
202       LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
203       return false;
204     }
205   }
206 
207   I.setDesc(TII.get(TargetOpcode::PHI));
208   return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
209 }
210 
211 MachineOperand
212 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
213                                            const TargetRegisterClass &SubRC,
214                                            unsigned SubIdx) const {
215 
216   MachineInstr *MI = MO.getParent();
217   MachineBasicBlock *BB = MO.getParent()->getParent();
218   MachineFunction *MF = BB->getParent();
219   MachineRegisterInfo &MRI = MF->getRegInfo();
220   Register DstReg = MRI.createVirtualRegister(&SubRC);
221 
222   if (MO.isReg()) {
223     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
224     Register Reg = MO.getReg();
225     BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
226             .addReg(Reg, 0, ComposedSubIdx);
227 
228     return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
229                                      MO.isKill(), MO.isDead(), MO.isUndef(),
230                                      MO.isEarlyClobber(), 0, MO.isDebug(),
231                                      MO.isInternalRead());
232   }
233 
234   assert(MO.isImm());
235 
236   APInt Imm(64, MO.getImm());
237 
238   switch (SubIdx) {
239   default:
240     llvm_unreachable("do not know to split immediate with this sub index.");
241   case AMDGPU::sub0:
242     return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
243   case AMDGPU::sub1:
244     return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
245   }
246 }
247 
248 static int64_t getConstant(const MachineInstr *MI) {
249   return MI->getOperand(1).getCImm()->getSExtValue();
250 }
251 
252 static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
253   switch (Opc) {
254   case AMDGPU::G_AND:
255     return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
256   case AMDGPU::G_OR:
257     return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
258   case AMDGPU::G_XOR:
259     return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
260   default:
261     llvm_unreachable("not a bit op");
262   }
263 }
264 
265 bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
266   MachineBasicBlock *BB = I.getParent();
267   MachineFunction *MF = BB->getParent();
268   MachineRegisterInfo &MRI = MF->getRegInfo();
269   MachineOperand &Dst = I.getOperand(0);
270   MachineOperand &Src0 = I.getOperand(1);
271   MachineOperand &Src1 = I.getOperand(2);
272   Register DstReg = Dst.getReg();
273   unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
274 
275   const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
276   if (DstRB->getID() == AMDGPU::VCCRegBankID) {
277     const TargetRegisterClass *RC = TRI.getBoolRC();
278     unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
279                                            RC == &AMDGPU::SReg_64RegClass);
280     I.setDesc(TII.get(InstOpc));
281 
282     // FIXME: Hack to avoid turning the register bank into a register class.
283     // The selector for G_ICMP relies on seeing the register bank for the result
284     // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
285     // be ambiguous whether it's a scalar or vector bool.
286     if (Src0.isUndef() && !MRI.getRegClassOrNull(Src0.getReg()))
287       MRI.setRegClass(Src0.getReg(), RC);
288     if (Src1.isUndef() && !MRI.getRegClassOrNull(Src1.getReg()))
289       MRI.setRegClass(Src1.getReg(), RC);
290 
291     return RBI.constrainGenericRegister(DstReg, *RC, MRI);
292   }
293 
294   // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
295   // the result?
296   if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
297     unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
298     I.setDesc(TII.get(InstOpc));
299     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
300   }
301 
302   return false;
303 }
304 
305 bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
306   MachineBasicBlock *BB = I.getParent();
307   MachineFunction *MF = BB->getParent();
308   MachineRegisterInfo &MRI = MF->getRegInfo();
309   Register DstReg = I.getOperand(0).getReg();
310   const DebugLoc &DL = I.getDebugLoc();
311   unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
312   const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
313   const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
314   const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
315 
316   if (Size == 32) {
317     if (IsSALU) {
318       const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
319       MachineInstr *Add =
320         BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
321         .add(I.getOperand(1))
322         .add(I.getOperand(2));
323       I.eraseFromParent();
324       return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
325     }
326 
327     if (STI.hasAddNoCarry()) {
328       const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
329       I.setDesc(TII.get(Opc));
330       I.addOperand(*MF, MachineOperand::CreateImm(0));
331       I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
332       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
333     }
334 
335     const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
336 
337     Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
338     MachineInstr *Add
339       = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
340       .addDef(UnusedCarry, RegState::Dead)
341       .add(I.getOperand(1))
342       .add(I.getOperand(2))
343       .addImm(0);
344     I.eraseFromParent();
345     return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
346   }
347 
348   assert(!Sub && "illegal sub should not reach here");
349 
350   const TargetRegisterClass &RC
351     = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
352   const TargetRegisterClass &HalfRC
353     = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
354 
355   MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
356   MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
357   MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
358   MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
359 
360   Register DstLo = MRI.createVirtualRegister(&HalfRC);
361   Register DstHi = MRI.createVirtualRegister(&HalfRC);
362 
363   if (IsSALU) {
364     BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
365       .add(Lo1)
366       .add(Lo2);
367     BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
368       .add(Hi1)
369       .add(Hi2);
370   } else {
371     const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
372     Register CarryReg = MRI.createVirtualRegister(CarryRC);
373     BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
374       .addDef(CarryReg)
375       .add(Lo1)
376       .add(Lo2)
377       .addImm(0);
378     MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
379       .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
380       .add(Hi1)
381       .add(Hi2)
382       .addReg(CarryReg, RegState::Kill)
383       .addImm(0);
384 
385     if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
386       return false;
387   }
388 
389   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
390     .addReg(DstLo)
391     .addImm(AMDGPU::sub0)
392     .addReg(DstHi)
393     .addImm(AMDGPU::sub1);
394 
395 
396   if (!RBI.constrainGenericRegister(DstReg, RC, MRI))
397     return false;
398 
399   I.eraseFromParent();
400   return true;
401 }
402 
403 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
404   MachineBasicBlock *BB = I.getParent();
405   MachineFunction *MF = BB->getParent();
406   MachineRegisterInfo &MRI = MF->getRegInfo();
407   assert(I.getOperand(2).getImm() % 32 == 0);
408   unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
409   const DebugLoc &DL = I.getDebugLoc();
410   MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
411                                I.getOperand(0).getReg())
412                                .addReg(I.getOperand(1).getReg(), 0, SubReg);
413 
414   for (const MachineOperand &MO : Copy->operands()) {
415     const TargetRegisterClass *RC =
416             TRI.getConstrainedRegClassForOperand(MO, MRI);
417     if (!RC)
418       continue;
419     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
420   }
421   I.eraseFromParent();
422   return true;
423 }
424 
425 bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
426   MachineBasicBlock *BB = MI.getParent();
427   MachineFunction *MF = BB->getParent();
428   MachineRegisterInfo &MRI = MF->getRegInfo();
429   Register DstReg = MI.getOperand(0).getReg();
430   LLT DstTy = MRI.getType(DstReg);
431   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
432 
433   const unsigned SrcSize = SrcTy.getSizeInBits();
434   if (SrcSize < 32)
435     return false;
436 
437   const DebugLoc &DL = MI.getDebugLoc();
438   const RegisterBank *DstBank = RBI.getRegBank(DstReg, MRI, TRI);
439   const unsigned DstSize = DstTy.getSizeInBits();
440   const TargetRegisterClass *DstRC =
441     TRI.getRegClassForSizeOnBank(DstSize, *DstBank, MRI);
442   if (!DstRC)
443     return false;
444 
445   ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
446   MachineInstrBuilder MIB =
447     BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
448   for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
449     MachineOperand &Src = MI.getOperand(I + 1);
450     MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
451     MIB.addImm(SubRegs[I]);
452 
453     const TargetRegisterClass *SrcRC
454       = TRI.getConstrainedRegClassForOperand(Src, MRI);
455     if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, MRI))
456       return false;
457   }
458 
459   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI))
460     return false;
461 
462   MI.eraseFromParent();
463   return true;
464 }
465 
466 bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
467   MachineBasicBlock *BB = MI.getParent();
468   MachineFunction *MF = BB->getParent();
469   MachineRegisterInfo &MRI = MF->getRegInfo();
470   const int NumDst = MI.getNumOperands() - 1;
471 
472   MachineOperand &Src = MI.getOperand(NumDst);
473 
474   Register SrcReg = Src.getReg();
475   Register DstReg0 = MI.getOperand(0).getReg();
476   LLT DstTy = MRI.getType(DstReg0);
477   LLT SrcTy = MRI.getType(SrcReg);
478 
479   const unsigned DstSize = DstTy.getSizeInBits();
480   const unsigned SrcSize = SrcTy.getSizeInBits();
481   const DebugLoc &DL = MI.getDebugLoc();
482   const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
483 
484   const TargetRegisterClass *SrcRC =
485     TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank, MRI);
486   if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
487     return false;
488 
489   const unsigned SrcFlags = getUndefRegState(Src.isUndef());
490 
491   // Note we could have mixed SGPR and VGPR destination banks for an SGPR
492   // source, and this relies on the fact that the same subregister indices are
493   // used for both.
494   ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
495   for (int I = 0, E = NumDst; I != E; ++I) {
496     MachineOperand &Dst = MI.getOperand(I);
497     BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
498       .addReg(SrcReg, SrcFlags, SubRegs[I]);
499 
500     const TargetRegisterClass *DstRC =
501       TRI.getConstrainedRegClassForOperand(Dst, MRI);
502     if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, MRI))
503       return false;
504   }
505 
506   MI.eraseFromParent();
507   return true;
508 }
509 
510 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
511   return selectG_ADD_SUB(I);
512 }
513 
514 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
515   MachineBasicBlock *BB = I.getParent();
516   MachineFunction *MF = BB->getParent();
517   MachineRegisterInfo &MRI = MF->getRegInfo();
518   const MachineOperand &MO = I.getOperand(0);
519 
520   // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
521   // regbank check here is to know why getConstrainedRegClassForOperand failed.
522   const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, MRI);
523   if ((!RC && !MRI.getRegBankOrNull(MO.getReg())) ||
524       (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, MRI))) {
525     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
526     return true;
527   }
528 
529   return false;
530 }
531 
532 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
533   MachineBasicBlock *BB = I.getParent();
534   MachineFunction *MF = BB->getParent();
535   MachineRegisterInfo &MRI = MF->getRegInfo();
536 
537   Register Src0Reg = I.getOperand(1).getReg();
538   Register Src1Reg = I.getOperand(2).getReg();
539   LLT Src1Ty = MRI.getType(Src1Reg);
540   if (Src1Ty.getSizeInBits() != 32)
541     return false;
542 
543   int64_t Offset = I.getOperand(3).getImm();
544   if (Offset % 32 != 0)
545     return false;
546 
547   unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32);
548   const DebugLoc &DL = I.getDebugLoc();
549 
550   MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
551                                .addDef(I.getOperand(0).getReg())
552                                .addReg(Src0Reg)
553                                .addReg(Src1Reg)
554                                .addImm(SubReg);
555 
556   for (const MachineOperand &MO : Ins->operands()) {
557     if (!MO.isReg())
558       continue;
559     if (Register::isPhysicalRegister(MO.getReg()))
560       continue;
561 
562     const TargetRegisterClass *RC =
563             TRI.getConstrainedRegClassForOperand(MO, MRI);
564     if (!RC)
565       continue;
566     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
567   }
568   I.eraseFromParent();
569   return true;
570 }
571 
572 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
573   unsigned IntrinsicID =  I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
574   switch (IntrinsicID) {
575   case Intrinsic::amdgcn_if_break: {
576     MachineBasicBlock *BB = I.getParent();
577     MachineFunction *MF = BB->getParent();
578     MachineRegisterInfo &MRI = MF->getRegInfo();
579 
580     // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
581     // SelectionDAG uses for wave32 vs wave64.
582     BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
583       .add(I.getOperand(0))
584       .add(I.getOperand(2))
585       .add(I.getOperand(3));
586 
587     Register DstReg = I.getOperand(0).getReg();
588     Register Src0Reg = I.getOperand(2).getReg();
589     Register Src1Reg = I.getOperand(3).getReg();
590 
591     I.eraseFromParent();
592 
593     for (Register Reg : { DstReg, Src0Reg, Src1Reg }) {
594       if (!MRI.getRegClassOrNull(Reg))
595         MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
596     }
597 
598     return true;
599   }
600   default:
601     return selectImpl(I, *CoverageInfo);
602   }
603 }
604 
605 static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
606   if (Size != 32 && Size != 64)
607     return -1;
608   switch (P) {
609   default:
610     llvm_unreachable("Unknown condition code!");
611   case CmpInst::ICMP_NE:
612     return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
613   case CmpInst::ICMP_EQ:
614     return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
615   case CmpInst::ICMP_SGT:
616     return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
617   case CmpInst::ICMP_SGE:
618     return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
619   case CmpInst::ICMP_SLT:
620     return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
621   case CmpInst::ICMP_SLE:
622     return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
623   case CmpInst::ICMP_UGT:
624     return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
625   case CmpInst::ICMP_UGE:
626     return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
627   case CmpInst::ICMP_ULT:
628     return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
629   case CmpInst::ICMP_ULE:
630     return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
631   }
632 }
633 
634 int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
635                                               unsigned Size) const {
636   if (Size == 64) {
637     if (!STI.hasScalarCompareEq64())
638       return -1;
639 
640     switch (P) {
641     case CmpInst::ICMP_NE:
642       return AMDGPU::S_CMP_LG_U64;
643     case CmpInst::ICMP_EQ:
644       return AMDGPU::S_CMP_EQ_U64;
645     default:
646       return -1;
647     }
648   }
649 
650   if (Size != 32)
651     return -1;
652 
653   switch (P) {
654   case CmpInst::ICMP_NE:
655     return AMDGPU::S_CMP_LG_U32;
656   case CmpInst::ICMP_EQ:
657     return AMDGPU::S_CMP_EQ_U32;
658   case CmpInst::ICMP_SGT:
659     return AMDGPU::S_CMP_GT_I32;
660   case CmpInst::ICMP_SGE:
661     return AMDGPU::S_CMP_GE_I32;
662   case CmpInst::ICMP_SLT:
663     return AMDGPU::S_CMP_LT_I32;
664   case CmpInst::ICMP_SLE:
665     return AMDGPU::S_CMP_LE_I32;
666   case CmpInst::ICMP_UGT:
667     return AMDGPU::S_CMP_GT_U32;
668   case CmpInst::ICMP_UGE:
669     return AMDGPU::S_CMP_GE_U32;
670   case CmpInst::ICMP_ULT:
671     return AMDGPU::S_CMP_LT_U32;
672   case CmpInst::ICMP_ULE:
673     return AMDGPU::S_CMP_LE_U32;
674   default:
675     llvm_unreachable("Unknown condition code!");
676   }
677 }
678 
679 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
680   MachineBasicBlock *BB = I.getParent();
681   MachineFunction *MF = BB->getParent();
682   MachineRegisterInfo &MRI = MF->getRegInfo();
683   const DebugLoc &DL = I.getDebugLoc();
684 
685   Register SrcReg = I.getOperand(2).getReg();
686   unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
687 
688   auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
689 
690   Register CCReg = I.getOperand(0).getReg();
691   if (isSCC(CCReg, MRI)) {
692     int Opcode = getS_CMPOpcode(Pred, Size);
693     if (Opcode == -1)
694       return false;
695     MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
696             .add(I.getOperand(2))
697             .add(I.getOperand(3));
698     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
699       .addReg(AMDGPU::SCC);
700     bool Ret =
701         constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
702         RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, MRI);
703     I.eraseFromParent();
704     return Ret;
705   }
706 
707   int Opcode = getV_CMPOpcode(Pred, Size);
708   if (Opcode == -1)
709     return false;
710 
711   MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
712             I.getOperand(0).getReg())
713             .add(I.getOperand(2))
714             .add(I.getOperand(3));
715   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
716                                *TRI.getBoolRC(), MRI);
717   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
718   I.eraseFromParent();
719   return Ret;
720 }
721 
722 static MachineInstr *
723 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
724          unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
725          unsigned VM, bool Compr, unsigned Enabled, bool Done) {
726   const DebugLoc &DL = Insert->getDebugLoc();
727   MachineBasicBlock &BB = *Insert->getParent();
728   unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
729   return BuildMI(BB, Insert, DL, TII.get(Opcode))
730           .addImm(Tgt)
731           .addReg(Reg0)
732           .addReg(Reg1)
733           .addReg(Reg2)
734           .addReg(Reg3)
735           .addImm(VM)
736           .addImm(Compr)
737           .addImm(Enabled);
738 }
739 
740 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
741     MachineInstr &I) const {
742   MachineBasicBlock *BB = I.getParent();
743   MachineFunction *MF = BB->getParent();
744   MachineRegisterInfo &MRI = MF->getRegInfo();
745 
746   unsigned IntrinsicID = I.getIntrinsicID();
747   switch (IntrinsicID) {
748   case Intrinsic::amdgcn_exp: {
749     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
750     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
751     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
752     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
753 
754     MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
755                                  I.getOperand(4).getReg(),
756                                  I.getOperand(5).getReg(),
757                                  I.getOperand(6).getReg(),
758                                  VM, false, Enabled, Done);
759 
760     I.eraseFromParent();
761     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
762   }
763   case Intrinsic::amdgcn_exp_compr: {
764     const DebugLoc &DL = I.getDebugLoc();
765     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
766     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
767     Register Reg0 = I.getOperand(3).getReg();
768     Register Reg1 = I.getOperand(4).getReg();
769     Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
770     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
771     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
772 
773     BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
774     MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
775                                  true,  Enabled, Done);
776 
777     I.eraseFromParent();
778     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
779   }
780   case Intrinsic::amdgcn_end_cf: {
781     // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
782     // SelectionDAG uses for wave32 vs wave64.
783     BuildMI(*BB, &I, I.getDebugLoc(),
784             TII.get(AMDGPU::SI_END_CF))
785       .add(I.getOperand(1));
786 
787     Register Reg = I.getOperand(1).getReg();
788     I.eraseFromParent();
789 
790     if (!MRI.getRegClassOrNull(Reg))
791       MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
792     return true;
793   }
794   default:
795     return selectImpl(I, *CoverageInfo);
796   }
797 }
798 
799 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
800   MachineBasicBlock *BB = I.getParent();
801   MachineFunction *MF = BB->getParent();
802   MachineRegisterInfo &MRI = MF->getRegInfo();
803   const DebugLoc &DL = I.getDebugLoc();
804 
805   Register DstReg = I.getOperand(0).getReg();
806   unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
807   assert(Size <= 32 || Size == 64);
808   const MachineOperand &CCOp = I.getOperand(1);
809   Register CCReg = CCOp.getReg();
810   if (isSCC(CCReg, MRI)) {
811     unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
812                                          AMDGPU::S_CSELECT_B32;
813     MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
814             .addReg(CCReg);
815 
816     // The generic constrainSelectedInstRegOperands doesn't work for the scc register
817     // bank, because it does not cover the register class that we used to represent
818     // for it.  So we need to manually set the register class here.
819     if (!MRI.getRegClassOrNull(CCReg))
820         MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
821     MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
822             .add(I.getOperand(2))
823             .add(I.getOperand(3));
824 
825     bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
826                constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
827     I.eraseFromParent();
828     return Ret;
829   }
830 
831   // Wide VGPR select should have been split in RegBankSelect.
832   if (Size > 32)
833     return false;
834 
835   MachineInstr *Select =
836       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
837               .addImm(0)
838               .add(I.getOperand(3))
839               .addImm(0)
840               .add(I.getOperand(2))
841               .add(I.getOperand(1));
842 
843   bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
844   I.eraseFromParent();
845   return Ret;
846 }
847 
848 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
849   initM0(I);
850   return selectImpl(I, *CoverageInfo);
851 }
852 
853 static int sizeToSubRegIndex(unsigned Size) {
854   switch (Size) {
855   case 32:
856     return AMDGPU::sub0;
857   case 64:
858     return AMDGPU::sub0_sub1;
859   case 96:
860     return AMDGPU::sub0_sub1_sub2;
861   case 128:
862     return AMDGPU::sub0_sub1_sub2_sub3;
863   case 256:
864     return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
865   default:
866     if (Size < 32)
867       return AMDGPU::sub0;
868     if (Size > 256)
869       return -1;
870     return sizeToSubRegIndex(PowerOf2Ceil(Size));
871   }
872 }
873 
874 bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
875   MachineBasicBlock *BB = I.getParent();
876   MachineFunction *MF = BB->getParent();
877   MachineRegisterInfo &MRI = MF->getRegInfo();
878 
879   Register DstReg = I.getOperand(0).getReg();
880   Register SrcReg = I.getOperand(1).getReg();
881   const LLT DstTy = MRI.getType(DstReg);
882   const LLT SrcTy = MRI.getType(SrcReg);
883   if (!DstTy.isScalar())
884     return false;
885 
886   const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
887   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
888   if (SrcRB != DstRB)
889     return false;
890 
891   unsigned DstSize = DstTy.getSizeInBits();
892   unsigned SrcSize = SrcTy.getSizeInBits();
893 
894   const TargetRegisterClass *SrcRC
895     = TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB, MRI);
896   const TargetRegisterClass *DstRC
897     = TRI.getRegClassForSizeOnBank(DstSize, *DstRB, MRI);
898 
899   if (SrcSize > 32) {
900     int SubRegIdx = sizeToSubRegIndex(DstSize);
901     if (SubRegIdx == -1)
902       return false;
903 
904     // Deal with weird cases where the class only partially supports the subreg
905     // index.
906     SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
907     if (!SrcRC)
908       return false;
909 
910     I.getOperand(1).setSubReg(SubRegIdx);
911   }
912 
913   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
914       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
915     LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
916     return false;
917   }
918 
919   I.setDesc(TII.get(TargetOpcode::COPY));
920   return true;
921 }
922 
923 /// \returns true if a bitmask for \p Size bits will be an inline immediate.
924 static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
925   Mask = maskTrailingOnes<unsigned>(Size);
926   int SignedMask = static_cast<int>(Mask);
927   return SignedMask >= -16 && SignedMask <= 64;
928 }
929 
930 bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
931   bool Signed = I.getOpcode() == AMDGPU::G_SEXT;
932   const DebugLoc &DL = I.getDebugLoc();
933   MachineBasicBlock &MBB = *I.getParent();
934   MachineFunction &MF = *MBB.getParent();
935   MachineRegisterInfo &MRI = MF.getRegInfo();
936   const Register DstReg = I.getOperand(0).getReg();
937   const Register SrcReg = I.getOperand(1).getReg();
938 
939   const LLT DstTy = MRI.getType(DstReg);
940   const LLT SrcTy = MRI.getType(SrcReg);
941   const LLT S1 = LLT::scalar(1);
942   const unsigned SrcSize = SrcTy.getSizeInBits();
943   const unsigned DstSize = DstTy.getSizeInBits();
944   if (!DstTy.isScalar())
945     return false;
946 
947   const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, TRI);
948 
949   if (SrcBank->getID() == AMDGPU::SCCRegBankID) {
950     if (SrcTy != S1 || DstSize > 64) // Invalid
951       return false;
952 
953     unsigned Opcode =
954         DstSize > 32 ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
955     const TargetRegisterClass *DstRC =
956         DstSize > 32 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass;
957 
958     // FIXME: Create an extra copy to avoid incorrectly constraining the result
959     // of the scc producer.
960     Register TmpReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
961     BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), TmpReg)
962       .addReg(SrcReg);
963     BuildMI(MBB, I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
964       .addReg(TmpReg);
965 
966     // The instruction operands are backwards from what you would expect.
967     BuildMI(MBB, I, DL, TII.get(Opcode), DstReg)
968       .addImm(0)
969       .addImm(Signed ? -1 : 1);
970     I.eraseFromParent();
971     return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
972   }
973 
974   if (SrcBank->getID() == AMDGPU::VCCRegBankID && DstSize <= 32) {
975     if (SrcTy != S1) // Invalid
976       return false;
977 
978     MachineInstr *ExtI =
979       BuildMI(MBB, I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
980       .addImm(0)               // src0_modifiers
981       .addImm(0)               // src0
982       .addImm(0)               // src1_modifiers
983       .addImm(Signed ? -1 : 1) // src1
984       .addUse(SrcReg);
985     I.eraseFromParent();
986     return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
987   }
988 
989   if (I.getOpcode() == AMDGPU::G_ANYEXT)
990     return selectCOPY(I);
991 
992   if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
993     // 64-bit should have been split up in RegBankSelect
994 
995     // Try to use an and with a mask if it will save code size.
996     unsigned Mask;
997     if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
998       MachineInstr *ExtI =
999       BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
1000         .addImm(Mask)
1001         .addReg(SrcReg);
1002       I.eraseFromParent();
1003       return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1004     }
1005 
1006     const unsigned BFE = Signed ? AMDGPU::V_BFE_I32 : AMDGPU::V_BFE_U32;
1007     MachineInstr *ExtI =
1008       BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
1009       .addReg(SrcReg)
1010       .addImm(0) // Offset
1011       .addImm(SrcSize); // Width
1012     I.eraseFromParent();
1013     return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
1014   }
1015 
1016   if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
1017     if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, MRI))
1018       return false;
1019 
1020     if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
1021       const unsigned SextOpc = SrcSize == 8 ?
1022         AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
1023       BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
1024         .addReg(SrcReg);
1025       I.eraseFromParent();
1026       return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1027     }
1028 
1029     const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
1030     const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1031 
1032     // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
1033     if (DstSize > 32 && SrcSize <= 32) {
1034       // We need a 64-bit register source, but the high bits don't matter.
1035       Register ExtReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
1036       Register UndefReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1037       BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
1038       BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
1039         .addReg(SrcReg)
1040         .addImm(AMDGPU::sub0)
1041         .addReg(UndefReg)
1042         .addImm(AMDGPU::sub1);
1043 
1044       BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
1045         .addReg(ExtReg)
1046         .addImm(SrcSize << 16);
1047 
1048       I.eraseFromParent();
1049       return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
1050     }
1051 
1052     unsigned Mask;
1053     if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
1054       BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
1055         .addReg(SrcReg)
1056         .addImm(Mask);
1057     } else {
1058       BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
1059         .addReg(SrcReg)
1060         .addImm(SrcSize << 16);
1061     }
1062 
1063     I.eraseFromParent();
1064     return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, MRI);
1065   }
1066 
1067   return false;
1068 }
1069 
1070 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
1071   MachineBasicBlock *BB = I.getParent();
1072   MachineFunction *MF = BB->getParent();
1073   MachineRegisterInfo &MRI = MF->getRegInfo();
1074   MachineOperand &ImmOp = I.getOperand(1);
1075 
1076   // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
1077   if (ImmOp.isFPImm()) {
1078     const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
1079     ImmOp.ChangeToImmediate(Imm.getZExtValue());
1080   } else if (ImmOp.isCImm()) {
1081     ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
1082   }
1083 
1084   Register DstReg = I.getOperand(0).getReg();
1085   unsigned Size;
1086   bool IsSgpr;
1087   const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
1088   if (RB) {
1089     IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
1090     Size = MRI.getType(DstReg).getSizeInBits();
1091   } else {
1092     const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
1093     IsSgpr = TRI.isSGPRClass(RC);
1094     Size = TRI.getRegSizeInBits(*RC);
1095   }
1096 
1097   if (Size != 32 && Size != 64)
1098     return false;
1099 
1100   unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1101   if (Size == 32) {
1102     I.setDesc(TII.get(Opcode));
1103     I.addImplicitDefUseOperands(*MF);
1104     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1105   }
1106 
1107   DebugLoc DL = I.getDebugLoc();
1108   const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
1109                                            &AMDGPU::VGPR_32RegClass;
1110   Register LoReg = MRI.createVirtualRegister(RC);
1111   Register HiReg = MRI.createVirtualRegister(RC);
1112   const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
1113 
1114   BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
1115           .addImm(Imm.trunc(32).getZExtValue());
1116 
1117   BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
1118           .addImm(Imm.ashr(32).getZExtValue());
1119 
1120   const MachineInstr *RS =
1121       BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1122               .addReg(LoReg)
1123               .addImm(AMDGPU::sub0)
1124               .addReg(HiReg)
1125               .addImm(AMDGPU::sub1);
1126 
1127   // We can't call constrainSelectedInstRegOperands here, because it doesn't
1128   // work for target independent opcodes
1129   I.eraseFromParent();
1130   const TargetRegisterClass *DstRC =
1131       TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
1132   if (!DstRC)
1133     return true;
1134   return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1135 }
1136 
1137 static bool isConstant(const MachineInstr &MI) {
1138   return MI.getOpcode() == TargetOpcode::G_CONSTANT;
1139 }
1140 
1141 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
1142     const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
1143 
1144   const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
1145 
1146   assert(PtrMI);
1147 
1148   if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
1149     return;
1150 
1151   GEPInfo GEPInfo(*PtrMI);
1152 
1153   for (unsigned i = 1; i != 3; ++i) {
1154     const MachineOperand &GEPOp = PtrMI->getOperand(i);
1155     const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
1156     assert(OpDef);
1157     if (i == 2 && isConstant(*OpDef)) {
1158       // TODO: Could handle constant base + variable offset, but a combine
1159       // probably should have commuted it.
1160       assert(GEPInfo.Imm == 0);
1161       GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
1162       continue;
1163     }
1164     const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
1165     if (OpBank->getID() == AMDGPU::SGPRRegBankID)
1166       GEPInfo.SgprParts.push_back(GEPOp.getReg());
1167     else
1168       GEPInfo.VgprParts.push_back(GEPOp.getReg());
1169   }
1170 
1171   AddrInfo.push_back(GEPInfo);
1172   getAddrModeInfo(*PtrMI, MRI, AddrInfo);
1173 }
1174 
1175 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
1176   if (!MI.hasOneMemOperand())
1177     return false;
1178 
1179   const MachineMemOperand *MMO = *MI.memoperands_begin();
1180   const Value *Ptr = MMO->getValue();
1181 
1182   // UndefValue means this is a load of a kernel input.  These are uniform.
1183   // Sometimes LDS instructions have constant pointers.
1184   // If Ptr is null, then that means this mem operand contains a
1185   // PseudoSourceValue like GOT.
1186   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
1187       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
1188     return true;
1189 
1190   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
1191     return true;
1192 
1193   const Instruction *I = dyn_cast<Instruction>(Ptr);
1194   return I && I->getMetadata("amdgpu.uniform");
1195 }
1196 
1197 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
1198   for (const GEPInfo &GEPInfo : AddrInfo) {
1199     if (!GEPInfo.VgprParts.empty())
1200       return true;
1201   }
1202   return false;
1203 }
1204 
1205 void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
1206   MachineBasicBlock *BB = I.getParent();
1207   MachineFunction *MF = BB->getParent();
1208   MachineRegisterInfo &MRI = MF->getRegInfo();
1209 
1210   const LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
1211   unsigned AS = PtrTy.getAddressSpace();
1212   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
1213       STI.ldsRequiresM0Init()) {
1214     // If DS instructions require M0 initializtion, insert it before selecting.
1215     BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1216       .addImm(-1);
1217   }
1218 }
1219 
1220 bool AMDGPUInstructionSelector::selectG_LOAD_ATOMICRMW(MachineInstr &I) const {
1221   initM0(I);
1222   return selectImpl(I, *CoverageInfo);
1223 }
1224 
1225 bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
1226   MachineBasicBlock *BB = I.getParent();
1227   MachineFunction *MF = BB->getParent();
1228   MachineRegisterInfo &MRI = MF->getRegInfo();
1229   MachineOperand &CondOp = I.getOperand(0);
1230   Register CondReg = CondOp.getReg();
1231   const DebugLoc &DL = I.getDebugLoc();
1232 
1233   unsigned BrOpcode;
1234   Register CondPhysReg;
1235   const TargetRegisterClass *ConstrainRC;
1236 
1237   // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
1238   // whether the branch is uniform when selecting the instruction. In
1239   // GlobalISel, we should push that decision into RegBankSelect. Assume for now
1240   // RegBankSelect knows what it's doing if the branch condition is scc, even
1241   // though it currently does not.
1242   if (isSCC(CondReg, MRI)) {
1243     CondPhysReg = AMDGPU::SCC;
1244     BrOpcode = AMDGPU::S_CBRANCH_SCC1;
1245     ConstrainRC = &AMDGPU::SReg_32_XM0RegClass;
1246   } else if (isVCC(CondReg, MRI)) {
1247     // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
1248     // We sort of know that a VCC producer based on the register bank, that ands
1249     // inactive lanes with 0. What if there was a logical operation with vcc
1250     // producers in different blocks/with different exec masks?
1251     // FIXME: Should scc->vcc copies and with exec?
1252     CondPhysReg = TRI.getVCC();
1253     BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
1254     ConstrainRC = TRI.getBoolRC();
1255   } else
1256     return false;
1257 
1258   if (!MRI.getRegClassOrNull(CondReg))
1259     MRI.setRegClass(CondReg, ConstrainRC);
1260 
1261   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
1262     .addReg(CondReg);
1263   BuildMI(*BB, &I, DL, TII.get(BrOpcode))
1264     .addMBB(I.getOperand(1).getMBB());
1265 
1266   I.eraseFromParent();
1267   return true;
1268 }
1269 
1270 bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const {
1271   MachineBasicBlock *BB = I.getParent();
1272   MachineFunction *MF = BB->getParent();
1273   MachineRegisterInfo &MRI = MF->getRegInfo();
1274 
1275   Register DstReg = I.getOperand(0).getReg();
1276   const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1277   const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1278   I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
1279   if (IsVGPR)
1280     I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1281 
1282   return RBI.constrainGenericRegister(
1283     DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, MRI);
1284 }
1285 
1286 bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
1287   uint64_t Align = I.getOperand(2).getImm();
1288   const uint64_t Mask = ~((UINT64_C(1) << Align) - 1);
1289 
1290   MachineBasicBlock *BB = I.getParent();
1291   MachineFunction *MF = BB->getParent();
1292   MachineRegisterInfo &MRI = MF->getRegInfo();
1293 
1294   Register DstReg = I.getOperand(0).getReg();
1295   Register SrcReg = I.getOperand(1).getReg();
1296 
1297   const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
1298   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, MRI, TRI);
1299   const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
1300   unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
1301   unsigned MovOpc = IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
1302   const TargetRegisterClass &RegRC
1303     = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
1304 
1305   LLT Ty = MRI.getType(DstReg);
1306 
1307   const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB,
1308                                                                   MRI);
1309   const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB,
1310                                                                   MRI);
1311   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI) ||
1312       !RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI))
1313     return false;
1314 
1315   const DebugLoc &DL = I.getDebugLoc();
1316   Register ImmReg = MRI.createVirtualRegister(&RegRC);
1317   BuildMI(*BB, &I, DL, TII.get(MovOpc), ImmReg)
1318     .addImm(Mask);
1319 
1320   if (Ty.getSizeInBits() == 32) {
1321     BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
1322       .addReg(SrcReg)
1323       .addReg(ImmReg);
1324     I.eraseFromParent();
1325     return true;
1326   }
1327 
1328   Register HiReg = MRI.createVirtualRegister(&RegRC);
1329   Register LoReg = MRI.createVirtualRegister(&RegRC);
1330   Register MaskLo = MRI.createVirtualRegister(&RegRC);
1331 
1332   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
1333     .addReg(SrcReg, 0, AMDGPU::sub0);
1334   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
1335     .addReg(SrcReg, 0, AMDGPU::sub1);
1336 
1337   BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskLo)
1338     .addReg(LoReg)
1339     .addReg(ImmReg);
1340   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1341     .addReg(MaskLo)
1342     .addImm(AMDGPU::sub0)
1343     .addReg(HiReg)
1344     .addImm(AMDGPU::sub1);
1345   I.eraseFromParent();
1346   return true;
1347 }
1348 
1349 bool AMDGPUInstructionSelector::select(MachineInstr &I) {
1350   if (I.isPHI())
1351     return selectPHI(I);
1352 
1353   if (!isPreISelGenericOpcode(I.getOpcode())) {
1354     if (I.isCopy())
1355       return selectCOPY(I);
1356     return true;
1357   }
1358 
1359   switch (I.getOpcode()) {
1360   case TargetOpcode::G_AND:
1361   case TargetOpcode::G_OR:
1362   case TargetOpcode::G_XOR:
1363     if (selectG_AND_OR_XOR(I))
1364       return true;
1365     return selectImpl(I, *CoverageInfo);
1366   case TargetOpcode::G_ADD:
1367   case TargetOpcode::G_SUB:
1368     if (selectImpl(I, *CoverageInfo))
1369       return true;
1370     return selectG_ADD_SUB(I);
1371   case TargetOpcode::G_INTTOPTR:
1372   case TargetOpcode::G_BITCAST:
1373     return selectCOPY(I);
1374   case TargetOpcode::G_CONSTANT:
1375   case TargetOpcode::G_FCONSTANT:
1376     return selectG_CONSTANT(I);
1377   case TargetOpcode::G_EXTRACT:
1378     return selectG_EXTRACT(I);
1379   case TargetOpcode::G_MERGE_VALUES:
1380   case TargetOpcode::G_BUILD_VECTOR:
1381   case TargetOpcode::G_CONCAT_VECTORS:
1382     return selectG_MERGE_VALUES(I);
1383   case TargetOpcode::G_UNMERGE_VALUES:
1384     return selectG_UNMERGE_VALUES(I);
1385   case TargetOpcode::G_GEP:
1386     return selectG_GEP(I);
1387   case TargetOpcode::G_IMPLICIT_DEF:
1388     return selectG_IMPLICIT_DEF(I);
1389   case TargetOpcode::G_INSERT:
1390     return selectG_INSERT(I);
1391   case TargetOpcode::G_INTRINSIC:
1392     return selectG_INTRINSIC(I);
1393   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
1394     return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
1395   case TargetOpcode::G_ICMP:
1396     if (selectG_ICMP(I))
1397       return true;
1398     return selectImpl(I, *CoverageInfo);
1399   case TargetOpcode::G_LOAD:
1400   case TargetOpcode::G_ATOMIC_CMPXCHG:
1401   case TargetOpcode::G_ATOMICRMW_XCHG:
1402   case TargetOpcode::G_ATOMICRMW_ADD:
1403   case TargetOpcode::G_ATOMICRMW_SUB:
1404   case TargetOpcode::G_ATOMICRMW_AND:
1405   case TargetOpcode::G_ATOMICRMW_OR:
1406   case TargetOpcode::G_ATOMICRMW_XOR:
1407   case TargetOpcode::G_ATOMICRMW_MIN:
1408   case TargetOpcode::G_ATOMICRMW_MAX:
1409   case TargetOpcode::G_ATOMICRMW_UMIN:
1410   case TargetOpcode::G_ATOMICRMW_UMAX:
1411   case TargetOpcode::G_ATOMICRMW_FADD:
1412     return selectG_LOAD_ATOMICRMW(I);
1413   case TargetOpcode::G_SELECT:
1414     return selectG_SELECT(I);
1415   case TargetOpcode::G_STORE:
1416     return selectG_STORE(I);
1417   case TargetOpcode::G_TRUNC:
1418     return selectG_TRUNC(I);
1419   case TargetOpcode::G_SEXT:
1420   case TargetOpcode::G_ZEXT:
1421   case TargetOpcode::G_ANYEXT:
1422     return selectG_SZA_EXT(I);
1423   case TargetOpcode::G_BRCOND:
1424     return selectG_BRCOND(I);
1425   case TargetOpcode::G_FRAME_INDEX:
1426     return selectG_FRAME_INDEX(I);
1427   case TargetOpcode::G_FENCE:
1428     // FIXME: Tablegen importer doesn't handle the imm operands correctly, and
1429     // is checking for G_CONSTANT
1430     I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE));
1431     return true;
1432   case TargetOpcode::G_PTR_MASK:
1433     return selectG_PTR_MASK(I);
1434   default:
1435     return selectImpl(I, *CoverageInfo);
1436   }
1437   return false;
1438 }
1439 
1440 InstructionSelector::ComplexRendererFns
1441 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
1442   return {{
1443       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1444   }};
1445 
1446 }
1447 
1448 std::pair<Register, unsigned>
1449 AMDGPUInstructionSelector::selectVOP3ModsImpl(
1450   Register Src, const MachineRegisterInfo &MRI) const {
1451   unsigned Mods = 0;
1452   MachineInstr *MI = MRI.getVRegDef(Src);
1453 
1454   if (MI && MI->getOpcode() == AMDGPU::G_FNEG) {
1455     Src = MI->getOperand(1).getReg();
1456     Mods |= SISrcMods::NEG;
1457     MI = MRI.getVRegDef(Src);
1458   }
1459 
1460   if (MI && MI->getOpcode() == AMDGPU::G_FABS) {
1461     Src = MI->getOperand(1).getReg();
1462     Mods |= SISrcMods::ABS;
1463   }
1464 
1465   return std::make_pair(Src, Mods);
1466 }
1467 
1468 ///
1469 /// This will select either an SGPR or VGPR operand and will save us from
1470 /// having to write an extra tablegen pattern.
1471 InstructionSelector::ComplexRendererFns
1472 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
1473   return {{
1474       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
1475   }};
1476 }
1477 
1478 InstructionSelector::ComplexRendererFns
1479 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
1480   MachineRegisterInfo &MRI
1481     = Root.getParent()->getParent()->getParent()->getRegInfo();
1482 
1483   Register Src;
1484   unsigned Mods;
1485   std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1486 
1487   return {{
1488       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1489       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1490       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
1491       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
1492   }};
1493 }
1494 
1495 InstructionSelector::ComplexRendererFns
1496 AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const {
1497   MachineRegisterInfo &MRI
1498     = Root.getParent()->getParent()->getParent()->getRegInfo();
1499 
1500   Register Src;
1501   unsigned Mods;
1502   std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1503 
1504   return {{
1505       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1506       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
1507       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
1508       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
1509   }};
1510 }
1511 
1512 InstructionSelector::ComplexRendererFns
1513 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
1514   return {{
1515       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1516       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
1517       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
1518   }};
1519 }
1520 
1521 InstructionSelector::ComplexRendererFns
1522 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
1523   MachineRegisterInfo &MRI
1524     = Root.getParent()->getParent()->getParent()->getRegInfo();
1525 
1526   Register Src;
1527   unsigned Mods;
1528   std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI);
1529 
1530   return {{
1531       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
1532       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }  // src_mods
1533   }};
1534 }
1535 
1536 InstructionSelector::ComplexRendererFns
1537 AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const {
1538   // FIXME: Handle clamp and op_sel
1539   return {{
1540       [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1541       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods
1542       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // clamp
1543   }};
1544 }
1545 
1546 InstructionSelector::ComplexRendererFns
1547 AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
1548   // FIXME: Handle op_sel
1549   return {{
1550       [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1551       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods
1552   }};
1553 }
1554 
1555 InstructionSelector::ComplexRendererFns
1556 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
1557   MachineRegisterInfo &MRI =
1558       Root.getParent()->getParent()->getParent()->getRegInfo();
1559 
1560   SmallVector<GEPInfo, 4> AddrInfo;
1561   getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1562 
1563   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1564     return None;
1565 
1566   const GEPInfo &GEPInfo = AddrInfo[0];
1567 
1568   if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
1569     return None;
1570 
1571   unsigned PtrReg = GEPInfo.SgprParts[0];
1572   int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1573   return {{
1574     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1575     [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1576   }};
1577 }
1578 
1579 InstructionSelector::ComplexRendererFns
1580 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
1581   MachineRegisterInfo &MRI =
1582       Root.getParent()->getParent()->getParent()->getRegInfo();
1583 
1584   SmallVector<GEPInfo, 4> AddrInfo;
1585   getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
1586 
1587   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1588     return None;
1589 
1590   const GEPInfo &GEPInfo = AddrInfo[0];
1591   unsigned PtrReg = GEPInfo.SgprParts[0];
1592   int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
1593   if (!isUInt<32>(EncodedImm))
1594     return None;
1595 
1596   return {{
1597     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1598     [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
1599   }};
1600 }
1601 
1602 InstructionSelector::ComplexRendererFns
1603 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
1604   MachineInstr *MI = Root.getParent();
1605   MachineBasicBlock *MBB = MI->getParent();
1606   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1607 
1608   SmallVector<GEPInfo, 4> AddrInfo;
1609   getAddrModeInfo(*MI, MRI, AddrInfo);
1610 
1611   // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
1612   // then we can select all ptr + 32-bit offsets not just immediate offsets.
1613   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
1614     return None;
1615 
1616   const GEPInfo &GEPInfo = AddrInfo[0];
1617   if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
1618     return None;
1619 
1620   // If we make it this far we have a load with an 32-bit immediate offset.
1621   // It is OK to select this using a sgpr offset, because we have already
1622   // failed trying to select this load into one of the _IMM variants since
1623   // the _IMM Patterns are considered before the _SGPR patterns.
1624   unsigned PtrReg = GEPInfo.SgprParts[0];
1625   Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
1626   BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
1627           .addImm(GEPInfo.Imm);
1628   return {{
1629     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
1630     [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
1631   }};
1632 }
1633 
1634 template <bool Signed>
1635 InstructionSelector::ComplexRendererFns
1636 AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const {
1637   MachineInstr *MI = Root.getParent();
1638   MachineBasicBlock *MBB = MI->getParent();
1639   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1640 
1641   InstructionSelector::ComplexRendererFns Default = {{
1642       [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); },
1643       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },  // offset
1644       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1645     }};
1646 
1647   if (!STI.hasFlatInstOffsets())
1648     return Default;
1649 
1650   const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg());
1651   if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP)
1652     return Default;
1653 
1654   Optional<int64_t> Offset =
1655     getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI);
1656   if (!Offset.hasValue())
1657     return Default;
1658 
1659   unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
1660   if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed))
1661     return Default;
1662 
1663   Register BasePtr = OpDef->getOperand(1).getReg();
1664 
1665   return {{
1666       [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); },
1667       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); },
1668       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // slc
1669     }};
1670 }
1671 
1672 InstructionSelector::ComplexRendererFns
1673 AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
1674   return selectFlatOffsetImpl<false>(Root);
1675 }
1676 
1677 InstructionSelector::ComplexRendererFns
1678 AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const {
1679   return selectFlatOffsetImpl<true>(Root);
1680 }
1681 
1682 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1683   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1684   return PSV && PSV->isStack();
1685 }
1686 
1687 InstructionSelector::ComplexRendererFns
1688 AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
1689   MachineInstr *MI = Root.getParent();
1690   MachineBasicBlock *MBB = MI->getParent();
1691   MachineFunction *MF = MBB->getParent();
1692   MachineRegisterInfo &MRI = MF->getRegInfo();
1693   const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1694 
1695   int64_t Offset = 0;
1696   if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) {
1697     Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1698 
1699     // TODO: Should this be inside the render function? The iterator seems to
1700     // move.
1701     BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
1702             HighBits)
1703       .addImm(Offset & ~4095);
1704 
1705     return {{[=](MachineInstrBuilder &MIB) { // rsrc
1706                MIB.addReg(Info->getScratchRSrcReg());
1707              },
1708              [=](MachineInstrBuilder &MIB) { // vaddr
1709                MIB.addReg(HighBits);
1710              },
1711              [=](MachineInstrBuilder &MIB) { // soffset
1712                const MachineMemOperand *MMO = *MI->memoperands_begin();
1713                const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1714 
1715                Register SOffsetReg = isStackPtrRelative(PtrInfo)
1716                                          ? Info->getStackPtrOffsetReg()
1717                                          : Info->getScratchWaveOffsetReg();
1718                MIB.addReg(SOffsetReg);
1719              },
1720              [=](MachineInstrBuilder &MIB) { // offset
1721                MIB.addImm(Offset & 4095);
1722              }}};
1723   }
1724 
1725   assert(Offset == 0);
1726 
1727   // Try to fold a frame index directly into the MUBUF vaddr field, and any
1728   // offsets.
1729   Optional<int> FI;
1730   Register VAddr = Root.getReg();
1731   if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) {
1732     if (isBaseWithConstantOffset(Root, MRI)) {
1733       const MachineOperand &LHS = RootDef->getOperand(1);
1734       const MachineOperand &RHS = RootDef->getOperand(2);
1735       const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
1736       const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
1737       if (LHSDef && RHSDef) {
1738         int64_t PossibleOffset =
1739             RHSDef->getOperand(1).getCImm()->getSExtValue();
1740         if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) &&
1741             (!STI.privateMemoryResourceIsRangeChecked() ||
1742              KnownBits->signBitIsZero(LHS.getReg()))) {
1743           if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
1744             FI = LHSDef->getOperand(1).getIndex();
1745           else
1746             VAddr = LHS.getReg();
1747           Offset = PossibleOffset;
1748         }
1749       }
1750     } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
1751       FI = RootDef->getOperand(1).getIndex();
1752     }
1753   }
1754 
1755   // If we don't know this private access is a local stack object, it needs to
1756   // be relative to the entry point's scratch wave offset register.
1757   // TODO: Should split large offsets that don't fit like above.
1758   // TODO: Don't use scratch wave offset just because the offset didn't fit.
1759   Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg()
1760                                    : Info->getScratchWaveOffsetReg();
1761 
1762   return {{[=](MachineInstrBuilder &MIB) { // rsrc
1763              MIB.addReg(Info->getScratchRSrcReg());
1764            },
1765            [=](MachineInstrBuilder &MIB) { // vaddr
1766              if (FI.hasValue())
1767                MIB.addFrameIndex(FI.getValue());
1768              else
1769                MIB.addReg(VAddr);
1770            },
1771            [=](MachineInstrBuilder &MIB) { // soffset
1772              MIB.addReg(SOffset);
1773            },
1774            [=](MachineInstrBuilder &MIB) { // offset
1775              MIB.addImm(Offset);
1776            }}};
1777 }
1778 
1779 bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
1780                                                 const MachineOperand &Base,
1781                                                 int64_t Offset,
1782                                                 unsigned OffsetBits) const {
1783   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
1784       (OffsetBits == 8 && !isUInt<8>(Offset)))
1785     return false;
1786 
1787   if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
1788     return true;
1789 
1790   // On Southern Islands instruction with a negative base value and an offset
1791   // don't seem to work.
1792   return KnownBits->signBitIsZero(Base.getReg());
1793 }
1794 
1795 InstructionSelector::ComplexRendererFns
1796 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
1797     MachineOperand &Root) const {
1798   MachineInstr *MI = Root.getParent();
1799   MachineBasicBlock *MBB = MI->getParent();
1800   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1801 
1802   int64_t Offset = 0;
1803   if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) ||
1804       !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
1805     return {};
1806 
1807   const MachineFunction *MF = MBB->getParent();
1808   const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
1809   const MachineMemOperand *MMO = *MI->memoperands_begin();
1810   const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
1811 
1812   Register SOffsetReg = isStackPtrRelative(PtrInfo)
1813                             ? Info->getStackPtrOffsetReg()
1814                             : Info->getScratchWaveOffsetReg();
1815   return {{
1816       [=](MachineInstrBuilder &MIB) {
1817         MIB.addReg(Info->getScratchRSrcReg());
1818       },                                                         // rsrc
1819       [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset
1820       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }      // offset
1821   }};
1822 }
1823 
1824 InstructionSelector::ComplexRendererFns
1825 AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
1826   MachineInstr *MI = Root.getParent();
1827   MachineBasicBlock *MBB = MI->getParent();
1828   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1829 
1830   const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
1831   if (!RootDef) {
1832     return {{
1833         [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1834         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
1835       }};
1836   }
1837 
1838   int64_t ConstAddr = 0;
1839   if (isBaseWithConstantOffset(Root, MRI)) {
1840     const MachineOperand &LHS = RootDef->getOperand(1);
1841     const MachineOperand &RHS = RootDef->getOperand(2);
1842     const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
1843     const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
1844     if (LHSDef && RHSDef) {
1845       int64_t PossibleOffset =
1846         RHSDef->getOperand(1).getCImm()->getSExtValue();
1847       if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) {
1848         // (add n0, c0)
1849         return {{
1850             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
1851             [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
1852           }};
1853       }
1854     }
1855   } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
1856 
1857 
1858 
1859   } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) {
1860 
1861 
1862   }
1863 
1864   return {{
1865       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
1866       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
1867     }};
1868 }
1869