1 //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AMDGPU.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUInstructionSelector.h"
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "AMDGPUTargetMachine.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 
34 #define DEBUG_TYPE "amdgpu-isel"
35 
36 using namespace llvm;
37 
38 #define GET_GLOBALISEL_IMPL
39 #define AMDGPUSubtarget GCNSubtarget
40 #include "AMDGPUGenGlobalISel.inc"
41 #undef GET_GLOBALISEL_IMPL
42 #undef AMDGPUSubtarget
43 
44 AMDGPUInstructionSelector::AMDGPUInstructionSelector(
45     const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
46     const AMDGPUTargetMachine &TM)
47     : InstructionSelector(), TII(*STI.getInstrInfo()),
48       TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49       STI(STI),
50       EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
51 #define GET_GLOBALISEL_PREDICATES_INIT
52 #include "AMDGPUGenGlobalISel.inc"
53 #undef GET_GLOBALISEL_PREDICATES_INIT
54 #define GET_GLOBALISEL_TEMPORARIES_INIT
55 #include "AMDGPUGenGlobalISel.inc"
56 #undef GET_GLOBALISEL_TEMPORARIES_INIT
57 {
58 }
59 
60 const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
61 
62 static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
63   if (Reg == AMDGPU::SCC)
64     return true;
65 
66   if (TargetRegisterInfo::isPhysicalRegister(Reg))
67     return false;
68 
69   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
70   const TargetRegisterClass *RC =
71       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
72   if (RC)
73     return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
74            MRI.getType(Reg).getSizeInBits() == 1;
75 
76   const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
77   return RB->getID() == AMDGPU::SCCRegBankID;
78 }
79 
80 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
81   MachineBasicBlock *BB = I.getParent();
82   MachineFunction *MF = BB->getParent();
83   MachineRegisterInfo &MRI = MF->getRegInfo();
84   I.setDesc(TII.get(TargetOpcode::COPY));
85 
86   // Special case for COPY from the scc register bank.  The scc register bank
87   // is modeled using 32-bit sgprs.
88   const MachineOperand &Src = I.getOperand(1);
89   unsigned SrcReg = Src.getReg();
90   if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
91     unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
92     unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
93 
94     // We have a copy from a 32-bit to 64-bit register.  This happens
95     // when we are selecting scc->vcc copies.
96     if (DstSize == 64) {
97       const DebugLoc &DL = I.getDebugLoc();
98       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
99         .addImm(0)
100         .addReg(SrcReg);
101       if (!MRI.getRegClassOrNull(SrcReg))
102         MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
103       I.eraseFromParent();
104       return true;
105     }
106   }
107 
108   for (const MachineOperand &MO : I.operands()) {
109     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
110       continue;
111 
112     const TargetRegisterClass *RC =
113             TRI.getConstrainedRegClassForOperand(MO, MRI);
114     if (!RC)
115       continue;
116     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
117   }
118   return true;
119 }
120 
121 MachineOperand
122 AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
123                                            unsigned SubIdx) const {
124 
125   MachineInstr *MI = MO.getParent();
126   MachineBasicBlock *BB = MO.getParent()->getParent();
127   MachineFunction *MF = BB->getParent();
128   MachineRegisterInfo &MRI = MF->getRegInfo();
129   unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
130 
131   if (MO.isReg()) {
132     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
133     unsigned Reg = MO.getReg();
134     BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
135             .addReg(Reg, 0, ComposedSubIdx);
136 
137     return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
138                                      MO.isKill(), MO.isDead(), MO.isUndef(),
139                                      MO.isEarlyClobber(), 0, MO.isDebug(),
140                                      MO.isInternalRead());
141   }
142 
143   assert(MO.isImm());
144 
145   APInt Imm(64, MO.getImm());
146 
147   switch (SubIdx) {
148   default:
149     llvm_unreachable("do not know to split immediate with this sub index.");
150   case AMDGPU::sub0:
151     return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
152   case AMDGPU::sub1:
153     return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
154   }
155 }
156 
157 static int64_t getConstant(const MachineInstr *MI) {
158   return MI->getOperand(1).getCImm()->getSExtValue();
159 }
160 
161 bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
162   MachineBasicBlock *BB = I.getParent();
163   MachineFunction *MF = BB->getParent();
164   MachineRegisterInfo &MRI = MF->getRegInfo();
165   unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
166   unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
167   unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
168 
169   if (Size != 64)
170     return false;
171 
172   DebugLoc DL = I.getDebugLoc();
173 
174   MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
175   MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
176 
177   BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
178           .add(Lo1)
179           .add(Lo2);
180 
181   MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
182   MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
183 
184   BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
185           .add(Hi1)
186           .add(Hi2);
187 
188   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
189           .addReg(DstLo)
190           .addImm(AMDGPU::sub0)
191           .addReg(DstHi)
192           .addImm(AMDGPU::sub1);
193 
194   for (MachineOperand &MO : I.explicit_operands()) {
195     if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
196       continue;
197     RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
198   }
199 
200   I.eraseFromParent();
201   return true;
202 }
203 
204 bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
205   MachineBasicBlock *BB = I.getParent();
206   MachineFunction *MF = BB->getParent();
207   MachineRegisterInfo &MRI = MF->getRegInfo();
208   assert(I.getOperand(2).getImm() % 32 == 0);
209   unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(2).getImm() / 32);
210   const DebugLoc &DL = I.getDebugLoc();
211   MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY),
212                                I.getOperand(0).getReg())
213                                .addReg(I.getOperand(1).getReg(), 0, SubReg);
214 
215   for (const MachineOperand &MO : Copy->operands()) {
216     const TargetRegisterClass *RC =
217             TRI.getConstrainedRegClassForOperand(MO, MRI);
218     if (!RC)
219       continue;
220     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
221   }
222   I.eraseFromParent();
223   return true;
224 }
225 
226 bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
227   return selectG_ADD(I);
228 }
229 
230 bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
231   MachineBasicBlock *BB = I.getParent();
232   MachineFunction *MF = BB->getParent();
233   MachineRegisterInfo &MRI = MF->getRegInfo();
234   const MachineOperand &MO = I.getOperand(0);
235   const TargetRegisterClass *RC =
236       TRI.getConstrainedRegClassForOperand(MO, MRI);
237   if (RC)
238     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
239   I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
240   return true;
241 }
242 
243 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
244   MachineBasicBlock *BB = I.getParent();
245   MachineFunction *MF = BB->getParent();
246   MachineRegisterInfo &MRI = MF->getRegInfo();
247   unsigned SubReg = TRI.getSubRegFromChannel(I.getOperand(3).getImm() / 32);
248   DebugLoc DL = I.getDebugLoc();
249   MachineInstr *Ins = BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG))
250                                .addDef(I.getOperand(0).getReg())
251                                .addReg(I.getOperand(1).getReg())
252                                .addReg(I.getOperand(2).getReg())
253                                .addImm(SubReg);
254 
255   for (const MachineOperand &MO : Ins->operands()) {
256     if (!MO.isReg())
257       continue;
258     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
259       continue;
260 
261     const TargetRegisterClass *RC =
262             TRI.getConstrainedRegClassForOperand(MO, MRI);
263     if (!RC)
264       continue;
265     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
266   }
267   I.eraseFromParent();
268   return true;
269 }
270 
271 bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
272                                           CodeGenCoverage &CoverageInfo) const {
273   unsigned IntrinsicID =  I.getOperand(I.getNumExplicitDefs()).getIntrinsicID();
274   switch (IntrinsicID) {
275   default:
276     break;
277   case Intrinsic::maxnum:
278   case Intrinsic::minnum:
279   case Intrinsic::amdgcn_cvt_pkrtz:
280     return selectImpl(I, CoverageInfo);
281 
282   case Intrinsic::amdgcn_kernarg_segment_ptr: {
283     MachineFunction *MF = I.getParent()->getParent();
284     MachineRegisterInfo &MRI = MF->getRegInfo();
285     const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
286     const ArgDescriptor *InputPtrReg;
287     const TargetRegisterClass *RC;
288     const DebugLoc &DL = I.getDebugLoc();
289 
290     std::tie(InputPtrReg, RC)
291       = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
292     if (!InputPtrReg)
293       report_fatal_error("missing kernarg segment ptr");
294 
295     BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
296       .add(I.getOperand(0))
297       .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
298     I.eraseFromParent();
299     return true;
300   }
301   }
302   return false;
303 }
304 
305 static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
306   assert(Size == 32 || Size == 64);
307   switch (P) {
308   default:
309     llvm_unreachable("Unknown condition code!");
310   case CmpInst::ICMP_NE:
311     return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
312   case CmpInst::ICMP_EQ:
313     return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
314   case CmpInst::ICMP_SGT:
315     return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
316   case CmpInst::ICMP_SGE:
317     return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
318   case CmpInst::ICMP_SLT:
319     return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
320   case CmpInst::ICMP_SLE:
321     return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
322   case CmpInst::ICMP_UGT:
323     return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
324   case CmpInst::ICMP_UGE:
325     return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
326   case CmpInst::ICMP_ULT:
327     return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
328   case CmpInst::ICMP_ULE:
329     return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
330   }
331 }
332 
333 static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
334   // FIXME: VI supports 64-bit comparse.
335   assert(Size == 32);
336   switch (P) {
337   default:
338     llvm_unreachable("Unknown condition code!");
339   case CmpInst::ICMP_NE:
340     return AMDGPU::S_CMP_LG_U32;
341   case CmpInst::ICMP_EQ:
342     return AMDGPU::S_CMP_EQ_U32;
343   case CmpInst::ICMP_SGT:
344     return AMDGPU::S_CMP_GT_I32;
345   case CmpInst::ICMP_SGE:
346     return AMDGPU::S_CMP_GE_I32;
347   case CmpInst::ICMP_SLT:
348     return AMDGPU::S_CMP_LT_I32;
349   case CmpInst::ICMP_SLE:
350     return AMDGPU::S_CMP_LE_I32;
351   case CmpInst::ICMP_UGT:
352     return AMDGPU::S_CMP_GT_U32;
353   case CmpInst::ICMP_UGE:
354     return AMDGPU::S_CMP_GE_U32;
355   case CmpInst::ICMP_ULT:
356     return AMDGPU::S_CMP_LT_U32;
357   case CmpInst::ICMP_ULE:
358     return AMDGPU::S_CMP_LE_U32;
359   }
360 }
361 
362 bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
363   MachineBasicBlock *BB = I.getParent();
364   MachineFunction *MF = BB->getParent();
365   MachineRegisterInfo &MRI = MF->getRegInfo();
366   DebugLoc DL = I.getDebugLoc();
367 
368   unsigned SrcReg = I.getOperand(2).getReg();
369   unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
370   // FIXME: VI supports 64-bit compares.
371   assert(Size == 32);
372 
373   unsigned CCReg = I.getOperand(0).getReg();
374   if (isSCC(CCReg, MRI)) {
375     unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
376     MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
377             .add(I.getOperand(2))
378             .add(I.getOperand(3));
379     MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
380             .addReg(AMDGPU::SCC);
381     bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
382                constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
383     I.eraseFromParent();
384     return Ret;
385   }
386 
387   assert(Size == 32 || Size == 64);
388   unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
389   MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
390             I.getOperand(0).getReg())
391             .add(I.getOperand(2))
392             .add(I.getOperand(3));
393   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
394                                AMDGPU::SReg_64RegClass, MRI);
395   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
396   I.eraseFromParent();
397   return Ret;
398 }
399 
400 static MachineInstr *
401 buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
402          unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
403          unsigned VM, bool Compr, unsigned Enabled, bool Done) {
404   const DebugLoc &DL = Insert->getDebugLoc();
405   MachineBasicBlock &BB = *Insert->getParent();
406   unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
407   return BuildMI(BB, Insert, DL, TII.get(Opcode))
408           .addImm(Tgt)
409           .addReg(Reg0)
410           .addReg(Reg1)
411           .addReg(Reg2)
412           .addReg(Reg3)
413           .addImm(VM)
414           .addImm(Compr)
415           .addImm(Enabled);
416 }
417 
418 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
419                                                  MachineInstr &I,
420 						 CodeGenCoverage &CoverageInfo) const {
421   MachineBasicBlock *BB = I.getParent();
422   MachineFunction *MF = BB->getParent();
423   MachineRegisterInfo &MRI = MF->getRegInfo();
424 
425   unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
426   switch (IntrinsicID) {
427   case Intrinsic::amdgcn_exp: {
428     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
429     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
430     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
431     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
432 
433     MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
434                                  I.getOperand(4).getReg(),
435                                  I.getOperand(5).getReg(),
436                                  I.getOperand(6).getReg(),
437                                  VM, false, Enabled, Done);
438 
439     I.eraseFromParent();
440     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
441   }
442   case Intrinsic::amdgcn_exp_compr: {
443     const DebugLoc &DL = I.getDebugLoc();
444     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
445     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
446     unsigned Reg0 = I.getOperand(3).getReg();
447     unsigned Reg1 = I.getOperand(4).getReg();
448     unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
449     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
450     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
451 
452     BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
453     MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
454                                  true,  Enabled, Done);
455 
456     I.eraseFromParent();
457     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
458   }
459   }
460   return false;
461 }
462 
463 bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
464   MachineBasicBlock *BB = I.getParent();
465   MachineFunction *MF = BB->getParent();
466   MachineRegisterInfo &MRI = MF->getRegInfo();
467   const DebugLoc &DL = I.getDebugLoc();
468 
469   unsigned DstReg = I.getOperand(0).getReg();
470   unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
471   assert(Size == 32 || Size == 64);
472   const MachineOperand &CCOp = I.getOperand(1);
473   unsigned CCReg = CCOp.getReg();
474   if (isSCC(CCReg, MRI)) {
475     unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
476                                          AMDGPU::S_CSELECT_B64;
477     MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
478             .addReg(CCReg);
479 
480     // The generic constrainSelectedInstRegOperands doesn't work for the scc register
481     // bank, because it does not cover the register class that we used to represent
482     // for it.  So we need to manually set the register class here.
483     if (!MRI.getRegClassOrNull(CCReg))
484         MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
485     MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
486             .add(I.getOperand(2))
487             .add(I.getOperand(3));
488 
489     bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
490                constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
491     I.eraseFromParent();
492     return Ret;
493   }
494 
495   assert(Size == 32);
496   // FIXME: Support 64-bit select
497   MachineInstr *Select =
498       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
499               .addImm(0)
500               .add(I.getOperand(3))
501               .addImm(0)
502               .add(I.getOperand(2))
503               .add(I.getOperand(1));
504 
505   bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
506   I.eraseFromParent();
507   return Ret;
508 }
509 
510 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
511   MachineBasicBlock *BB = I.getParent();
512   MachineFunction *MF = BB->getParent();
513   MachineRegisterInfo &MRI = MF->getRegInfo();
514   DebugLoc DL = I.getDebugLoc();
515   unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
516   unsigned Opcode;
517 
518   // FIXME: Select store instruction based on address space
519   switch (StoreSize) {
520   default:
521     return false;
522   case 32:
523     Opcode = AMDGPU::FLAT_STORE_DWORD;
524     break;
525   case 64:
526     Opcode = AMDGPU::FLAT_STORE_DWORDX2;
527     break;
528   case 96:
529     Opcode = AMDGPU::FLAT_STORE_DWORDX3;
530     break;
531   case 128:
532     Opcode = AMDGPU::FLAT_STORE_DWORDX4;
533     break;
534   }
535 
536   MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
537           .add(I.getOperand(1))
538           .add(I.getOperand(0))
539           .addImm(0)  // offset
540           .addImm(0)  // glc
541           .addImm(0)  // slc
542           .addImm(0); // dlc
543 
544 
545   // Now that we selected an opcode, we need to constrain the register
546   // operands to use appropriate classes.
547   bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
548 
549   I.eraseFromParent();
550   return Ret;
551 }
552 
553 bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
554   MachineBasicBlock *BB = I.getParent();
555   MachineFunction *MF = BB->getParent();
556   MachineRegisterInfo &MRI = MF->getRegInfo();
557   MachineOperand &ImmOp = I.getOperand(1);
558 
559   // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
560   if (ImmOp.isFPImm()) {
561     const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
562     ImmOp.ChangeToImmediate(Imm.getZExtValue());
563   } else if (ImmOp.isCImm()) {
564     ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
565   }
566 
567   unsigned DstReg = I.getOperand(0).getReg();
568   unsigned Size;
569   bool IsSgpr;
570   const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
571   if (RB) {
572     IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
573     Size = MRI.getType(DstReg).getSizeInBits();
574   } else {
575     const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
576     IsSgpr = TRI.isSGPRClass(RC);
577     Size = TRI.getRegSizeInBits(*RC);
578   }
579 
580   if (Size != 32 && Size != 64)
581     return false;
582 
583   unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
584   if (Size == 32) {
585     I.setDesc(TII.get(Opcode));
586     I.addImplicitDefUseOperands(*MF);
587     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
588   }
589 
590   DebugLoc DL = I.getDebugLoc();
591   const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
592                                            &AMDGPU::VGPR_32RegClass;
593   unsigned LoReg = MRI.createVirtualRegister(RC);
594   unsigned HiReg = MRI.createVirtualRegister(RC);
595   const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
596 
597   BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
598           .addImm(Imm.trunc(32).getZExtValue());
599 
600   BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
601           .addImm(Imm.ashr(32).getZExtValue());
602 
603   const MachineInstr *RS =
604       BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
605               .addReg(LoReg)
606               .addImm(AMDGPU::sub0)
607               .addReg(HiReg)
608               .addImm(AMDGPU::sub1);
609 
610   // We can't call constrainSelectedInstRegOperands here, because it doesn't
611   // work for target independent opcodes
612   I.eraseFromParent();
613   const TargetRegisterClass *DstRC =
614       TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
615   if (!DstRC)
616     return true;
617   return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
618 }
619 
620 static bool isConstant(const MachineInstr &MI) {
621   return MI.getOpcode() == TargetOpcode::G_CONSTANT;
622 }
623 
624 void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
625     const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
626 
627   const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
628 
629   assert(PtrMI);
630 
631   if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
632     return;
633 
634   GEPInfo GEPInfo(*PtrMI);
635 
636   for (unsigned i = 1, e = 3; i < e; ++i) {
637     const MachineOperand &GEPOp = PtrMI->getOperand(i);
638     const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
639     assert(OpDef);
640     if (isConstant(*OpDef)) {
641       // FIXME: Is it possible to have multiple Imm parts?  Maybe if we
642       // are lacking other optimizations.
643       assert(GEPInfo.Imm == 0);
644       GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
645       continue;
646     }
647     const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
648     if (OpBank->getID() == AMDGPU::SGPRRegBankID)
649       GEPInfo.SgprParts.push_back(GEPOp.getReg());
650     else
651       GEPInfo.VgprParts.push_back(GEPOp.getReg());
652   }
653 
654   AddrInfo.push_back(GEPInfo);
655   getAddrModeInfo(*PtrMI, MRI, AddrInfo);
656 }
657 
658 bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
659   if (!MI.hasOneMemOperand())
660     return false;
661 
662   const MachineMemOperand *MMO = *MI.memoperands_begin();
663   const Value *Ptr = MMO->getValue();
664 
665   // UndefValue means this is a load of a kernel input.  These are uniform.
666   // Sometimes LDS instructions have constant pointers.
667   // If Ptr is null, then that means this mem operand contains a
668   // PseudoSourceValue like GOT.
669   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
670       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
671     return true;
672 
673   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
674     return true;
675 
676   const Instruction *I = dyn_cast<Instruction>(Ptr);
677   return I && I->getMetadata("amdgpu.uniform");
678 }
679 
680 bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
681   for (const GEPInfo &GEPInfo : AddrInfo) {
682     if (!GEPInfo.VgprParts.empty())
683       return true;
684   }
685   return false;
686 }
687 
688 bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
689   MachineBasicBlock *BB = I.getParent();
690   MachineFunction *MF = BB->getParent();
691   MachineRegisterInfo &MRI = MF->getRegInfo();
692   DebugLoc DL = I.getDebugLoc();
693   unsigned DstReg = I.getOperand(0).getReg();
694   unsigned PtrReg = I.getOperand(1).getReg();
695   unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
696   unsigned Opcode;
697 
698   SmallVector<GEPInfo, 4> AddrInfo;
699 
700   getAddrModeInfo(I, MRI, AddrInfo);
701 
702   switch (LoadSize) {
703   default:
704     llvm_unreachable("Load size not supported\n");
705   case 32:
706     Opcode = AMDGPU::FLAT_LOAD_DWORD;
707     break;
708   case 64:
709     Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
710     break;
711   }
712 
713   MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
714                                .add(I.getOperand(0))
715                                .addReg(PtrReg)
716                                .addImm(0)  // offset
717                                .addImm(0)  // glc
718                                .addImm(0)  // slc
719                                .addImm(0); // dlc
720 
721   bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
722   I.eraseFromParent();
723   return Ret;
724 }
725 
726 bool AMDGPUInstructionSelector::select(MachineInstr &I,
727                                        CodeGenCoverage &CoverageInfo) const {
728 
729   if (!isPreISelGenericOpcode(I.getOpcode())) {
730     if (I.isCopy())
731       return selectCOPY(I);
732     return true;
733   }
734 
735   switch (I.getOpcode()) {
736   default:
737     return selectImpl(I, CoverageInfo);
738   case TargetOpcode::G_ADD:
739     return selectG_ADD(I);
740   case TargetOpcode::G_INTTOPTR:
741   case TargetOpcode::G_BITCAST:
742     return selectCOPY(I);
743   case TargetOpcode::G_CONSTANT:
744   case TargetOpcode::G_FCONSTANT:
745     return selectG_CONSTANT(I);
746   case TargetOpcode::G_EXTRACT:
747     return selectG_EXTRACT(I);
748   case TargetOpcode::G_GEP:
749     return selectG_GEP(I);
750   case TargetOpcode::G_IMPLICIT_DEF:
751     return selectG_IMPLICIT_DEF(I);
752   case TargetOpcode::G_INSERT:
753     return selectG_INSERT(I);
754   case TargetOpcode::G_INTRINSIC:
755     return selectG_INTRINSIC(I, CoverageInfo);
756   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
757     return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
758   case TargetOpcode::G_ICMP:
759     return selectG_ICMP(I);
760   case TargetOpcode::G_LOAD:
761     if (selectImpl(I, CoverageInfo))
762       return true;
763     return selectG_LOAD(I);
764   case TargetOpcode::G_SELECT:
765     return selectG_SELECT(I);
766   case TargetOpcode::G_STORE:
767     return selectG_STORE(I);
768   }
769   return false;
770 }
771 
772 InstructionSelector::ComplexRendererFns
773 AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
774   return {{
775       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
776   }};
777 
778 }
779 
780 ///
781 /// This will select either an SGPR or VGPR operand and will save us from
782 /// having to write an extra tablegen pattern.
783 InstructionSelector::ComplexRendererFns
784 AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
785   return {{
786       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
787   }};
788 }
789 
790 InstructionSelector::ComplexRendererFns
791 AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
792   return {{
793       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
794       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
795       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
796       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
797   }};
798 }
799 InstructionSelector::ComplexRendererFns
800 AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
801   return {{
802       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
803       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
804       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
805   }};
806 }
807 
808 InstructionSelector::ComplexRendererFns
809 AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
810   return {{
811       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
812       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // src_mods
813   }};
814 }
815 
816 InstructionSelector::ComplexRendererFns
817 AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
818   MachineRegisterInfo &MRI =
819       Root.getParent()->getParent()->getParent()->getRegInfo();
820 
821   SmallVector<GEPInfo, 4> AddrInfo;
822   getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
823 
824   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
825     return None;
826 
827   const GEPInfo &GEPInfo = AddrInfo[0];
828 
829   if (!AMDGPU::isLegalSMRDImmOffset(STI, GEPInfo.Imm))
830     return None;
831 
832   unsigned PtrReg = GEPInfo.SgprParts[0];
833   int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
834   return {{
835     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
836     [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
837   }};
838 }
839 
840 InstructionSelector::ComplexRendererFns
841 AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
842   MachineRegisterInfo &MRI =
843       Root.getParent()->getParent()->getParent()->getRegInfo();
844 
845   SmallVector<GEPInfo, 4> AddrInfo;
846   getAddrModeInfo(*Root.getParent(), MRI, AddrInfo);
847 
848   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
849     return None;
850 
851   const GEPInfo &GEPInfo = AddrInfo[0];
852   unsigned PtrReg = GEPInfo.SgprParts[0];
853   int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm);
854   if (!isUInt<32>(EncodedImm))
855     return None;
856 
857   return {{
858     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
859     [=](MachineInstrBuilder &MIB) { MIB.addImm(EncodedImm); }
860   }};
861 }
862 
863 InstructionSelector::ComplexRendererFns
864 AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
865   MachineInstr *MI = Root.getParent();
866   MachineBasicBlock *MBB = MI->getParent();
867   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
868 
869   SmallVector<GEPInfo, 4> AddrInfo;
870   getAddrModeInfo(*MI, MRI, AddrInfo);
871 
872   // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
873   // then we can select all ptr + 32-bit offsets not just immediate offsets.
874   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
875     return None;
876 
877   const GEPInfo &GEPInfo = AddrInfo[0];
878   if (!GEPInfo.Imm || !isUInt<32>(GEPInfo.Imm))
879     return None;
880 
881   // If we make it this far we have a load with an 32-bit immediate offset.
882   // It is OK to select this using a sgpr offset, because we have already
883   // failed trying to select this load into one of the _IMM variants since
884   // the _IMM Patterns are considered before the _SGPR patterns.
885   unsigned PtrReg = GEPInfo.SgprParts[0];
886   unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
887   BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg)
888           .addImm(GEPInfo.Imm);
889   return {{
890     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
891     [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }
892   }};
893 }
894