17a7e6055SDimitry Andric //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
27a7e6055SDimitry Andric //
37a7e6055SDimitry Andric //                     The LLVM Compiler Infrastructure
47a7e6055SDimitry Andric //
57a7e6055SDimitry Andric // This file is distributed under the University of Illinois Open Source
67a7e6055SDimitry Andric // License. See LICENSE.TXT for details.
77a7e6055SDimitry Andric //
87a7e6055SDimitry Andric //===----------------------------------------------------------------------===//
97a7e6055SDimitry Andric /// \file
107a7e6055SDimitry Andric /// This file implements the targeting of the InstructionSelector class for
117a7e6055SDimitry Andric /// AMDGPU.
127a7e6055SDimitry Andric /// \todo This should be generated by TableGen.
137a7e6055SDimitry Andric //===----------------------------------------------------------------------===//
147a7e6055SDimitry Andric 
157a7e6055SDimitry Andric #include "AMDGPUInstructionSelector.h"
167a7e6055SDimitry Andric #include "AMDGPUInstrInfo.h"
177a7e6055SDimitry Andric #include "AMDGPURegisterBankInfo.h"
187a7e6055SDimitry Andric #include "AMDGPURegisterInfo.h"
197a7e6055SDimitry Andric #include "AMDGPUSubtarget.h"
204ba319b5SDimitry Andric #include "AMDGPUTargetMachine.h"
214ba319b5SDimitry Andric #include "SIMachineFunctionInfo.h"
224ba319b5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
234ba319b5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
244ba319b5SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
254ba319b5SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
267a7e6055SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
277a7e6055SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
287a7e6055SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
297a7e6055SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
307a7e6055SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
317a7e6055SDimitry Andric #include "llvm/IR/Type.h"
327a7e6055SDimitry Andric #include "llvm/Support/Debug.h"
337a7e6055SDimitry Andric #include "llvm/Support/raw_ostream.h"
347a7e6055SDimitry Andric 
357a7e6055SDimitry Andric #define DEBUG_TYPE "amdgpu-isel"
367a7e6055SDimitry Andric 
377a7e6055SDimitry Andric using namespace llvm;
387a7e6055SDimitry Andric 
394ba319b5SDimitry Andric #define GET_GLOBALISEL_IMPL
404ba319b5SDimitry Andric #define AMDGPUSubtarget GCNSubtarget
414ba319b5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
424ba319b5SDimitry Andric #undef GET_GLOBALISEL_IMPL
434ba319b5SDimitry Andric #undef AMDGPUSubtarget
444ba319b5SDimitry Andric 
AMDGPUInstructionSelector(const GCNSubtarget & STI,const AMDGPURegisterBankInfo & RBI,const AMDGPUTargetMachine & TM)457a7e6055SDimitry Andric AMDGPUInstructionSelector::AMDGPUInstructionSelector(
464ba319b5SDimitry Andric     const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
474ba319b5SDimitry Andric     const AMDGPUTargetMachine &TM)
487a7e6055SDimitry Andric     : InstructionSelector(), TII(*STI.getInstrInfo()),
494ba319b5SDimitry Andric       TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
504ba319b5SDimitry Andric       STI(STI),
514ba319b5SDimitry Andric       EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
524ba319b5SDimitry Andric #define GET_GLOBALISEL_PREDICATES_INIT
534ba319b5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
544ba319b5SDimitry Andric #undef GET_GLOBALISEL_PREDICATES_INIT
554ba319b5SDimitry Andric #define GET_GLOBALISEL_TEMPORARIES_INIT
564ba319b5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
574ba319b5SDimitry Andric #undef GET_GLOBALISEL_TEMPORARIES_INIT
584ba319b5SDimitry Andric {
594ba319b5SDimitry Andric }
604ba319b5SDimitry Andric 
getName()614ba319b5SDimitry Andric const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
624ba319b5SDimitry Andric 
selectCOPY(MachineInstr & I) const634ba319b5SDimitry Andric bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
644ba319b5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
654ba319b5SDimitry Andric   MachineFunction *MF = BB->getParent();
664ba319b5SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
674ba319b5SDimitry Andric   I.setDesc(TII.get(TargetOpcode::COPY));
684ba319b5SDimitry Andric   for (const MachineOperand &MO : I.operands()) {
694ba319b5SDimitry Andric     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
704ba319b5SDimitry Andric       continue;
714ba319b5SDimitry Andric 
724ba319b5SDimitry Andric     const TargetRegisterClass *RC =
734ba319b5SDimitry Andric             TRI.getConstrainedRegClassForOperand(MO, MRI);
744ba319b5SDimitry Andric     if (!RC)
754ba319b5SDimitry Andric       continue;
764ba319b5SDimitry Andric     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
774ba319b5SDimitry Andric   }
784ba319b5SDimitry Andric   return true;
794ba319b5SDimitry Andric }
807a7e6055SDimitry Andric 
817a7e6055SDimitry Andric MachineOperand
getSubOperand64(MachineOperand & MO,unsigned SubIdx) const827a7e6055SDimitry Andric AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
837a7e6055SDimitry Andric                                            unsigned SubIdx) const {
847a7e6055SDimitry Andric 
857a7e6055SDimitry Andric   MachineInstr *MI = MO.getParent();
867a7e6055SDimitry Andric   MachineBasicBlock *BB = MO.getParent()->getParent();
877a7e6055SDimitry Andric   MachineFunction *MF = BB->getParent();
887a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
897a7e6055SDimitry Andric   unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
907a7e6055SDimitry Andric 
917a7e6055SDimitry Andric   if (MO.isReg()) {
927a7e6055SDimitry Andric     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
937a7e6055SDimitry Andric     unsigned Reg = MO.getReg();
947a7e6055SDimitry Andric     BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
957a7e6055SDimitry Andric             .addReg(Reg, 0, ComposedSubIdx);
967a7e6055SDimitry Andric 
977a7e6055SDimitry Andric     return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
987a7e6055SDimitry Andric                                      MO.isKill(), MO.isDead(), MO.isUndef(),
997a7e6055SDimitry Andric                                      MO.isEarlyClobber(), 0, MO.isDebug(),
1007a7e6055SDimitry Andric                                      MO.isInternalRead());
1017a7e6055SDimitry Andric   }
1027a7e6055SDimitry Andric 
1037a7e6055SDimitry Andric   assert(MO.isImm());
1047a7e6055SDimitry Andric 
1057a7e6055SDimitry Andric   APInt Imm(64, MO.getImm());
1067a7e6055SDimitry Andric 
1077a7e6055SDimitry Andric   switch (SubIdx) {
1087a7e6055SDimitry Andric   default:
1097a7e6055SDimitry Andric     llvm_unreachable("do not know to split immediate with this sub index.");
1107a7e6055SDimitry Andric   case AMDGPU::sub0:
1117a7e6055SDimitry Andric     return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
1127a7e6055SDimitry Andric   case AMDGPU::sub1:
1137a7e6055SDimitry Andric     return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
1147a7e6055SDimitry Andric   }
1157a7e6055SDimitry Andric }
1167a7e6055SDimitry Andric 
getConstant(const MachineInstr * MI)1174ba319b5SDimitry Andric static int64_t getConstant(const MachineInstr *MI) {
1184ba319b5SDimitry Andric   return MI->getOperand(1).getCImm()->getSExtValue();
1194ba319b5SDimitry Andric }
1204ba319b5SDimitry Andric 
selectG_ADD(MachineInstr & I) const1217a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
1227a7e6055SDimitry Andric   MachineBasicBlock *BB = I.getParent();
1237a7e6055SDimitry Andric   MachineFunction *MF = BB->getParent();
1247a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
1257a7e6055SDimitry Andric   unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
1267a7e6055SDimitry Andric   unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1277a7e6055SDimitry Andric   unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
1287a7e6055SDimitry Andric 
1297a7e6055SDimitry Andric   if (Size != 64)
1307a7e6055SDimitry Andric     return false;
1317a7e6055SDimitry Andric 
1327a7e6055SDimitry Andric   DebugLoc DL = I.getDebugLoc();
1337a7e6055SDimitry Andric 
1347a7e6055SDimitry Andric   MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
1357a7e6055SDimitry Andric   MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
1367a7e6055SDimitry Andric 
1377a7e6055SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
1387a7e6055SDimitry Andric           .add(Lo1)
1397a7e6055SDimitry Andric           .add(Lo2);
1407a7e6055SDimitry Andric 
1417a7e6055SDimitry Andric   MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
1427a7e6055SDimitry Andric   MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
1437a7e6055SDimitry Andric 
1447a7e6055SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
1457a7e6055SDimitry Andric           .add(Hi1)
1467a7e6055SDimitry Andric           .add(Hi2);
1477a7e6055SDimitry Andric 
1487a7e6055SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
1497a7e6055SDimitry Andric           .addReg(DstLo)
1507a7e6055SDimitry Andric           .addImm(AMDGPU::sub0)
1517a7e6055SDimitry Andric           .addReg(DstHi)
1527a7e6055SDimitry Andric           .addImm(AMDGPU::sub1);
1537a7e6055SDimitry Andric 
1547a7e6055SDimitry Andric   for (MachineOperand &MO : I.explicit_operands()) {
1557a7e6055SDimitry Andric     if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1567a7e6055SDimitry Andric       continue;
1577a7e6055SDimitry Andric     RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
1587a7e6055SDimitry Andric   }
1597a7e6055SDimitry Andric 
1607a7e6055SDimitry Andric   I.eraseFromParent();
1617a7e6055SDimitry Andric   return true;
1627a7e6055SDimitry Andric }
1637a7e6055SDimitry Andric 
selectG_GEP(MachineInstr & I) const1647a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
1657a7e6055SDimitry Andric   return selectG_ADD(I);
1667a7e6055SDimitry Andric }
1677a7e6055SDimitry Andric 
selectG_IMPLICIT_DEF(MachineInstr & I) const1684ba319b5SDimitry Andric bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
1694ba319b5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
1704ba319b5SDimitry Andric   MachineFunction *MF = BB->getParent();
1714ba319b5SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
1724ba319b5SDimitry Andric   const MachineOperand &MO = I.getOperand(0);
1734ba319b5SDimitry Andric   const TargetRegisterClass *RC =
1744ba319b5SDimitry Andric       TRI.getConstrainedRegClassForOperand(MO, MRI);
1754ba319b5SDimitry Andric   if (RC)
1764ba319b5SDimitry Andric     RBI.constrainGenericRegister(MO.getReg(), *RC, MRI);
1774ba319b5SDimitry Andric   I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
1784ba319b5SDimitry Andric   return true;
1794ba319b5SDimitry Andric }
1804ba319b5SDimitry Andric 
selectG_INTRINSIC(MachineInstr & I,CodeGenCoverage & CoverageInfo) const1814ba319b5SDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
1824ba319b5SDimitry Andric                                           CodeGenCoverage &CoverageInfo) const {
1834ba319b5SDimitry Andric   unsigned IntrinsicID =  I.getOperand(1).getIntrinsicID();
1844ba319b5SDimitry Andric 
1854ba319b5SDimitry Andric   switch (IntrinsicID) {
1864ba319b5SDimitry Andric   default:
1874ba319b5SDimitry Andric     break;
1884ba319b5SDimitry Andric   case Intrinsic::maxnum:
1894ba319b5SDimitry Andric   case Intrinsic::minnum:
1904ba319b5SDimitry Andric   case Intrinsic::amdgcn_cvt_pkrtz:
1914ba319b5SDimitry Andric     return selectImpl(I, CoverageInfo);
1924ba319b5SDimitry Andric 
1934ba319b5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr: {
1944ba319b5SDimitry Andric     MachineFunction *MF = I.getParent()->getParent();
1954ba319b5SDimitry Andric     MachineRegisterInfo &MRI = MF->getRegInfo();
1964ba319b5SDimitry Andric     const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1974ba319b5SDimitry Andric     const ArgDescriptor *InputPtrReg;
1984ba319b5SDimitry Andric     const TargetRegisterClass *RC;
1994ba319b5SDimitry Andric     const DebugLoc &DL = I.getDebugLoc();
2004ba319b5SDimitry Andric 
2014ba319b5SDimitry Andric     std::tie(InputPtrReg, RC)
2024ba319b5SDimitry Andric       = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
2034ba319b5SDimitry Andric     if (!InputPtrReg)
2044ba319b5SDimitry Andric       report_fatal_error("missing kernarg segment ptr");
2054ba319b5SDimitry Andric 
2064ba319b5SDimitry Andric     BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
2074ba319b5SDimitry Andric       .add(I.getOperand(0))
2084ba319b5SDimitry Andric       .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
2094ba319b5SDimitry Andric     I.eraseFromParent();
2104ba319b5SDimitry Andric     return true;
2114ba319b5SDimitry Andric   }
2124ba319b5SDimitry Andric   }
2134ba319b5SDimitry Andric   return false;
2144ba319b5SDimitry Andric }
2154ba319b5SDimitry Andric 
2164ba319b5SDimitry Andric static MachineInstr *
buildEXP(const TargetInstrInfo & TII,MachineInstr * Insert,unsigned Tgt,unsigned Reg0,unsigned Reg1,unsigned Reg2,unsigned Reg3,unsigned VM,bool Compr,unsigned Enabled,bool Done)2174ba319b5SDimitry Andric buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
2184ba319b5SDimitry Andric          unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
2194ba319b5SDimitry Andric          unsigned VM, bool Compr, unsigned Enabled, bool Done) {
2204ba319b5SDimitry Andric   const DebugLoc &DL = Insert->getDebugLoc();
2214ba319b5SDimitry Andric   MachineBasicBlock &BB = *Insert->getParent();
2224ba319b5SDimitry Andric   unsigned Opcode = Done ? AMDGPU::EXP_DONE : AMDGPU::EXP;
2234ba319b5SDimitry Andric   return BuildMI(BB, Insert, DL, TII.get(Opcode))
2244ba319b5SDimitry Andric           .addImm(Tgt)
2254ba319b5SDimitry Andric           .addReg(Reg0)
2264ba319b5SDimitry Andric           .addReg(Reg1)
2274ba319b5SDimitry Andric           .addReg(Reg2)
2284ba319b5SDimitry Andric           .addReg(Reg3)
2294ba319b5SDimitry Andric           .addImm(VM)
2304ba319b5SDimitry Andric           .addImm(Compr)
2314ba319b5SDimitry Andric           .addImm(Enabled);
2324ba319b5SDimitry Andric }
2334ba319b5SDimitry Andric 
selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr & I,CodeGenCoverage & CoverageInfo) const2344ba319b5SDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2354ba319b5SDimitry Andric                                                  MachineInstr &I,
2364ba319b5SDimitry Andric 						 CodeGenCoverage &CoverageInfo) const {
2374ba319b5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
2384ba319b5SDimitry Andric   MachineFunction *MF = BB->getParent();
2394ba319b5SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
2404ba319b5SDimitry Andric 
2414ba319b5SDimitry Andric   unsigned IntrinsicID = I.getOperand(0).getIntrinsicID();
2424ba319b5SDimitry Andric   switch (IntrinsicID) {
2434ba319b5SDimitry Andric   case Intrinsic::amdgcn_exp: {
2444ba319b5SDimitry Andric     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
2454ba319b5SDimitry Andric     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
2464ba319b5SDimitry Andric     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
2474ba319b5SDimitry Andric     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
2484ba319b5SDimitry Andric 
2494ba319b5SDimitry Andric     MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
2504ba319b5SDimitry Andric                                  I.getOperand(4).getReg(),
2514ba319b5SDimitry Andric                                  I.getOperand(5).getReg(),
2524ba319b5SDimitry Andric                                  I.getOperand(6).getReg(),
2534ba319b5SDimitry Andric                                  VM, false, Enabled, Done);
2544ba319b5SDimitry Andric 
2554ba319b5SDimitry Andric     I.eraseFromParent();
2564ba319b5SDimitry Andric     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
2574ba319b5SDimitry Andric   }
2584ba319b5SDimitry Andric   case Intrinsic::amdgcn_exp_compr: {
2594ba319b5SDimitry Andric     const DebugLoc &DL = I.getDebugLoc();
2604ba319b5SDimitry Andric     int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
2614ba319b5SDimitry Andric     int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
2624ba319b5SDimitry Andric     unsigned Reg0 = I.getOperand(3).getReg();
2634ba319b5SDimitry Andric     unsigned Reg1 = I.getOperand(4).getReg();
2644ba319b5SDimitry Andric     unsigned Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2654ba319b5SDimitry Andric     int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
2664ba319b5SDimitry Andric     int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
2674ba319b5SDimitry Andric 
2684ba319b5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
2694ba319b5SDimitry Andric     MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
2704ba319b5SDimitry Andric                                  true,  Enabled, Done);
2714ba319b5SDimitry Andric 
2724ba319b5SDimitry Andric     I.eraseFromParent();
2734ba319b5SDimitry Andric     return constrainSelectedInstRegOperands(*Exp, TII, TRI, RBI);
2744ba319b5SDimitry Andric   }
2754ba319b5SDimitry Andric   }
2764ba319b5SDimitry Andric   return false;
2774ba319b5SDimitry Andric }
2784ba319b5SDimitry Andric 
selectG_STORE(MachineInstr & I) const2797a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
2807a7e6055SDimitry Andric   MachineBasicBlock *BB = I.getParent();
2814ba319b5SDimitry Andric   MachineFunction *MF = BB->getParent();
2824ba319b5SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
2837a7e6055SDimitry Andric   DebugLoc DL = I.getDebugLoc();
2844ba319b5SDimitry Andric   unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
2854ba319b5SDimitry Andric   unsigned Opcode;
2867a7e6055SDimitry Andric 
2877a7e6055SDimitry Andric   // FIXME: Select store instruction based on address space
2884ba319b5SDimitry Andric   switch (StoreSize) {
2894ba319b5SDimitry Andric   default:
2904ba319b5SDimitry Andric     return false;
2914ba319b5SDimitry Andric   case 32:
2924ba319b5SDimitry Andric     Opcode = AMDGPU::FLAT_STORE_DWORD;
2934ba319b5SDimitry Andric     break;
2944ba319b5SDimitry Andric   case 64:
2954ba319b5SDimitry Andric     Opcode = AMDGPU::FLAT_STORE_DWORDX2;
2964ba319b5SDimitry Andric     break;
2974ba319b5SDimitry Andric   case 96:
2984ba319b5SDimitry Andric     Opcode = AMDGPU::FLAT_STORE_DWORDX3;
2994ba319b5SDimitry Andric     break;
3004ba319b5SDimitry Andric   case 128:
3014ba319b5SDimitry Andric     Opcode = AMDGPU::FLAT_STORE_DWORDX4;
3024ba319b5SDimitry Andric     break;
3034ba319b5SDimitry Andric   }
3044ba319b5SDimitry Andric 
3054ba319b5SDimitry Andric   MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
3067a7e6055SDimitry Andric           .add(I.getOperand(1))
3077a7e6055SDimitry Andric           .add(I.getOperand(0))
30824d58133SDimitry Andric           .addImm(0)  // offset
30924d58133SDimitry Andric           .addImm(0)  // glc
31024d58133SDimitry Andric           .addImm(0); // slc
3117a7e6055SDimitry Andric 
3125517e702SDimitry Andric 
3137a7e6055SDimitry Andric   // Now that we selected an opcode, we need to constrain the register
3147a7e6055SDimitry Andric   // operands to use appropriate classes.
3157a7e6055SDimitry Andric   bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
3167a7e6055SDimitry Andric 
3177a7e6055SDimitry Andric   I.eraseFromParent();
3187a7e6055SDimitry Andric   return Ret;
3197a7e6055SDimitry Andric }
3207a7e6055SDimitry Andric 
selectG_CONSTANT(MachineInstr & I) const3217a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
3227a7e6055SDimitry Andric   MachineBasicBlock *BB = I.getParent();
3237a7e6055SDimitry Andric   MachineFunction *MF = BB->getParent();
3247a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
3254ba319b5SDimitry Andric   MachineOperand &ImmOp = I.getOperand(1);
3267a7e6055SDimitry Andric 
3274ba319b5SDimitry Andric   // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
3284ba319b5SDimitry Andric   if (ImmOp.isFPImm()) {
3294ba319b5SDimitry Andric     const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
3304ba319b5SDimitry Andric     ImmOp.ChangeToImmediate(Imm.getZExtValue());
3314ba319b5SDimitry Andric   } else if (ImmOp.isCImm()) {
3324ba319b5SDimitry Andric     ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue());
3334ba319b5SDimitry Andric   }
3344ba319b5SDimitry Andric 
3354ba319b5SDimitry Andric   unsigned DstReg = I.getOperand(0).getReg();
3364ba319b5SDimitry Andric   unsigned Size;
3374ba319b5SDimitry Andric   bool IsSgpr;
3384ba319b5SDimitry Andric   const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg());
3394ba319b5SDimitry Andric   if (RB) {
3404ba319b5SDimitry Andric     IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID;
3414ba319b5SDimitry Andric     Size = MRI.getType(DstReg).getSizeInBits();
3424ba319b5SDimitry Andric   } else {
3434ba319b5SDimitry Andric     const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg);
3444ba319b5SDimitry Andric     IsSgpr = TRI.isSGPRClass(RC);
3454ba319b5SDimitry Andric     Size = TRI.getRegSizeInBits(*RC);
3464ba319b5SDimitry Andric   }
3474ba319b5SDimitry Andric 
3484ba319b5SDimitry Andric   if (Size != 32 && Size != 64)
3494ba319b5SDimitry Andric     return false;
3504ba319b5SDimitry Andric 
3514ba319b5SDimitry Andric   unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
3527a7e6055SDimitry Andric   if (Size == 32) {
3534ba319b5SDimitry Andric     I.setDesc(TII.get(Opcode));
3544ba319b5SDimitry Andric     I.addImplicitDefUseOperands(*MF);
3557a7e6055SDimitry Andric     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3567a7e6055SDimitry Andric   }
3577a7e6055SDimitry Andric 
3587a7e6055SDimitry Andric   DebugLoc DL = I.getDebugLoc();
3594ba319b5SDimitry Andric   const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
3604ba319b5SDimitry Andric                                            &AMDGPU::VGPR_32RegClass;
3614ba319b5SDimitry Andric   unsigned LoReg = MRI.createVirtualRegister(RC);
3624ba319b5SDimitry Andric   unsigned HiReg = MRI.createVirtualRegister(RC);
3634ba319b5SDimitry Andric   const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
3647a7e6055SDimitry Andric 
3654ba319b5SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
3667a7e6055SDimitry Andric           .addImm(Imm.trunc(32).getZExtValue());
3677a7e6055SDimitry Andric 
3684ba319b5SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
3697a7e6055SDimitry Andric           .addImm(Imm.ashr(32).getZExtValue());
3707a7e6055SDimitry Andric 
3714ba319b5SDimitry Andric   const MachineInstr *RS =
3727a7e6055SDimitry Andric       BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3737a7e6055SDimitry Andric               .addReg(LoReg)
3747a7e6055SDimitry Andric               .addImm(AMDGPU::sub0)
3757a7e6055SDimitry Andric               .addReg(HiReg)
3767a7e6055SDimitry Andric               .addImm(AMDGPU::sub1);
3774ba319b5SDimitry Andric 
3787a7e6055SDimitry Andric   // We can't call constrainSelectedInstRegOperands here, because it doesn't
3797a7e6055SDimitry Andric   // work for target independent opcodes
3807a7e6055SDimitry Andric   I.eraseFromParent();
3814ba319b5SDimitry Andric   const TargetRegisterClass *DstRC =
3824ba319b5SDimitry Andric       TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI);
3834ba319b5SDimitry Andric   if (!DstRC)
3844ba319b5SDimitry Andric     return true;
3854ba319b5SDimitry Andric   return RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3867a7e6055SDimitry Andric }
3877a7e6055SDimitry Andric 
isConstant(const MachineInstr & MI)3887a7e6055SDimitry Andric static bool isConstant(const MachineInstr &MI) {
3897a7e6055SDimitry Andric   return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3907a7e6055SDimitry Andric }
3917a7e6055SDimitry Andric 
getAddrModeInfo(const MachineInstr & Load,const MachineRegisterInfo & MRI,SmallVectorImpl<GEPInfo> & AddrInfo) const3927a7e6055SDimitry Andric void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
3937a7e6055SDimitry Andric     const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
3947a7e6055SDimitry Andric 
3957a7e6055SDimitry Andric   const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
3967a7e6055SDimitry Andric 
3977a7e6055SDimitry Andric   assert(PtrMI);
3987a7e6055SDimitry Andric 
3997a7e6055SDimitry Andric   if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
4007a7e6055SDimitry Andric     return;
4017a7e6055SDimitry Andric 
4027a7e6055SDimitry Andric   GEPInfo GEPInfo(*PtrMI);
4037a7e6055SDimitry Andric 
4047a7e6055SDimitry Andric   for (unsigned i = 1, e = 3; i < e; ++i) {
4057a7e6055SDimitry Andric     const MachineOperand &GEPOp = PtrMI->getOperand(i);
4067a7e6055SDimitry Andric     const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
4077a7e6055SDimitry Andric     assert(OpDef);
4087a7e6055SDimitry Andric     if (isConstant(*OpDef)) {
4097a7e6055SDimitry Andric       // FIXME: Is it possible to have multiple Imm parts?  Maybe if we
4107a7e6055SDimitry Andric       // are lacking other optimizations.
4117a7e6055SDimitry Andric       assert(GEPInfo.Imm == 0);
4127a7e6055SDimitry Andric       GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
4137a7e6055SDimitry Andric       continue;
4147a7e6055SDimitry Andric     }
4157a7e6055SDimitry Andric     const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
4167a7e6055SDimitry Andric     if (OpBank->getID() == AMDGPU::SGPRRegBankID)
4177a7e6055SDimitry Andric       GEPInfo.SgprParts.push_back(GEPOp.getReg());
4187a7e6055SDimitry Andric     else
4197a7e6055SDimitry Andric       GEPInfo.VgprParts.push_back(GEPOp.getReg());
4207a7e6055SDimitry Andric   }
4217a7e6055SDimitry Andric 
4227a7e6055SDimitry Andric   AddrInfo.push_back(GEPInfo);
4237a7e6055SDimitry Andric   getAddrModeInfo(*PtrMI, MRI, AddrInfo);
4247a7e6055SDimitry Andric }
4257a7e6055SDimitry Andric 
isInstrUniform(const MachineInstr & MI)4267a7e6055SDimitry Andric static bool isInstrUniform(const MachineInstr &MI) {
4277a7e6055SDimitry Andric   if (!MI.hasOneMemOperand())
4287a7e6055SDimitry Andric     return false;
4297a7e6055SDimitry Andric 
4307a7e6055SDimitry Andric   const MachineMemOperand *MMO = *MI.memoperands_begin();
4317a7e6055SDimitry Andric   const Value *Ptr = MMO->getValue();
4327a7e6055SDimitry Andric 
4337a7e6055SDimitry Andric   // UndefValue means this is a load of a kernel input.  These are uniform.
4347a7e6055SDimitry Andric   // Sometimes LDS instructions have constant pointers.
4357a7e6055SDimitry Andric   // If Ptr is null, then that means this mem operand contains a
4367a7e6055SDimitry Andric   // PseudoSourceValue like GOT.
4377a7e6055SDimitry Andric   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
4387a7e6055SDimitry Andric       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
4397a7e6055SDimitry Andric     return true;
4407a7e6055SDimitry Andric 
4414ba319b5SDimitry Andric   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
4424ba319b5SDimitry Andric     return true;
4434ba319b5SDimitry Andric 
4447a7e6055SDimitry Andric   const Instruction *I = dyn_cast<Instruction>(Ptr);
4457a7e6055SDimitry Andric   return I && I->getMetadata("amdgpu.uniform");
4467a7e6055SDimitry Andric }
4477a7e6055SDimitry Andric 
getSmrdOpcode(unsigned BaseOpcode,unsigned LoadSize)4487a7e6055SDimitry Andric static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
4497a7e6055SDimitry Andric 
4507a7e6055SDimitry Andric   if (LoadSize == 32)
4517a7e6055SDimitry Andric     return BaseOpcode;
4527a7e6055SDimitry Andric 
4537a7e6055SDimitry Andric   switch (BaseOpcode) {
4547a7e6055SDimitry Andric   case AMDGPU::S_LOAD_DWORD_IMM:
4557a7e6055SDimitry Andric     switch (LoadSize) {
4567a7e6055SDimitry Andric     case 64:
4577a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX2_IMM;
4587a7e6055SDimitry Andric     case 128:
4597a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX4_IMM;
4607a7e6055SDimitry Andric     case 256:
4617a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX8_IMM;
4627a7e6055SDimitry Andric     case 512:
4637a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX16_IMM;
4647a7e6055SDimitry Andric     }
4657a7e6055SDimitry Andric     break;
4667a7e6055SDimitry Andric   case AMDGPU::S_LOAD_DWORD_IMM_ci:
4677a7e6055SDimitry Andric     switch (LoadSize) {
4687a7e6055SDimitry Andric     case 64:
4697a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
4707a7e6055SDimitry Andric     case 128:
4717a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
4727a7e6055SDimitry Andric     case 256:
4737a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
4747a7e6055SDimitry Andric     case 512:
4757a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
4767a7e6055SDimitry Andric     }
4777a7e6055SDimitry Andric     break;
4787a7e6055SDimitry Andric   case AMDGPU::S_LOAD_DWORD_SGPR:
4797a7e6055SDimitry Andric     switch (LoadSize) {
4807a7e6055SDimitry Andric     case 64:
4817a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX2_SGPR;
4827a7e6055SDimitry Andric     case 128:
4837a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX4_SGPR;
4847a7e6055SDimitry Andric     case 256:
4857a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX8_SGPR;
4867a7e6055SDimitry Andric     case 512:
4877a7e6055SDimitry Andric       return AMDGPU::S_LOAD_DWORDX16_SGPR;
4887a7e6055SDimitry Andric     }
4897a7e6055SDimitry Andric     break;
4907a7e6055SDimitry Andric   }
4917a7e6055SDimitry Andric   llvm_unreachable("Invalid base smrd opcode or size");
4927a7e6055SDimitry Andric }
4937a7e6055SDimitry Andric 
hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const4947a7e6055SDimitry Andric bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
4957a7e6055SDimitry Andric   for (const GEPInfo &GEPInfo : AddrInfo) {
4967a7e6055SDimitry Andric     if (!GEPInfo.VgprParts.empty())
4977a7e6055SDimitry Andric       return true;
4987a7e6055SDimitry Andric   }
4997a7e6055SDimitry Andric   return false;
5007a7e6055SDimitry Andric }
5017a7e6055SDimitry Andric 
selectSMRD(MachineInstr & I,ArrayRef<GEPInfo> AddrInfo) const5027a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
5037a7e6055SDimitry Andric                                            ArrayRef<GEPInfo> AddrInfo) const {
5047a7e6055SDimitry Andric 
5057a7e6055SDimitry Andric   if (!I.hasOneMemOperand())
5067a7e6055SDimitry Andric     return false;
5077a7e6055SDimitry Andric 
508*b5893f02SDimitry Andric   if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
509*b5893f02SDimitry Andric       (*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS_32BIT)
5107a7e6055SDimitry Andric     return false;
5117a7e6055SDimitry Andric 
5127a7e6055SDimitry Andric   if (!isInstrUniform(I))
5137a7e6055SDimitry Andric     return false;
5147a7e6055SDimitry Andric 
5157a7e6055SDimitry Andric   if (hasVgprParts(AddrInfo))
5167a7e6055SDimitry Andric     return false;
5177a7e6055SDimitry Andric 
5187a7e6055SDimitry Andric   MachineBasicBlock *BB = I.getParent();
5197a7e6055SDimitry Andric   MachineFunction *MF = BB->getParent();
5204ba319b5SDimitry Andric   const GCNSubtarget &Subtarget = MF->getSubtarget<GCNSubtarget>();
5217a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
5227a7e6055SDimitry Andric   unsigned DstReg = I.getOperand(0).getReg();
5237a7e6055SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
5247a7e6055SDimitry Andric   unsigned Opcode;
5257a7e6055SDimitry Andric   unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
5267a7e6055SDimitry Andric 
5277a7e6055SDimitry Andric   if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
5287a7e6055SDimitry Andric 
5297a7e6055SDimitry Andric     const GEPInfo &GEPInfo = AddrInfo[0];
5307a7e6055SDimitry Andric 
5317a7e6055SDimitry Andric     unsigned PtrReg = GEPInfo.SgprParts[0];
5327a7e6055SDimitry Andric     int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
5337a7e6055SDimitry Andric     if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
5347a7e6055SDimitry Andric       Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
5357a7e6055SDimitry Andric 
5367a7e6055SDimitry Andric       MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
5377a7e6055SDimitry Andric                                  .addReg(PtrReg)
5387a7e6055SDimitry Andric                                  .addImm(EncodedImm)
5397a7e6055SDimitry Andric                                  .addImm(0); // glc
5407a7e6055SDimitry Andric       return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
5417a7e6055SDimitry Andric     }
5427a7e6055SDimitry Andric 
5437a7e6055SDimitry Andric     if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
5447a7e6055SDimitry Andric         isUInt<32>(EncodedImm)) {
5457a7e6055SDimitry Andric       Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
5467a7e6055SDimitry Andric       MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
5477a7e6055SDimitry Andric                                    .addReg(PtrReg)
5487a7e6055SDimitry Andric                                    .addImm(EncodedImm)
5497a7e6055SDimitry Andric                                    .addImm(0); // glc
5507a7e6055SDimitry Andric       return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
5517a7e6055SDimitry Andric     }
5527a7e6055SDimitry Andric 
5537a7e6055SDimitry Andric     if (isUInt<32>(GEPInfo.Imm)) {
5547a7e6055SDimitry Andric       Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
5557a7e6055SDimitry Andric       unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5567a7e6055SDimitry Andric       BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
5577a7e6055SDimitry Andric               .addImm(GEPInfo.Imm);
5587a7e6055SDimitry Andric 
5597a7e6055SDimitry Andric       MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
5607a7e6055SDimitry Andric                                    .addReg(PtrReg)
5617a7e6055SDimitry Andric                                    .addReg(OffsetReg)
5627a7e6055SDimitry Andric                                    .addImm(0); // glc
5637a7e6055SDimitry Andric       return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
5647a7e6055SDimitry Andric     }
5657a7e6055SDimitry Andric   }
5667a7e6055SDimitry Andric 
5677a7e6055SDimitry Andric   unsigned PtrReg = I.getOperand(1).getReg();
5687a7e6055SDimitry Andric   Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
5697a7e6055SDimitry Andric   MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
5707a7e6055SDimitry Andric                                .addReg(PtrReg)
5717a7e6055SDimitry Andric                                .addImm(0)
5727a7e6055SDimitry Andric                                .addImm(0); // glc
5737a7e6055SDimitry Andric   return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
5747a7e6055SDimitry Andric }
5757a7e6055SDimitry Andric 
5767a7e6055SDimitry Andric 
selectG_LOAD(MachineInstr & I) const5777a7e6055SDimitry Andric bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
5787a7e6055SDimitry Andric   MachineBasicBlock *BB = I.getParent();
5797a7e6055SDimitry Andric   MachineFunction *MF = BB->getParent();
5807a7e6055SDimitry Andric   MachineRegisterInfo &MRI = MF->getRegInfo();
5817a7e6055SDimitry Andric   DebugLoc DL = I.getDebugLoc();
5827a7e6055SDimitry Andric   unsigned DstReg = I.getOperand(0).getReg();
5837a7e6055SDimitry Andric   unsigned PtrReg = I.getOperand(1).getReg();
5847a7e6055SDimitry Andric   unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
5857a7e6055SDimitry Andric   unsigned Opcode;
5867a7e6055SDimitry Andric 
5877a7e6055SDimitry Andric   SmallVector<GEPInfo, 4> AddrInfo;
5887a7e6055SDimitry Andric 
5897a7e6055SDimitry Andric   getAddrModeInfo(I, MRI, AddrInfo);
5907a7e6055SDimitry Andric 
5917a7e6055SDimitry Andric   if (selectSMRD(I, AddrInfo)) {
5927a7e6055SDimitry Andric     I.eraseFromParent();
5937a7e6055SDimitry Andric     return true;
5947a7e6055SDimitry Andric   }
5957a7e6055SDimitry Andric 
5967a7e6055SDimitry Andric   switch (LoadSize) {
5977a7e6055SDimitry Andric   default:
5987a7e6055SDimitry Andric     llvm_unreachable("Load size not supported\n");
5997a7e6055SDimitry Andric   case 32:
6007a7e6055SDimitry Andric     Opcode = AMDGPU::FLAT_LOAD_DWORD;
6017a7e6055SDimitry Andric     break;
6027a7e6055SDimitry Andric   case 64:
6037a7e6055SDimitry Andric     Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
6047a7e6055SDimitry Andric     break;
6057a7e6055SDimitry Andric   }
6067a7e6055SDimitry Andric 
6077a7e6055SDimitry Andric   MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
6087a7e6055SDimitry Andric                                .add(I.getOperand(0))
6097a7e6055SDimitry Andric                                .addReg(PtrReg)
61024d58133SDimitry Andric                                .addImm(0)  // offset
61124d58133SDimitry Andric                                .addImm(0)  // glc
61224d58133SDimitry Andric                                .addImm(0); // slc
6137a7e6055SDimitry Andric 
6147a7e6055SDimitry Andric   bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
6157a7e6055SDimitry Andric   I.eraseFromParent();
6167a7e6055SDimitry Andric   return Ret;
6177a7e6055SDimitry Andric }
6187a7e6055SDimitry Andric 
select(MachineInstr & I,CodeGenCoverage & CoverageInfo) const6192cab237bSDimitry Andric bool AMDGPUInstructionSelector::select(MachineInstr &I,
6202cab237bSDimitry Andric                                        CodeGenCoverage &CoverageInfo) const {
6217a7e6055SDimitry Andric 
6224ba319b5SDimitry Andric   if (!isPreISelGenericOpcode(I.getOpcode())) {
6234ba319b5SDimitry Andric     if (I.isCopy())
6244ba319b5SDimitry Andric       return selectCOPY(I);
6257a7e6055SDimitry Andric     return true;
6264ba319b5SDimitry Andric   }
6277a7e6055SDimitry Andric 
6287a7e6055SDimitry Andric   switch (I.getOpcode()) {
6297a7e6055SDimitry Andric   default:
6304ba319b5SDimitry Andric     return selectImpl(I, CoverageInfo);
6317a7e6055SDimitry Andric   case TargetOpcode::G_ADD:
6327a7e6055SDimitry Andric     return selectG_ADD(I);
633*b5893f02SDimitry Andric   case TargetOpcode::G_INTTOPTR:
6344ba319b5SDimitry Andric   case TargetOpcode::G_BITCAST:
6354ba319b5SDimitry Andric     return selectCOPY(I);
6367a7e6055SDimitry Andric   case TargetOpcode::G_CONSTANT:
6374ba319b5SDimitry Andric   case TargetOpcode::G_FCONSTANT:
6387a7e6055SDimitry Andric     return selectG_CONSTANT(I);
6397a7e6055SDimitry Andric   case TargetOpcode::G_GEP:
6407a7e6055SDimitry Andric     return selectG_GEP(I);
6414ba319b5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF:
6424ba319b5SDimitry Andric     return selectG_IMPLICIT_DEF(I);
6434ba319b5SDimitry Andric   case TargetOpcode::G_INTRINSIC:
6444ba319b5SDimitry Andric     return selectG_INTRINSIC(I, CoverageInfo);
6454ba319b5SDimitry Andric   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
6464ba319b5SDimitry Andric     return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
6477a7e6055SDimitry Andric   case TargetOpcode::G_LOAD:
6487a7e6055SDimitry Andric     return selectG_LOAD(I);
6497a7e6055SDimitry Andric   case TargetOpcode::G_STORE:
6507a7e6055SDimitry Andric     return selectG_STORE(I);
6517a7e6055SDimitry Andric   }
6527a7e6055SDimitry Andric   return false;
6537a7e6055SDimitry Andric }
6544ba319b5SDimitry Andric 
6554ba319b5SDimitry Andric InstructionSelector::ComplexRendererFns
selectVCSRC(MachineOperand & Root) const6564ba319b5SDimitry Andric AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
6574ba319b5SDimitry Andric   return {{
6584ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
6594ba319b5SDimitry Andric   }};
6604ba319b5SDimitry Andric 
6614ba319b5SDimitry Andric }
6624ba319b5SDimitry Andric 
6634ba319b5SDimitry Andric ///
6644ba319b5SDimitry Andric /// This will select either an SGPR or VGPR operand and will save us from
6654ba319b5SDimitry Andric /// having to write an extra tablegen pattern.
6664ba319b5SDimitry Andric InstructionSelector::ComplexRendererFns
selectVSRC0(MachineOperand & Root) const6674ba319b5SDimitry Andric AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
6684ba319b5SDimitry Andric   return {{
6694ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
6704ba319b5SDimitry Andric   }};
6714ba319b5SDimitry Andric }
6724ba319b5SDimitry Andric 
6734ba319b5SDimitry Andric InstructionSelector::ComplexRendererFns
selectVOP3Mods0(MachineOperand & Root) const6744ba319b5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
6754ba319b5SDimitry Andric   return {{
6764ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6774ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src0_mods
6784ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
6794ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
6804ba319b5SDimitry Andric   }};
6814ba319b5SDimitry Andric }
6824ba319b5SDimitry Andric InstructionSelector::ComplexRendererFns
selectVOP3OMods(MachineOperand & Root) const6834ba319b5SDimitry Andric AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
6844ba319b5SDimitry Andric   return {{
6854ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6864ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
6874ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
6884ba319b5SDimitry Andric   }};
6894ba319b5SDimitry Andric }
6904ba319b5SDimitry Andric 
6914ba319b5SDimitry Andric InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand & Root) const6924ba319b5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
6934ba319b5SDimitry Andric   return {{
6944ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6954ba319b5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // src_mods
6964ba319b5SDimitry Andric   }};
6974ba319b5SDimitry Andric }
698