17836f895SMatt Arsenault //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //==-----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
105f8f34e4SAdrian Prantl /// Defines an instruction selector for the AMDGPU target.
1145bb48eaSTom Stellard //
1245bb48eaSTom Stellard //===----------------------------------------------------------------------===//
13592d0681SMatt Arsenault 
1447d6274dSDaniil Fukalov #include "AMDGPUISelDAGToDAG.h"
152bc2f33bSEugene Zelenko #include "AMDGPU.h"
1620d20156SJoe Nash #include "AMDGPUSubtarget.h"
17cc85223fSMatt Arsenault #include "AMDGPUTargetMachine.h"
1820d20156SJoe Nash #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1948958d02SDaniil Fukalov #include "MCTargetDesc/R600MCTargetDesc.h"
2047d6274dSDaniil Fukalov #include "R600RegisterInfo.h"
2145bb48eaSTom Stellard #include "SIMachineFunctionInfo.h"
2235617ed4SNicolai Haehnle #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
23f97de007SJan Vesely #include "llvm/Analysis/ValueTracking.h"
2445bb48eaSTom Stellard #include "llvm/CodeGen/FunctionLoweringInfo.h"
2545bb48eaSTom Stellard #include "llvm/CodeGen/SelectionDAG.h"
2645bb48eaSTom Stellard #include "llvm/CodeGen/SelectionDAGISel.h"
272bc2f33bSEugene Zelenko #include "llvm/CodeGen/SelectionDAGNodes.h"
286a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
2905da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
306a87e9b0Sdfukalov 
312ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
326a87e9b0Sdfukalov #include "llvm/Analysis/LoopInfo.h"
332ce560f0SAlexander Timofeev #include "llvm/IR/Dominators.h"
342ce560f0SAlexander Timofeev #endif
3545bb48eaSTom Stellard 
36*c17450a0SFangrui Song #define DEBUG_TYPE "amdgpu-isel"
37e8c03a25SMatt Arsenault 
3845bb48eaSTom Stellard using namespace llvm;
3945bb48eaSTom Stellard 
4045bb48eaSTom Stellard //===----------------------------------------------------------------------===//
4145bb48eaSTom Stellard // Instruction Selector Implementation
4245bb48eaSTom Stellard //===----------------------------------------------------------------------===//
4345bb48eaSTom Stellard 
4445bb48eaSTom Stellard namespace {
45bc4497b1STom Stellard 
stripBitcast(SDValue Val)46e8c03a25SMatt Arsenault static SDValue stripBitcast(SDValue Val) {
47e8c03a25SMatt Arsenault   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
48e8c03a25SMatt Arsenault }
49e8c03a25SMatt Arsenault 
50e8c03a25SMatt Arsenault // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)51e8c03a25SMatt Arsenault static bool isExtractHiElt(SDValue In, SDValue &Out) {
52e8c03a25SMatt Arsenault   In = stripBitcast(In);
53a8d9d507SStanislav Mekhanoshin 
54a8d9d507SStanislav Mekhanoshin   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
55a8d9d507SStanislav Mekhanoshin     if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
56a8d9d507SStanislav Mekhanoshin       if (!Idx->isOne())
57a8d9d507SStanislav Mekhanoshin         return false;
58a8d9d507SStanislav Mekhanoshin       Out = In.getOperand(0);
59a8d9d507SStanislav Mekhanoshin       return true;
60a8d9d507SStanislav Mekhanoshin     }
61a8d9d507SStanislav Mekhanoshin   }
62a8d9d507SStanislav Mekhanoshin 
63e8c03a25SMatt Arsenault   if (In.getOpcode() != ISD::TRUNCATE)
64e8c03a25SMatt Arsenault     return false;
65e8c03a25SMatt Arsenault 
66e8c03a25SMatt Arsenault   SDValue Srl = In.getOperand(0);
67e8c03a25SMatt Arsenault   if (Srl.getOpcode() == ISD::SRL) {
68e8c03a25SMatt Arsenault     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
69e8c03a25SMatt Arsenault       if (ShiftAmt->getZExtValue() == 16) {
70e8c03a25SMatt Arsenault         Out = stripBitcast(Srl.getOperand(0));
71e8c03a25SMatt Arsenault         return true;
72e8c03a25SMatt Arsenault       }
73e8c03a25SMatt Arsenault     }
74e8c03a25SMatt Arsenault   }
75e8c03a25SMatt Arsenault 
76e8c03a25SMatt Arsenault   return false;
77e8c03a25SMatt Arsenault }
78e8c03a25SMatt Arsenault 
79e8c03a25SMatt Arsenault // Look through operations that obscure just looking at the low 16-bits of the
80e8c03a25SMatt Arsenault // same register.
stripExtractLoElt(SDValue In)81e8c03a25SMatt Arsenault static SDValue stripExtractLoElt(SDValue In) {
82a8d9d507SStanislav Mekhanoshin   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
83a8d9d507SStanislav Mekhanoshin     if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
849af8f1b1SCraig Topper       if (Idx->isZero() && In.getValueSizeInBits() <= 32)
85a8d9d507SStanislav Mekhanoshin         return In.getOperand(0);
86a8d9d507SStanislav Mekhanoshin     }
87a8d9d507SStanislav Mekhanoshin   }
88a8d9d507SStanislav Mekhanoshin 
89e8c03a25SMatt Arsenault   if (In.getOpcode() == ISD::TRUNCATE) {
90e8c03a25SMatt Arsenault     SDValue Src = In.getOperand(0);
91e8c03a25SMatt Arsenault     if (Src.getValueType().getSizeInBits() == 32)
92e8c03a25SMatt Arsenault       return stripBitcast(Src);
93e8c03a25SMatt Arsenault   }
94e8c03a25SMatt Arsenault 
95e8c03a25SMatt Arsenault   return In;
96e8c03a25SMatt Arsenault }
97e8c03a25SMatt Arsenault 
9845bb48eaSTom Stellard }  // end anonymous namespace
9945bb48eaSTom Stellard 
1003d76d360SFangrui Song INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
1017016f134SMatt Arsenault                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)1027016f134SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
1031c538423SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10435617ed4SNicolai Haehnle INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
1052ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
1062ce560f0SAlexander Timofeev INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1072ce560f0SAlexander Timofeev INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1082ce560f0SAlexander Timofeev #endif
1093d76d360SFangrui Song INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
1107016f134SMatt Arsenault                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
1117016f134SMatt Arsenault 
1125f8f34e4SAdrian Prantl /// This pass converts a legalized DAG into a AMDGPU-specific
11345bb48eaSTom Stellard // DAG, ready for instruction scheduling.
1147016f134SMatt Arsenault FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
11560a83737SKonstantin Zhuravlyov                                         CodeGenOpt::Level OptLevel) {
11660a83737SKonstantin Zhuravlyov   return new AMDGPUDAGToDAGISel(TM, OptLevel);
11745bb48eaSTom Stellard }
11845bb48eaSTom Stellard 
AMDGPUDAGToDAGISel(TargetMachine * TM,CodeGenOpt::Level OptLevel)11947d6274dSDaniil Fukalov AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
12047d6274dSDaniil Fukalov     TargetMachine *TM /*= nullptr*/,
12147d6274dSDaniil Fukalov     CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
12247d6274dSDaniil Fukalov     : SelectionDAGISel(*TM, OptLevel) {
12347d6274dSDaniil Fukalov   EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
12420287697STom Stellard }
12520287697STom Stellard 
runOnMachineFunction(MachineFunction & MF)12645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
1272ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
1282ce560f0SAlexander Timofeev   DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1292ce560f0SAlexander Timofeev   LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1302ce560f0SAlexander Timofeev   for (auto &L : LI->getLoopsInPreorder()) {
1312ce560f0SAlexander Timofeev     assert(L->isLCSSAForm(DT));
1322ce560f0SAlexander Timofeev   }
1332ce560f0SAlexander Timofeev #endif
1345bfbae5cSTom Stellard   Subtarget = &MF.getSubtarget<GCNSubtarget>();
1355660bb6bSMatt Arsenault   Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
13645bb48eaSTom Stellard   return SelectionDAGISel::runOnMachineFunction(MF);
13745bb48eaSTom Stellard }
13845bb48eaSTom Stellard 
fp16SrcZerosHighBits(unsigned Opc) const1399ad8a1f6SMatt Arsenault bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
1409ad8a1f6SMatt Arsenault   // XXX - only need to list legal operations.
1419ad8a1f6SMatt Arsenault   switch (Opc) {
1429ad8a1f6SMatt Arsenault   case ISD::FADD:
1439ad8a1f6SMatt Arsenault   case ISD::FSUB:
1449ad8a1f6SMatt Arsenault   case ISD::FMUL:
1459ad8a1f6SMatt Arsenault   case ISD::FDIV:
1469ad8a1f6SMatt Arsenault   case ISD::FREM:
1479ad8a1f6SMatt Arsenault   case ISD::FCANONICALIZE:
1489ad8a1f6SMatt Arsenault   case ISD::UINT_TO_FP:
1499ad8a1f6SMatt Arsenault   case ISD::SINT_TO_FP:
1509ad8a1f6SMatt Arsenault   case ISD::FABS:
1519ad8a1f6SMatt Arsenault     // Fabs is lowered to a bit operation, but it's an and which will clear the
1529ad8a1f6SMatt Arsenault     // high bits anyway.
1539ad8a1f6SMatt Arsenault   case ISD::FSQRT:
1549ad8a1f6SMatt Arsenault   case ISD::FSIN:
1559ad8a1f6SMatt Arsenault   case ISD::FCOS:
1569ad8a1f6SMatt Arsenault   case ISD::FPOWI:
1579ad8a1f6SMatt Arsenault   case ISD::FPOW:
1589ad8a1f6SMatt Arsenault   case ISD::FLOG:
1599ad8a1f6SMatt Arsenault   case ISD::FLOG2:
1609ad8a1f6SMatt Arsenault   case ISD::FLOG10:
1619ad8a1f6SMatt Arsenault   case ISD::FEXP:
1629ad8a1f6SMatt Arsenault   case ISD::FEXP2:
1639ad8a1f6SMatt Arsenault   case ISD::FCEIL:
1649ad8a1f6SMatt Arsenault   case ISD::FTRUNC:
1659ad8a1f6SMatt Arsenault   case ISD::FRINT:
1669ad8a1f6SMatt Arsenault   case ISD::FNEARBYINT:
1679ad8a1f6SMatt Arsenault   case ISD::FROUND:
1689ad8a1f6SMatt Arsenault   case ISD::FFLOOR:
1699ad8a1f6SMatt Arsenault   case ISD::FMINNUM:
1709ad8a1f6SMatt Arsenault   case ISD::FMAXNUM:
1719ad8a1f6SMatt Arsenault   case AMDGPUISD::FRACT:
1729ad8a1f6SMatt Arsenault   case AMDGPUISD::CLAMP:
1739ad8a1f6SMatt Arsenault   case AMDGPUISD::COS_HW:
1749ad8a1f6SMatt Arsenault   case AMDGPUISD::SIN_HW:
1759ad8a1f6SMatt Arsenault   case AMDGPUISD::FMIN3:
1769ad8a1f6SMatt Arsenault   case AMDGPUISD::FMAX3:
1779ad8a1f6SMatt Arsenault   case AMDGPUISD::FMED3:
1789ad8a1f6SMatt Arsenault   case AMDGPUISD::FMAD_FTZ:
1799ad8a1f6SMatt Arsenault   case AMDGPUISD::RCP:
1809ad8a1f6SMatt Arsenault   case AMDGPUISD::RSQ:
1819ad8a1f6SMatt Arsenault   case AMDGPUISD::RCP_IFLAG:
1829ad8a1f6SMatt Arsenault   case AMDGPUISD::LDEXP:
1839ad8a1f6SMatt Arsenault     // On gfx10, all 16-bit instructions preserve the high bits.
1849ad8a1f6SMatt Arsenault     return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
1859ad8a1f6SMatt Arsenault   case ISD::FP_ROUND:
1869ad8a1f6SMatt Arsenault     // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
1879ad8a1f6SMatt Arsenault     // high bits on gfx9.
1889ad8a1f6SMatt Arsenault     // TODO: If we had the source node we could see if the source was fma/mad
1899ad8a1f6SMatt Arsenault     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
1909ad8a1f6SMatt Arsenault   case ISD::FMA:
1919ad8a1f6SMatt Arsenault   case ISD::FMAD:
1929ad8a1f6SMatt Arsenault   case AMDGPUISD::DIV_FIXUP:
1939ad8a1f6SMatt Arsenault     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
1949ad8a1f6SMatt Arsenault   default:
1959ad8a1f6SMatt Arsenault     // fcopysign, select and others may be lowered to 32-bit bit operations
1969ad8a1f6SMatt Arsenault     // which don't zero the high bits.
1979ad8a1f6SMatt Arsenault     return false;
1989ad8a1f6SMatt Arsenault   }
1999ad8a1f6SMatt Arsenault }
2009ad8a1f6SMatt Arsenault 
getAnalysisUsage(AnalysisUsage & AU) const20147d6274dSDaniil Fukalov void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
20247d6274dSDaniil Fukalov   AU.addRequired<AMDGPUArgumentUsageInfo>();
20347d6274dSDaniil Fukalov   AU.addRequired<LegacyDivergenceAnalysis>();
20447d6274dSDaniil Fukalov #ifdef EXPENSIVE_CHECKS
20547d6274dSDaniil Fukalov   AU.addRequired<DominatorTreeWrapperPass>();
20647d6274dSDaniil Fukalov   AU.addRequired<LoopInfoWrapperPass>();
20747d6274dSDaniil Fukalov #endif
20847d6274dSDaniil Fukalov   SelectionDAGISel::getAnalysisUsage(AU);
20947d6274dSDaniil Fukalov }
21047d6274dSDaniil Fukalov 
matchLoadD16FromBuildVector(SDNode * N) const211e8c03a25SMatt Arsenault bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
212e8c03a25SMatt Arsenault   assert(Subtarget->d16PreservesUnusedBits());
213e8c03a25SMatt Arsenault   MVT VT = N->getValueType(0).getSimpleVT();
214e8c03a25SMatt Arsenault   if (VT != MVT::v2i16 && VT != MVT::v2f16)
215e8c03a25SMatt Arsenault     return false;
216e8c03a25SMatt Arsenault 
217e8c03a25SMatt Arsenault   SDValue Lo = N->getOperand(0);
218e8c03a25SMatt Arsenault   SDValue Hi = N->getOperand(1);
219e8c03a25SMatt Arsenault 
220e8c03a25SMatt Arsenault   LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
221e8c03a25SMatt Arsenault 
222e8c03a25SMatt Arsenault   // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
223e8c03a25SMatt Arsenault   // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
224e8c03a25SMatt Arsenault   // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
225e8c03a25SMatt Arsenault 
226e8c03a25SMatt Arsenault   // Need to check for possible indirect dependencies on the other half of the
227e8c03a25SMatt Arsenault   // vector to avoid introducing a cycle.
228e8c03a25SMatt Arsenault   if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
229e8c03a25SMatt Arsenault     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
230e8c03a25SMatt Arsenault 
231e8c03a25SMatt Arsenault     SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
232e8c03a25SMatt Arsenault     SDValue Ops[] = {
233e8c03a25SMatt Arsenault       LdHi->getChain(), LdHi->getBasePtr(), TiedIn
234e8c03a25SMatt Arsenault     };
235e8c03a25SMatt Arsenault 
236e8c03a25SMatt Arsenault     unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
237e8c03a25SMatt Arsenault     if (LdHi->getMemoryVT() == MVT::i8) {
238e8c03a25SMatt Arsenault       LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
239e8c03a25SMatt Arsenault         AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
240e8c03a25SMatt Arsenault     } else {
241e8c03a25SMatt Arsenault       assert(LdHi->getMemoryVT() == MVT::i16);
242e8c03a25SMatt Arsenault     }
243e8c03a25SMatt Arsenault 
244e8c03a25SMatt Arsenault     SDValue NewLoadHi =
245e8c03a25SMatt Arsenault       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
246e8c03a25SMatt Arsenault                                   Ops, LdHi->getMemoryVT(),
247e8c03a25SMatt Arsenault                                   LdHi->getMemOperand());
248e8c03a25SMatt Arsenault 
249e8c03a25SMatt Arsenault     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
250e8c03a25SMatt Arsenault     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
251e8c03a25SMatt Arsenault     return true;
252e8c03a25SMatt Arsenault   }
253e8c03a25SMatt Arsenault 
254e8c03a25SMatt Arsenault   // build_vector (load ptr), hi -> load_d16_lo ptr, hi
255e8c03a25SMatt Arsenault   // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
256e8c03a25SMatt Arsenault   // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
257e8c03a25SMatt Arsenault   LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
258e8c03a25SMatt Arsenault   if (LdLo && Lo.hasOneUse()) {
259e8c03a25SMatt Arsenault     SDValue TiedIn = getHi16Elt(Hi);
260e8c03a25SMatt Arsenault     if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
261e8c03a25SMatt Arsenault       return false;
262e8c03a25SMatt Arsenault 
263e8c03a25SMatt Arsenault     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
264e8c03a25SMatt Arsenault     unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
265e8c03a25SMatt Arsenault     if (LdLo->getMemoryVT() == MVT::i8) {
266e8c03a25SMatt Arsenault       LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
267e8c03a25SMatt Arsenault         AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
268e8c03a25SMatt Arsenault     } else {
269e8c03a25SMatt Arsenault       assert(LdLo->getMemoryVT() == MVT::i16);
270e8c03a25SMatt Arsenault     }
271e8c03a25SMatt Arsenault 
272e8c03a25SMatt Arsenault     TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
273e8c03a25SMatt Arsenault 
274e8c03a25SMatt Arsenault     SDValue Ops[] = {
275e8c03a25SMatt Arsenault       LdLo->getChain(), LdLo->getBasePtr(), TiedIn
276e8c03a25SMatt Arsenault     };
277e8c03a25SMatt Arsenault 
278e8c03a25SMatt Arsenault     SDValue NewLoadLo =
279e8c03a25SMatt Arsenault       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
280e8c03a25SMatt Arsenault                                   Ops, LdLo->getMemoryVT(),
281e8c03a25SMatt Arsenault                                   LdLo->getMemOperand());
282e8c03a25SMatt Arsenault 
283e8c03a25SMatt Arsenault     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
284e8c03a25SMatt Arsenault     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
285e8c03a25SMatt Arsenault     return true;
286e8c03a25SMatt Arsenault   }
287e8c03a25SMatt Arsenault 
288e8c03a25SMatt Arsenault   return false;
289e8c03a25SMatt Arsenault }
290e8c03a25SMatt Arsenault 
PreprocessISelDAG()291e8c03a25SMatt Arsenault void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
292e8c03a25SMatt Arsenault   if (!Subtarget->d16PreservesUnusedBits())
293e8c03a25SMatt Arsenault     return;
294e8c03a25SMatt Arsenault 
295e8c03a25SMatt Arsenault   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
296e8c03a25SMatt Arsenault 
297e8c03a25SMatt Arsenault   bool MadeChange = false;
298e8c03a25SMatt Arsenault   while (Position != CurDAG->allnodes_begin()) {
299e8c03a25SMatt Arsenault     SDNode *N = &*--Position;
300e8c03a25SMatt Arsenault     if (N->use_empty())
301e8c03a25SMatt Arsenault       continue;
302e8c03a25SMatt Arsenault 
303e8c03a25SMatt Arsenault     switch (N->getOpcode()) {
304e8c03a25SMatt Arsenault     case ISD::BUILD_VECTOR:
305e8c03a25SMatt Arsenault       MadeChange |= matchLoadD16FromBuildVector(N);
306e8c03a25SMatt Arsenault       break;
307e8c03a25SMatt Arsenault     default:
308e8c03a25SMatt Arsenault       break;
309e8c03a25SMatt Arsenault     }
310e8c03a25SMatt Arsenault   }
311e8c03a25SMatt Arsenault 
312e8c03a25SMatt Arsenault   if (MadeChange) {
313e8c03a25SMatt Arsenault     CurDAG->RemoveDeadNodes();
314e8c03a25SMatt Arsenault     LLVM_DEBUG(dbgs() << "After PreProcess:\n";
315e8c03a25SMatt Arsenault                CurDAG->dump(););
316e8c03a25SMatt Arsenault   }
317e8c03a25SMatt Arsenault }
318e8c03a25SMatt Arsenault 
isNoNanSrc(SDValue N) const319f84e5d9aSMatt Arsenault bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
320f84e5d9aSMatt Arsenault   if (TM.Options.NoNaNsFPMath)
321f84e5d9aSMatt Arsenault     return true;
322f84e5d9aSMatt Arsenault 
323f84e5d9aSMatt Arsenault   // TODO: Move into isKnownNeverNaN
324714ceefaSJonas Paulsson   if (N->getFlags().hasNoNaNs())
325714ceefaSJonas Paulsson     return true;
326f84e5d9aSMatt Arsenault 
327f84e5d9aSMatt Arsenault   return CurDAG->isKnownNeverNaN(N);
328f84e5d9aSMatt Arsenault }
329f84e5d9aSMatt Arsenault 
isInlineImmediate(const SDNode * N,bool Negated) const330e24b34e9SMatt Arsenault bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
331e24b34e9SMatt Arsenault                                            bool Negated) const {
332b7f87c0eSMatt Arsenault   if (N->isUndef())
333b7f87c0eSMatt Arsenault     return true;
334fe267759SMatt Arsenault 
335e24b34e9SMatt Arsenault   const SIInstrInfo *TII = Subtarget->getInstrInfo();
336e24b34e9SMatt Arsenault   if (Negated) {
337e24b34e9SMatt Arsenault     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
338e24b34e9SMatt Arsenault       return TII->isInlineConstant(-C->getAPIntValue());
339e24b34e9SMatt Arsenault 
340e24b34e9SMatt Arsenault     if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
341e24b34e9SMatt Arsenault       return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
342e24b34e9SMatt Arsenault 
343e24b34e9SMatt Arsenault   } else {
344fe267759SMatt Arsenault     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
345fe267759SMatt Arsenault       return TII->isInlineConstant(C->getAPIntValue());
346fe267759SMatt Arsenault 
347fe267759SMatt Arsenault     if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
348fe267759SMatt Arsenault       return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
349e24b34e9SMatt Arsenault   }
350fe267759SMatt Arsenault 
351fe267759SMatt Arsenault   return false;
35245bb48eaSTom Stellard }
35345bb48eaSTom Stellard 
3545f8f34e4SAdrian Prantl /// Determine the register class for \p OpNo
35545bb48eaSTom Stellard /// \returns The register class of the virtual register that will be used for
35645bb48eaSTom Stellard /// the given operand number \OpNo or NULL if the register class cannot be
35745bb48eaSTom Stellard /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const35845bb48eaSTom Stellard const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
35945bb48eaSTom Stellard                                                           unsigned OpNo) const {
360c507cdb4SMatt Arsenault   if (!N->isMachineOpcode()) {
361c507cdb4SMatt Arsenault     if (N->getOpcode() == ISD::CopyToReg) {
36234978602SJay Foad       Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
36334978602SJay Foad       if (Reg.isVirtual()) {
364c507cdb4SMatt Arsenault         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
365c507cdb4SMatt Arsenault         return MRI.getRegClass(Reg);
366c507cdb4SMatt Arsenault       }
367c507cdb4SMatt Arsenault 
368c507cdb4SMatt Arsenault       const SIRegisterInfo *TRI
3695bfbae5cSTom Stellard         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
370c507cdb4SMatt Arsenault       return TRI->getPhysRegClass(Reg);
371c507cdb4SMatt Arsenault     }
372c507cdb4SMatt Arsenault 
37345bb48eaSTom Stellard     return nullptr;
374c507cdb4SMatt Arsenault   }
37545bb48eaSTom Stellard 
37645bb48eaSTom Stellard   switch (N->getMachineOpcode()) {
37745bb48eaSTom Stellard   default: {
37845bb48eaSTom Stellard     const MCInstrDesc &Desc =
37945bb48eaSTom Stellard         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
38045bb48eaSTom Stellard     unsigned OpIdx = Desc.getNumDefs() + OpNo;
38145bb48eaSTom Stellard     if (OpIdx >= Desc.getNumOperands())
38245bb48eaSTom Stellard       return nullptr;
38345bb48eaSTom Stellard     int RegClass = Desc.OpInfo[OpIdx].RegClass;
38445bb48eaSTom Stellard     if (RegClass == -1)
38545bb48eaSTom Stellard       return nullptr;
38645bb48eaSTom Stellard 
38745bb48eaSTom Stellard     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
38845bb48eaSTom Stellard   }
38945bb48eaSTom Stellard   case AMDGPU::REG_SEQUENCE: {
39045bb48eaSTom Stellard     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
39145bb48eaSTom Stellard     const TargetRegisterClass *SuperRC =
39245bb48eaSTom Stellard         Subtarget->getRegisterInfo()->getRegClass(RCID);
39345bb48eaSTom Stellard 
39445bb48eaSTom Stellard     SDValue SubRegOp = N->getOperand(OpNo + 1);
39545bb48eaSTom Stellard     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
39645bb48eaSTom Stellard     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
39745bb48eaSTom Stellard                                                               SubRegIdx);
39845bb48eaSTom Stellard   }
39945bb48eaSTom Stellard   }
40045bb48eaSTom Stellard }
40145bb48eaSTom Stellard 
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const402b5234b64SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
403b5234b64SMatt Arsenault                                          SDValue Glue) const {
404b5234b64SMatt Arsenault   SmallVector <SDValue, 8> Ops;
405b5234b64SMatt Arsenault   Ops.push_back(NewChain); // Replace the chain.
406b5234b64SMatt Arsenault   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
407b5234b64SMatt Arsenault     Ops.push_back(N->getOperand(i));
408b5234b64SMatt Arsenault 
409b5234b64SMatt Arsenault   Ops.push_back(Glue);
410b5234b64SMatt Arsenault   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
411b5234b64SMatt Arsenault }
412b5234b64SMatt Arsenault 
glueCopyToM0(SDNode * N,SDValue Val) const413cdd191d9SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
41445bb48eaSTom Stellard   const SITargetLowering& Lowering =
41545bb48eaSTom Stellard     *static_cast<const SITargetLowering*>(getTargetLowering());
41645bb48eaSTom Stellard 
4175a86dbcfSMatt Arsenault   assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
4185a86dbcfSMatt Arsenault 
419b5234b64SMatt Arsenault   SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
420b5234b64SMatt Arsenault   return glueCopyToOp(N, M0, M0.getValue(1));
42145bb48eaSTom Stellard }
42245bb48eaSTom Stellard 
glueCopyToM0LDSInit(SDNode * N) const423cdd191d9SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
4244dc3b2bfSNicolai Haehnle   unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
4254dc3b2bfSNicolai Haehnle   if (AS == AMDGPUAS::LOCAL_ADDRESS) {
4264dc3b2bfSNicolai Haehnle     if (Subtarget->ldsRequiresM0Init())
427cdd191d9SMatt Arsenault       return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
4284dc3b2bfSNicolai Haehnle   } else if (AS == AMDGPUAS::REGION_ADDRESS) {
4294dc3b2bfSNicolai Haehnle     MachineFunction &MF = CurDAG->getMachineFunction();
4304dc3b2bfSNicolai Haehnle     unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
4314dc3b2bfSNicolai Haehnle     return
4324dc3b2bfSNicolai Haehnle         glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
4334dc3b2bfSNicolai Haehnle   }
4344dc3b2bfSNicolai Haehnle   return N;
435cdd191d9SMatt Arsenault }
436cdd191d9SMatt Arsenault 
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const437f1c7b92aSTim Renouf MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
438f1c7b92aSTim Renouf                                                   EVT VT) const {
439f1c7b92aSTim Renouf   SDNode *Lo = CurDAG->getMachineNode(
440f1c7b92aSTim Renouf       AMDGPU::S_MOV_B32, DL, MVT::i32,
44106eed422SMatt Arsenault       CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
442f1c7b92aSTim Renouf   SDNode *Hi =
443f1c7b92aSTim Renouf       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
44406eed422SMatt Arsenault                              CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
445f1c7b92aSTim Renouf   const SDValue Ops[] = {
446f1c7b92aSTim Renouf       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
447f1c7b92aSTim Renouf       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
448f1c7b92aSTim Renouf       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
449f1c7b92aSTim Renouf 
450f1c7b92aSTim Renouf   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
451f1c7b92aSTim Renouf }
452f1c7b92aSTim Renouf 
SelectBuildVector(SDNode * N,unsigned RegClassID)45320287697STom Stellard void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
45445bb48eaSTom Stellard   EVT VT = N->getValueType(0);
45545bb48eaSTom Stellard   unsigned NumVectorElts = VT.getVectorNumElements();
45645bb48eaSTom Stellard   EVT EltVT = VT.getVectorElementType();
45745bb48eaSTom Stellard   SDLoc DL(N);
45845bb48eaSTom Stellard   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
45945bb48eaSTom Stellard 
46045bb48eaSTom Stellard   if (NumVectorElts == 1) {
46195927c0fSJustin Bogner     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
46295927c0fSJustin Bogner                          RegClass);
46395927c0fSJustin Bogner     return;
46445bb48eaSTom Stellard   }
46545bb48eaSTom Stellard 
466e67cc380SStanislav Mekhanoshin   assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
46745bb48eaSTom Stellard                                   "supported yet");
468e67cc380SStanislav Mekhanoshin   // 32 = Max Num Vector Elements
46945bb48eaSTom Stellard   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
47045bb48eaSTom Stellard   // 1 = Vector Register Class
471e67cc380SStanislav Mekhanoshin   SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
47245bb48eaSTom Stellard 
473ed3527c6SStanislav Mekhanoshin   bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
474ed3527c6SStanislav Mekhanoshin                Triple::amdgcn;
47545bb48eaSTom Stellard   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
47645bb48eaSTom Stellard   bool IsRegSeq = true;
47745bb48eaSTom Stellard   unsigned NOps = N->getNumOperands();
47845bb48eaSTom Stellard   for (unsigned i = 0; i < NOps; i++) {
47945bb48eaSTom Stellard     // XXX: Why is this here?
48045bb48eaSTom Stellard     if (isa<RegisterSDNode>(N->getOperand(i))) {
48145bb48eaSTom Stellard       IsRegSeq = false;
48245bb48eaSTom Stellard       break;
48345bb48eaSTom Stellard     }
484ed3527c6SStanislav Mekhanoshin     unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
485ed3527c6SStanislav Mekhanoshin                          : R600RegisterInfo::getSubRegFromChannel(i);
48645bb48eaSTom Stellard     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
487ede0e407SSimon Pilgrim     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
48845bb48eaSTom Stellard   }
48945bb48eaSTom Stellard   if (NOps != NumVectorElts) {
49045bb48eaSTom Stellard     // Fill in the missing undef elements if this was a scalar_to_vector.
49103aa3aeeSTom Stellard     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
49245bb48eaSTom Stellard     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
49345bb48eaSTom Stellard                                                    DL, EltVT);
49445bb48eaSTom Stellard     for (unsigned i = NOps; i < NumVectorElts; ++i) {
495ed3527c6SStanislav Mekhanoshin       unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
496ed3527c6SStanislav Mekhanoshin                            : R600RegisterInfo::getSubRegFromChannel(i);
49745bb48eaSTom Stellard       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
49845bb48eaSTom Stellard       RegSeqArgs[1 + (2 * i) + 1] =
499ede0e407SSimon Pilgrim           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
50045bb48eaSTom Stellard     }
50145bb48eaSTom Stellard   }
50245bb48eaSTom Stellard 
50345bb48eaSTom Stellard   if (!IsRegSeq)
50420287697STom Stellard     SelectCode(N);
50595927c0fSJustin Bogner   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
50620287697STom Stellard }
50720287697STom Stellard 
Select(SDNode * N)50820287697STom Stellard void AMDGPUDAGToDAGISel::Select(SDNode *N) {
50920287697STom Stellard   unsigned int Opc = N->getOpcode();
51020287697STom Stellard   if (N->isMachineOpcode()) {
51120287697STom Stellard     N->setNodeId(-1);
51220287697STom Stellard     return;   // Already selected.
51320287697STom Stellard   }
51420287697STom Stellard 
5152bd166adSMatt Arsenault   // isa<MemSDNode> almost works but is slightly too permissive for some DS
5162bd166adSMatt Arsenault   // intrinsics.
5172bd166adSMatt Arsenault   if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
518d5fca554SDaniil Fukalov       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
519a5840c3cSMatt Arsenault        Opc == ISD::ATOMIC_LOAD_FADD ||
520d5fca554SDaniil Fukalov        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
521cdd45d5fSMatt Arsenault        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
522cdd191d9SMatt Arsenault     N = glueCopyToM0LDSInit(N);
5232bd166adSMatt Arsenault     SelectCode(N);
5242bd166adSMatt Arsenault     return;
5252bd166adSMatt Arsenault   }
52620287697STom Stellard 
52720287697STom Stellard   switch (Opc) {
52884445dd1SMatt Arsenault   default:
52984445dd1SMatt Arsenault     break;
53020287697STom Stellard   // We are selecting i64 ADD here instead of custom lower it during
53120287697STom Stellard   // DAG legalization, so we can fold some i64 ADDs used for address
53220287697STom Stellard   // calculation into the LOAD and STORE instructions.
53320287697STom Stellard   case ISD::ADDC:
53420287697STom Stellard   case ISD::ADDE:
53520287697STom Stellard   case ISD::SUBC:
53620287697STom Stellard   case ISD::SUBE: {
53720287697STom Stellard     if (N->getValueType(0) != MVT::i64)
53820287697STom Stellard       break;
53920287697STom Stellard 
54020287697STom Stellard     SelectADD_SUB_I64(N);
54120287697STom Stellard     return;
54220287697STom Stellard   }
5438f3da70eSStanislav Mekhanoshin   case ISD::ADDCARRY:
5448f3da70eSStanislav Mekhanoshin   case ISD::SUBCARRY:
5458f3da70eSStanislav Mekhanoshin     if (N->getValueType(0) != MVT::i32)
5468f3da70eSStanislav Mekhanoshin       break;
5478f3da70eSStanislav Mekhanoshin 
5488f3da70eSStanislav Mekhanoshin     SelectAddcSubb(N);
5498f3da70eSStanislav Mekhanoshin     return;
55020287697STom Stellard   case ISD::UADDO:
55120287697STom Stellard   case ISD::USUBO: {
55220287697STom Stellard     SelectUADDO_USUBO(N);
55320287697STom Stellard     return;
55420287697STom Stellard   }
55520287697STom Stellard   case AMDGPUISD::FMUL_W_CHAIN: {
55620287697STom Stellard     SelectFMUL_W_CHAIN(N);
55720287697STom Stellard     return;
55820287697STom Stellard   }
55920287697STom Stellard   case AMDGPUISD::FMA_W_CHAIN: {
56020287697STom Stellard     SelectFMA_W_CHAIN(N);
56120287697STom Stellard     return;
56220287697STom Stellard   }
56320287697STom Stellard 
56420287697STom Stellard   case ISD::SCALAR_TO_VECTOR:
56520287697STom Stellard   case ISD::BUILD_VECTOR: {
56620287697STom Stellard     EVT VT = N->getValueType(0);
56720287697STom Stellard     unsigned NumVectorElts = VT.getVectorNumElements();
5685a4ec812SMatt Arsenault     if (VT.getScalarSizeInBits() == 16) {
5695a4ec812SMatt Arsenault       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
570e24b34e9SMatt Arsenault         if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
571e24b34e9SMatt Arsenault           ReplaceNode(N, Packed);
57220287697STom Stellard           return;
57320287697STom Stellard         }
57420287697STom Stellard       }
57520287697STom Stellard 
57620287697STom Stellard       break;
57720287697STom Stellard     }
57820287697STom Stellard 
57903aa3aeeSTom Stellard     assert(VT.getVectorElementType().bitsEq(MVT::i32));
580658f33dcSJay Foad     unsigned RegClassID =
581658f33dcSJay Foad         SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
58220287697STom Stellard     SelectBuildVector(N, RegClassID);
58395927c0fSJustin Bogner     return;
58445bb48eaSTom Stellard   }
58545bb48eaSTom Stellard   case ISD::BUILD_PAIR: {
58645bb48eaSTom Stellard     SDValue RC, SubReg0, SubReg1;
58745bb48eaSTom Stellard     SDLoc DL(N);
58845bb48eaSTom Stellard     if (N->getValueType(0) == MVT::i128) {
58912994a70SMatt Arsenault       RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
59045bb48eaSTom Stellard       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
59145bb48eaSTom Stellard       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
59245bb48eaSTom Stellard     } else if (N->getValueType(0) == MVT::i64) {
59345bb48eaSTom Stellard       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
59445bb48eaSTom Stellard       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
59545bb48eaSTom Stellard       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
59645bb48eaSTom Stellard     } else {
59745bb48eaSTom Stellard       llvm_unreachable("Unhandled value type for BUILD_PAIR");
59845bb48eaSTom Stellard     }
59945bb48eaSTom Stellard     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
60045bb48eaSTom Stellard                             N->getOperand(1), SubReg1 };
60195927c0fSJustin Bogner     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
60295927c0fSJustin Bogner                                           N->getValueType(0), Ops));
60395927c0fSJustin Bogner     return;
60445bb48eaSTom Stellard   }
60545bb48eaSTom Stellard 
60645bb48eaSTom Stellard   case ISD::Constant:
60745bb48eaSTom Stellard   case ISD::ConstantFP: {
60820287697STom Stellard     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
60945bb48eaSTom Stellard       break;
61045bb48eaSTom Stellard 
61145bb48eaSTom Stellard     uint64_t Imm;
61245bb48eaSTom Stellard     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
61345bb48eaSTom Stellard       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
61445bb48eaSTom Stellard     else {
61545bb48eaSTom Stellard       ConstantSDNode *C = cast<ConstantSDNode>(N);
61645bb48eaSTom Stellard       Imm = C->getZExtValue();
61745bb48eaSTom Stellard     }
61845bb48eaSTom Stellard 
61945bb48eaSTom Stellard     SDLoc DL(N);
620f1c7b92aSTim Renouf     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
62195927c0fSJustin Bogner     return;
62245bb48eaSTom Stellard   }
62345bb48eaSTom Stellard   case AMDGPUISD::BFE_I32:
62445bb48eaSTom Stellard   case AMDGPUISD::BFE_U32: {
62545bb48eaSTom Stellard     // There is a scalar version available, but unlike the vector version which
62645bb48eaSTom Stellard     // has a separate operand for the offset and width, the scalar version packs
62745bb48eaSTom Stellard     // the width and offset into a single operand. Try to move to the scalar
62845bb48eaSTom Stellard     // version if the offsets are constant, so that we can try to keep extended
62945bb48eaSTom Stellard     // loads of kernel arguments in SGPRs.
63045bb48eaSTom Stellard 
63145bb48eaSTom Stellard     // TODO: Technically we could try to pattern match scalar bitshifts of
63245bb48eaSTom Stellard     // dynamic values, but it's probably not useful.
63345bb48eaSTom Stellard     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
63445bb48eaSTom Stellard     if (!Offset)
63545bb48eaSTom Stellard       break;
63645bb48eaSTom Stellard 
63745bb48eaSTom Stellard     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
63845bb48eaSTom Stellard     if (!Width)
63945bb48eaSTom Stellard       break;
64045bb48eaSTom Stellard 
64145bb48eaSTom Stellard     bool Signed = Opc == AMDGPUISD::BFE_I32;
64245bb48eaSTom Stellard 
64345bb48eaSTom Stellard     uint32_t OffsetVal = Offset->getZExtValue();
64445bb48eaSTom Stellard     uint32_t WidthVal = Width->getZExtValue();
64545bb48eaSTom Stellard 
6460a3d755eSalex-t     ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
6470a3d755eSalex-t                             WidthVal));
64895927c0fSJustin Bogner     return;
64945bb48eaSTom Stellard   }
65045bb48eaSTom Stellard   case AMDGPUISD::DIV_SCALE: {
65195927c0fSJustin Bogner     SelectDIV_SCALE(N);
65295927c0fSJustin Bogner     return;
65345bb48eaSTom Stellard   }
6544f6318feSMatt Arsenault   case AMDGPUISD::MAD_I64_I32:
6554f6318feSMatt Arsenault   case AMDGPUISD::MAD_U64_U32: {
6564f6318feSMatt Arsenault     SelectMAD_64_32(N);
6574f6318feSMatt Arsenault     return;
6584f6318feSMatt Arsenault   }
659d7e03df7SJay Foad   case ISD::SMUL_LOHI:
660d7e03df7SJay Foad   case ISD::UMUL_LOHI:
661d7e03df7SJay Foad     return SelectMUL_LOHI(N);
66245bb48eaSTom Stellard   case ISD::CopyToReg: {
66345bb48eaSTom Stellard     const SITargetLowering& Lowering =
66445bb48eaSTom Stellard       *static_cast<const SITargetLowering*>(getTargetLowering());
6650d0d6c2fSMatt Arsenault     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
66645bb48eaSTom Stellard     break;
66745bb48eaSTom Stellard   }
66845bb48eaSTom Stellard   case ISD::AND:
66945bb48eaSTom Stellard   case ISD::SRL:
67045bb48eaSTom Stellard   case ISD::SRA:
6717e8de01fSMatt Arsenault   case ISD::SIGN_EXTEND_INREG:
67220287697STom Stellard     if (N->getValueType(0) != MVT::i32)
67345bb48eaSTom Stellard       break;
67445bb48eaSTom Stellard 
67595927c0fSJustin Bogner     SelectS_BFE(N);
67695927c0fSJustin Bogner     return;
677bc4497b1STom Stellard   case ISD::BRCOND:
67895927c0fSJustin Bogner     SelectBRCOND(N);
67995927c0fSJustin Bogner     return;
680d7e2303dSMatt Arsenault   case ISD::FMAD:
6810084adc5SMatt Arsenault   case ISD::FMA:
6820084adc5SMatt Arsenault     SelectFMAD_FMA(N);
683d7e2303dSMatt Arsenault     return;
684709374d1SMatt Arsenault   case AMDGPUISD::CVT_PKRTZ_F16_F32:
685709374d1SMatt Arsenault   case AMDGPUISD::CVT_PKNORM_I16_F32:
686709374d1SMatt Arsenault   case AMDGPUISD::CVT_PKNORM_U16_F32:
687709374d1SMatt Arsenault   case AMDGPUISD::CVT_PK_U16_U32:
688709374d1SMatt Arsenault   case AMDGPUISD::CVT_PK_I16_I32: {
689709374d1SMatt Arsenault     // Hack around using a legal type if f16 is illegal.
690709374d1SMatt Arsenault     if (N->getValueType(0) == MVT::i32) {
691709374d1SMatt Arsenault       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
692709374d1SMatt Arsenault       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
693709374d1SMatt Arsenault                               { N->getOperand(0), N->getOperand(1) });
694709374d1SMatt Arsenault       SelectCode(N);
695709374d1SMatt Arsenault       return;
696709374d1SMatt Arsenault     }
697cdd191d9SMatt Arsenault 
698cdd191d9SMatt Arsenault     break;
699cdd191d9SMatt Arsenault   }
700cdd191d9SMatt Arsenault   case ISD::INTRINSIC_W_CHAIN: {
701cdd191d9SMatt Arsenault     SelectINTRINSIC_W_CHAIN(N);
702cdd191d9SMatt Arsenault     return;
703709374d1SMatt Arsenault   }
70400e89b42SCarl Ritson   case ISD::INTRINSIC_WO_CHAIN: {
70500e89b42SCarl Ritson     SelectINTRINSIC_WO_CHAIN(N);
70600e89b42SCarl Ritson     return;
70700e89b42SCarl Ritson   }
7084d55d024SMatt Arsenault   case ISD::INTRINSIC_VOID: {
7094d55d024SMatt Arsenault     SelectINTRINSIC_VOID(N);
7104d55d024SMatt Arsenault     return;
7114d55d024SMatt Arsenault   }
71245bb48eaSTom Stellard   }
71345bb48eaSTom Stellard 
71495927c0fSJustin Bogner   SelectCode(N);
71545bb48eaSTom Stellard }
71645bb48eaSTom Stellard 
isUniformBr(const SDNode * N) const717bc4497b1STom Stellard bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
718bc4497b1STom Stellard   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
71905b127daSNicolai Haehnle   const Instruction *Term = BB->getTerminator();
72005b127daSNicolai Haehnle   return Term->getMetadata("amdgpu.uniform") ||
72105b127daSNicolai Haehnle          Term->getMetadata("structurizecfg.uniform");
722bc4497b1STom Stellard }
723bc4497b1STom Stellard 
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const724078da26bSAbinav Puthan Purayil bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
725078da26bSAbinav Puthan Purayil                                              unsigned ShAmtBits) const {
726078da26bSAbinav Puthan Purayil   assert(N->getOpcode() == ISD::AND);
727078da26bSAbinav Puthan Purayil 
728078da26bSAbinav Puthan Purayil   const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
729078da26bSAbinav Puthan Purayil   if (RHS.countTrailingOnes() >= ShAmtBits)
730078da26bSAbinav Puthan Purayil     return true;
731078da26bSAbinav Puthan Purayil 
732078da26bSAbinav Puthan Purayil   const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
733078da26bSAbinav Puthan Purayil   return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
734078da26bSAbinav Puthan Purayil }
735078da26bSAbinav Puthan Purayil 
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)7360fd6a04bSMatt Arsenault static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
7370fd6a04bSMatt Arsenault                                           SDValue &N0, SDValue &N1) {
7380fd6a04bSMatt Arsenault   if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
7390fd6a04bSMatt Arsenault       Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
7400fd6a04bSMatt Arsenault     // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
7410fd6a04bSMatt Arsenault     // (i64 (bitcast (v2i32 (build_vector
7420fd6a04bSMatt Arsenault     //                        (or (extract_vector_elt V, 0), OFFSET),
7430fd6a04bSMatt Arsenault     //                        (extract_vector_elt V, 1)))))
7440fd6a04bSMatt Arsenault     SDValue Lo = Addr.getOperand(0).getOperand(0);
7450fd6a04bSMatt Arsenault     if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
7460fd6a04bSMatt Arsenault       SDValue BaseLo = Lo.getOperand(0);
7470fd6a04bSMatt Arsenault       SDValue BaseHi = Addr.getOperand(0).getOperand(1);
7480fd6a04bSMatt Arsenault       // Check that split base (Lo and Hi) are extracted from the same one.
7490fd6a04bSMatt Arsenault       if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7500fd6a04bSMatt Arsenault           BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7510fd6a04bSMatt Arsenault           BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
7520fd6a04bSMatt Arsenault           // Lo is statically extracted from index 0.
7530fd6a04bSMatt Arsenault           isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
7540fd6a04bSMatt Arsenault           BaseLo.getConstantOperandVal(1) == 0 &&
7550fd6a04bSMatt Arsenault           // Hi is statically extracted from index 0.
7560fd6a04bSMatt Arsenault           isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
7570fd6a04bSMatt Arsenault           BaseHi.getConstantOperandVal(1) == 1) {
7580fd6a04bSMatt Arsenault         N0 = BaseLo.getOperand(0).getOperand(0);
7590fd6a04bSMatt Arsenault         N1 = Lo.getOperand(1);
7600fd6a04bSMatt Arsenault         return true;
7610fd6a04bSMatt Arsenault       }
7620fd6a04bSMatt Arsenault     }
7630fd6a04bSMatt Arsenault   }
7640fd6a04bSMatt Arsenault   return false;
7650fd6a04bSMatt Arsenault }
7660fd6a04bSMatt Arsenault 
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const7670fd6a04bSMatt Arsenault bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
7680fd6a04bSMatt Arsenault                                                     SDValue &RHS) const {
7690fd6a04bSMatt Arsenault   if (CurDAG->isBaseWithConstantOffset(Addr)) {
7700fd6a04bSMatt Arsenault     LHS = Addr.getOperand(0);
7710fd6a04bSMatt Arsenault     RHS = Addr.getOperand(1);
7720fd6a04bSMatt Arsenault     return true;
7730fd6a04bSMatt Arsenault   }
7740fd6a04bSMatt Arsenault 
7750fd6a04bSMatt Arsenault   if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
7760fd6a04bSMatt Arsenault     assert(LHS && RHS && isa<ConstantSDNode>(RHS));
7770fd6a04bSMatt Arsenault     return true;
7780fd6a04bSMatt Arsenault   }
7790fd6a04bSMatt Arsenault 
7800fd6a04bSMatt Arsenault   return false;
7810fd6a04bSMatt Arsenault }
7820fd6a04bSMatt Arsenault 
getPassName() const783117296c0SMehdi Amini StringRef AMDGPUDAGToDAGISel::getPassName() const {
78445bb48eaSTom Stellard   return "AMDGPU DAG->DAG Pattern Instruction Selection";
78545bb48eaSTom Stellard }
78645bb48eaSTom Stellard 
78745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
78845bb48eaSTom Stellard // Complex Patterns
78945bb48eaSTom Stellard //===----------------------------------------------------------------------===//
79045bb48eaSTom Stellard 
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)79145bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
79245bb48eaSTom Stellard                                             SDValue &Offset) {
79320287697STom Stellard   return false;
79445bb48eaSTom Stellard }
79545bb48eaSTom Stellard 
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)79645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
79745bb48eaSTom Stellard                                             SDValue &Offset) {
79845bb48eaSTom Stellard   ConstantSDNode *C;
79945bb48eaSTom Stellard   SDLoc DL(Addr);
80045bb48eaSTom Stellard 
80145bb48eaSTom Stellard   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
802c5a154dbSTom Stellard     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
80345bb48eaSTom Stellard     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80406200bd7SJan Vesely   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
80506200bd7SJan Vesely              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
806c5a154dbSTom Stellard     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
80706200bd7SJan Vesely     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80845bb48eaSTom Stellard   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
80945bb48eaSTom Stellard             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
81045bb48eaSTom Stellard     Base = Addr.getOperand(0);
81145bb48eaSTom Stellard     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
81245bb48eaSTom Stellard   } else {
81345bb48eaSTom Stellard     Base = Addr;
81445bb48eaSTom Stellard     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
81545bb48eaSTom Stellard   }
81645bb48eaSTom Stellard 
81745bb48eaSTom Stellard   return true;
81845bb48eaSTom Stellard }
81945bb48eaSTom Stellard 
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const8207cd57dcdSMatt Arsenault SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
8217cd57dcdSMatt Arsenault                                                        const SDLoc &DL) const {
8227cd57dcdSMatt Arsenault   SDNode *Mov = CurDAG->getMachineNode(
8237cd57dcdSMatt Arsenault     AMDGPU::S_MOV_B32, DL, MVT::i32,
8247cd57dcdSMatt Arsenault     CurDAG->getTargetConstant(Val, DL, MVT::i32));
8257cd57dcdSMatt Arsenault   return SDValue(Mov, 0);
8267cd57dcdSMatt Arsenault }
8277cd57dcdSMatt Arsenault 
82884445dd1SMatt Arsenault // FIXME: Should only handle addcarry/subcarry
SelectADD_SUB_I64(SDNode * N)82995927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
83045bb48eaSTom Stellard   SDLoc DL(N);
83145bb48eaSTom Stellard   SDValue LHS = N->getOperand(0);
83245bb48eaSTom Stellard   SDValue RHS = N->getOperand(1);
83345bb48eaSTom Stellard 
83467624af0SNicolai Haehnle   unsigned Opcode = N->getOpcode();
83567624af0SNicolai Haehnle   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
83667624af0SNicolai Haehnle   bool ProduceCarry =
83767624af0SNicolai Haehnle       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
83884445dd1SMatt Arsenault   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
83945bb48eaSTom Stellard 
84045bb48eaSTom Stellard   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
84145bb48eaSTom Stellard   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
84245bb48eaSTom Stellard 
84345bb48eaSTom Stellard   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84445bb48eaSTom Stellard                                        DL, MVT::i32, LHS, Sub0);
84545bb48eaSTom Stellard   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84645bb48eaSTom Stellard                                        DL, MVT::i32, LHS, Sub1);
84745bb48eaSTom Stellard 
84845bb48eaSTom Stellard   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84945bb48eaSTom Stellard                                        DL, MVT::i32, RHS, Sub0);
85045bb48eaSTom Stellard   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
85145bb48eaSTom Stellard                                        DL, MVT::i32, RHS, Sub1);
85245bb48eaSTom Stellard 
85345bb48eaSTom Stellard   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
85445bb48eaSTom Stellard 
8556e34e718Salex-t   static const unsigned OpcMap[2][2][2] = {
8566e34e718Salex-t       {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
85779f67caeSMatt Arsenault        {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
8586e34e718Salex-t       {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
8596e34e718Salex-t        {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
8606e34e718Salex-t 
8616e34e718Salex-t   unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
8626e34e718Salex-t   unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
86345bb48eaSTom Stellard 
86467624af0SNicolai Haehnle   SDNode *AddLo;
86567624af0SNicolai Haehnle   if (!ConsumeCarry) {
86667624af0SNicolai Haehnle     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
86767624af0SNicolai Haehnle     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
86867624af0SNicolai Haehnle   } else {
86967624af0SNicolai Haehnle     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
87067624af0SNicolai Haehnle     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
87167624af0SNicolai Haehnle   }
87267624af0SNicolai Haehnle   SDValue AddHiArgs[] = {
87367624af0SNicolai Haehnle     SDValue(Hi0, 0),
87467624af0SNicolai Haehnle     SDValue(Hi1, 0),
87567624af0SNicolai Haehnle     SDValue(AddLo, 1)
87667624af0SNicolai Haehnle   };
87767624af0SNicolai Haehnle   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
87845bb48eaSTom Stellard 
87967624af0SNicolai Haehnle   SDValue RegSequenceArgs[] = {
88045bb48eaSTom Stellard     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
88145bb48eaSTom Stellard     SDValue(AddLo,0),
88245bb48eaSTom Stellard     Sub0,
88345bb48eaSTom Stellard     SDValue(AddHi,0),
88445bb48eaSTom Stellard     Sub1,
88545bb48eaSTom Stellard   };
88667624af0SNicolai Haehnle   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
88767624af0SNicolai Haehnle                                                MVT::i64, RegSequenceArgs);
88867624af0SNicolai Haehnle 
88967624af0SNicolai Haehnle   if (ProduceCarry) {
89067624af0SNicolai Haehnle     // Replace the carry-use
8913264c1bdSNirav Dave     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
89267624af0SNicolai Haehnle   }
89367624af0SNicolai Haehnle 
89467624af0SNicolai Haehnle   // Replace the remaining uses.
8953264c1bdSNirav Dave   ReplaceNode(N, RegSequence);
89645bb48eaSTom Stellard }
89745bb48eaSTom Stellard 
SelectAddcSubb(SDNode * N)8988f3da70eSStanislav Mekhanoshin void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
8998f3da70eSStanislav Mekhanoshin   SDLoc DL(N);
9008f3da70eSStanislav Mekhanoshin   SDValue LHS = N->getOperand(0);
9018f3da70eSStanislav Mekhanoshin   SDValue RHS = N->getOperand(1);
9028f3da70eSStanislav Mekhanoshin   SDValue CI = N->getOperand(2);
9038f3da70eSStanislav Mekhanoshin 
9045b898bddSalex-t   if (N->isDivergent()) {
9058f3da70eSStanislav Mekhanoshin     unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
9068f3da70eSStanislav Mekhanoshin                                                    : AMDGPU::V_SUBB_U32_e64;
9078f3da70eSStanislav Mekhanoshin     CurDAG->SelectNodeTo(
9088f3da70eSStanislav Mekhanoshin         N, Opc, N->getVTList(),
9095b898bddSalex-t         {LHS, RHS, CI,
9105b898bddSalex-t          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9115b898bddSalex-t   } else {
9125b898bddSalex-t     unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
9135b898bddSalex-t                                                    : AMDGPU::S_SUB_CO_PSEUDO;
9145b898bddSalex-t     CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
9155b898bddSalex-t   }
9168f3da70eSStanislav Mekhanoshin }
9178f3da70eSStanislav Mekhanoshin 
SelectUADDO_USUBO(SDNode * N)918ee3f0acfSMatt Arsenault void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
919ee3f0acfSMatt Arsenault   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
920ee3f0acfSMatt Arsenault   // carry out despite the _i32 name. These were renamed in VI to _U32.
921ee3f0acfSMatt Arsenault   // FIXME: We should probably rename the opcodes here.
9225b898bddSalex-t   bool IsAdd = N->getOpcode() == ISD::UADDO;
9235b898bddSalex-t   bool IsVALU = N->isDivergent();
9245b898bddSalex-t 
9255b898bddSalex-t   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
9265b898bddSalex-t        ++UI)
9275b898bddSalex-t     if (UI.getUse().getResNo() == 1) {
9285b898bddSalex-t       if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
9295b898bddSalex-t           (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
9305b898bddSalex-t         IsVALU = true;
9315b898bddSalex-t         break;
9325b898bddSalex-t       }
9335b898bddSalex-t     }
9345b898bddSalex-t 
9355b898bddSalex-t   if (IsVALU) {
93679f67caeSMatt Arsenault     unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
937ee3f0acfSMatt Arsenault 
938eea5177dSMichael Liao     CurDAG->SelectNodeTo(
939eea5177dSMichael Liao         N, Opc, N->getVTList(),
940cfdfba99STim Renouf         {N->getOperand(0), N->getOperand(1),
941eea5177dSMichael Liao          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9425b898bddSalex-t   } else {
9435b898bddSalex-t     unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
9445b898bddSalex-t                                                 : AMDGPU::S_USUBO_PSEUDO;
9455b898bddSalex-t 
9465b898bddSalex-t     CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
9475b898bddSalex-t                          {N->getOperand(0), N->getOperand(1)});
9485b898bddSalex-t   }
949ee3f0acfSMatt Arsenault }
950ee3f0acfSMatt Arsenault 
SelectFMA_W_CHAIN(SDNode * N)9518485fa09STom Stellard void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
9528485fa09STom Stellard   SDLoc SL(N);
9538485fa09STom Stellard   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
9548485fa09STom Stellard   SDValue Ops[10];
9558485fa09STom Stellard 
9568485fa09STom Stellard   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
9578485fa09STom Stellard   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9588485fa09STom Stellard   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
9598485fa09STom Stellard   Ops[8] = N->getOperand(0);
9608485fa09STom Stellard   Ops[9] = N->getOperand(4);
9618485fa09STom Stellard 
962598bebeaSJay Foad   // If there are no source modifiers, prefer fmac over fma because it can use
963598bebeaSJay Foad   // the smaller VOP2 encoding.
964598bebeaSJay Foad   bool UseFMAC = Subtarget->hasDLInsts() &&
965598bebeaSJay Foad                  cast<ConstantSDNode>(Ops[0])->isZero() &&
966598bebeaSJay Foad                  cast<ConstantSDNode>(Ops[2])->isZero() &&
967598bebeaSJay Foad                  cast<ConstantSDNode>(Ops[4])->isZero();
968598bebeaSJay Foad   unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
969598bebeaSJay Foad   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
9708485fa09STom Stellard }
9718485fa09STom Stellard 
SelectFMUL_W_CHAIN(SDNode * N)9728485fa09STom Stellard void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
9738485fa09STom Stellard   SDLoc SL(N);
9748485fa09STom Stellard   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
9758485fa09STom Stellard   SDValue Ops[8];
9768485fa09STom Stellard 
9778485fa09STom Stellard   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
9788485fa09STom Stellard   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9798485fa09STom Stellard   Ops[6] = N->getOperand(0);
9808485fa09STom Stellard   Ops[7] = N->getOperand(3);
9818485fa09STom Stellard 
9828485fa09STom Stellard   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
9838485fa09STom Stellard }
9848485fa09STom Stellard 
98545bb48eaSTom Stellard // We need to handle this here because tablegen doesn't support matching
98645bb48eaSTom Stellard // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)98795927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
98845bb48eaSTom Stellard   SDLoc SL(N);
98945bb48eaSTom Stellard   EVT VT = N->getValueType(0);
99045bb48eaSTom Stellard 
99145bb48eaSTom Stellard   assert(VT == MVT::f32 || VT == MVT::f64);
99245bb48eaSTom Stellard 
99345bb48eaSTom Stellard   unsigned Opc
994314e29edSJoe Nash     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
99545bb48eaSTom Stellard 
9965b91a6a8SJay Foad   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
9975b91a6a8SJay Foad   // omod
9985b91a6a8SJay Foad   SDValue Ops[8];
9995b91a6a8SJay Foad   SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
10005b91a6a8SJay Foad   SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
10015b91a6a8SJay Foad   SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
10023b99f12aSMatt Arsenault   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
100345bb48eaSTom Stellard }
100445bb48eaSTom Stellard 
10054f6318feSMatt Arsenault // We need to handle this here because tablegen doesn't support matching
10064f6318feSMatt Arsenault // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)10074f6318feSMatt Arsenault void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
10084f6318feSMatt Arsenault   SDLoc SL(N);
10094f6318feSMatt Arsenault   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1010d943c514SJay Foad   unsigned Opc;
1011d943c514SJay Foad   if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1012d943c514SJay Foad     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013d943c514SJay Foad                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1014d943c514SJay Foad   else
1015d943c514SJay Foad     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10164f6318feSMatt Arsenault 
10174f6318feSMatt Arsenault   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10184f6318feSMatt Arsenault   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
10194f6318feSMatt Arsenault                     Clamp };
10204f6318feSMatt Arsenault   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10214f6318feSMatt Arsenault }
10224f6318feSMatt Arsenault 
1023d7e03df7SJay Foad // We need to handle this here because tablegen doesn't support matching
1024d7e03df7SJay Foad // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)1025d7e03df7SJay Foad void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1026d7e03df7SJay Foad   SDLoc SL(N);
1027d7e03df7SJay Foad   bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1028d943c514SJay Foad   unsigned Opc;
1029d943c514SJay Foad   if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1030d943c514SJay Foad     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1031d943c514SJay Foad                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1032d943c514SJay Foad   else
1033d943c514SJay Foad     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1034d7e03df7SJay Foad 
1035d7e03df7SJay Foad   SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1036d7e03df7SJay Foad   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1037d7e03df7SJay Foad   SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1038d7e03df7SJay Foad   SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1039d7e03df7SJay Foad   if (!SDValue(N, 0).use_empty()) {
1040d7e03df7SJay Foad     SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1041d7e03df7SJay Foad     SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1042d7e03df7SJay Foad                                         MVT::i32, SDValue(Mad, 0), Sub0);
1043d7e03df7SJay Foad     ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1044d7e03df7SJay Foad   }
1045d7e03df7SJay Foad   if (!SDValue(N, 1).use_empty()) {
1046d7e03df7SJay Foad     SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1047d7e03df7SJay Foad     SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1048d7e03df7SJay Foad                                         MVT::i32, SDValue(Mad, 0), Sub1);
1049d7e03df7SJay Foad     ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1050d7e03df7SJay Foad   }
1051d7e03df7SJay Foad   CurDAG->RemoveDeadNode(N);
1052d7e03df7SJay Foad }
1053d7e03df7SJay Foad 
isDSOffsetLegal(SDValue Base,unsigned Offset) const1054040c5027SJay Foad bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1055040c5027SJay Foad   if (!isUInt<16>(Offset))
105645bb48eaSTom Stellard     return false;
105745bb48eaSTom Stellard 
1058040c5027SJay Foad   if (!Base || Subtarget->hasUsableDSOffset() ||
1059706f930bSMatt Arsenault       Subtarget->unsafeDSOffsetFoldingEnabled())
106045bb48eaSTom Stellard     return true;
106145bb48eaSTom Stellard 
106245bb48eaSTom Stellard   // On Southern Islands instruction with a negative base value and an offset
106345bb48eaSTom Stellard   // don't seem to work.
106445bb48eaSTom Stellard   return CurDAG->SignBitIsZero(Base);
106545bb48eaSTom Stellard }
106645bb48eaSTom Stellard 
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const106745bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
106845bb48eaSTom Stellard                                               SDValue &Offset) const {
106992b24f32STom Stellard   SDLoc DL(Addr);
107045bb48eaSTom Stellard   if (CurDAG->isBaseWithConstantOffset(Addr)) {
107145bb48eaSTom Stellard     SDValue N0 = Addr.getOperand(0);
107245bb48eaSTom Stellard     SDValue N1 = Addr.getOperand(1);
107345bb48eaSTom Stellard     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1074040c5027SJay Foad     if (isDSOffsetLegal(N0, C1->getSExtValue())) {
107545bb48eaSTom Stellard       // (add n0, c0)
107645bb48eaSTom Stellard       Base = N0;
107792b24f32STom Stellard       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
107845bb48eaSTom Stellard       return true;
107945bb48eaSTom Stellard     }
1080966a94f8SMatt Arsenault   } else if (Addr.getOpcode() == ISD::SUB) {
1081966a94f8SMatt Arsenault     // sub C, x -> add (sub 0, x), C
1082966a94f8SMatt Arsenault     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1083966a94f8SMatt Arsenault       int64_t ByteOffset = C->getSExtValue();
1084040c5027SJay Foad       if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1085966a94f8SMatt Arsenault         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
108645bb48eaSTom Stellard 
1087966a94f8SMatt Arsenault         // XXX - This is kind of hacky. Create a dummy sub node so we can check
1088966a94f8SMatt Arsenault         // the known bits in isDSOffsetLegal. We need to emit the selected node
1089966a94f8SMatt Arsenault         // here, so this is thrown away.
1090966a94f8SMatt Arsenault         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1091966a94f8SMatt Arsenault                                       Zero, Addr.getOperand(1));
1092966a94f8SMatt Arsenault 
1093040c5027SJay Foad         if (isDSOffsetLegal(Sub, ByteOffset)) {
1094cfdfba99STim Renouf           SmallVector<SDValue, 3> Opnds;
1095cfdfba99STim Renouf           Opnds.push_back(Zero);
1096cfdfba99STim Renouf           Opnds.push_back(Addr.getOperand(1));
109784445dd1SMatt Arsenault 
1098cfdfba99STim Renouf           // FIXME: Select to VOP3 version for with-carry.
109979f67caeSMatt Arsenault           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1100cfdfba99STim Renouf           if (Subtarget->hasAddNoCarry()) {
1101cfdfba99STim Renouf             SubOp = AMDGPU::V_SUB_U32_e64;
1102eea5177dSMichael Liao             Opnds.push_back(
1103eea5177dSMichael Liao                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1104cfdfba99STim Renouf           }
1105cfdfba99STim Renouf 
1106cfdfba99STim Renouf           MachineSDNode *MachineSub =
1107cfdfba99STim Renouf               CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1108966a94f8SMatt Arsenault 
1109966a94f8SMatt Arsenault           Base = SDValue(MachineSub, 0);
111026a2ab74STom Stellard           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1111966a94f8SMatt Arsenault           return true;
1112966a94f8SMatt Arsenault         }
1113966a94f8SMatt Arsenault       }
1114966a94f8SMatt Arsenault     }
1115966a94f8SMatt Arsenault   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
111645bb48eaSTom Stellard     // If we have a constant address, prefer to put the constant into the
111745bb48eaSTom Stellard     // offset. This can save moves to load the constant address since multiple
111845bb48eaSTom Stellard     // operations can share the zero base address register, and enables merging
111945bb48eaSTom Stellard     // into read2 / write2 instructions.
1120966a94f8SMatt Arsenault 
1121966a94f8SMatt Arsenault     SDLoc DL(Addr);
1122966a94f8SMatt Arsenault 
1123040c5027SJay Foad     if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
112445bb48eaSTom Stellard       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
112545bb48eaSTom Stellard       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
112645bb48eaSTom Stellard                                  DL, MVT::i32, Zero);
112745bb48eaSTom Stellard       Base = SDValue(MovZero, 0);
112826a2ab74STom Stellard       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
112945bb48eaSTom Stellard       return true;
113045bb48eaSTom Stellard     }
113145bb48eaSTom Stellard   }
113245bb48eaSTom Stellard 
113345bb48eaSTom Stellard   // default case
113445bb48eaSTom Stellard   Base = Addr;
1135966a94f8SMatt Arsenault   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
113645bb48eaSTom Stellard   return true;
113745bb48eaSTom Stellard }
113845bb48eaSTom Stellard 
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const1139040c5027SJay Foad bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1140040c5027SJay Foad                                           unsigned Offset1,
1141040c5027SJay Foad                                           unsigned Size) const {
1142040c5027SJay Foad   if (Offset0 % Size != 0 || Offset1 % Size != 0)
1143040c5027SJay Foad     return false;
1144040c5027SJay Foad   if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1145040c5027SJay Foad     return false;
1146040c5027SJay Foad 
1147040c5027SJay Foad   if (!Base || Subtarget->hasUsableDSOffset() ||
1148040c5027SJay Foad       Subtarget->unsafeDSOffsetFoldingEnabled())
1149040c5027SJay Foad     return true;
1150040c5027SJay Foad 
1151040c5027SJay Foad   // On Southern Islands instruction with a negative base value and an offset
1152040c5027SJay Foad   // don't seem to work.
1153040c5027SJay Foad   return CurDAG->SignBitIsZero(Base);
1154040c5027SJay Foad }
1155040c5027SJay Foad 
1156966a94f8SMatt Arsenault // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const115745bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
115845bb48eaSTom Stellard                                                    SDValue &Offset0,
115945bb48eaSTom Stellard                                                    SDValue &Offset1) const {
1160040c5027SJay Foad   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1161d17ea67bSMirko Brkusanin }
1162d17ea67bSMirko Brkusanin 
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const1163d17ea67bSMirko Brkusanin bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1164d17ea67bSMirko Brkusanin                                                     SDValue &Offset0,
1165d17ea67bSMirko Brkusanin                                                     SDValue &Offset1) const {
1166040c5027SJay Foad   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1167d17ea67bSMirko Brkusanin }
1168d17ea67bSMirko Brkusanin 
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const1169d17ea67bSMirko Brkusanin bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1170d17ea67bSMirko Brkusanin                                             SDValue &Offset0, SDValue &Offset1,
1171040c5027SJay Foad                                             unsigned Size) const {
117245bb48eaSTom Stellard   SDLoc DL(Addr);
117345bb48eaSTom Stellard 
117445bb48eaSTom Stellard   if (CurDAG->isBaseWithConstantOffset(Addr)) {
117545bb48eaSTom Stellard     SDValue N0 = Addr.getOperand(0);
117645bb48eaSTom Stellard     SDValue N1 = Addr.getOperand(1);
117745bb48eaSTom Stellard     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1178040c5027SJay Foad     unsigned OffsetValue0 = C1->getZExtValue();
1179040c5027SJay Foad     unsigned OffsetValue1 = OffsetValue0 + Size;
1180040c5027SJay Foad 
118145bb48eaSTom Stellard     // (add n0, c0)
1182040c5027SJay Foad     if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
118345bb48eaSTom Stellard       Base = N0;
1184040c5027SJay Foad       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1185040c5027SJay Foad       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
118645bb48eaSTom Stellard       return true;
118745bb48eaSTom Stellard     }
1188966a94f8SMatt Arsenault   } else if (Addr.getOpcode() == ISD::SUB) {
1189966a94f8SMatt Arsenault     // sub C, x -> add (sub 0, x), C
1190d17ea67bSMirko Brkusanin     if (const ConstantSDNode *C =
1191d17ea67bSMirko Brkusanin             dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1192040c5027SJay Foad       unsigned OffsetValue0 = C->getZExtValue();
1193040c5027SJay Foad       unsigned OffsetValue1 = OffsetValue0 + Size;
119445bb48eaSTom Stellard 
1195040c5027SJay Foad       if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1196966a94f8SMatt Arsenault         SDLoc DL(Addr);
1197966a94f8SMatt Arsenault         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1198966a94f8SMatt Arsenault 
1199966a94f8SMatt Arsenault         // XXX - This is kind of hacky. Create a dummy sub node so we can check
1200966a94f8SMatt Arsenault         // the known bits in isDSOffsetLegal. We need to emit the selected node
1201966a94f8SMatt Arsenault         // here, so this is thrown away.
1202d17ea67bSMirko Brkusanin         SDValue Sub =
1203d17ea67bSMirko Brkusanin             CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1204966a94f8SMatt Arsenault 
1205040c5027SJay Foad         if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1206cfdfba99STim Renouf           SmallVector<SDValue, 3> Opnds;
1207cfdfba99STim Renouf           Opnds.push_back(Zero);
1208cfdfba99STim Renouf           Opnds.push_back(Addr.getOperand(1));
120979f67caeSMatt Arsenault           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1210cfdfba99STim Renouf           if (Subtarget->hasAddNoCarry()) {
1211cfdfba99STim Renouf             SubOp = AMDGPU::V_SUB_U32_e64;
1212eea5177dSMichael Liao             Opnds.push_back(
1213eea5177dSMichael Liao                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1214cfdfba99STim Renouf           }
121584445dd1SMatt Arsenault 
1216d17ea67bSMirko Brkusanin           MachineSDNode *MachineSub = CurDAG->getMachineNode(
1217040c5027SJay Foad               SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1218966a94f8SMatt Arsenault 
1219966a94f8SMatt Arsenault           Base = SDValue(MachineSub, 0);
1220040c5027SJay Foad           Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1221040c5027SJay Foad           Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1222966a94f8SMatt Arsenault           return true;
1223966a94f8SMatt Arsenault         }
1224966a94f8SMatt Arsenault       }
1225966a94f8SMatt Arsenault     }
1226966a94f8SMatt Arsenault   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1227040c5027SJay Foad     unsigned OffsetValue0 = CAddr->getZExtValue();
1228040c5027SJay Foad     unsigned OffsetValue1 = OffsetValue0 + Size;
122945bb48eaSTom Stellard 
1230040c5027SJay Foad     if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
123145bb48eaSTom Stellard       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1232d17ea67bSMirko Brkusanin       MachineSDNode *MovZero =
1233d17ea67bSMirko Brkusanin           CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
123445bb48eaSTom Stellard       Base = SDValue(MovZero, 0);
1235040c5027SJay Foad       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1236040c5027SJay Foad       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
123745bb48eaSTom Stellard       return true;
123845bb48eaSTom Stellard     }
123945bb48eaSTom Stellard   }
124045bb48eaSTom Stellard 
124145bb48eaSTom Stellard   // default case
12420efdd06bSMatt Arsenault 
124345bb48eaSTom Stellard   Base = Addr;
124445bb48eaSTom Stellard   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
124545bb48eaSTom Stellard   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
124645bb48eaSTom Stellard   return true;
124745bb48eaSTom Stellard }
124845bb48eaSTom Stellard 
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const12493bffb1cdSStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
12503bffb1cdSStanislav Mekhanoshin                                      SDValue &SOffset, SDValue &Offset,
12513bffb1cdSStanislav Mekhanoshin                                      SDValue &Offen, SDValue &Idxen,
1252edd6da10SStanislav Mekhanoshin                                      SDValue &Addr64) const {
1253b41574a9SChangpeng Fang   // Subtarget prefers to use flat instruction
1254fdaad485SMatt Arsenault   // FIXME: This should be a pattern predicate and not reach here
1255b41574a9SChangpeng Fang   if (Subtarget->useFlatForGlobal())
1256b41574a9SChangpeng Fang     return false;
1257b41574a9SChangpeng Fang 
125845bb48eaSTom Stellard   SDLoc DL(Addr);
125945bb48eaSTom Stellard 
126045bb48eaSTom Stellard   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
126145bb48eaSTom Stellard   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
126245bb48eaSTom Stellard   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
126345bb48eaSTom Stellard   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
126445bb48eaSTom Stellard 
1265f1c7b92aSTim Renouf   ConstantSDNode *C1 = nullptr;
1266f1c7b92aSTim Renouf   SDValue N0 = Addr;
126745bb48eaSTom Stellard   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1268f1c7b92aSTim Renouf     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1269f1c7b92aSTim Renouf     if (isUInt<32>(C1->getZExtValue()))
1270f1c7b92aSTim Renouf       N0 = Addr.getOperand(0);
1271f1c7b92aSTim Renouf     else
1272f1c7b92aSTim Renouf       C1 = nullptr;
1273f1c7b92aSTim Renouf   }
127445bb48eaSTom Stellard 
127545bb48eaSTom Stellard   if (N0.getOpcode() == ISD::ADD) {
1276f1c7b92aSTim Renouf     // (add N2, N3) -> addr64, or
127745bb48eaSTom Stellard     // (add (add N2, N3), C1) -> addr64
127845bb48eaSTom Stellard     SDValue N2 = N0.getOperand(0);
127945bb48eaSTom Stellard     SDValue N3 = N0.getOperand(1);
128045bb48eaSTom Stellard     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1281f1c7b92aSTim Renouf 
1282f1c7b92aSTim Renouf     if (N2->isDivergent()) {
1283f1c7b92aSTim Renouf       if (N3->isDivergent()) {
1284f1c7b92aSTim Renouf         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1285f1c7b92aSTim Renouf         // addr64, and construct the resource from a 0 address.
1286f1c7b92aSTim Renouf         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1287f1c7b92aSTim Renouf         VAddr = N0;
1288f1c7b92aSTim Renouf       } else {
1289f1c7b92aSTim Renouf         // N2 is divergent, N3 is not.
1290f1c7b92aSTim Renouf         Ptr = N3;
1291f1c7b92aSTim Renouf         VAddr = N2;
1292f1c7b92aSTim Renouf       }
1293f1c7b92aSTim Renouf     } else {
1294f1c7b92aSTim Renouf       // N2 is not divergent.
129545bb48eaSTom Stellard       Ptr = N2;
129645bb48eaSTom Stellard       VAddr = N3;
1297f1c7b92aSTim Renouf     }
1298f1c7b92aSTim Renouf     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1299f1c7b92aSTim Renouf   } else if (N0->isDivergent()) {
1300f1c7b92aSTim Renouf     // N0 is divergent. Use it as the addr64, and construct the resource from a
1301f1c7b92aSTim Renouf     // 0 address.
1302f1c7b92aSTim Renouf     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1303f1c7b92aSTim Renouf     VAddr = N0;
1304f1c7b92aSTim Renouf     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
130545bb48eaSTom Stellard   } else {
1306f1c7b92aSTim Renouf     // N0 -> offset, or
1307f1c7b92aSTim Renouf     // (N0 + C1) -> offset
130845bb48eaSTom Stellard     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
130945bb48eaSTom Stellard     Ptr = N0;
131045bb48eaSTom Stellard   }
131145bb48eaSTom Stellard 
1312f1c7b92aSTim Renouf   if (!C1) {
1313f1c7b92aSTim Renouf     // No offset.
1314f1c7b92aSTim Renouf     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1315f1c7b92aSTim Renouf     return true;
1316f1c7b92aSTim Renouf   }
1317f1c7b92aSTim Renouf 
1318ffadcb74SMarek Olsak   if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1319f1c7b92aSTim Renouf     // Legal offset for instruction.
132045bb48eaSTom Stellard     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1321b41574a9SChangpeng Fang     return true;
132288701817SMatt Arsenault   }
132388701817SMatt Arsenault 
132445bb48eaSTom Stellard   // Illegal offset, store it in soffset.
132545bb48eaSTom Stellard   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1326f1c7b92aSTim Renouf   SOffset =
1327f1c7b92aSTim Renouf       SDValue(CurDAG->getMachineNode(
1328f1c7b92aSTim Renouf                   AMDGPU::S_MOV_B32, DL, MVT::i32,
132945bb48eaSTom Stellard                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
133045bb48eaSTom Stellard               0);
1331b41574a9SChangpeng Fang   return true;
133245bb48eaSTom Stellard }
133345bb48eaSTom Stellard 
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const133445bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
133545bb48eaSTom Stellard                                            SDValue &VAddr, SDValue &SOffset,
1336edd6da10SStanislav Mekhanoshin                                            SDValue &Offset) const {
133745bb48eaSTom Stellard   SDValue Ptr, Offen, Idxen, Addr64;
133845bb48eaSTom Stellard 
133970580f83STom Stellard   // addr64 bit was removed for volcanic islands.
1340fdaad485SMatt Arsenault   // FIXME: This should be a pattern predicate and not reach here
1341e4c2e9b0SMatt Arsenault   if (!Subtarget->hasAddr64())
134270580f83STom Stellard     return false;
134370580f83STom Stellard 
1344edd6da10SStanislav Mekhanoshin   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1345b41574a9SChangpeng Fang     return false;
134645bb48eaSTom Stellard 
134745bb48eaSTom Stellard   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
134845bb48eaSTom Stellard   if (C->getSExtValue()) {
134945bb48eaSTom Stellard     SDLoc DL(Addr);
135045bb48eaSTom Stellard 
135145bb48eaSTom Stellard     const SITargetLowering& Lowering =
135245bb48eaSTom Stellard       *static_cast<const SITargetLowering*>(getTargetLowering());
135345bb48eaSTom Stellard 
135445bb48eaSTom Stellard     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
135545bb48eaSTom Stellard     return true;
135645bb48eaSTom Stellard   }
135745bb48eaSTom Stellard 
135845bb48eaSTom Stellard   return false;
135945bb48eaSTom Stellard }
136045bb48eaSTom Stellard 
foldFrameIndex(SDValue N) const1361156d3ae0SMatt Arsenault std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
136260b1967cSScott Linder   SDLoc DL(N);
1363156d3ae0SMatt Arsenault 
13645a061041SChristudasan Devadasan   auto *FI = dyn_cast<FrameIndexSDNode>(N);
13655a061041SChristudasan Devadasan   SDValue TFI =
13665a061041SChristudasan Devadasan       FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1367156d3ae0SMatt Arsenault 
13685a061041SChristudasan Devadasan   // We rebase the base address into an absolute stack address and hence
1369ff8a1caeSChristudasan Devadasan   // use constant 0 for soffset. This value must be retained until
1370ff8a1caeSChristudasan Devadasan   // frame elimination and eliminateFrameIndex will choose the appropriate
1371ff8a1caeSChristudasan Devadasan   // frame register if need be.
13725a061041SChristudasan Devadasan   return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1373156d3ae0SMatt Arsenault }
1374156d3ae0SMatt Arsenault 
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const1375b81495dcSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1376156d3ae0SMatt Arsenault                                                  SDValue Addr, SDValue &Rsrc,
137745bb48eaSTom Stellard                                                  SDValue &VAddr, SDValue &SOffset,
137845bb48eaSTom Stellard                                                  SDValue &ImmOffset) const {
137945bb48eaSTom Stellard 
138045bb48eaSTom Stellard   SDLoc DL(Addr);
138145bb48eaSTom Stellard   MachineFunction &MF = CurDAG->getMachineFunction();
13820e3d3893SMatt Arsenault   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
138345bb48eaSTom Stellard 
13840e3d3893SMatt Arsenault   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
138545bb48eaSTom Stellard 
13860774ea26SMatt Arsenault   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1387bb10fa3aSMatt Arsenault     int64_t Imm = CAddr->getSExtValue();
1388bb10fa3aSMatt Arsenault     const int64_t NullPtr =
1389bb10fa3aSMatt Arsenault         AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1390bb10fa3aSMatt Arsenault     // Don't fold null pointer.
1391bb10fa3aSMatt Arsenault     if (Imm != NullPtr) {
13920774ea26SMatt Arsenault       SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1393bb10fa3aSMatt Arsenault       MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1394bb10fa3aSMatt Arsenault         AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
13950774ea26SMatt Arsenault       VAddr = SDValue(MovHighBits, 0);
1396156d3ae0SMatt Arsenault 
1397690f5b7aSSebastian Neubauer       SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
13980774ea26SMatt Arsenault       ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
13990774ea26SMatt Arsenault       return true;
14000774ea26SMatt Arsenault     }
1401bb10fa3aSMatt Arsenault   }
14020774ea26SMatt Arsenault 
140345bb48eaSTom Stellard   if (CurDAG->isBaseWithConstantOffset(Addr)) {
14040774ea26SMatt Arsenault     // (add n0, c1)
14050774ea26SMatt Arsenault 
140678655fcfSTom Stellard     SDValue N0 = Addr.getOperand(0);
140745bb48eaSTom Stellard     SDValue N1 = Addr.getOperand(1);
1408cd09961fSMatt Arsenault 
1409caf0ed4dSMatt Arsenault     // Offsets in vaddr must be positive if range checking is enabled.
141045b98189SMatt Arsenault     //
1411caf0ed4dSMatt Arsenault     // The total computation of vaddr + soffset + offset must not overflow.  If
1412caf0ed4dSMatt Arsenault     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
141345b98189SMatt Arsenault     // overflowing.
1414caf0ed4dSMatt Arsenault     //
1415caf0ed4dSMatt Arsenault     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1416caf0ed4dSMatt Arsenault     // always perform a range check. If a negative vaddr base index was used,
1417caf0ed4dSMatt Arsenault     // this would fail the range check. The overall address computation would
1418caf0ed4dSMatt Arsenault     // compute a valid address, but this doesn't happen due to the range
1419caf0ed4dSMatt Arsenault     // check. For out-of-bounds MUBUF loads, a 0 is returned.
1420caf0ed4dSMatt Arsenault     //
1421caf0ed4dSMatt Arsenault     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1422caf0ed4dSMatt Arsenault     // MUBUF vaddr, but not on older subtargets which can only do this if the
1423caf0ed4dSMatt Arsenault     // sign bit is known 0.
142445bb48eaSTom Stellard     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
142545b98189SMatt Arsenault     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1426caf0ed4dSMatt Arsenault         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1427caf0ed4dSMatt Arsenault          CurDAG->SignBitIsZero(N0))) {
1428156d3ae0SMatt Arsenault       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
142945bb48eaSTom Stellard       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
143045bb48eaSTom Stellard       return true;
143145bb48eaSTom Stellard     }
143245bb48eaSTom Stellard   }
143345bb48eaSTom Stellard 
143445bb48eaSTom Stellard   // (node)
1435156d3ae0SMatt Arsenault   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
143645bb48eaSTom Stellard   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
143745bb48eaSTom Stellard   return true;
143845bb48eaSTom Stellard }
143945bb48eaSTom Stellard 
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)1440690f5b7aSSebastian Neubauer static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1441690f5b7aSSebastian Neubauer   if (Val.getOpcode() != ISD::CopyFromReg)
1442690f5b7aSSebastian Neubauer     return false;
1443690f5b7aSSebastian Neubauer   auto RC =
1444690f5b7aSSebastian Neubauer       TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1445690f5b7aSSebastian Neubauer   return RC && TRI.isSGPRClass(RC);
1446690f5b7aSSebastian Neubauer }
1447690f5b7aSSebastian Neubauer 
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const1448b81495dcSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1449156d3ae0SMatt Arsenault                                                   SDValue Addr,
14500774ea26SMatt Arsenault                                                   SDValue &SRsrc,
14510774ea26SMatt Arsenault                                                   SDValue &SOffset,
14520774ea26SMatt Arsenault                                                   SDValue &Offset) const {
1453690f5b7aSSebastian Neubauer   const SIRegisterInfo *TRI =
1454690f5b7aSSebastian Neubauer       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
14550774ea26SMatt Arsenault   MachineFunction &MF = CurDAG->getMachineFunction();
14560774ea26SMatt Arsenault   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1457690f5b7aSSebastian Neubauer   SDLoc DL(Addr);
1458690f5b7aSSebastian Neubauer 
1459690f5b7aSSebastian Neubauer   // CopyFromReg <sgpr>
1460690f5b7aSSebastian Neubauer   if (IsCopyFromSGPR(*TRI, Addr)) {
1461690f5b7aSSebastian Neubauer     SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1462690f5b7aSSebastian Neubauer     SOffset = Addr;
1463690f5b7aSSebastian Neubauer     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1464690f5b7aSSebastian Neubauer     return true;
1465690f5b7aSSebastian Neubauer   }
1466690f5b7aSSebastian Neubauer 
1467690f5b7aSSebastian Neubauer   ConstantSDNode *CAddr;
1468690f5b7aSSebastian Neubauer   if (Addr.getOpcode() == ISD::ADD) {
1469690f5b7aSSebastian Neubauer     // Add (CopyFromReg <sgpr>) <constant>
1470690f5b7aSSebastian Neubauer     CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1471690f5b7aSSebastian Neubauer     if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1472690f5b7aSSebastian Neubauer       return false;
1473690f5b7aSSebastian Neubauer     if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1474690f5b7aSSebastian Neubauer       return false;
1475690f5b7aSSebastian Neubauer 
1476690f5b7aSSebastian Neubauer     SOffset = Addr.getOperand(0);
1477690f5b7aSSebastian Neubauer   } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1478690f5b7aSSebastian Neubauer              SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1479690f5b7aSSebastian Neubauer     // <constant>
1480690f5b7aSSebastian Neubauer     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1481690f5b7aSSebastian Neubauer   } else {
1482690f5b7aSSebastian Neubauer     return false;
1483690f5b7aSSebastian Neubauer   }
14840774ea26SMatt Arsenault 
14850774ea26SMatt Arsenault   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1486156d3ae0SMatt Arsenault 
14870774ea26SMatt Arsenault   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
14880774ea26SMatt Arsenault   return true;
14890774ea26SMatt Arsenault }
14900774ea26SMatt Arsenault 
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const149145bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1492edd6da10SStanislav Mekhanoshin                                            SDValue &SOffset, SDValue &Offset
1493edd6da10SStanislav Mekhanoshin                                            ) const {
149445bb48eaSTom Stellard   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
149545bb48eaSTom Stellard   const SIInstrInfo *TII =
149645bb48eaSTom Stellard     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
149745bb48eaSTom Stellard 
1498edd6da10SStanislav Mekhanoshin   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1499b41574a9SChangpeng Fang     return false;
150045bb48eaSTom Stellard 
150145bb48eaSTom Stellard   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
150245bb48eaSTom Stellard       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
150345bb48eaSTom Stellard       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
150445bb48eaSTom Stellard     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1505735f4671SChris Lattner                     APInt::getAllOnes(32).getZExtValue(); // Size
150645bb48eaSTom Stellard     SDLoc DL(Addr);
150745bb48eaSTom Stellard 
150845bb48eaSTom Stellard     const SITargetLowering& Lowering =
150945bb48eaSTom Stellard       *static_cast<const SITargetLowering*>(getTargetLowering());
151045bb48eaSTom Stellard 
151145bb48eaSTom Stellard     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
151245bb48eaSTom Stellard     return true;
151345bb48eaSTom Stellard   }
151445bb48eaSTom Stellard   return false;
151545bb48eaSTom Stellard }
151645bb48eaSTom Stellard 
15174227c62bSMatt Arsenault // Find a load or store from corresponding pattern root.
15184227c62bSMatt Arsenault // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)15194227c62bSMatt Arsenault static MemSDNode* findMemSDNode(SDNode *N) {
15204227c62bSMatt Arsenault   N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
15214227c62bSMatt Arsenault   if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
15224227c62bSMatt Arsenault     return MN;
15234227c62bSMatt Arsenault   assert(isa<BuildVectorSDNode>(N));
15244227c62bSMatt Arsenault   for (SDValue V : N->op_values())
15254227c62bSMatt Arsenault     if (MemSDNode *MN =
15264227c62bSMatt Arsenault           dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
15274227c62bSMatt Arsenault       return MN;
15284227c62bSMatt Arsenault   llvm_unreachable("cannot find MemSDNode in the pattern!");
15294227c62bSMatt Arsenault }
15304227c62bSMatt Arsenault 
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const1531cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1532cc7add52SSebastian Neubauer                                               SDValue &VAddr, SDValue &Offset,
1533cc7add52SSebastian Neubauer                                               uint64_t FlatVariant) const {
15344227c62bSMatt Arsenault   int64_t OffsetVal = 0;
15354227c62bSMatt Arsenault 
1536038d884aSStanislav Mekhanoshin   unsigned AS = findMemSDNode(N)->getAddressSpace();
1537038d884aSStanislav Mekhanoshin 
1538cc7add52SSebastian Neubauer   bool CanHaveFlatSegmentOffsetBug =
1539cc7add52SSebastian Neubauer       Subtarget->hasFlatSegmentOffsetBug() &&
1540cc7add52SSebastian Neubauer       FlatVariant == SIInstrFlags::FLAT &&
1541cc7add52SSebastian Neubauer       (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
1542cc7add52SSebastian Neubauer 
1543cc7add52SSebastian Neubauer   if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1544b1360caaSMichael Liao     SDValue N0, N1;
15450fd6a04bSMatt Arsenault     if (isBaseWithConstantOffset64(Addr, N0, N1)) {
154613c03162SSebastian Neubauer       int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
15474227c62bSMatt Arsenault 
15484227c62bSMatt Arsenault       const SIInstrInfo *TII = Subtarget->getInstrInfo();
1549cc7add52SSebastian Neubauer       if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
15504227c62bSMatt Arsenault         Addr = N0;
15514227c62bSMatt Arsenault         OffsetVal = COffsetVal;
15527cd57dcdSMatt Arsenault       } else {
15537cd57dcdSMatt Arsenault         // If the offset doesn't fit, put the low bits into the offset field and
15547cd57dcdSMatt Arsenault         // add the rest.
1555760af7a0SJay Foad         //
1556760af7a0SJay Foad         // For a FLAT instruction the hardware decides whether to access
1557760af7a0SJay Foad         // global/scratch/shared memory based on the high bits of vaddr,
1558760af7a0SJay Foad         // ignoring the offset field, so we have to ensure that when we add
1559760af7a0SJay Foad         // remainder to vaddr it still points into the same underlying object.
1560760af7a0SJay Foad         // The easiest way to do that is to make sure that we split the offset
1561760af7a0SJay Foad         // into two pieces that are both >= 0 or both <= 0.
15627cd57dcdSMatt Arsenault 
15637cd57dcdSMatt Arsenault         SDLoc DL(N);
1564e722943eSMatt Arsenault         uint64_t RemainderOffset;
15657cd57dcdSMatt Arsenault 
1566cc7add52SSebastian Neubauer         std::tie(OffsetVal, RemainderOffset) =
1567cc7add52SSebastian Neubauer             TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
15687cd57dcdSMatt Arsenault 
1569038d884aSStanislav Mekhanoshin         SDValue AddOffsetLo =
1570038d884aSStanislav Mekhanoshin             getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1571038d884aSStanislav Mekhanoshin         SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1572038d884aSStanislav Mekhanoshin 
1573038d884aSStanislav Mekhanoshin         if (Addr.getValueType().getSizeInBits() == 32) {
1574038d884aSStanislav Mekhanoshin           SmallVector<SDValue, 3> Opnds;
1575038d884aSStanislav Mekhanoshin           Opnds.push_back(N0);
1576038d884aSStanislav Mekhanoshin           Opnds.push_back(AddOffsetLo);
1577038d884aSStanislav Mekhanoshin           unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1578038d884aSStanislav Mekhanoshin           if (Subtarget->hasAddNoCarry()) {
1579038d884aSStanislav Mekhanoshin             AddOp = AMDGPU::V_ADD_U32_e64;
1580038d884aSStanislav Mekhanoshin             Opnds.push_back(Clamp);
1581038d884aSStanislav Mekhanoshin           }
1582038d884aSStanislav Mekhanoshin           Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1583038d884aSStanislav Mekhanoshin         } else {
1584b1360caaSMichael Liao           // TODO: Should this try to use a scalar add pseudo if the base address
1585b1360caaSMichael Liao           // is uniform and saddr is usable?
15867cd57dcdSMatt Arsenault           SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
15877cd57dcdSMatt Arsenault           SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
15887cd57dcdSMatt Arsenault 
1589038d884aSStanislav Mekhanoshin           SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1590038d884aSStanislav Mekhanoshin                                                 DL, MVT::i32, N0, Sub0);
1591038d884aSStanislav Mekhanoshin           SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1592038d884aSStanislav Mekhanoshin                                                 DL, MVT::i32, N0, Sub1);
15937cd57dcdSMatt Arsenault 
1594b1360caaSMichael Liao           SDValue AddOffsetHi =
1595b1360caaSMichael Liao               getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
15967cd57dcdSMatt Arsenault 
15977cd57dcdSMatt Arsenault           SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
15987cd57dcdSMatt Arsenault 
1599b1360caaSMichael Liao           SDNode *Add =
160079f67caeSMatt Arsenault               CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
16017cd57dcdSMatt Arsenault                                      {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
16027cd57dcdSMatt Arsenault 
16037cd57dcdSMatt Arsenault           SDNode *Addc = CurDAG->getMachineNode(
16047cd57dcdSMatt Arsenault               AMDGPU::V_ADDC_U32_e64, DL, VTs,
16057cd57dcdSMatt Arsenault               {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
16067cd57dcdSMatt Arsenault 
16077cd57dcdSMatt Arsenault           SDValue RegSequenceArgs[] = {
16087cd57dcdSMatt Arsenault               CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1609b1360caaSMichael Liao               SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
16107cd57dcdSMatt Arsenault 
16117cd57dcdSMatt Arsenault           Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1612b1360caaSMichael Liao                                                 MVT::i64, RegSequenceArgs),
1613b1360caaSMichael Liao                          0);
1614b1360caaSMichael Liao         }
16154227c62bSMatt Arsenault       }
16164227c62bSMatt Arsenault     }
1617038d884aSStanislav Mekhanoshin   }
16184227c62bSMatt Arsenault 
16194227c62bSMatt Arsenault   VAddr = Addr;
16204227c62bSMatt Arsenault   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
16214227c62bSMatt Arsenault   return true;
1622db7c6a87SMatt Arsenault }
1623db7c6a87SMatt Arsenault 
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1624cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1625cc7add52SSebastian Neubauer                                           SDValue &VAddr,
1626cc7add52SSebastian Neubauer                                           SDValue &Offset) const {
1627cc7add52SSebastian Neubauer   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1628cc7add52SSebastian Neubauer }
1629cc7add52SSebastian Neubauer 
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1630cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1631cc7add52SSebastian Neubauer                                             SDValue &VAddr,
1632cc7add52SSebastian Neubauer                                             SDValue &Offset) const {
1633cc7add52SSebastian Neubauer   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1634cc7add52SSebastian Neubauer }
1635cc7add52SSebastian Neubauer 
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1636cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1637cc7add52SSebastian Neubauer                                              SDValue &VAddr,
1638cc7add52SSebastian Neubauer                                              SDValue &Offset) const {
1639cc7add52SSebastian Neubauer   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1640cc7add52SSebastian Neubauer                               SIInstrFlags::FlatScratch);
1641cc7add52SSebastian Neubauer }
1642cc7add52SSebastian Neubauer 
1643e1a2f471SMatt Arsenault // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)1644e1a2f471SMatt Arsenault static SDValue matchZExtFromI32(SDValue Op) {
1645e1a2f471SMatt Arsenault   if (Op.getOpcode() != ISD::ZERO_EXTEND)
1646e1a2f471SMatt Arsenault     return SDValue();
1647e1a2f471SMatt Arsenault 
1648e1a2f471SMatt Arsenault   SDValue ExtSrc = Op.getOperand(0);
1649e1a2f471SMatt Arsenault   return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1650e1a2f471SMatt Arsenault }
1651e1a2f471SMatt Arsenault 
1652e1a2f471SMatt Arsenault // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const1653e1a2f471SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1654e1a2f471SMatt Arsenault                                            SDValue Addr,
1655e1a2f471SMatt Arsenault                                            SDValue &SAddr,
1656e1a2f471SMatt Arsenault                                            SDValue &VOffset,
1657e1a2f471SMatt Arsenault                                            SDValue &Offset) const {
1658e1a2f471SMatt Arsenault   int64_t ImmOffset = 0;
1659e1a2f471SMatt Arsenault 
1660e1a2f471SMatt Arsenault   // Match the immediate offset first, which canonically is moved as low as
1661e1a2f471SMatt Arsenault   // possible.
1662e1a2f471SMatt Arsenault 
1663d2e52eecSMatt Arsenault   SDValue LHS, RHS;
1664d2e52eecSMatt Arsenault   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1665e1a2f471SMatt Arsenault     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1666e1a2f471SMatt Arsenault     const SIInstrInfo *TII = Subtarget->getInstrInfo();
1667e1a2f471SMatt Arsenault 
1668cc7add52SSebastian Neubauer     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1669cc7add52SSebastian Neubauer                                SIInstrFlags::FlatGlobal)) {
1670e1a2f471SMatt Arsenault       Addr = LHS;
1671e1a2f471SMatt Arsenault       ImmOffset = COffsetVal;
1672909a5ccfSStanislav Mekhanoshin     } else if (!LHS->isDivergent()) {
1673909a5ccfSStanislav Mekhanoshin       if (COffsetVal > 0) {
1674a6e353b1SMatt Arsenault         SDLoc SL(N);
1675909a5ccfSStanislav Mekhanoshin         // saddr + large_offset -> saddr +
1676909a5ccfSStanislav Mekhanoshin         //                         (voffset = large_offset & ~MaxOffset) +
1677a6e353b1SMatt Arsenault         //                         (large_offset & MaxOffset);
1678a6e353b1SMatt Arsenault         int64_t SplitImmOffset, RemainderOffset;
1679cc7add52SSebastian Neubauer         std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1680cc7add52SSebastian Neubauer             COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1681a6e353b1SMatt Arsenault 
1682a6e353b1SMatt Arsenault         if (isUInt<32>(RemainderOffset)) {
1683a6e353b1SMatt Arsenault           SDNode *VMov = CurDAG->getMachineNode(
1684a6e353b1SMatt Arsenault               AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1685a6e353b1SMatt Arsenault               CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1686a6e353b1SMatt Arsenault           VOffset = SDValue(VMov, 0);
1687a6e353b1SMatt Arsenault           SAddr = LHS;
1688a6e353b1SMatt Arsenault           Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1689a6e353b1SMatt Arsenault           return true;
1690a6e353b1SMatt Arsenault         }
1691e1a2f471SMatt Arsenault       }
1692909a5ccfSStanislav Mekhanoshin 
1693909a5ccfSStanislav Mekhanoshin       // We are adding a 64 bit SGPR and a constant. If constant bus limit
1694909a5ccfSStanislav Mekhanoshin       // is 1 we would need to perform 1 or 2 extra moves for each half of
1695909a5ccfSStanislav Mekhanoshin       // the constant and it is better to do a scalar add and then issue a
1696909a5ccfSStanislav Mekhanoshin       // single VALU instruction to materialize zero. Otherwise it is less
1697909a5ccfSStanislav Mekhanoshin       // instructions to perform VALU adds with immediates or inline literals.
1698909a5ccfSStanislav Mekhanoshin       unsigned NumLiterals =
1699909a5ccfSStanislav Mekhanoshin           !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1700909a5ccfSStanislav Mekhanoshin           !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1701909a5ccfSStanislav Mekhanoshin       if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1702909a5ccfSStanislav Mekhanoshin         return false;
1703909a5ccfSStanislav Mekhanoshin     }
1704e1a2f471SMatt Arsenault   }
1705e1a2f471SMatt Arsenault 
1706e1a2f471SMatt Arsenault   // Match the variable offset.
1707909a5ccfSStanislav Mekhanoshin   if (Addr.getOpcode() == ISD::ADD) {
1708d2e52eecSMatt Arsenault     LHS = Addr.getOperand(0);
1709d2e52eecSMatt Arsenault     RHS = Addr.getOperand(1);
1710e1a2f471SMatt Arsenault 
1711e1a2f471SMatt Arsenault     if (!LHS->isDivergent()) {
1712e1a2f471SMatt Arsenault       // add (i64 sgpr), (zero_extend (i32 vgpr))
1713e1a2f471SMatt Arsenault       if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1714e1a2f471SMatt Arsenault         SAddr = LHS;
1715e1a2f471SMatt Arsenault         VOffset = ZextRHS;
1716e1a2f471SMatt Arsenault       }
1717e1a2f471SMatt Arsenault     }
1718e1a2f471SMatt Arsenault 
1719e1a2f471SMatt Arsenault     if (!SAddr && !RHS->isDivergent()) {
1720e1a2f471SMatt Arsenault       // add (zero_extend (i32 vgpr)), (i64 sgpr)
1721e1a2f471SMatt Arsenault       if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1722e1a2f471SMatt Arsenault         SAddr = RHS;
1723e1a2f471SMatt Arsenault         VOffset = ZextLHS;
1724e1a2f471SMatt Arsenault       }
1725e1a2f471SMatt Arsenault     }
1726e1a2f471SMatt Arsenault 
1727909a5ccfSStanislav Mekhanoshin     if (SAddr) {
1728909a5ccfSStanislav Mekhanoshin       Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1729909a5ccfSStanislav Mekhanoshin       return true;
1730909a5ccfSStanislav Mekhanoshin     }
1731909a5ccfSStanislav Mekhanoshin   }
1732909a5ccfSStanislav Mekhanoshin 
1733909a5ccfSStanislav Mekhanoshin   if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1734909a5ccfSStanislav Mekhanoshin       isa<ConstantSDNode>(Addr))
1735e1a2f471SMatt Arsenault     return false;
1736e1a2f471SMatt Arsenault 
1737909a5ccfSStanislav Mekhanoshin   // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1738909a5ccfSStanislav Mekhanoshin   // moves required to copy a 64-bit SGPR to VGPR.
1739909a5ccfSStanislav Mekhanoshin   SAddr = Addr;
1740909a5ccfSStanislav Mekhanoshin   SDNode *VMov =
1741909a5ccfSStanislav Mekhanoshin       CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1742909a5ccfSStanislav Mekhanoshin                              CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1743909a5ccfSStanislav Mekhanoshin   VOffset = SDValue(VMov, 0);
1744e1a2f471SMatt Arsenault   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1745e1a2f471SMatt Arsenault   return true;
1746e1a2f471SMatt Arsenault }
1747e1a2f471SMatt Arsenault 
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)1748b7ebb25eSStanislav Mekhanoshin static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1749b7ebb25eSStanislav Mekhanoshin   if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1750b7ebb25eSStanislav Mekhanoshin     SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1751b7ebb25eSStanislav Mekhanoshin   } else if (SAddr.getOpcode() == ISD::ADD &&
1752b7ebb25eSStanislav Mekhanoshin              isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1753b7ebb25eSStanislav Mekhanoshin     // Materialize this into a scalar move for scalar address to avoid
1754b7ebb25eSStanislav Mekhanoshin     // readfirstlane.
1755b7ebb25eSStanislav Mekhanoshin     auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1756b7ebb25eSStanislav Mekhanoshin     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1757b7ebb25eSStanislav Mekhanoshin                                               FI->getValueType(0));
175896e1fcb1SSebastian Neubauer     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1759b7ebb25eSStanislav Mekhanoshin                                            MVT::i32, TFI, SAddr.getOperand(1)),
1760b7ebb25eSStanislav Mekhanoshin                     0);
1761b7ebb25eSStanislav Mekhanoshin   }
1762b7ebb25eSStanislav Mekhanoshin 
1763b7ebb25eSStanislav Mekhanoshin   return SAddr;
1764b7ebb25eSStanislav Mekhanoshin }
1765b7ebb25eSStanislav Mekhanoshin 
1766038d884aSStanislav Mekhanoshin // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const1767690f5b7aSSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1768038d884aSStanislav Mekhanoshin                                             SDValue &SAddr,
1769038d884aSStanislav Mekhanoshin                                             SDValue &Offset) const {
1770038d884aSStanislav Mekhanoshin   if (Addr->isDivergent())
1771038d884aSStanislav Mekhanoshin     return false;
1772038d884aSStanislav Mekhanoshin 
1773690f5b7aSSebastian Neubauer   SDLoc DL(Addr);
1774690f5b7aSSebastian Neubauer 
1775038d884aSStanislav Mekhanoshin   int64_t COffsetVal = 0;
1776038d884aSStanislav Mekhanoshin 
1777038d884aSStanislav Mekhanoshin   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1778038d884aSStanislav Mekhanoshin     COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1779038d884aSStanislav Mekhanoshin     SAddr = Addr.getOperand(0);
1780690f5b7aSSebastian Neubauer   } else {
1781690f5b7aSSebastian Neubauer     SAddr = Addr;
1782038d884aSStanislav Mekhanoshin   }
1783038d884aSStanislav Mekhanoshin 
1784b7ebb25eSStanislav Mekhanoshin   SAddr = SelectSAddrFI(CurDAG, SAddr);
1785038d884aSStanislav Mekhanoshin 
1786038d884aSStanislav Mekhanoshin   const SIInstrInfo *TII = Subtarget->getInstrInfo();
1787038d884aSStanislav Mekhanoshin 
1788cc7add52SSebastian Neubauer   if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1789cc7add52SSebastian Neubauer                               SIInstrFlags::FlatScratch)) {
179013c03162SSebastian Neubauer     int64_t SplitImmOffset, RemainderOffset;
179113c03162SSebastian Neubauer     std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
179213c03162SSebastian Neubauer         COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
1793038d884aSStanislav Mekhanoshin 
179413c03162SSebastian Neubauer     COffsetVal = SplitImmOffset;
1795038d884aSStanislav Mekhanoshin 
1796038d884aSStanislav Mekhanoshin     SDValue AddOffset =
1797690f5b7aSSebastian Neubauer         SAddr.getOpcode() == ISD::TargetFrameIndex
1798690f5b7aSSebastian Neubauer             ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1799690f5b7aSSebastian Neubauer             : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
180096e1fcb1SSebastian Neubauer     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
180196e1fcb1SSebastian Neubauer                                            SAddr, AddOffset),
180296e1fcb1SSebastian Neubauer                     0);
1803038d884aSStanislav Mekhanoshin   }
1804038d884aSStanislav Mekhanoshin 
1805690f5b7aSSebastian Neubauer   Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1806038d884aSStanislav Mekhanoshin 
1807038d884aSStanislav Mekhanoshin   return true;
1808038d884aSStanislav Mekhanoshin }
1809038d884aSStanislav Mekhanoshin 
18107b9f620eSJay Foad // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const18117b9f620eSJay Foad bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
18127b9f620eSJay Foad     SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
18137b9f620eSJay Foad   if (!Subtarget->hasFlatScratchSVSSwizzleBug())
18147b9f620eSJay Foad     return false;
18157b9f620eSJay Foad 
18167b9f620eSJay Foad   // The bug affects the swizzling of SVS accesses if there is any carry out
18177b9f620eSJay Foad   // from the two low order bits (i.e. from bit 1 into bit 2) when adding
18187b9f620eSJay Foad   // voffset to (soffset + inst_offset).
18197b9f620eSJay Foad   KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
18207b9f620eSJay Foad   KnownBits SKnown = KnownBits::computeForAddSub(
18217b9f620eSJay Foad       true, false, CurDAG->computeKnownBits(SAddr),
18227b9f620eSJay Foad       KnownBits::makeConstant(APInt(32, ImmOffset)));
18237b9f620eSJay Foad   uint64_t VMax = VKnown.getMaxValue().getZExtValue();
18247b9f620eSJay Foad   uint64_t SMax = SKnown.getMaxValue().getZExtValue();
18257b9f620eSJay Foad   return (VMax & 3) + (SMax & 3) >= 4;
18267b9f620eSJay Foad }
18277b9f620eSJay Foad 
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const182836fe3f13SStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
182936fe3f13SStanislav Mekhanoshin                                              SDValue &VAddr, SDValue &SAddr,
183036fe3f13SStanislav Mekhanoshin                                              SDValue &Offset) const  {
183136fe3f13SStanislav Mekhanoshin   int64_t ImmOffset = 0;
183236fe3f13SStanislav Mekhanoshin 
183336fe3f13SStanislav Mekhanoshin   SDValue LHS, RHS;
183436fe3f13SStanislav Mekhanoshin   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
183536fe3f13SStanislav Mekhanoshin     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
183636fe3f13SStanislav Mekhanoshin     const SIInstrInfo *TII = Subtarget->getInstrInfo();
183736fe3f13SStanislav Mekhanoshin 
183836fe3f13SStanislav Mekhanoshin     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
183936fe3f13SStanislav Mekhanoshin       Addr = LHS;
184036fe3f13SStanislav Mekhanoshin       ImmOffset = COffsetVal;
184136fe3f13SStanislav Mekhanoshin     } else if (!LHS->isDivergent() && COffsetVal > 0) {
184236fe3f13SStanislav Mekhanoshin       SDLoc SL(N);
184336fe3f13SStanislav Mekhanoshin       // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
184436fe3f13SStanislav Mekhanoshin       //                         (large_offset & MaxOffset);
184536fe3f13SStanislav Mekhanoshin       int64_t SplitImmOffset, RemainderOffset;
184636fe3f13SStanislav Mekhanoshin       std::tie(SplitImmOffset, RemainderOffset)
184736fe3f13SStanislav Mekhanoshin         = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
184836fe3f13SStanislav Mekhanoshin 
184936fe3f13SStanislav Mekhanoshin       if (isUInt<32>(RemainderOffset)) {
185036fe3f13SStanislav Mekhanoshin         SDNode *VMov = CurDAG->getMachineNode(
185136fe3f13SStanislav Mekhanoshin           AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
185236fe3f13SStanislav Mekhanoshin           CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
185336fe3f13SStanislav Mekhanoshin         VAddr = SDValue(VMov, 0);
185436fe3f13SStanislav Mekhanoshin         SAddr = LHS;
18557b9f620eSJay Foad         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
18567b9f620eSJay Foad           return false;
185736fe3f13SStanislav Mekhanoshin         Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
185836fe3f13SStanislav Mekhanoshin         return true;
185936fe3f13SStanislav Mekhanoshin       }
186036fe3f13SStanislav Mekhanoshin     }
186136fe3f13SStanislav Mekhanoshin   }
186236fe3f13SStanislav Mekhanoshin 
186336fe3f13SStanislav Mekhanoshin   if (Addr.getOpcode() != ISD::ADD)
186436fe3f13SStanislav Mekhanoshin     return false;
186536fe3f13SStanislav Mekhanoshin 
186636fe3f13SStanislav Mekhanoshin   LHS = Addr.getOperand(0);
186736fe3f13SStanislav Mekhanoshin   RHS = Addr.getOperand(1);
186836fe3f13SStanislav Mekhanoshin 
186936fe3f13SStanislav Mekhanoshin   if (!LHS->isDivergent() && RHS->isDivergent()) {
187036fe3f13SStanislav Mekhanoshin     SAddr = LHS;
187136fe3f13SStanislav Mekhanoshin     VAddr = RHS;
187236fe3f13SStanislav Mekhanoshin   } else if (!RHS->isDivergent() && LHS->isDivergent()) {
187336fe3f13SStanislav Mekhanoshin     SAddr = RHS;
187436fe3f13SStanislav Mekhanoshin     VAddr = LHS;
187536fe3f13SStanislav Mekhanoshin   } else {
187636fe3f13SStanislav Mekhanoshin     return false;
187736fe3f13SStanislav Mekhanoshin   }
187836fe3f13SStanislav Mekhanoshin 
18797b9f620eSJay Foad   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
18807b9f620eSJay Foad     return false;
188136fe3f13SStanislav Mekhanoshin   SAddr = SelectSAddrFI(CurDAG, SAddr);
188236fe3f13SStanislav Mekhanoshin   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
188336fe3f13SStanislav Mekhanoshin   return true;
188436fe3f13SStanislav Mekhanoshin }
188536fe3f13SStanislav Mekhanoshin 
18864696a33dSIvan Kosarev // Match an immediate (if Imm is true) or an SGPR (if Imm is false)
18874696a33dSIvan Kosarev // offset. If Imm32Only is true, match only 32-bit immediate offsets
18884696a33dSIvan Kosarev // available on CI.
SelectSMRDOffset(SDValue Addr,SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const1889432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
1890432cbd78SIvan Kosarev                                           SDValue *SOffset, SDValue *Offset,
18914696a33dSIvan Kosarev                                           bool Imm32Only) const {
1892dee26a28STom Stellard   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
189377ce2e21SJakub Kuderski   if (!C) {
1894432cbd78SIvan Kosarev     if (!SOffset)
18954696a33dSIvan Kosarev       return false;
189677ce2e21SJakub Kuderski     if (ByteOffsetNode.getValueType().isScalarInteger() &&
189777ce2e21SJakub Kuderski         ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1898432cbd78SIvan Kosarev       *SOffset = ByteOffsetNode;
189977ce2e21SJakub Kuderski       return true;
190077ce2e21SJakub Kuderski     }
190177ce2e21SJakub Kuderski     if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
190277ce2e21SJakub Kuderski       if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1903432cbd78SIvan Kosarev         *SOffset = ByteOffsetNode.getOperand(0);
190477ce2e21SJakub Kuderski         return true;
190577ce2e21SJakub Kuderski       }
190677ce2e21SJakub Kuderski     }
1907dee26a28STom Stellard     return false;
190877ce2e21SJakub Kuderski   }
1909dee26a28STom Stellard 
1910dee26a28STom Stellard   SDLoc SL(ByteOffsetNode);
191130f18ed3SAustin Kerbow   // GFX9 and GFX10 have signed byte immediate offsets.
191230f18ed3SAustin Kerbow   int64_t ByteOffset = C->getSExtValue();
19130426c2d0SMatt Arsenault   Optional<int64_t> EncodedOffset =
191430f18ed3SAustin Kerbow       AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1915432cbd78SIvan Kosarev   if (EncodedOffset && Offset && !Imm32Only) {
1916432cbd78SIvan Kosarev     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1917dee26a28STom Stellard     return true;
1918dee26a28STom Stellard   }
1919dee26a28STom Stellard 
192030f18ed3SAustin Kerbow   // SGPR and literal offsets are unsigned.
192130f18ed3SAustin Kerbow   if (ByteOffset < 0)
192230f18ed3SAustin Kerbow     return false;
192330f18ed3SAustin Kerbow 
192430f18ed3SAustin Kerbow   EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1925432cbd78SIvan Kosarev   if (EncodedOffset && Offset && Imm32Only) {
1926432cbd78SIvan Kosarev     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
19270426c2d0SMatt Arsenault     return true;
19280426c2d0SMatt Arsenault   }
19290426c2d0SMatt Arsenault 
19300426c2d0SMatt Arsenault   if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1931217361c3STom Stellard     return false;
1932217361c3STom Stellard 
1933432cbd78SIvan Kosarev   if (SOffset) {
1934dee26a28STom Stellard     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1935432cbd78SIvan Kosarev     *SOffset = SDValue(
19360426c2d0SMatt Arsenault         CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1937dee26a28STom Stellard     return true;
1938dee26a28STom Stellard   }
1939dee26a28STom Stellard 
19404696a33dSIvan Kosarev   return false;
19414696a33dSIvan Kosarev }
19424696a33dSIvan Kosarev 
Expand32BitAddress(SDValue Addr) const1943923712b6SMatt Arsenault SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1944923712b6SMatt Arsenault   if (Addr.getValueType() != MVT::i32)
1945923712b6SMatt Arsenault     return Addr;
1946923712b6SMatt Arsenault 
1947923712b6SMatt Arsenault   // Zero-extend a 32-bit address.
1948923712b6SMatt Arsenault   SDLoc SL(Addr);
1949923712b6SMatt Arsenault 
1950923712b6SMatt Arsenault   const MachineFunction &MF = CurDAG->getMachineFunction();
1951923712b6SMatt Arsenault   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1952923712b6SMatt Arsenault   unsigned AddrHiVal = Info->get32BitAddressHighBits();
1953923712b6SMatt Arsenault   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1954923712b6SMatt Arsenault 
1955923712b6SMatt Arsenault   const SDValue Ops[] = {
1956923712b6SMatt Arsenault     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1957923712b6SMatt Arsenault     Addr,
1958923712b6SMatt Arsenault     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1959923712b6SMatt Arsenault     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1960923712b6SMatt Arsenault             0),
1961923712b6SMatt Arsenault     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1962923712b6SMatt Arsenault   };
1963923712b6SMatt Arsenault 
1964923712b6SMatt Arsenault   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1965923712b6SMatt Arsenault                                         Ops), 0);
1966923712b6SMatt Arsenault }
1967923712b6SMatt Arsenault 
19684696a33dSIvan Kosarev // Match a base and an immediate (if Imm is true) or an SGPR
19694696a33dSIvan Kosarev // (if Imm is false) offset. If Imm32Only is true, match only 32-bit
19704696a33dSIvan Kosarev // immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const1971432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1972432cbd78SIvan Kosarev                                               SDValue *SOffset, SDValue *Offset,
19734696a33dSIvan Kosarev                                               bool Imm32Only) const {
1974dee26a28STom Stellard   SDLoc SL(Addr);
1975923712b6SMatt Arsenault 
1976432cbd78SIvan Kosarev   if (SOffset && Offset) {
1977432cbd78SIvan Kosarev     assert(!Imm32Only);
1978432cbd78SIvan Kosarev     SDValue B;
1979432cbd78SIvan Kosarev     return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1980432cbd78SIvan Kosarev            SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1981432cbd78SIvan Kosarev   }
1982432cbd78SIvan Kosarev 
19833fc2079cSMarek Olsak   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
19843fc2079cSMarek Olsak   // wraparound, because s_load instructions perform the addition in 64 bits.
19853fc2079cSMarek Olsak   if ((Addr.getValueType() != MVT::i32 ||
1986b1360caaSMichael Liao        Addr->getFlags().hasNoUnsignedWrap())) {
1987b1360caaSMichael Liao     SDValue N0, N1;
1988b1360caaSMichael Liao     // Extract the base and offset if possible.
1989b1360caaSMichael Liao     if (CurDAG->isBaseWithConstantOffset(Addr) ||
1990b1360caaSMichael Liao         Addr.getOpcode() == ISD::ADD) {
1991b1360caaSMichael Liao       N0 = Addr.getOperand(0);
1992b1360caaSMichael Liao       N1 = Addr.getOperand(1);
1993b1360caaSMichael Liao     } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1994b1360caaSMichael Liao       assert(N0 && N1 && isa<ConstantSDNode>(N1));
1995b1360caaSMichael Liao     }
1996b1360caaSMichael Liao     if (N0 && N1) {
1997432cbd78SIvan Kosarev       if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
1998432cbd78SIvan Kosarev         SBase = N0;
1999dee26a28STom Stellard         return true;
2000dee26a28STom Stellard       }
2001432cbd78SIvan Kosarev       if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
2002432cbd78SIvan Kosarev         SBase = N1;
20039c66c02eSIvan Kosarev         return true;
20049c66c02eSIvan Kosarev       }
2005dee26a28STom Stellard     }
20064696a33dSIvan Kosarev     return false;
2007b1360caaSMichael Liao   }
2008432cbd78SIvan Kosarev   if (Offset && !SOffset) {
2009432cbd78SIvan Kosarev     SBase = Addr;
2010432cbd78SIvan Kosarev     *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2011432cbd78SIvan Kosarev     return true;
2012432cbd78SIvan Kosarev   }
20134696a33dSIvan Kosarev   return false;
2014432cbd78SIvan Kosarev }
2015432cbd78SIvan Kosarev 
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2016432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2017432cbd78SIvan Kosarev                                     SDValue *SOffset, SDValue *Offset,
2018432cbd78SIvan Kosarev                                     bool Imm32Only) const {
2019432cbd78SIvan Kosarev   if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
2020432cbd78SIvan Kosarev     return false;
2021432cbd78SIvan Kosarev   SBase = Expand32BitAddress(SBase);
2022dee26a28STom Stellard   return true;
2023dee26a28STom Stellard }
2024dee26a28STom Stellard 
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const2025dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2026dee26a28STom Stellard                                        SDValue &Offset) const {
2027432cbd78SIvan Kosarev   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
20288973a0a2SMarek Olsak }
2029dee26a28STom Stellard 
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const20308973a0a2SMarek Olsak bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
20318973a0a2SMarek Olsak                                          SDValue &Offset) const {
2032f7521dc2SMatt Arsenault   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2033432cbd78SIvan Kosarev   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2034432cbd78SIvan Kosarev                     /* Imm32Only */ true);
2035217361c3STom Stellard }
2036217361c3STom Stellard 
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const2037dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2038432cbd78SIvan Kosarev                                         SDValue &SOffset) const {
2039432cbd78SIvan Kosarev   return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2040432cbd78SIvan Kosarev }
2041432cbd78SIvan Kosarev 
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2042432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2043432cbd78SIvan Kosarev                                            SDValue &SOffset,
2044dee26a28STom Stellard                                            SDValue &Offset) const {
2045432cbd78SIvan Kosarev   return SelectSMRD(Addr, SBase, &SOffset, &Offset);
2046dee26a28STom Stellard }
2047dee26a28STom Stellard 
SelectSMRDBufferImm(SDValue Addr,SDValue & Offset) const2048dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2049dee26a28STom Stellard                                              SDValue &Offset) const {
205075fcdfa1SMatt Arsenault   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
205130f18ed3SAustin Kerbow     // The immediate offset for S_BUFFER instructions is unsigned.
205230f18ed3SAustin Kerbow     if (auto Imm =
205330f18ed3SAustin Kerbow             AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
205475fcdfa1SMatt Arsenault       Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
205575fcdfa1SMatt Arsenault       return true;
205675fcdfa1SMatt Arsenault     }
205775fcdfa1SMatt Arsenault   }
205875fcdfa1SMatt Arsenault 
205975fcdfa1SMatt Arsenault   return false;
20608973a0a2SMarek Olsak }
2061dee26a28STom Stellard 
SelectSMRDBufferImm32(SDValue Addr,SDValue & Offset) const20628973a0a2SMarek Olsak bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
20638973a0a2SMarek Olsak                                                SDValue &Offset) const {
2064f7521dc2SMatt Arsenault   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
20658973a0a2SMarek Olsak 
206675fcdfa1SMatt Arsenault   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
206775fcdfa1SMatt Arsenault     if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
206875fcdfa1SMatt Arsenault                                                          C->getZExtValue())) {
206975fcdfa1SMatt Arsenault       Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
207075fcdfa1SMatt Arsenault       return true;
207175fcdfa1SMatt Arsenault     }
207275fcdfa1SMatt Arsenault   }
2073217361c3STom Stellard 
207475fcdfa1SMatt Arsenault   return false;
2075217361c3STom Stellard }
2076217361c3STom Stellard 
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const20777968c345SNicolai Haehnle bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
20781322b6f8SMatt Arsenault                                             SDValue &Base,
20797968c345SNicolai Haehnle                                             SDValue &Offset) const {
20801322b6f8SMatt Arsenault   SDLoc DL(Index);
20811322b6f8SMatt Arsenault 
20821322b6f8SMatt Arsenault   if (CurDAG->isBaseWithConstantOffset(Index)) {
20831322b6f8SMatt Arsenault     SDValue N0 = Index.getOperand(0);
20841322b6f8SMatt Arsenault     SDValue N1 = Index.getOperand(1);
20851322b6f8SMatt Arsenault     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
20861322b6f8SMatt Arsenault 
20871322b6f8SMatt Arsenault     // (add n0, c0)
20886f539294SChangpeng Fang     // Don't peel off the offset (c0) if doing so could possibly lead
20896f539294SChangpeng Fang     // the base (n0) to be negative.
2090591b029fSStanislav Mekhanoshin     // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2091591b029fSStanislav Mekhanoshin     if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2092591b029fSStanislav Mekhanoshin         (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
20931322b6f8SMatt Arsenault       Base = N0;
20941322b6f8SMatt Arsenault       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
20951322b6f8SMatt Arsenault       return true;
20961322b6f8SMatt Arsenault     }
20976f539294SChangpeng Fang   }
20981322b6f8SMatt Arsenault 
20991322b6f8SMatt Arsenault   if (isa<ConstantSDNode>(Index))
21001322b6f8SMatt Arsenault     return false;
21011322b6f8SMatt Arsenault 
21021322b6f8SMatt Arsenault   Base = Index;
21031322b6f8SMatt Arsenault   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
21041322b6f8SMatt Arsenault   return true;
21051322b6f8SMatt Arsenault }
21061322b6f8SMatt Arsenault 
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)21070a3d755eSalex-t SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2108bdc4956bSBenjamin Kramer                                      SDValue Val, uint32_t Offset,
2109bdc4956bSBenjamin Kramer                                      uint32_t Width) {
21100a3d755eSalex-t   if (Val->isDivergent()) {
21110a3d755eSalex-t     unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
21120a3d755eSalex-t     SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
21130a3d755eSalex-t     SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
21140a3d755eSalex-t 
21150a3d755eSalex-t     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
21160a3d755eSalex-t   }
21170a3d755eSalex-t   unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
211845bb48eaSTom Stellard   // Transformation function, pack the offset and width of a BFE into
211945bb48eaSTom Stellard   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
212045bb48eaSTom Stellard   // source, bits [5:0] contain the offset and bits [22:16] the width.
212145bb48eaSTom Stellard   uint32_t PackedVal = Offset | (Width << 16);
212245bb48eaSTom Stellard   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
212345bb48eaSTom Stellard 
212445bb48eaSTom Stellard   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
212545bb48eaSTom Stellard }
212645bb48eaSTom Stellard 
SelectS_BFEFromShifts(SDNode * N)212795927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
212845bb48eaSTom Stellard   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
212945bb48eaSTom Stellard   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
213045bb48eaSTom Stellard   // Predicate: 0 < b <= c < 32
213145bb48eaSTom Stellard 
213245bb48eaSTom Stellard   const SDValue &Shl = N->getOperand(0);
213345bb48eaSTom Stellard   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
213445bb48eaSTom Stellard   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
213545bb48eaSTom Stellard 
213645bb48eaSTom Stellard   if (B && C) {
213745bb48eaSTom Stellard     uint32_t BVal = B->getZExtValue();
213845bb48eaSTom Stellard     uint32_t CVal = C->getZExtValue();
213945bb48eaSTom Stellard 
214045bb48eaSTom Stellard     if (0 < BVal && BVal <= CVal && CVal < 32) {
214145bb48eaSTom Stellard       bool Signed = N->getOpcode() == ISD::SRA;
21420a3d755eSalex-t       ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
214395927c0fSJustin Bogner                   32 - CVal));
214495927c0fSJustin Bogner       return;
214545bb48eaSTom Stellard     }
214645bb48eaSTom Stellard   }
214795927c0fSJustin Bogner   SelectCode(N);
214845bb48eaSTom Stellard }
214945bb48eaSTom Stellard 
SelectS_BFE(SDNode * N)215095927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
215145bb48eaSTom Stellard   switch (N->getOpcode()) {
215245bb48eaSTom Stellard   case ISD::AND:
215345bb48eaSTom Stellard     if (N->getOperand(0).getOpcode() == ISD::SRL) {
215445bb48eaSTom Stellard       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
215545bb48eaSTom Stellard       // Predicate: isMask(mask)
215645bb48eaSTom Stellard       const SDValue &Srl = N->getOperand(0);
215745bb48eaSTom Stellard       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
215845bb48eaSTom Stellard       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
215945bb48eaSTom Stellard 
216045bb48eaSTom Stellard       if (Shift && Mask) {
216145bb48eaSTom Stellard         uint32_t ShiftVal = Shift->getZExtValue();
216245bb48eaSTom Stellard         uint32_t MaskVal = Mask->getZExtValue();
216345bb48eaSTom Stellard 
216445bb48eaSTom Stellard         if (isMask_32(MaskVal)) {
216545bb48eaSTom Stellard           uint32_t WidthVal = countPopulation(MaskVal);
21660a3d755eSalex-t           ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
21670a3d755eSalex-t                                   WidthVal));
216895927c0fSJustin Bogner           return;
216945bb48eaSTom Stellard         }
217045bb48eaSTom Stellard       }
217145bb48eaSTom Stellard     }
217245bb48eaSTom Stellard     break;
217345bb48eaSTom Stellard   case ISD::SRL:
217445bb48eaSTom Stellard     if (N->getOperand(0).getOpcode() == ISD::AND) {
217545bb48eaSTom Stellard       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
217645bb48eaSTom Stellard       // Predicate: isMask(mask >> b)
217745bb48eaSTom Stellard       const SDValue &And = N->getOperand(0);
217845bb48eaSTom Stellard       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
217945bb48eaSTom Stellard       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
218045bb48eaSTom Stellard 
218145bb48eaSTom Stellard       if (Shift && Mask) {
218245bb48eaSTom Stellard         uint32_t ShiftVal = Shift->getZExtValue();
218345bb48eaSTom Stellard         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
218445bb48eaSTom Stellard 
218545bb48eaSTom Stellard         if (isMask_32(MaskVal)) {
218645bb48eaSTom Stellard           uint32_t WidthVal = countPopulation(MaskVal);
21870a3d755eSalex-t           ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
21880a3d755eSalex-t                       WidthVal));
218995927c0fSJustin Bogner           return;
219045bb48eaSTom Stellard         }
219145bb48eaSTom Stellard       }
219295927c0fSJustin Bogner     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
219395927c0fSJustin Bogner       SelectS_BFEFromShifts(N);
219495927c0fSJustin Bogner       return;
219595927c0fSJustin Bogner     }
219645bb48eaSTom Stellard     break;
219745bb48eaSTom Stellard   case ISD::SRA:
219895927c0fSJustin Bogner     if (N->getOperand(0).getOpcode() == ISD::SHL) {
219995927c0fSJustin Bogner       SelectS_BFEFromShifts(N);
220095927c0fSJustin Bogner       return;
220195927c0fSJustin Bogner     }
220245bb48eaSTom Stellard     break;
22037e8de01fSMatt Arsenault 
22047e8de01fSMatt Arsenault   case ISD::SIGN_EXTEND_INREG: {
22057e8de01fSMatt Arsenault     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
22067e8de01fSMatt Arsenault     SDValue Src = N->getOperand(0);
22077e8de01fSMatt Arsenault     if (Src.getOpcode() != ISD::SRL)
22087e8de01fSMatt Arsenault       break;
22097e8de01fSMatt Arsenault 
22107e8de01fSMatt Arsenault     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
22117e8de01fSMatt Arsenault     if (!Amt)
22127e8de01fSMatt Arsenault       break;
22137e8de01fSMatt Arsenault 
22147e8de01fSMatt Arsenault     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
22150a3d755eSalex-t     ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
221695927c0fSJustin Bogner                             Amt->getZExtValue(), Width));
221795927c0fSJustin Bogner     return;
22187e8de01fSMatt Arsenault   }
221945bb48eaSTom Stellard   }
222045bb48eaSTom Stellard 
222195927c0fSJustin Bogner   SelectCode(N);
222245bb48eaSTom Stellard }
222345bb48eaSTom Stellard 
isCBranchSCC(const SDNode * N) const22247b1dc2c9SMatt Arsenault bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
22257b1dc2c9SMatt Arsenault   assert(N->getOpcode() == ISD::BRCOND);
22267b1dc2c9SMatt Arsenault   if (!N->hasOneUse())
22277b1dc2c9SMatt Arsenault     return false;
22287b1dc2c9SMatt Arsenault 
22297b1dc2c9SMatt Arsenault   SDValue Cond = N->getOperand(1);
22307b1dc2c9SMatt Arsenault   if (Cond.getOpcode() == ISD::CopyToReg)
22317b1dc2c9SMatt Arsenault     Cond = Cond.getOperand(2);
22327b1dc2c9SMatt Arsenault 
22337b1dc2c9SMatt Arsenault   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
22347b1dc2c9SMatt Arsenault     return false;
22357b1dc2c9SMatt Arsenault 
22367b1dc2c9SMatt Arsenault   MVT VT = Cond.getOperand(0).getSimpleValueType();
22377b1dc2c9SMatt Arsenault   if (VT == MVT::i32)
22387b1dc2c9SMatt Arsenault     return true;
22397b1dc2c9SMatt Arsenault 
22407b1dc2c9SMatt Arsenault   if (VT == MVT::i64) {
22415bfbae5cSTom Stellard     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
22427b1dc2c9SMatt Arsenault 
22437b1dc2c9SMatt Arsenault     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
22447b1dc2c9SMatt Arsenault     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
22457b1dc2c9SMatt Arsenault   }
22467b1dc2c9SMatt Arsenault 
22477b1dc2c9SMatt Arsenault   return false;
22487b1dc2c9SMatt Arsenault }
22497b1dc2c9SMatt Arsenault 
SelectBRCOND(SDNode * N)225095927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2251bc4497b1STom Stellard   SDValue Cond = N->getOperand(1);
2252bc4497b1STom Stellard 
2253327188aaSMatt Arsenault   if (Cond.isUndef()) {
2254327188aaSMatt Arsenault     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2255327188aaSMatt Arsenault                          N->getOperand(2), N->getOperand(0));
2256327188aaSMatt Arsenault     return;
2257327188aaSMatt Arsenault   }
2258327188aaSMatt Arsenault 
225952500216SStanislav Mekhanoshin   const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
226052500216SStanislav Mekhanoshin   const SIRegisterInfo *TRI = ST->getRegisterInfo();
226152500216SStanislav Mekhanoshin 
2262d674e0acSMatt Arsenault   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2263d674e0acSMatt Arsenault   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2264178050c3SMatt Arsenault   Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2265bc4497b1STom Stellard   SDLoc SL(N);
2266bc4497b1STom Stellard 
22676eaad1e5STim Renouf   if (!UseSCCBr) {
22686eaad1e5STim Renouf     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
22696eaad1e5STim Renouf     // analyzed what generates the vcc value, so we do not know whether vcc
22706eaad1e5STim Renouf     // bits for disabled lanes are 0.  Thus we need to mask out bits for
22716eaad1e5STim Renouf     // disabled lanes.
22726eaad1e5STim Renouf     //
22736eaad1e5STim Renouf     // For the case that we select S_CBRANCH_SCC1 and it gets
22746eaad1e5STim Renouf     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
22756eaad1e5STim Renouf     // SIInstrInfo::moveToVALU which inserts the S_AND).
22766eaad1e5STim Renouf     //
22776eaad1e5STim Renouf     // We could add an analysis of what generates the vcc value here and omit
22786eaad1e5STim Renouf     // the S_AND when is unnecessary. But it would be better to add a separate
22796eaad1e5STim Renouf     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
22806eaad1e5STim Renouf     // catches both cases.
228152500216SStanislav Mekhanoshin     Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
228252500216SStanislav Mekhanoshin                                                          : AMDGPU::S_AND_B64,
228352500216SStanislav Mekhanoshin                      SL, MVT::i1,
228452500216SStanislav Mekhanoshin                      CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
228552500216SStanislav Mekhanoshin                                                         : AMDGPU::EXEC,
228652500216SStanislav Mekhanoshin                                          MVT::i1),
22876eaad1e5STim Renouf                     Cond),
22886eaad1e5STim Renouf                    0);
22896eaad1e5STim Renouf   }
22906eaad1e5STim Renouf 
2291d674e0acSMatt Arsenault   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2292d674e0acSMatt Arsenault   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2293bc4497b1STom Stellard                        N->getOperand(2), // Basic Block
2294f530e8b3SMatt Arsenault                        VCC.getValue(0));
2295bc4497b1STom Stellard }
2296bc4497b1STom Stellard 
SelectFMAD_FMA(SDNode * N)22970084adc5SMatt Arsenault void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2298d7e2303dSMatt Arsenault   MVT VT = N->getSimpleValueType(0);
22990084adc5SMatt Arsenault   bool IsFMA = N->getOpcode() == ISD::FMA;
23000084adc5SMatt Arsenault   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
23010084adc5SMatt Arsenault                          !Subtarget->hasFmaMixInsts()) ||
23020084adc5SMatt Arsenault       ((IsFMA && Subtarget->hasMadMixInsts()) ||
23030084adc5SMatt Arsenault        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2304d7e2303dSMatt Arsenault     SelectCode(N);
2305d7e2303dSMatt Arsenault     return;
2306d7e2303dSMatt Arsenault   }
2307d7e2303dSMatt Arsenault 
2308d7e2303dSMatt Arsenault   SDValue Src0 = N->getOperand(0);
2309d7e2303dSMatt Arsenault   SDValue Src1 = N->getOperand(1);
2310d7e2303dSMatt Arsenault   SDValue Src2 = N->getOperand(2);
2311d7e2303dSMatt Arsenault   unsigned Src0Mods, Src1Mods, Src2Mods;
2312d7e2303dSMatt Arsenault 
23130084adc5SMatt Arsenault   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
23140084adc5SMatt Arsenault   // using the conversion from f16.
2315d7e2303dSMatt Arsenault   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2316d7e2303dSMatt Arsenault   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2317d7e2303dSMatt Arsenault   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2318d7e2303dSMatt Arsenault 
23191024b73eSMatt Arsenault   assert((IsFMA || !Mode.allFP32Denormals()) &&
2320d7e2303dSMatt Arsenault          "fmad selected with denormals enabled");
2321d7e2303dSMatt Arsenault   // TODO: We can select this with f32 denormals enabled if all the sources are
2322d7e2303dSMatt Arsenault   // converted from f16 (in which case fmad isn't legal).
2323d7e2303dSMatt Arsenault 
2324d7e2303dSMatt Arsenault   if (Sel0 || Sel1 || Sel2) {
2325d7e2303dSMatt Arsenault     // For dummy operands.
2326d7e2303dSMatt Arsenault     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2327d7e2303dSMatt Arsenault     SDValue Ops[] = {
2328d7e2303dSMatt Arsenault       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2329d7e2303dSMatt Arsenault       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2330d7e2303dSMatt Arsenault       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2331d7e2303dSMatt Arsenault       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2332d7e2303dSMatt Arsenault       Zero, Zero
2333d7e2303dSMatt Arsenault     };
2334d7e2303dSMatt Arsenault 
23350084adc5SMatt Arsenault     CurDAG->SelectNodeTo(N,
23360084adc5SMatt Arsenault                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
23370084adc5SMatt Arsenault                          MVT::f32, Ops);
2338d7e2303dSMatt Arsenault   } else {
2339d7e2303dSMatt Arsenault     SelectCode(N);
2340d7e2303dSMatt Arsenault   }
2341d7e2303dSMatt Arsenault }
2342d7e2303dSMatt Arsenault 
SelectDSAppendConsume(SDNode * N,unsigned IntrID)2343d3c84e67SMatt Arsenault void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2344cdd191d9SMatt Arsenault   // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2345cdd191d9SMatt Arsenault   // be copied to an SGPR with readfirstlane.
2346cdd191d9SMatt Arsenault   unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2347cdd191d9SMatt Arsenault     AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2348cdd191d9SMatt Arsenault 
2349cdd191d9SMatt Arsenault   SDValue Chain = N->getOperand(0);
2350cdd191d9SMatt Arsenault   SDValue Ptr = N->getOperand(2);
2351cdd191d9SMatt Arsenault   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
23529e5fa333SMatt Arsenault   MachineMemOperand *MMO = M->getMemOperand();
2353cdd191d9SMatt Arsenault   bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2354cdd191d9SMatt Arsenault 
2355cdd191d9SMatt Arsenault   SDValue Offset;
2356cdd191d9SMatt Arsenault   if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2357cdd191d9SMatt Arsenault     SDValue PtrBase = Ptr.getOperand(0);
2358cdd191d9SMatt Arsenault     SDValue PtrOffset = Ptr.getOperand(1);
2359cdd191d9SMatt Arsenault 
2360cdd191d9SMatt Arsenault     const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2361040c5027SJay Foad     if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2362cdd191d9SMatt Arsenault       N = glueCopyToM0(N, PtrBase);
2363cdd191d9SMatt Arsenault       Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2364cdd191d9SMatt Arsenault     }
2365cdd191d9SMatt Arsenault   }
2366cdd191d9SMatt Arsenault 
2367cdd191d9SMatt Arsenault   if (!Offset) {
2368cdd191d9SMatt Arsenault     N = glueCopyToM0(N, Ptr);
2369cdd191d9SMatt Arsenault     Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2370cdd191d9SMatt Arsenault   }
2371cdd191d9SMatt Arsenault 
2372cdd191d9SMatt Arsenault   SDValue Ops[] = {
2373cdd191d9SMatt Arsenault     Offset,
2374cdd191d9SMatt Arsenault     CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2375cdd191d9SMatt Arsenault     Chain,
2376cdd191d9SMatt Arsenault     N->getOperand(N->getNumOperands() - 1) // New glue
2377cdd191d9SMatt Arsenault   };
2378cdd191d9SMatt Arsenault 
23799e5fa333SMatt Arsenault   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
23809e5fa333SMatt Arsenault   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2381cdd191d9SMatt Arsenault }
2382cdd191d9SMatt Arsenault 
gwsIntrinToOpcode(unsigned IntrID)2383740322f1SMatt Arsenault static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2384740322f1SMatt Arsenault   switch (IntrID) {
2385740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_init:
2386740322f1SMatt Arsenault     return AMDGPU::DS_GWS_INIT;
2387740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_barrier:
2388740322f1SMatt Arsenault     return AMDGPU::DS_GWS_BARRIER;
2389740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_v:
2390740322f1SMatt Arsenault     return AMDGPU::DS_GWS_SEMA_V;
2391740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_br:
2392740322f1SMatt Arsenault     return AMDGPU::DS_GWS_SEMA_BR;
2393740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_p:
2394740322f1SMatt Arsenault     return AMDGPU::DS_GWS_SEMA_P;
2395740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_release_all:
2396740322f1SMatt Arsenault     return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2397740322f1SMatt Arsenault   default:
2398740322f1SMatt Arsenault     llvm_unreachable("not a gws intrinsic");
2399740322f1SMatt Arsenault   }
2400740322f1SMatt Arsenault }
2401740322f1SMatt Arsenault 
SelectDS_GWS(SDNode * N,unsigned IntrID)24024d55d024SMatt Arsenault void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2403740322f1SMatt Arsenault   if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2404740322f1SMatt Arsenault       !Subtarget->hasGWSSemaReleaseAll()) {
2405740322f1SMatt Arsenault     // Let this error.
2406740322f1SMatt Arsenault     SelectCode(N);
2407740322f1SMatt Arsenault     return;
2408740322f1SMatt Arsenault   }
2409740322f1SMatt Arsenault 
2410740322f1SMatt Arsenault   // Chain, intrinsic ID, vsrc, offset
2411740322f1SMatt Arsenault   const bool HasVSrc = N->getNumOperands() == 4;
2412740322f1SMatt Arsenault   assert(HasVSrc || N->getNumOperands() == 3);
2413740322f1SMatt Arsenault 
24144d55d024SMatt Arsenault   SDLoc SL(N);
2415740322f1SMatt Arsenault   SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
24164d55d024SMatt Arsenault   int ImmOffset = 0;
24174d55d024SMatt Arsenault   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
24184d55d024SMatt Arsenault   MachineMemOperand *MMO = M->getMemOperand();
24194d55d024SMatt Arsenault 
24204d55d024SMatt Arsenault   // Don't worry if the offset ends up in a VGPR. Only one lane will have
24214d55d024SMatt Arsenault   // effect, so SIFixSGPRCopies will validly insert readfirstlane.
24224d55d024SMatt Arsenault 
24234d55d024SMatt Arsenault   // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
24244d55d024SMatt Arsenault   // offset field) % 64. Some versions of the programming guide omit the m0
24254d55d024SMatt Arsenault   // part, or claim it's from offset 0.
24264d55d024SMatt Arsenault   if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
242748c0df5dSMatt Arsenault     // If we have a constant offset, try to use the 0 in m0 as the base.
242848c0df5dSMatt Arsenault     // TODO: Look into changing the default m0 initialization value. If the
242948c0df5dSMatt Arsenault     // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
243048c0df5dSMatt Arsenault     // the immediate offset.
243148c0df5dSMatt Arsenault     glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
243248c0df5dSMatt Arsenault     ImmOffset = ConstOffset->getZExtValue();
24334d55d024SMatt Arsenault   } else {
24344d55d024SMatt Arsenault     if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
24354d55d024SMatt Arsenault       ImmOffset = BaseOffset.getConstantOperandVal(1);
24364d55d024SMatt Arsenault       BaseOffset = BaseOffset.getOperand(0);
24374d55d024SMatt Arsenault     }
24384d55d024SMatt Arsenault 
24394d55d024SMatt Arsenault     // Prefer to do the shift in an SGPR since it should be possible to use m0
24404d55d024SMatt Arsenault     // as the result directly. If it's already an SGPR, it will be eliminated
24414d55d024SMatt Arsenault     // later.
24424d55d024SMatt Arsenault     SDNode *SGPROffset
24434d55d024SMatt Arsenault       = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
24444d55d024SMatt Arsenault                                BaseOffset);
24454d55d024SMatt Arsenault     // Shift to offset in m0
24464d55d024SMatt Arsenault     SDNode *M0Base
24474d55d024SMatt Arsenault       = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
24484d55d024SMatt Arsenault                                SDValue(SGPROffset, 0),
24494d55d024SMatt Arsenault                                CurDAG->getTargetConstant(16, SL, MVT::i32));
24504d55d024SMatt Arsenault     glueCopyToM0(N, SDValue(M0Base, 0));
24514d55d024SMatt Arsenault   }
24524d55d024SMatt Arsenault 
2453740322f1SMatt Arsenault   SDValue Chain = N->getOperand(0);
24544d55d024SMatt Arsenault   SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
24554d55d024SMatt Arsenault 
2456740322f1SMatt Arsenault   const unsigned Opc = gwsIntrinToOpcode(IntrID);
2457740322f1SMatt Arsenault   SmallVector<SDValue, 5> Ops;
2458740322f1SMatt Arsenault   if (HasVSrc)
2459bb582ebdSMatt Arsenault     Ops.push_back(N->getOperand(2));
2460740322f1SMatt Arsenault   Ops.push_back(OffsetField);
2461740322f1SMatt Arsenault   Ops.push_back(Chain);
24624d55d024SMatt Arsenault 
24634d55d024SMatt Arsenault   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
24644d55d024SMatt Arsenault   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
24654d55d024SMatt Arsenault }
24664d55d024SMatt Arsenault 
SelectInterpP1F16(SDNode * N)246768b102b9SMatt Arsenault void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
246868b102b9SMatt Arsenault   if (Subtarget->getLDSBankCount() != 16) {
246968b102b9SMatt Arsenault     // This is a single instruction with a pattern.
247068b102b9SMatt Arsenault     SelectCode(N);
247168b102b9SMatt Arsenault     return;
247268b102b9SMatt Arsenault   }
247368b102b9SMatt Arsenault 
247468b102b9SMatt Arsenault   SDLoc DL(N);
247568b102b9SMatt Arsenault 
247668b102b9SMatt Arsenault   // This requires 2 instructions. It is possible to write a pattern to support
247768b102b9SMatt Arsenault   // this, but the generated isel emitter doesn't correctly deal with multiple
247868b102b9SMatt Arsenault   // output instructions using the same physical register input. The copy to m0
247968b102b9SMatt Arsenault   // is incorrectly placed before the second instruction.
248068b102b9SMatt Arsenault   //
248168b102b9SMatt Arsenault   // TODO: Match source modifiers.
248268b102b9SMatt Arsenault   //
248368b102b9SMatt Arsenault   // def : Pat <
248468b102b9SMatt Arsenault   //   (int_amdgcn_interp_p1_f16
248568b102b9SMatt Arsenault   //    (VOP3Mods f32:$src0, i32:$src0_modifiers),
248668b102b9SMatt Arsenault   //                             (i32 timm:$attrchan), (i32 timm:$attr),
248768b102b9SMatt Arsenault   //                             (i1 timm:$high), M0),
248868b102b9SMatt Arsenault   //   (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
248968b102b9SMatt Arsenault   //       timm:$attrchan, 0,
249068b102b9SMatt Arsenault   //       (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
249168b102b9SMatt Arsenault   //   let Predicates = [has16BankLDS];
249268b102b9SMatt Arsenault   // }
249368b102b9SMatt Arsenault 
249468b102b9SMatt Arsenault   // 16 bank LDS
249568b102b9SMatt Arsenault   SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
249668b102b9SMatt Arsenault                                       N->getOperand(5), SDValue());
249768b102b9SMatt Arsenault 
249868b102b9SMatt Arsenault   SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
249968b102b9SMatt Arsenault 
250068b102b9SMatt Arsenault   SDNode *InterpMov =
250168b102b9SMatt Arsenault     CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
250268b102b9SMatt Arsenault         CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
250368b102b9SMatt Arsenault         N->getOperand(3),  // Attr
250468b102b9SMatt Arsenault         N->getOperand(2),  // Attrchan
250568b102b9SMatt Arsenault         ToM0.getValue(1) // In glue
250668b102b9SMatt Arsenault   });
250768b102b9SMatt Arsenault 
250868b102b9SMatt Arsenault   SDNode *InterpP1LV =
250968b102b9SMatt Arsenault     CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
251068b102b9SMatt Arsenault         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
251168b102b9SMatt Arsenault         N->getOperand(1), // Src0
251268b102b9SMatt Arsenault         N->getOperand(3), // Attr
251368b102b9SMatt Arsenault         N->getOperand(2), // Attrchan
251468b102b9SMatt Arsenault         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
251568b102b9SMatt Arsenault         SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
251668b102b9SMatt Arsenault         N->getOperand(4), // high
251768b102b9SMatt Arsenault         CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
251868b102b9SMatt Arsenault         CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
251968b102b9SMatt Arsenault         SDValue(InterpMov, 1)
252068b102b9SMatt Arsenault   });
252168b102b9SMatt Arsenault 
252268b102b9SMatt Arsenault   CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
252368b102b9SMatt Arsenault }
252468b102b9SMatt Arsenault 
SelectINTRINSIC_W_CHAIN(SDNode * N)2525d3c84e67SMatt Arsenault void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2526d3c84e67SMatt Arsenault   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2527d3c84e67SMatt Arsenault   switch (IntrID) {
2528d3c84e67SMatt Arsenault   case Intrinsic::amdgcn_ds_append:
2529d3c84e67SMatt Arsenault   case Intrinsic::amdgcn_ds_consume: {
2530d3c84e67SMatt Arsenault     if (N->getValueType(0) != MVT::i32)
2531d3c84e67SMatt Arsenault       break;
2532d3c84e67SMatt Arsenault     SelectDSAppendConsume(N, IntrID);
2533d3c84e67SMatt Arsenault     return;
2534d3c84e67SMatt Arsenault   }
25354d55d024SMatt Arsenault   }
25364d55d024SMatt Arsenault 
25374d55d024SMatt Arsenault   SelectCode(N);
25384d55d024SMatt Arsenault }
25394d55d024SMatt Arsenault 
SelectINTRINSIC_WO_CHAIN(SDNode * N)254000e89b42SCarl Ritson void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
254100e89b42SCarl Ritson   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
254200e89b42SCarl Ritson   unsigned Opcode;
254300e89b42SCarl Ritson   switch (IntrID) {
25440b283570SCarl Ritson   case Intrinsic::amdgcn_wqm:
25450b283570SCarl Ritson     Opcode = AMDGPU::WQM;
25460b283570SCarl Ritson     break;
254700e89b42SCarl Ritson   case Intrinsic::amdgcn_softwqm:
254800e89b42SCarl Ritson     Opcode = AMDGPU::SOFT_WQM;
254900e89b42SCarl Ritson     break;
25500b283570SCarl Ritson   case Intrinsic::amdgcn_wwm:
2551c3ce7baeSPiotr Sobczak   case Intrinsic::amdgcn_strict_wwm:
2552c3ce7baeSPiotr Sobczak     Opcode = AMDGPU::STRICT_WWM;
25530b283570SCarl Ritson     break;
25544672bac1SPiotr Sobczak   case Intrinsic::amdgcn_strict_wqm:
25554672bac1SPiotr Sobczak     Opcode = AMDGPU::STRICT_WQM;
25564672bac1SPiotr Sobczak     break;
255768b102b9SMatt Arsenault   case Intrinsic::amdgcn_interp_p1_f16:
255868b102b9SMatt Arsenault     SelectInterpP1F16(N);
255968b102b9SMatt Arsenault     return;
256000e89b42SCarl Ritson   default:
256100e89b42SCarl Ritson     SelectCode(N);
256200e89b42SCarl Ritson     return;
256300e89b42SCarl Ritson   }
256400e89b42SCarl Ritson 
256500e89b42SCarl Ritson   SDValue Src = N->getOperand(1);
256600e89b42SCarl Ritson   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
256700e89b42SCarl Ritson }
256800e89b42SCarl Ritson 
SelectINTRINSIC_VOID(SDNode * N)25694d55d024SMatt Arsenault void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
25704d55d024SMatt Arsenault   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
25714d55d024SMatt Arsenault   switch (IntrID) {
25724d55d024SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_init:
25734d55d024SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_barrier:
2574740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_v:
2575740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_br:
2576740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_p:
2577740322f1SMatt Arsenault   case Intrinsic::amdgcn_ds_gws_sema_release_all:
25784d55d024SMatt Arsenault     SelectDS_GWS(N, IntrID);
25794d55d024SMatt Arsenault     return;
2580d3c84e67SMatt Arsenault   default:
2581d3c84e67SMatt Arsenault     break;
2582d3c84e67SMatt Arsenault   }
2583d3c84e67SMatt Arsenault 
2584d3c84e67SMatt Arsenault   SelectCode(N);
2585d3c84e67SMatt Arsenault }
2586d3c84e67SMatt Arsenault 
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool AllowAbs) const2587d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
25885b91a6a8SJay Foad                                             unsigned &Mods,
25895b91a6a8SJay Foad                                             bool AllowAbs) const {
2590d7e2303dSMatt Arsenault   Mods = 0;
259145bb48eaSTom Stellard   Src = In;
259245bb48eaSTom Stellard 
259345bb48eaSTom Stellard   if (Src.getOpcode() == ISD::FNEG) {
259445bb48eaSTom Stellard     Mods |= SISrcMods::NEG;
259545bb48eaSTom Stellard     Src = Src.getOperand(0);
259645bb48eaSTom Stellard   }
259745bb48eaSTom Stellard 
25985b91a6a8SJay Foad   if (AllowAbs && Src.getOpcode() == ISD::FABS) {
259945bb48eaSTom Stellard     Mods |= SISrcMods::ABS;
260045bb48eaSTom Stellard     Src = Src.getOperand(0);
260145bb48eaSTom Stellard   }
260245bb48eaSTom Stellard 
2603d7e2303dSMatt Arsenault   return true;
2604d7e2303dSMatt Arsenault }
2605d7e2303dSMatt Arsenault 
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const2606d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2607d7e2303dSMatt Arsenault                                         SDValue &SrcMods) const {
2608d7e2303dSMatt Arsenault   unsigned Mods;
2609d7e2303dSMatt Arsenault   if (SelectVOP3ModsImpl(In, Src, Mods)) {
261045bb48eaSTom Stellard     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
261145bb48eaSTom Stellard     return true;
261245bb48eaSTom Stellard   }
261345bb48eaSTom Stellard 
2614d7e2303dSMatt Arsenault   return false;
2615d7e2303dSMatt Arsenault }
2616d7e2303dSMatt Arsenault 
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const26175b91a6a8SJay Foad bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
26185b91a6a8SJay Foad                                          SDValue &SrcMods) const {
26195b91a6a8SJay Foad   unsigned Mods;
26205b91a6a8SJay Foad   if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
26215b91a6a8SJay Foad     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
26225b91a6a8SJay Foad     return true;
26235b91a6a8SJay Foad   }
26245b91a6a8SJay Foad 
26255b91a6a8SJay Foad   return false;
26265b91a6a8SJay Foad }
26275b91a6a8SJay Foad 
SelectVOP3Mods_NNaN(SDValue In,SDValue & Src,SDValue & SrcMods) const2628f84e5d9aSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2629f84e5d9aSMatt Arsenault                                              SDValue &SrcMods) const {
2630f84e5d9aSMatt Arsenault   SelectVOP3Mods(In, Src, SrcMods);
2631f84e5d9aSMatt Arsenault   return isNoNanSrc(Src);
2632f84e5d9aSMatt Arsenault }
2633f84e5d9aSMatt Arsenault 
SelectVOP3NoMods(SDValue In,SDValue & Src) const2634df58e825SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2635df58e825SMatt Arsenault   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2636df58e825SMatt Arsenault     return false;
2637df58e825SMatt Arsenault 
2638df58e825SMatt Arsenault   Src = In;
2639df58e825SMatt Arsenault   return true;
2640db5a11f6STom Stellard }
2641db5a11f6STom Stellard 
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const264220d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
264320d20156SJoe Nash                                                SDValue &SrcMods,
264420d20156SJoe Nash                                                bool OpSel) const {
264520d20156SJoe Nash   unsigned Mods;
264620d20156SJoe Nash   if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
264720d20156SJoe Nash     if (OpSel)
264820d20156SJoe Nash       Mods |= SISrcMods::OP_SEL_0;
264920d20156SJoe Nash     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
265020d20156SJoe Nash     return true;
265120d20156SJoe Nash   }
265220d20156SJoe Nash 
265320d20156SJoe Nash   return false;
265420d20156SJoe Nash }
265520d20156SJoe Nash 
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const265620d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
265720d20156SJoe Nash                                            SDValue &SrcMods) const {
265820d20156SJoe Nash   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
265920d20156SJoe Nash }
266020d20156SJoe Nash 
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const266120d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
266220d20156SJoe Nash                                              SDValue &SrcMods) const {
266320d20156SJoe Nash   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
266420d20156SJoe Nash }
266520d20156SJoe Nash 
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const266645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
266745bb48eaSTom Stellard                                          SDValue &SrcMods, SDValue &Clamp,
266845bb48eaSTom Stellard                                          SDValue &Omod) const {
266945bb48eaSTom Stellard   SDLoc DL(In);
2670df58e825SMatt Arsenault   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2671df58e825SMatt Arsenault   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
267245bb48eaSTom Stellard 
267345bb48eaSTom Stellard   return SelectVOP3Mods(In, Src, SrcMods);
267445bb48eaSTom Stellard }
267545bb48eaSTom Stellard 
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const26765b91a6a8SJay Foad bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
26775b91a6a8SJay Foad                                           SDValue &SrcMods, SDValue &Clamp,
26785b91a6a8SJay Foad                                           SDValue &Omod) const {
26795b91a6a8SJay Foad   SDLoc DL(In);
26805b91a6a8SJay Foad   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
26815b91a6a8SJay Foad   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
26825b91a6a8SJay Foad 
26835b91a6a8SJay Foad   return SelectVOP3BMods(In, Src, SrcMods);
26845b91a6a8SJay Foad }
26855b91a6a8SJay Foad 
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const2686c512d448SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2687c512d448SDmitry Preobrazhensky                                          SDValue &Clamp, SDValue &Omod) const {
2688c512d448SDmitry Preobrazhensky   Src = In;
2689c512d448SDmitry Preobrazhensky 
2690c512d448SDmitry Preobrazhensky   SDLoc DL(In);
2691df58e825SMatt Arsenault   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2692df58e825SMatt Arsenault   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2693c512d448SDmitry Preobrazhensky 
2694c512d448SDmitry Preobrazhensky   return true;
2695c512d448SDmitry Preobrazhensky }
2696c512d448SDmitry Preobrazhensky 
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const2697eb522e68SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2698c4500de2SStanislav Mekhanoshin                                          SDValue &SrcMods, bool IsDOT) const {
2699eb522e68SMatt Arsenault   unsigned Mods = 0;
2700eb522e68SMatt Arsenault   Src = In;
2701eb522e68SMatt Arsenault 
2702eb522e68SMatt Arsenault   if (Src.getOpcode() == ISD::FNEG) {
2703786eeea2SMatt Arsenault     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2704eb522e68SMatt Arsenault     Src = Src.getOperand(0);
2705eb522e68SMatt Arsenault   }
2706eb522e68SMatt Arsenault 
2707c4500de2SStanislav Mekhanoshin   if (Src.getOpcode() == ISD::BUILD_VECTOR &&
2708c4500de2SStanislav Mekhanoshin       (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2709786eeea2SMatt Arsenault     unsigned VecMods = Mods;
2710786eeea2SMatt Arsenault 
271198f2946aSMatt Arsenault     SDValue Lo = stripBitcast(Src.getOperand(0));
271298f2946aSMatt Arsenault     SDValue Hi = stripBitcast(Src.getOperand(1));
2713786eeea2SMatt Arsenault 
2714786eeea2SMatt Arsenault     if (Lo.getOpcode() == ISD::FNEG) {
271598f2946aSMatt Arsenault       Lo = stripBitcast(Lo.getOperand(0));
2716786eeea2SMatt Arsenault       Mods ^= SISrcMods::NEG;
2717786eeea2SMatt Arsenault     }
2718786eeea2SMatt Arsenault 
2719786eeea2SMatt Arsenault     if (Hi.getOpcode() == ISD::FNEG) {
272098f2946aSMatt Arsenault       Hi = stripBitcast(Hi.getOperand(0));
2721786eeea2SMatt Arsenault       Mods ^= SISrcMods::NEG_HI;
2722786eeea2SMatt Arsenault     }
2723786eeea2SMatt Arsenault 
272498f2946aSMatt Arsenault     if (isExtractHiElt(Lo, Lo))
272598f2946aSMatt Arsenault       Mods |= SISrcMods::OP_SEL_0;
272698f2946aSMatt Arsenault 
272798f2946aSMatt Arsenault     if (isExtractHiElt(Hi, Hi))
272898f2946aSMatt Arsenault       Mods |= SISrcMods::OP_SEL_1;
272998f2946aSMatt Arsenault 
2730a8d9d507SStanislav Mekhanoshin     unsigned VecSize = Src.getValueSizeInBits();
273198f2946aSMatt Arsenault     Lo = stripExtractLoElt(Lo);
273298f2946aSMatt Arsenault     Hi = stripExtractLoElt(Hi);
273398f2946aSMatt Arsenault 
2734a8d9d507SStanislav Mekhanoshin     if (Lo.getValueSizeInBits() > VecSize) {
2735a8d9d507SStanislav Mekhanoshin       Lo = CurDAG->getTargetExtractSubreg(
2736a8d9d507SStanislav Mekhanoshin         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2737a8d9d507SStanislav Mekhanoshin         MVT::getIntegerVT(VecSize), Lo);
2738a8d9d507SStanislav Mekhanoshin     }
2739a8d9d507SStanislav Mekhanoshin 
2740a8d9d507SStanislav Mekhanoshin     if (Hi.getValueSizeInBits() > VecSize) {
2741a8d9d507SStanislav Mekhanoshin       Hi = CurDAG->getTargetExtractSubreg(
2742a8d9d507SStanislav Mekhanoshin         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2743a8d9d507SStanislav Mekhanoshin         MVT::getIntegerVT(VecSize), Hi);
2744a8d9d507SStanislav Mekhanoshin     }
2745a8d9d507SStanislav Mekhanoshin 
2746a8d9d507SStanislav Mekhanoshin     assert(Lo.getValueSizeInBits() <= VecSize &&
2747a8d9d507SStanislav Mekhanoshin            Hi.getValueSizeInBits() <= VecSize);
2748a8d9d507SStanislav Mekhanoshin 
2749786eeea2SMatt Arsenault     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2750786eeea2SMatt Arsenault       // Really a scalar input. Just select from the low half of the register to
2751786eeea2SMatt Arsenault       // avoid packing.
2752786eeea2SMatt Arsenault 
2753a8d9d507SStanislav Mekhanoshin       if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2754786eeea2SMatt Arsenault         Src = Lo;
2755a8d9d507SStanislav Mekhanoshin       } else {
2756a8d9d507SStanislav Mekhanoshin         assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2757a8d9d507SStanislav Mekhanoshin 
2758a8d9d507SStanislav Mekhanoshin         SDLoc SL(In);
2759a8d9d507SStanislav Mekhanoshin         SDValue Undef = SDValue(
2760a8d9d507SStanislav Mekhanoshin           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2761a8d9d507SStanislav Mekhanoshin                                  Lo.getValueType()), 0);
2762a8d9d507SStanislav Mekhanoshin         auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2763a8d9d507SStanislav Mekhanoshin                                     : AMDGPU::SReg_64RegClassID;
2764a8d9d507SStanislav Mekhanoshin         const SDValue Ops[] = {
2765a8d9d507SStanislav Mekhanoshin           CurDAG->getTargetConstant(RC, SL, MVT::i32),
2766a8d9d507SStanislav Mekhanoshin           Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2767a8d9d507SStanislav Mekhanoshin           Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2768a8d9d507SStanislav Mekhanoshin 
2769a8d9d507SStanislav Mekhanoshin         Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2770a8d9d507SStanislav Mekhanoshin                                              Src.getValueType(), Ops), 0);
2771a8d9d507SStanislav Mekhanoshin       }
2772786eeea2SMatt Arsenault       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2773786eeea2SMatt Arsenault       return true;
2774786eeea2SMatt Arsenault     }
2775786eeea2SMatt Arsenault 
2776a8d9d507SStanislav Mekhanoshin     if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2777a8d9d507SStanislav Mekhanoshin       uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2778a8d9d507SStanislav Mekhanoshin                       .bitcastToAPInt().getZExtValue();
2779a8d9d507SStanislav Mekhanoshin       if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2780a8d9d507SStanislav Mekhanoshin         Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2781a8d9d507SStanislav Mekhanoshin         SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2782a8d9d507SStanislav Mekhanoshin         return true;
2783a8d9d507SStanislav Mekhanoshin       }
2784a8d9d507SStanislav Mekhanoshin     }
2785a8d9d507SStanislav Mekhanoshin 
2786786eeea2SMatt Arsenault     Mods = VecMods;
2787786eeea2SMatt Arsenault   }
2788786eeea2SMatt Arsenault 
2789eb522e68SMatt Arsenault   // Packed instructions do not have abs modifiers.
2790eb522e68SMatt Arsenault   Mods |= SISrcMods::OP_SEL_1;
2791eb522e68SMatt Arsenault 
2792eb522e68SMatt Arsenault   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2793eb522e68SMatt Arsenault   return true;
2794eb522e68SMatt Arsenault }
2795eb522e68SMatt Arsenault 
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const2796c4500de2SStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2797c4500de2SStanislav Mekhanoshin                                             SDValue &SrcMods) const {
2798c4500de2SStanislav Mekhanoshin   return SelectVOP3PMods(In, Src, SrcMods, true);
2799c4500de2SStanislav Mekhanoshin }
2800c4500de2SStanislav Mekhanoshin 
SelectDotIUVOP3PMods(SDValue In,SDValue & Src) const28012d43de13SJoe Nash bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
28022d43de13SJoe Nash   const ConstantSDNode *C = cast<ConstantSDNode>(In);
28032d43de13SJoe Nash   // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
28042d43de13SJoe Nash   // 1 promotes packed values to signed, 0 treats them as unsigned.
28052d43de13SJoe Nash   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
28062d43de13SJoe Nash 
28072d43de13SJoe Nash   unsigned Mods = SISrcMods::OP_SEL_1;
28082d43de13SJoe Nash   unsigned SrcSign = C->getAPIntValue().getZExtValue();
28092d43de13SJoe Nash   if (SrcSign == 1)
28102d43de13SJoe Nash     Mods ^= SISrcMods::NEG;
28112d43de13SJoe Nash 
28122d43de13SJoe Nash   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28132d43de13SJoe Nash   return true;
28142d43de13SJoe Nash }
28152d43de13SJoe Nash 
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const28164874838aSPiotr Sobczak bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
28174874838aSPiotr Sobczak                                                   SDValue &Src) const {
28184874838aSPiotr Sobczak   const ConstantSDNode *C = cast<ConstantSDNode>(In);
28194874838aSPiotr Sobczak   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
28204874838aSPiotr Sobczak 
28214874838aSPiotr Sobczak   unsigned Mods = SISrcMods::OP_SEL_1;
28224874838aSPiotr Sobczak   unsigned SrcVal = C->getAPIntValue().getZExtValue();
28234874838aSPiotr Sobczak   if (SrcVal == 1)
28244874838aSPiotr Sobczak     Mods |= SISrcMods::OP_SEL_0;
28254874838aSPiotr Sobczak 
28264874838aSPiotr Sobczak   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28274874838aSPiotr Sobczak   return true;
28284874838aSPiotr Sobczak }
28294874838aSPiotr Sobczak 
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const2830abf28394SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2831abf28394SDmitry Preobrazhensky                                          SDValue &SrcMods) const {
2832abf28394SDmitry Preobrazhensky   Src = In;
2833abf28394SDmitry Preobrazhensky   // FIXME: Handle op_sel
2834abf28394SDmitry Preobrazhensky   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2835abf28394SDmitry Preobrazhensky   return true;
2836abf28394SDmitry Preobrazhensky }
2837abf28394SDmitry Preobrazhensky 
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2838abf28394SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2839abf28394SDmitry Preobrazhensky                                              SDValue &SrcMods) const {
2840abf28394SDmitry Preobrazhensky   // FIXME: Handle op_sel
2841abf28394SDmitry Preobrazhensky   return SelectVOP3Mods(In, Src, SrcMods);
2842abf28394SDmitry Preobrazhensky }
2843abf28394SDmitry Preobrazhensky 
2844d7e2303dSMatt Arsenault // The return value is not whether the match is possible (which it always is),
2845d7e2303dSMatt Arsenault // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const2846d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2847d7e2303dSMatt Arsenault                                                    unsigned &Mods) const {
2848d7e2303dSMatt Arsenault   Mods = 0;
2849d7e2303dSMatt Arsenault   SelectVOP3ModsImpl(In, Src, Mods);
2850d7e2303dSMatt Arsenault 
2851d7e2303dSMatt Arsenault   if (Src.getOpcode() == ISD::FP_EXTEND) {
2852d7e2303dSMatt Arsenault     Src = Src.getOperand(0);
2853d7e2303dSMatt Arsenault     assert(Src.getValueType() == MVT::f16);
2854d7e2303dSMatt Arsenault     Src = stripBitcast(Src);
2855d7e2303dSMatt Arsenault 
2856550c66d1SMatt Arsenault     // Be careful about folding modifiers if we already have an abs. fneg is
2857550c66d1SMatt Arsenault     // applied last, so we don't want to apply an earlier fneg.
2858550c66d1SMatt Arsenault     if ((Mods & SISrcMods::ABS) == 0) {
2859550c66d1SMatt Arsenault       unsigned ModsTmp;
2860550c66d1SMatt Arsenault       SelectVOP3ModsImpl(Src, Src, ModsTmp);
2861550c66d1SMatt Arsenault 
2862550c66d1SMatt Arsenault       if ((ModsTmp & SISrcMods::NEG) != 0)
2863550c66d1SMatt Arsenault         Mods ^= SISrcMods::NEG;
2864550c66d1SMatt Arsenault 
2865550c66d1SMatt Arsenault       if ((ModsTmp & SISrcMods::ABS) != 0)
2866550c66d1SMatt Arsenault         Mods |= SISrcMods::ABS;
2867550c66d1SMatt Arsenault     }
2868550c66d1SMatt Arsenault 
2869d7e2303dSMatt Arsenault     // op_sel/op_sel_hi decide the source type and source.
2870d7e2303dSMatt Arsenault     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2871d7e2303dSMatt Arsenault     // If the sources's op_sel is set, it picks the high half of the source
2872d7e2303dSMatt Arsenault     // register.
2873d7e2303dSMatt Arsenault 
2874d7e2303dSMatt Arsenault     Mods |= SISrcMods::OP_SEL_1;
2875550c66d1SMatt Arsenault     if (isExtractHiElt(Src, Src)) {
2876d7e2303dSMatt Arsenault       Mods |= SISrcMods::OP_SEL_0;
2877d7e2303dSMatt Arsenault 
2878550c66d1SMatt Arsenault       // TODO: Should we try to look for neg/abs here?
2879550c66d1SMatt Arsenault     }
2880550c66d1SMatt Arsenault 
2881d7e2303dSMatt Arsenault     return true;
2882d7e2303dSMatt Arsenault   }
2883d7e2303dSMatt Arsenault 
2884d7e2303dSMatt Arsenault   return false;
2885d7e2303dSMatt Arsenault }
2886d7e2303dSMatt Arsenault 
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const288776935122SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
288876935122SMatt Arsenault                                                SDValue &SrcMods) const {
288976935122SMatt Arsenault   unsigned Mods = 0;
289076935122SMatt Arsenault   SelectVOP3PMadMixModsImpl(In, Src, Mods);
289176935122SMatt Arsenault   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
289276935122SMatt Arsenault   return true;
289376935122SMatt Arsenault }
289476935122SMatt Arsenault 
getHi16Elt(SDValue In) const2895e8c03a25SMatt Arsenault SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2896e8c03a25SMatt Arsenault   if (In.isUndef())
2897e8c03a25SMatt Arsenault     return CurDAG->getUNDEF(MVT::i32);
2898e8c03a25SMatt Arsenault 
2899e8c03a25SMatt Arsenault   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2900e8c03a25SMatt Arsenault     SDLoc SL(In);
2901e8c03a25SMatt Arsenault     return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2902e8c03a25SMatt Arsenault   }
2903e8c03a25SMatt Arsenault 
2904e8c03a25SMatt Arsenault   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2905e8c03a25SMatt Arsenault     SDLoc SL(In);
2906e8c03a25SMatt Arsenault     return CurDAG->getConstant(
2907e8c03a25SMatt Arsenault       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2908e8c03a25SMatt Arsenault   }
2909e8c03a25SMatt Arsenault 
2910e8c03a25SMatt Arsenault   SDValue Src;
2911e8c03a25SMatt Arsenault   if (isExtractHiElt(In, Src))
2912e8c03a25SMatt Arsenault     return Src;
2913e8c03a25SMatt Arsenault 
2914e8c03a25SMatt Arsenault   return SDValue();
2915e8c03a25SMatt Arsenault }
2916e8c03a25SMatt Arsenault 
isVGPRImm(const SDNode * N) const2917db7ee766SAlexander Timofeev bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2918e4c2e9b0SMatt Arsenault   assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2919e4c2e9b0SMatt Arsenault 
2920db7ee766SAlexander Timofeev   const SIRegisterInfo *SIRI =
2921db7ee766SAlexander Timofeev     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2922db7ee766SAlexander Timofeev   const SIInstrInfo * SII =
2923db7ee766SAlexander Timofeev     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2924db7ee766SAlexander Timofeev 
2925db7ee766SAlexander Timofeev   unsigned Limit = 0;
2926db7ee766SAlexander Timofeev   bool AllUsesAcceptSReg = true;
2927db7ee766SAlexander Timofeev   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2928db7ee766SAlexander Timofeev     Limit < 10 && U != E; ++U, ++Limit) {
2929db7ee766SAlexander Timofeev     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2930db7ee766SAlexander Timofeev 
2931db7ee766SAlexander Timofeev     // If the register class is unknown, it could be an unknown
2932db7ee766SAlexander Timofeev     // register class that needs to be an SGPR, e.g. an inline asm
2933db7ee766SAlexander Timofeev     // constraint
2934db7ee766SAlexander Timofeev     if (!RC || SIRI->isSGPRClass(RC))
2935db7ee766SAlexander Timofeev       return false;
2936db7ee766SAlexander Timofeev 
2937db7ee766SAlexander Timofeev     if (RC != &AMDGPU::VS_32RegClass) {
2938db7ee766SAlexander Timofeev       AllUsesAcceptSReg = false;
2939db7ee766SAlexander Timofeev       SDNode * User = *U;
2940db7ee766SAlexander Timofeev       if (User->isMachineOpcode()) {
2941db7ee766SAlexander Timofeev         unsigned Opc = User->getMachineOpcode();
2942db7ee766SAlexander Timofeev         MCInstrDesc Desc = SII->get(Opc);
2943db7ee766SAlexander Timofeev         if (Desc.isCommutable()) {
2944db7ee766SAlexander Timofeev           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2945db7ee766SAlexander Timofeev           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2946db7ee766SAlexander Timofeev           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2947db7ee766SAlexander Timofeev             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2948db7ee766SAlexander Timofeev             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2949db7ee766SAlexander Timofeev             if (CommutedRC == &AMDGPU::VS_32RegClass)
2950db7ee766SAlexander Timofeev               AllUsesAcceptSReg = true;
2951db7ee766SAlexander Timofeev           }
2952db7ee766SAlexander Timofeev         }
2953db7ee766SAlexander Timofeev       }
29546527b2a4SSebastian Neubauer       // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2955db7ee766SAlexander Timofeev       // commuting current user. This means have at least one use
2956db7ee766SAlexander Timofeev       // that strictly require VGPR. Thus, we will not attempt to commute
2957db7ee766SAlexander Timofeev       // other user instructions.
2958db7ee766SAlexander Timofeev       if (!AllUsesAcceptSReg)
2959db7ee766SAlexander Timofeev         break;
2960db7ee766SAlexander Timofeev     }
2961db7ee766SAlexander Timofeev   }
2962db7ee766SAlexander Timofeev   return !AllUsesAcceptSReg && (Limit < 10);
2963db7ee766SAlexander Timofeev }
2964db7ee766SAlexander Timofeev 
isUniformLoad(const SDNode * N) const29654d302f69SAlexander Timofeev bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
29664d302f69SAlexander Timofeev   auto Ld = cast<LoadSDNode>(N);
29674d302f69SAlexander Timofeev 
296807881861SGuillaume Chatelet   return Ld->getAlign() >= Align(4) &&
296907881861SGuillaume Chatelet          (((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
297007881861SGuillaume Chatelet             Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
297107881861SGuillaume Chatelet            !N->isDivergent()) ||
297207881861SGuillaume Chatelet           (Subtarget->getScalarizeGlobalBehavior() &&
29734d302f69SAlexander Timofeev            Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
297407881861SGuillaume Chatelet            Ld->isSimple() && !N->isDivergent() &&
297507881861SGuillaume Chatelet            static_cast<const SITargetLowering *>(getTargetLowering())
297607881861SGuillaume Chatelet                ->isMemOpHasNoClobberedMemOperand(N)));
29774d302f69SAlexander Timofeev }
2978db7ee766SAlexander Timofeev 
PostprocessISelDAG()297945bb48eaSTom Stellard void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
298045bb48eaSTom Stellard   const AMDGPUTargetLowering& Lowering =
298145bb48eaSTom Stellard     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
298245bb48eaSTom Stellard   bool IsModified = false;
298345bb48eaSTom Stellard   do {
298445bb48eaSTom Stellard     IsModified = false;
298568f05052SMatt Arsenault 
298645bb48eaSTom Stellard     // Go over all selected nodes and try to fold them a bit more
298768f05052SMatt Arsenault     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
298868f05052SMatt Arsenault     while (Position != CurDAG->allnodes_end()) {
298968f05052SMatt Arsenault       SDNode *Node = &*Position++;
299068f05052SMatt Arsenault       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
299145bb48eaSTom Stellard       if (!MachineNode)
299245bb48eaSTom Stellard         continue;
299345bb48eaSTom Stellard 
299445bb48eaSTom Stellard       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
299568f05052SMatt Arsenault       if (ResNode != Node) {
299668f05052SMatt Arsenault         if (ResNode)
299768f05052SMatt Arsenault           ReplaceUses(Node, ResNode);
299845bb48eaSTom Stellard         IsModified = true;
299945bb48eaSTom Stellard       }
300045bb48eaSTom Stellard     }
300145bb48eaSTom Stellard     CurDAG->RemoveDeadNodes();
300245bb48eaSTom Stellard   } while (IsModified);
300345bb48eaSTom Stellard }
3004