17836f895SMatt Arsenault //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
245bb48eaSTom Stellard //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
645bb48eaSTom Stellard //
745bb48eaSTom Stellard //==-----------------------------------------------------------------------===//
845bb48eaSTom Stellard //
945bb48eaSTom Stellard /// \file
105f8f34e4SAdrian Prantl /// Defines an instruction selector for the AMDGPU target.
1145bb48eaSTom Stellard //
1245bb48eaSTom Stellard //===----------------------------------------------------------------------===//
13592d0681SMatt Arsenault
1447d6274dSDaniil Fukalov #include "AMDGPUISelDAGToDAG.h"
152bc2f33bSEugene Zelenko #include "AMDGPU.h"
1620d20156SJoe Nash #include "AMDGPUSubtarget.h"
17cc85223fSMatt Arsenault #include "AMDGPUTargetMachine.h"
1820d20156SJoe Nash #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1948958d02SDaniil Fukalov #include "MCTargetDesc/R600MCTargetDesc.h"
2047d6274dSDaniil Fukalov #include "R600RegisterInfo.h"
2145bb48eaSTom Stellard #include "SIMachineFunctionInfo.h"
2235617ed4SNicolai Haehnle #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
23f97de007SJan Vesely #include "llvm/Analysis/ValueTracking.h"
2445bb48eaSTom Stellard #include "llvm/CodeGen/FunctionLoweringInfo.h"
2545bb48eaSTom Stellard #include "llvm/CodeGen/SelectionDAG.h"
2645bb48eaSTom Stellard #include "llvm/CodeGen/SelectionDAGISel.h"
272bc2f33bSEugene Zelenko #include "llvm/CodeGen/SelectionDAGNodes.h"
286a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
2905da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
306a87e9b0Sdfukalov
312ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
326a87e9b0Sdfukalov #include "llvm/Analysis/LoopInfo.h"
332ce560f0SAlexander Timofeev #include "llvm/IR/Dominators.h"
342ce560f0SAlexander Timofeev #endif
3545bb48eaSTom Stellard
36*c17450a0SFangrui Song #define DEBUG_TYPE "amdgpu-isel"
37e8c03a25SMatt Arsenault
3845bb48eaSTom Stellard using namespace llvm;
3945bb48eaSTom Stellard
4045bb48eaSTom Stellard //===----------------------------------------------------------------------===//
4145bb48eaSTom Stellard // Instruction Selector Implementation
4245bb48eaSTom Stellard //===----------------------------------------------------------------------===//
4345bb48eaSTom Stellard
4445bb48eaSTom Stellard namespace {
45bc4497b1STom Stellard
stripBitcast(SDValue Val)46e8c03a25SMatt Arsenault static SDValue stripBitcast(SDValue Val) {
47e8c03a25SMatt Arsenault return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
48e8c03a25SMatt Arsenault }
49e8c03a25SMatt Arsenault
50e8c03a25SMatt Arsenault // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)51e8c03a25SMatt Arsenault static bool isExtractHiElt(SDValue In, SDValue &Out) {
52e8c03a25SMatt Arsenault In = stripBitcast(In);
53a8d9d507SStanislav Mekhanoshin
54a8d9d507SStanislav Mekhanoshin if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
55a8d9d507SStanislav Mekhanoshin if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
56a8d9d507SStanislav Mekhanoshin if (!Idx->isOne())
57a8d9d507SStanislav Mekhanoshin return false;
58a8d9d507SStanislav Mekhanoshin Out = In.getOperand(0);
59a8d9d507SStanislav Mekhanoshin return true;
60a8d9d507SStanislav Mekhanoshin }
61a8d9d507SStanislav Mekhanoshin }
62a8d9d507SStanislav Mekhanoshin
63e8c03a25SMatt Arsenault if (In.getOpcode() != ISD::TRUNCATE)
64e8c03a25SMatt Arsenault return false;
65e8c03a25SMatt Arsenault
66e8c03a25SMatt Arsenault SDValue Srl = In.getOperand(0);
67e8c03a25SMatt Arsenault if (Srl.getOpcode() == ISD::SRL) {
68e8c03a25SMatt Arsenault if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
69e8c03a25SMatt Arsenault if (ShiftAmt->getZExtValue() == 16) {
70e8c03a25SMatt Arsenault Out = stripBitcast(Srl.getOperand(0));
71e8c03a25SMatt Arsenault return true;
72e8c03a25SMatt Arsenault }
73e8c03a25SMatt Arsenault }
74e8c03a25SMatt Arsenault }
75e8c03a25SMatt Arsenault
76e8c03a25SMatt Arsenault return false;
77e8c03a25SMatt Arsenault }
78e8c03a25SMatt Arsenault
79e8c03a25SMatt Arsenault // Look through operations that obscure just looking at the low 16-bits of the
80e8c03a25SMatt Arsenault // same register.
stripExtractLoElt(SDValue In)81e8c03a25SMatt Arsenault static SDValue stripExtractLoElt(SDValue In) {
82a8d9d507SStanislav Mekhanoshin if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
83a8d9d507SStanislav Mekhanoshin if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
849af8f1b1SCraig Topper if (Idx->isZero() && In.getValueSizeInBits() <= 32)
85a8d9d507SStanislav Mekhanoshin return In.getOperand(0);
86a8d9d507SStanislav Mekhanoshin }
87a8d9d507SStanislav Mekhanoshin }
88a8d9d507SStanislav Mekhanoshin
89e8c03a25SMatt Arsenault if (In.getOpcode() == ISD::TRUNCATE) {
90e8c03a25SMatt Arsenault SDValue Src = In.getOperand(0);
91e8c03a25SMatt Arsenault if (Src.getValueType().getSizeInBits() == 32)
92e8c03a25SMatt Arsenault return stripBitcast(Src);
93e8c03a25SMatt Arsenault }
94e8c03a25SMatt Arsenault
95e8c03a25SMatt Arsenault return In;
96e8c03a25SMatt Arsenault }
97e8c03a25SMatt Arsenault
9845bb48eaSTom Stellard } // end anonymous namespace
9945bb48eaSTom Stellard
1003d76d360SFangrui Song INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
1017016f134SMatt Arsenault "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)1027016f134SMatt Arsenault INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
1031c538423SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10435617ed4SNicolai Haehnle INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
1052ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
1062ce560f0SAlexander Timofeev INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1072ce560f0SAlexander Timofeev INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
1082ce560f0SAlexander Timofeev #endif
1093d76d360SFangrui Song INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
1107016f134SMatt Arsenault "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
1117016f134SMatt Arsenault
1125f8f34e4SAdrian Prantl /// This pass converts a legalized DAG into a AMDGPU-specific
11345bb48eaSTom Stellard // DAG, ready for instruction scheduling.
1147016f134SMatt Arsenault FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
11560a83737SKonstantin Zhuravlyov CodeGenOpt::Level OptLevel) {
11660a83737SKonstantin Zhuravlyov return new AMDGPUDAGToDAGISel(TM, OptLevel);
11745bb48eaSTom Stellard }
11845bb48eaSTom Stellard
AMDGPUDAGToDAGISel(TargetMachine * TM,CodeGenOpt::Level OptLevel)11947d6274dSDaniil Fukalov AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
12047d6274dSDaniil Fukalov TargetMachine *TM /*= nullptr*/,
12147d6274dSDaniil Fukalov CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
12247d6274dSDaniil Fukalov : SelectionDAGISel(*TM, OptLevel) {
12347d6274dSDaniil Fukalov EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
12420287697STom Stellard }
12520287697STom Stellard
runOnMachineFunction(MachineFunction & MF)12645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
1272ce560f0SAlexander Timofeev #ifdef EXPENSIVE_CHECKS
1282ce560f0SAlexander Timofeev DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1292ce560f0SAlexander Timofeev LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1302ce560f0SAlexander Timofeev for (auto &L : LI->getLoopsInPreorder()) {
1312ce560f0SAlexander Timofeev assert(L->isLCSSAForm(DT));
1322ce560f0SAlexander Timofeev }
1332ce560f0SAlexander Timofeev #endif
1345bfbae5cSTom Stellard Subtarget = &MF.getSubtarget<GCNSubtarget>();
1355660bb6bSMatt Arsenault Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
13645bb48eaSTom Stellard return SelectionDAGISel::runOnMachineFunction(MF);
13745bb48eaSTom Stellard }
13845bb48eaSTom Stellard
fp16SrcZerosHighBits(unsigned Opc) const1399ad8a1f6SMatt Arsenault bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
1409ad8a1f6SMatt Arsenault // XXX - only need to list legal operations.
1419ad8a1f6SMatt Arsenault switch (Opc) {
1429ad8a1f6SMatt Arsenault case ISD::FADD:
1439ad8a1f6SMatt Arsenault case ISD::FSUB:
1449ad8a1f6SMatt Arsenault case ISD::FMUL:
1459ad8a1f6SMatt Arsenault case ISD::FDIV:
1469ad8a1f6SMatt Arsenault case ISD::FREM:
1479ad8a1f6SMatt Arsenault case ISD::FCANONICALIZE:
1489ad8a1f6SMatt Arsenault case ISD::UINT_TO_FP:
1499ad8a1f6SMatt Arsenault case ISD::SINT_TO_FP:
1509ad8a1f6SMatt Arsenault case ISD::FABS:
1519ad8a1f6SMatt Arsenault // Fabs is lowered to a bit operation, but it's an and which will clear the
1529ad8a1f6SMatt Arsenault // high bits anyway.
1539ad8a1f6SMatt Arsenault case ISD::FSQRT:
1549ad8a1f6SMatt Arsenault case ISD::FSIN:
1559ad8a1f6SMatt Arsenault case ISD::FCOS:
1569ad8a1f6SMatt Arsenault case ISD::FPOWI:
1579ad8a1f6SMatt Arsenault case ISD::FPOW:
1589ad8a1f6SMatt Arsenault case ISD::FLOG:
1599ad8a1f6SMatt Arsenault case ISD::FLOG2:
1609ad8a1f6SMatt Arsenault case ISD::FLOG10:
1619ad8a1f6SMatt Arsenault case ISD::FEXP:
1629ad8a1f6SMatt Arsenault case ISD::FEXP2:
1639ad8a1f6SMatt Arsenault case ISD::FCEIL:
1649ad8a1f6SMatt Arsenault case ISD::FTRUNC:
1659ad8a1f6SMatt Arsenault case ISD::FRINT:
1669ad8a1f6SMatt Arsenault case ISD::FNEARBYINT:
1679ad8a1f6SMatt Arsenault case ISD::FROUND:
1689ad8a1f6SMatt Arsenault case ISD::FFLOOR:
1699ad8a1f6SMatt Arsenault case ISD::FMINNUM:
1709ad8a1f6SMatt Arsenault case ISD::FMAXNUM:
1719ad8a1f6SMatt Arsenault case AMDGPUISD::FRACT:
1729ad8a1f6SMatt Arsenault case AMDGPUISD::CLAMP:
1739ad8a1f6SMatt Arsenault case AMDGPUISD::COS_HW:
1749ad8a1f6SMatt Arsenault case AMDGPUISD::SIN_HW:
1759ad8a1f6SMatt Arsenault case AMDGPUISD::FMIN3:
1769ad8a1f6SMatt Arsenault case AMDGPUISD::FMAX3:
1779ad8a1f6SMatt Arsenault case AMDGPUISD::FMED3:
1789ad8a1f6SMatt Arsenault case AMDGPUISD::FMAD_FTZ:
1799ad8a1f6SMatt Arsenault case AMDGPUISD::RCP:
1809ad8a1f6SMatt Arsenault case AMDGPUISD::RSQ:
1819ad8a1f6SMatt Arsenault case AMDGPUISD::RCP_IFLAG:
1829ad8a1f6SMatt Arsenault case AMDGPUISD::LDEXP:
1839ad8a1f6SMatt Arsenault // On gfx10, all 16-bit instructions preserve the high bits.
1849ad8a1f6SMatt Arsenault return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
1859ad8a1f6SMatt Arsenault case ISD::FP_ROUND:
1869ad8a1f6SMatt Arsenault // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
1879ad8a1f6SMatt Arsenault // high bits on gfx9.
1889ad8a1f6SMatt Arsenault // TODO: If we had the source node we could see if the source was fma/mad
1899ad8a1f6SMatt Arsenault return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
1909ad8a1f6SMatt Arsenault case ISD::FMA:
1919ad8a1f6SMatt Arsenault case ISD::FMAD:
1929ad8a1f6SMatt Arsenault case AMDGPUISD::DIV_FIXUP:
1939ad8a1f6SMatt Arsenault return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
1949ad8a1f6SMatt Arsenault default:
1959ad8a1f6SMatt Arsenault // fcopysign, select and others may be lowered to 32-bit bit operations
1969ad8a1f6SMatt Arsenault // which don't zero the high bits.
1979ad8a1f6SMatt Arsenault return false;
1989ad8a1f6SMatt Arsenault }
1999ad8a1f6SMatt Arsenault }
2009ad8a1f6SMatt Arsenault
getAnalysisUsage(AnalysisUsage & AU) const20147d6274dSDaniil Fukalov void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
20247d6274dSDaniil Fukalov AU.addRequired<AMDGPUArgumentUsageInfo>();
20347d6274dSDaniil Fukalov AU.addRequired<LegacyDivergenceAnalysis>();
20447d6274dSDaniil Fukalov #ifdef EXPENSIVE_CHECKS
20547d6274dSDaniil Fukalov AU.addRequired<DominatorTreeWrapperPass>();
20647d6274dSDaniil Fukalov AU.addRequired<LoopInfoWrapperPass>();
20747d6274dSDaniil Fukalov #endif
20847d6274dSDaniil Fukalov SelectionDAGISel::getAnalysisUsage(AU);
20947d6274dSDaniil Fukalov }
21047d6274dSDaniil Fukalov
matchLoadD16FromBuildVector(SDNode * N) const211e8c03a25SMatt Arsenault bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
212e8c03a25SMatt Arsenault assert(Subtarget->d16PreservesUnusedBits());
213e8c03a25SMatt Arsenault MVT VT = N->getValueType(0).getSimpleVT();
214e8c03a25SMatt Arsenault if (VT != MVT::v2i16 && VT != MVT::v2f16)
215e8c03a25SMatt Arsenault return false;
216e8c03a25SMatt Arsenault
217e8c03a25SMatt Arsenault SDValue Lo = N->getOperand(0);
218e8c03a25SMatt Arsenault SDValue Hi = N->getOperand(1);
219e8c03a25SMatt Arsenault
220e8c03a25SMatt Arsenault LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
221e8c03a25SMatt Arsenault
222e8c03a25SMatt Arsenault // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
223e8c03a25SMatt Arsenault // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
224e8c03a25SMatt Arsenault // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
225e8c03a25SMatt Arsenault
226e8c03a25SMatt Arsenault // Need to check for possible indirect dependencies on the other half of the
227e8c03a25SMatt Arsenault // vector to avoid introducing a cycle.
228e8c03a25SMatt Arsenault if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
229e8c03a25SMatt Arsenault SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
230e8c03a25SMatt Arsenault
231e8c03a25SMatt Arsenault SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
232e8c03a25SMatt Arsenault SDValue Ops[] = {
233e8c03a25SMatt Arsenault LdHi->getChain(), LdHi->getBasePtr(), TiedIn
234e8c03a25SMatt Arsenault };
235e8c03a25SMatt Arsenault
236e8c03a25SMatt Arsenault unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
237e8c03a25SMatt Arsenault if (LdHi->getMemoryVT() == MVT::i8) {
238e8c03a25SMatt Arsenault LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
239e8c03a25SMatt Arsenault AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
240e8c03a25SMatt Arsenault } else {
241e8c03a25SMatt Arsenault assert(LdHi->getMemoryVT() == MVT::i16);
242e8c03a25SMatt Arsenault }
243e8c03a25SMatt Arsenault
244e8c03a25SMatt Arsenault SDValue NewLoadHi =
245e8c03a25SMatt Arsenault CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
246e8c03a25SMatt Arsenault Ops, LdHi->getMemoryVT(),
247e8c03a25SMatt Arsenault LdHi->getMemOperand());
248e8c03a25SMatt Arsenault
249e8c03a25SMatt Arsenault CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
250e8c03a25SMatt Arsenault CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
251e8c03a25SMatt Arsenault return true;
252e8c03a25SMatt Arsenault }
253e8c03a25SMatt Arsenault
254e8c03a25SMatt Arsenault // build_vector (load ptr), hi -> load_d16_lo ptr, hi
255e8c03a25SMatt Arsenault // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
256e8c03a25SMatt Arsenault // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
257e8c03a25SMatt Arsenault LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
258e8c03a25SMatt Arsenault if (LdLo && Lo.hasOneUse()) {
259e8c03a25SMatt Arsenault SDValue TiedIn = getHi16Elt(Hi);
260e8c03a25SMatt Arsenault if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
261e8c03a25SMatt Arsenault return false;
262e8c03a25SMatt Arsenault
263e8c03a25SMatt Arsenault SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
264e8c03a25SMatt Arsenault unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
265e8c03a25SMatt Arsenault if (LdLo->getMemoryVT() == MVT::i8) {
266e8c03a25SMatt Arsenault LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
267e8c03a25SMatt Arsenault AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
268e8c03a25SMatt Arsenault } else {
269e8c03a25SMatt Arsenault assert(LdLo->getMemoryVT() == MVT::i16);
270e8c03a25SMatt Arsenault }
271e8c03a25SMatt Arsenault
272e8c03a25SMatt Arsenault TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
273e8c03a25SMatt Arsenault
274e8c03a25SMatt Arsenault SDValue Ops[] = {
275e8c03a25SMatt Arsenault LdLo->getChain(), LdLo->getBasePtr(), TiedIn
276e8c03a25SMatt Arsenault };
277e8c03a25SMatt Arsenault
278e8c03a25SMatt Arsenault SDValue NewLoadLo =
279e8c03a25SMatt Arsenault CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
280e8c03a25SMatt Arsenault Ops, LdLo->getMemoryVT(),
281e8c03a25SMatt Arsenault LdLo->getMemOperand());
282e8c03a25SMatt Arsenault
283e8c03a25SMatt Arsenault CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
284e8c03a25SMatt Arsenault CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
285e8c03a25SMatt Arsenault return true;
286e8c03a25SMatt Arsenault }
287e8c03a25SMatt Arsenault
288e8c03a25SMatt Arsenault return false;
289e8c03a25SMatt Arsenault }
290e8c03a25SMatt Arsenault
PreprocessISelDAG()291e8c03a25SMatt Arsenault void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
292e8c03a25SMatt Arsenault if (!Subtarget->d16PreservesUnusedBits())
293e8c03a25SMatt Arsenault return;
294e8c03a25SMatt Arsenault
295e8c03a25SMatt Arsenault SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
296e8c03a25SMatt Arsenault
297e8c03a25SMatt Arsenault bool MadeChange = false;
298e8c03a25SMatt Arsenault while (Position != CurDAG->allnodes_begin()) {
299e8c03a25SMatt Arsenault SDNode *N = &*--Position;
300e8c03a25SMatt Arsenault if (N->use_empty())
301e8c03a25SMatt Arsenault continue;
302e8c03a25SMatt Arsenault
303e8c03a25SMatt Arsenault switch (N->getOpcode()) {
304e8c03a25SMatt Arsenault case ISD::BUILD_VECTOR:
305e8c03a25SMatt Arsenault MadeChange |= matchLoadD16FromBuildVector(N);
306e8c03a25SMatt Arsenault break;
307e8c03a25SMatt Arsenault default:
308e8c03a25SMatt Arsenault break;
309e8c03a25SMatt Arsenault }
310e8c03a25SMatt Arsenault }
311e8c03a25SMatt Arsenault
312e8c03a25SMatt Arsenault if (MadeChange) {
313e8c03a25SMatt Arsenault CurDAG->RemoveDeadNodes();
314e8c03a25SMatt Arsenault LLVM_DEBUG(dbgs() << "After PreProcess:\n";
315e8c03a25SMatt Arsenault CurDAG->dump(););
316e8c03a25SMatt Arsenault }
317e8c03a25SMatt Arsenault }
318e8c03a25SMatt Arsenault
isNoNanSrc(SDValue N) const319f84e5d9aSMatt Arsenault bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
320f84e5d9aSMatt Arsenault if (TM.Options.NoNaNsFPMath)
321f84e5d9aSMatt Arsenault return true;
322f84e5d9aSMatt Arsenault
323f84e5d9aSMatt Arsenault // TODO: Move into isKnownNeverNaN
324714ceefaSJonas Paulsson if (N->getFlags().hasNoNaNs())
325714ceefaSJonas Paulsson return true;
326f84e5d9aSMatt Arsenault
327f84e5d9aSMatt Arsenault return CurDAG->isKnownNeverNaN(N);
328f84e5d9aSMatt Arsenault }
329f84e5d9aSMatt Arsenault
isInlineImmediate(const SDNode * N,bool Negated) const330e24b34e9SMatt Arsenault bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
331e24b34e9SMatt Arsenault bool Negated) const {
332b7f87c0eSMatt Arsenault if (N->isUndef())
333b7f87c0eSMatt Arsenault return true;
334fe267759SMatt Arsenault
335e24b34e9SMatt Arsenault const SIInstrInfo *TII = Subtarget->getInstrInfo();
336e24b34e9SMatt Arsenault if (Negated) {
337e24b34e9SMatt Arsenault if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
338e24b34e9SMatt Arsenault return TII->isInlineConstant(-C->getAPIntValue());
339e24b34e9SMatt Arsenault
340e24b34e9SMatt Arsenault if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
341e24b34e9SMatt Arsenault return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
342e24b34e9SMatt Arsenault
343e24b34e9SMatt Arsenault } else {
344fe267759SMatt Arsenault if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
345fe267759SMatt Arsenault return TII->isInlineConstant(C->getAPIntValue());
346fe267759SMatt Arsenault
347fe267759SMatt Arsenault if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
348fe267759SMatt Arsenault return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
349e24b34e9SMatt Arsenault }
350fe267759SMatt Arsenault
351fe267759SMatt Arsenault return false;
35245bb48eaSTom Stellard }
35345bb48eaSTom Stellard
3545f8f34e4SAdrian Prantl /// Determine the register class for \p OpNo
35545bb48eaSTom Stellard /// \returns The register class of the virtual register that will be used for
35645bb48eaSTom Stellard /// the given operand number \OpNo or NULL if the register class cannot be
35745bb48eaSTom Stellard /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const35845bb48eaSTom Stellard const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
35945bb48eaSTom Stellard unsigned OpNo) const {
360c507cdb4SMatt Arsenault if (!N->isMachineOpcode()) {
361c507cdb4SMatt Arsenault if (N->getOpcode() == ISD::CopyToReg) {
36234978602SJay Foad Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
36334978602SJay Foad if (Reg.isVirtual()) {
364c507cdb4SMatt Arsenault MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
365c507cdb4SMatt Arsenault return MRI.getRegClass(Reg);
366c507cdb4SMatt Arsenault }
367c507cdb4SMatt Arsenault
368c507cdb4SMatt Arsenault const SIRegisterInfo *TRI
3695bfbae5cSTom Stellard = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
370c507cdb4SMatt Arsenault return TRI->getPhysRegClass(Reg);
371c507cdb4SMatt Arsenault }
372c507cdb4SMatt Arsenault
37345bb48eaSTom Stellard return nullptr;
374c507cdb4SMatt Arsenault }
37545bb48eaSTom Stellard
37645bb48eaSTom Stellard switch (N->getMachineOpcode()) {
37745bb48eaSTom Stellard default: {
37845bb48eaSTom Stellard const MCInstrDesc &Desc =
37945bb48eaSTom Stellard Subtarget->getInstrInfo()->get(N->getMachineOpcode());
38045bb48eaSTom Stellard unsigned OpIdx = Desc.getNumDefs() + OpNo;
38145bb48eaSTom Stellard if (OpIdx >= Desc.getNumOperands())
38245bb48eaSTom Stellard return nullptr;
38345bb48eaSTom Stellard int RegClass = Desc.OpInfo[OpIdx].RegClass;
38445bb48eaSTom Stellard if (RegClass == -1)
38545bb48eaSTom Stellard return nullptr;
38645bb48eaSTom Stellard
38745bb48eaSTom Stellard return Subtarget->getRegisterInfo()->getRegClass(RegClass);
38845bb48eaSTom Stellard }
38945bb48eaSTom Stellard case AMDGPU::REG_SEQUENCE: {
39045bb48eaSTom Stellard unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
39145bb48eaSTom Stellard const TargetRegisterClass *SuperRC =
39245bb48eaSTom Stellard Subtarget->getRegisterInfo()->getRegClass(RCID);
39345bb48eaSTom Stellard
39445bb48eaSTom Stellard SDValue SubRegOp = N->getOperand(OpNo + 1);
39545bb48eaSTom Stellard unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
39645bb48eaSTom Stellard return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
39745bb48eaSTom Stellard SubRegIdx);
39845bb48eaSTom Stellard }
39945bb48eaSTom Stellard }
40045bb48eaSTom Stellard }
40145bb48eaSTom Stellard
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const402b5234b64SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
403b5234b64SMatt Arsenault SDValue Glue) const {
404b5234b64SMatt Arsenault SmallVector <SDValue, 8> Ops;
405b5234b64SMatt Arsenault Ops.push_back(NewChain); // Replace the chain.
406b5234b64SMatt Arsenault for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
407b5234b64SMatt Arsenault Ops.push_back(N->getOperand(i));
408b5234b64SMatt Arsenault
409b5234b64SMatt Arsenault Ops.push_back(Glue);
410b5234b64SMatt Arsenault return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
411b5234b64SMatt Arsenault }
412b5234b64SMatt Arsenault
glueCopyToM0(SDNode * N,SDValue Val) const413cdd191d9SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
41445bb48eaSTom Stellard const SITargetLowering& Lowering =
41545bb48eaSTom Stellard *static_cast<const SITargetLowering*>(getTargetLowering());
41645bb48eaSTom Stellard
4175a86dbcfSMatt Arsenault assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
4185a86dbcfSMatt Arsenault
419b5234b64SMatt Arsenault SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
420b5234b64SMatt Arsenault return glueCopyToOp(N, M0, M0.getValue(1));
42145bb48eaSTom Stellard }
42245bb48eaSTom Stellard
glueCopyToM0LDSInit(SDNode * N) const423cdd191d9SMatt Arsenault SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
4244dc3b2bfSNicolai Haehnle unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
4254dc3b2bfSNicolai Haehnle if (AS == AMDGPUAS::LOCAL_ADDRESS) {
4264dc3b2bfSNicolai Haehnle if (Subtarget->ldsRequiresM0Init())
427cdd191d9SMatt Arsenault return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
4284dc3b2bfSNicolai Haehnle } else if (AS == AMDGPUAS::REGION_ADDRESS) {
4294dc3b2bfSNicolai Haehnle MachineFunction &MF = CurDAG->getMachineFunction();
4304dc3b2bfSNicolai Haehnle unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
4314dc3b2bfSNicolai Haehnle return
4324dc3b2bfSNicolai Haehnle glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
4334dc3b2bfSNicolai Haehnle }
4344dc3b2bfSNicolai Haehnle return N;
435cdd191d9SMatt Arsenault }
436cdd191d9SMatt Arsenault
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const437f1c7b92aSTim Renouf MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
438f1c7b92aSTim Renouf EVT VT) const {
439f1c7b92aSTim Renouf SDNode *Lo = CurDAG->getMachineNode(
440f1c7b92aSTim Renouf AMDGPU::S_MOV_B32, DL, MVT::i32,
44106eed422SMatt Arsenault CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
442f1c7b92aSTim Renouf SDNode *Hi =
443f1c7b92aSTim Renouf CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
44406eed422SMatt Arsenault CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
445f1c7b92aSTim Renouf const SDValue Ops[] = {
446f1c7b92aSTim Renouf CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
447f1c7b92aSTim Renouf SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
448f1c7b92aSTim Renouf SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
449f1c7b92aSTim Renouf
450f1c7b92aSTim Renouf return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
451f1c7b92aSTim Renouf }
452f1c7b92aSTim Renouf
SelectBuildVector(SDNode * N,unsigned RegClassID)45320287697STom Stellard void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
45445bb48eaSTom Stellard EVT VT = N->getValueType(0);
45545bb48eaSTom Stellard unsigned NumVectorElts = VT.getVectorNumElements();
45645bb48eaSTom Stellard EVT EltVT = VT.getVectorElementType();
45745bb48eaSTom Stellard SDLoc DL(N);
45845bb48eaSTom Stellard SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
45945bb48eaSTom Stellard
46045bb48eaSTom Stellard if (NumVectorElts == 1) {
46195927c0fSJustin Bogner CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
46295927c0fSJustin Bogner RegClass);
46395927c0fSJustin Bogner return;
46445bb48eaSTom Stellard }
46545bb48eaSTom Stellard
466e67cc380SStanislav Mekhanoshin assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
46745bb48eaSTom Stellard "supported yet");
468e67cc380SStanislav Mekhanoshin // 32 = Max Num Vector Elements
46945bb48eaSTom Stellard // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
47045bb48eaSTom Stellard // 1 = Vector Register Class
471e67cc380SStanislav Mekhanoshin SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
47245bb48eaSTom Stellard
473ed3527c6SStanislav Mekhanoshin bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
474ed3527c6SStanislav Mekhanoshin Triple::amdgcn;
47545bb48eaSTom Stellard RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
47645bb48eaSTom Stellard bool IsRegSeq = true;
47745bb48eaSTom Stellard unsigned NOps = N->getNumOperands();
47845bb48eaSTom Stellard for (unsigned i = 0; i < NOps; i++) {
47945bb48eaSTom Stellard // XXX: Why is this here?
48045bb48eaSTom Stellard if (isa<RegisterSDNode>(N->getOperand(i))) {
48145bb48eaSTom Stellard IsRegSeq = false;
48245bb48eaSTom Stellard break;
48345bb48eaSTom Stellard }
484ed3527c6SStanislav Mekhanoshin unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
485ed3527c6SStanislav Mekhanoshin : R600RegisterInfo::getSubRegFromChannel(i);
48645bb48eaSTom Stellard RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
487ede0e407SSimon Pilgrim RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
48845bb48eaSTom Stellard }
48945bb48eaSTom Stellard if (NOps != NumVectorElts) {
49045bb48eaSTom Stellard // Fill in the missing undef elements if this was a scalar_to_vector.
49103aa3aeeSTom Stellard assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
49245bb48eaSTom Stellard MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
49345bb48eaSTom Stellard DL, EltVT);
49445bb48eaSTom Stellard for (unsigned i = NOps; i < NumVectorElts; ++i) {
495ed3527c6SStanislav Mekhanoshin unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
496ed3527c6SStanislav Mekhanoshin : R600RegisterInfo::getSubRegFromChannel(i);
49745bb48eaSTom Stellard RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
49845bb48eaSTom Stellard RegSeqArgs[1 + (2 * i) + 1] =
499ede0e407SSimon Pilgrim CurDAG->getTargetConstant(Sub, DL, MVT::i32);
50045bb48eaSTom Stellard }
50145bb48eaSTom Stellard }
50245bb48eaSTom Stellard
50345bb48eaSTom Stellard if (!IsRegSeq)
50420287697STom Stellard SelectCode(N);
50595927c0fSJustin Bogner CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
50620287697STom Stellard }
50720287697STom Stellard
Select(SDNode * N)50820287697STom Stellard void AMDGPUDAGToDAGISel::Select(SDNode *N) {
50920287697STom Stellard unsigned int Opc = N->getOpcode();
51020287697STom Stellard if (N->isMachineOpcode()) {
51120287697STom Stellard N->setNodeId(-1);
51220287697STom Stellard return; // Already selected.
51320287697STom Stellard }
51420287697STom Stellard
5152bd166adSMatt Arsenault // isa<MemSDNode> almost works but is slightly too permissive for some DS
5162bd166adSMatt Arsenault // intrinsics.
5172bd166adSMatt Arsenault if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
518d5fca554SDaniil Fukalov (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
519a5840c3cSMatt Arsenault Opc == ISD::ATOMIC_LOAD_FADD ||
520d5fca554SDaniil Fukalov Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
521cdd45d5fSMatt Arsenault Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
522cdd191d9SMatt Arsenault N = glueCopyToM0LDSInit(N);
5232bd166adSMatt Arsenault SelectCode(N);
5242bd166adSMatt Arsenault return;
5252bd166adSMatt Arsenault }
52620287697STom Stellard
52720287697STom Stellard switch (Opc) {
52884445dd1SMatt Arsenault default:
52984445dd1SMatt Arsenault break;
53020287697STom Stellard // We are selecting i64 ADD here instead of custom lower it during
53120287697STom Stellard // DAG legalization, so we can fold some i64 ADDs used for address
53220287697STom Stellard // calculation into the LOAD and STORE instructions.
53320287697STom Stellard case ISD::ADDC:
53420287697STom Stellard case ISD::ADDE:
53520287697STom Stellard case ISD::SUBC:
53620287697STom Stellard case ISD::SUBE: {
53720287697STom Stellard if (N->getValueType(0) != MVT::i64)
53820287697STom Stellard break;
53920287697STom Stellard
54020287697STom Stellard SelectADD_SUB_I64(N);
54120287697STom Stellard return;
54220287697STom Stellard }
5438f3da70eSStanislav Mekhanoshin case ISD::ADDCARRY:
5448f3da70eSStanislav Mekhanoshin case ISD::SUBCARRY:
5458f3da70eSStanislav Mekhanoshin if (N->getValueType(0) != MVT::i32)
5468f3da70eSStanislav Mekhanoshin break;
5478f3da70eSStanislav Mekhanoshin
5488f3da70eSStanislav Mekhanoshin SelectAddcSubb(N);
5498f3da70eSStanislav Mekhanoshin return;
55020287697STom Stellard case ISD::UADDO:
55120287697STom Stellard case ISD::USUBO: {
55220287697STom Stellard SelectUADDO_USUBO(N);
55320287697STom Stellard return;
55420287697STom Stellard }
55520287697STom Stellard case AMDGPUISD::FMUL_W_CHAIN: {
55620287697STom Stellard SelectFMUL_W_CHAIN(N);
55720287697STom Stellard return;
55820287697STom Stellard }
55920287697STom Stellard case AMDGPUISD::FMA_W_CHAIN: {
56020287697STom Stellard SelectFMA_W_CHAIN(N);
56120287697STom Stellard return;
56220287697STom Stellard }
56320287697STom Stellard
56420287697STom Stellard case ISD::SCALAR_TO_VECTOR:
56520287697STom Stellard case ISD::BUILD_VECTOR: {
56620287697STom Stellard EVT VT = N->getValueType(0);
56720287697STom Stellard unsigned NumVectorElts = VT.getVectorNumElements();
5685a4ec812SMatt Arsenault if (VT.getScalarSizeInBits() == 16) {
5695a4ec812SMatt Arsenault if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
570e24b34e9SMatt Arsenault if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
571e24b34e9SMatt Arsenault ReplaceNode(N, Packed);
57220287697STom Stellard return;
57320287697STom Stellard }
57420287697STom Stellard }
57520287697STom Stellard
57620287697STom Stellard break;
57720287697STom Stellard }
57820287697STom Stellard
57903aa3aeeSTom Stellard assert(VT.getVectorElementType().bitsEq(MVT::i32));
580658f33dcSJay Foad unsigned RegClassID =
581658f33dcSJay Foad SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
58220287697STom Stellard SelectBuildVector(N, RegClassID);
58395927c0fSJustin Bogner return;
58445bb48eaSTom Stellard }
58545bb48eaSTom Stellard case ISD::BUILD_PAIR: {
58645bb48eaSTom Stellard SDValue RC, SubReg0, SubReg1;
58745bb48eaSTom Stellard SDLoc DL(N);
58845bb48eaSTom Stellard if (N->getValueType(0) == MVT::i128) {
58912994a70SMatt Arsenault RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
59045bb48eaSTom Stellard SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
59145bb48eaSTom Stellard SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
59245bb48eaSTom Stellard } else if (N->getValueType(0) == MVT::i64) {
59345bb48eaSTom Stellard RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
59445bb48eaSTom Stellard SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
59545bb48eaSTom Stellard SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
59645bb48eaSTom Stellard } else {
59745bb48eaSTom Stellard llvm_unreachable("Unhandled value type for BUILD_PAIR");
59845bb48eaSTom Stellard }
59945bb48eaSTom Stellard const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
60045bb48eaSTom Stellard N->getOperand(1), SubReg1 };
60195927c0fSJustin Bogner ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
60295927c0fSJustin Bogner N->getValueType(0), Ops));
60395927c0fSJustin Bogner return;
60445bb48eaSTom Stellard }
60545bb48eaSTom Stellard
60645bb48eaSTom Stellard case ISD::Constant:
60745bb48eaSTom Stellard case ISD::ConstantFP: {
60820287697STom Stellard if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
60945bb48eaSTom Stellard break;
61045bb48eaSTom Stellard
61145bb48eaSTom Stellard uint64_t Imm;
61245bb48eaSTom Stellard if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
61345bb48eaSTom Stellard Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
61445bb48eaSTom Stellard else {
61545bb48eaSTom Stellard ConstantSDNode *C = cast<ConstantSDNode>(N);
61645bb48eaSTom Stellard Imm = C->getZExtValue();
61745bb48eaSTom Stellard }
61845bb48eaSTom Stellard
61945bb48eaSTom Stellard SDLoc DL(N);
620f1c7b92aSTim Renouf ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
62195927c0fSJustin Bogner return;
62245bb48eaSTom Stellard }
62345bb48eaSTom Stellard case AMDGPUISD::BFE_I32:
62445bb48eaSTom Stellard case AMDGPUISD::BFE_U32: {
62545bb48eaSTom Stellard // There is a scalar version available, but unlike the vector version which
62645bb48eaSTom Stellard // has a separate operand for the offset and width, the scalar version packs
62745bb48eaSTom Stellard // the width and offset into a single operand. Try to move to the scalar
62845bb48eaSTom Stellard // version if the offsets are constant, so that we can try to keep extended
62945bb48eaSTom Stellard // loads of kernel arguments in SGPRs.
63045bb48eaSTom Stellard
63145bb48eaSTom Stellard // TODO: Technically we could try to pattern match scalar bitshifts of
63245bb48eaSTom Stellard // dynamic values, but it's probably not useful.
63345bb48eaSTom Stellard ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
63445bb48eaSTom Stellard if (!Offset)
63545bb48eaSTom Stellard break;
63645bb48eaSTom Stellard
63745bb48eaSTom Stellard ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
63845bb48eaSTom Stellard if (!Width)
63945bb48eaSTom Stellard break;
64045bb48eaSTom Stellard
64145bb48eaSTom Stellard bool Signed = Opc == AMDGPUISD::BFE_I32;
64245bb48eaSTom Stellard
64345bb48eaSTom Stellard uint32_t OffsetVal = Offset->getZExtValue();
64445bb48eaSTom Stellard uint32_t WidthVal = Width->getZExtValue();
64545bb48eaSTom Stellard
6460a3d755eSalex-t ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
6470a3d755eSalex-t WidthVal));
64895927c0fSJustin Bogner return;
64945bb48eaSTom Stellard }
65045bb48eaSTom Stellard case AMDGPUISD::DIV_SCALE: {
65195927c0fSJustin Bogner SelectDIV_SCALE(N);
65295927c0fSJustin Bogner return;
65345bb48eaSTom Stellard }
6544f6318feSMatt Arsenault case AMDGPUISD::MAD_I64_I32:
6554f6318feSMatt Arsenault case AMDGPUISD::MAD_U64_U32: {
6564f6318feSMatt Arsenault SelectMAD_64_32(N);
6574f6318feSMatt Arsenault return;
6584f6318feSMatt Arsenault }
659d7e03df7SJay Foad case ISD::SMUL_LOHI:
660d7e03df7SJay Foad case ISD::UMUL_LOHI:
661d7e03df7SJay Foad return SelectMUL_LOHI(N);
66245bb48eaSTom Stellard case ISD::CopyToReg: {
66345bb48eaSTom Stellard const SITargetLowering& Lowering =
66445bb48eaSTom Stellard *static_cast<const SITargetLowering*>(getTargetLowering());
6650d0d6c2fSMatt Arsenault N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
66645bb48eaSTom Stellard break;
66745bb48eaSTom Stellard }
66845bb48eaSTom Stellard case ISD::AND:
66945bb48eaSTom Stellard case ISD::SRL:
67045bb48eaSTom Stellard case ISD::SRA:
6717e8de01fSMatt Arsenault case ISD::SIGN_EXTEND_INREG:
67220287697STom Stellard if (N->getValueType(0) != MVT::i32)
67345bb48eaSTom Stellard break;
67445bb48eaSTom Stellard
67595927c0fSJustin Bogner SelectS_BFE(N);
67695927c0fSJustin Bogner return;
677bc4497b1STom Stellard case ISD::BRCOND:
67895927c0fSJustin Bogner SelectBRCOND(N);
67995927c0fSJustin Bogner return;
680d7e2303dSMatt Arsenault case ISD::FMAD:
6810084adc5SMatt Arsenault case ISD::FMA:
6820084adc5SMatt Arsenault SelectFMAD_FMA(N);
683d7e2303dSMatt Arsenault return;
684709374d1SMatt Arsenault case AMDGPUISD::CVT_PKRTZ_F16_F32:
685709374d1SMatt Arsenault case AMDGPUISD::CVT_PKNORM_I16_F32:
686709374d1SMatt Arsenault case AMDGPUISD::CVT_PKNORM_U16_F32:
687709374d1SMatt Arsenault case AMDGPUISD::CVT_PK_U16_U32:
688709374d1SMatt Arsenault case AMDGPUISD::CVT_PK_I16_I32: {
689709374d1SMatt Arsenault // Hack around using a legal type if f16 is illegal.
690709374d1SMatt Arsenault if (N->getValueType(0) == MVT::i32) {
691709374d1SMatt Arsenault MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
692709374d1SMatt Arsenault N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
693709374d1SMatt Arsenault { N->getOperand(0), N->getOperand(1) });
694709374d1SMatt Arsenault SelectCode(N);
695709374d1SMatt Arsenault return;
696709374d1SMatt Arsenault }
697cdd191d9SMatt Arsenault
698cdd191d9SMatt Arsenault break;
699cdd191d9SMatt Arsenault }
700cdd191d9SMatt Arsenault case ISD::INTRINSIC_W_CHAIN: {
701cdd191d9SMatt Arsenault SelectINTRINSIC_W_CHAIN(N);
702cdd191d9SMatt Arsenault return;
703709374d1SMatt Arsenault }
70400e89b42SCarl Ritson case ISD::INTRINSIC_WO_CHAIN: {
70500e89b42SCarl Ritson SelectINTRINSIC_WO_CHAIN(N);
70600e89b42SCarl Ritson return;
70700e89b42SCarl Ritson }
7084d55d024SMatt Arsenault case ISD::INTRINSIC_VOID: {
7094d55d024SMatt Arsenault SelectINTRINSIC_VOID(N);
7104d55d024SMatt Arsenault return;
7114d55d024SMatt Arsenault }
71245bb48eaSTom Stellard }
71345bb48eaSTom Stellard
71495927c0fSJustin Bogner SelectCode(N);
71545bb48eaSTom Stellard }
71645bb48eaSTom Stellard
isUniformBr(const SDNode * N) const717bc4497b1STom Stellard bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
718bc4497b1STom Stellard const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
71905b127daSNicolai Haehnle const Instruction *Term = BB->getTerminator();
72005b127daSNicolai Haehnle return Term->getMetadata("amdgpu.uniform") ||
72105b127daSNicolai Haehnle Term->getMetadata("structurizecfg.uniform");
722bc4497b1STom Stellard }
723bc4497b1STom Stellard
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const724078da26bSAbinav Puthan Purayil bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
725078da26bSAbinav Puthan Purayil unsigned ShAmtBits) const {
726078da26bSAbinav Puthan Purayil assert(N->getOpcode() == ISD::AND);
727078da26bSAbinav Puthan Purayil
728078da26bSAbinav Puthan Purayil const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
729078da26bSAbinav Puthan Purayil if (RHS.countTrailingOnes() >= ShAmtBits)
730078da26bSAbinav Puthan Purayil return true;
731078da26bSAbinav Puthan Purayil
732078da26bSAbinav Puthan Purayil const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
733078da26bSAbinav Puthan Purayil return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
734078da26bSAbinav Puthan Purayil }
735078da26bSAbinav Puthan Purayil
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)7360fd6a04bSMatt Arsenault static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
7370fd6a04bSMatt Arsenault SDValue &N0, SDValue &N1) {
7380fd6a04bSMatt Arsenault if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
7390fd6a04bSMatt Arsenault Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
7400fd6a04bSMatt Arsenault // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
7410fd6a04bSMatt Arsenault // (i64 (bitcast (v2i32 (build_vector
7420fd6a04bSMatt Arsenault // (or (extract_vector_elt V, 0), OFFSET),
7430fd6a04bSMatt Arsenault // (extract_vector_elt V, 1)))))
7440fd6a04bSMatt Arsenault SDValue Lo = Addr.getOperand(0).getOperand(0);
7450fd6a04bSMatt Arsenault if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
7460fd6a04bSMatt Arsenault SDValue BaseLo = Lo.getOperand(0);
7470fd6a04bSMatt Arsenault SDValue BaseHi = Addr.getOperand(0).getOperand(1);
7480fd6a04bSMatt Arsenault // Check that split base (Lo and Hi) are extracted from the same one.
7490fd6a04bSMatt Arsenault if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7500fd6a04bSMatt Arsenault BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7510fd6a04bSMatt Arsenault BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
7520fd6a04bSMatt Arsenault // Lo is statically extracted from index 0.
7530fd6a04bSMatt Arsenault isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
7540fd6a04bSMatt Arsenault BaseLo.getConstantOperandVal(1) == 0 &&
7550fd6a04bSMatt Arsenault // Hi is statically extracted from index 0.
7560fd6a04bSMatt Arsenault isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
7570fd6a04bSMatt Arsenault BaseHi.getConstantOperandVal(1) == 1) {
7580fd6a04bSMatt Arsenault N0 = BaseLo.getOperand(0).getOperand(0);
7590fd6a04bSMatt Arsenault N1 = Lo.getOperand(1);
7600fd6a04bSMatt Arsenault return true;
7610fd6a04bSMatt Arsenault }
7620fd6a04bSMatt Arsenault }
7630fd6a04bSMatt Arsenault }
7640fd6a04bSMatt Arsenault return false;
7650fd6a04bSMatt Arsenault }
7660fd6a04bSMatt Arsenault
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const7670fd6a04bSMatt Arsenault bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
7680fd6a04bSMatt Arsenault SDValue &RHS) const {
7690fd6a04bSMatt Arsenault if (CurDAG->isBaseWithConstantOffset(Addr)) {
7700fd6a04bSMatt Arsenault LHS = Addr.getOperand(0);
7710fd6a04bSMatt Arsenault RHS = Addr.getOperand(1);
7720fd6a04bSMatt Arsenault return true;
7730fd6a04bSMatt Arsenault }
7740fd6a04bSMatt Arsenault
7750fd6a04bSMatt Arsenault if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
7760fd6a04bSMatt Arsenault assert(LHS && RHS && isa<ConstantSDNode>(RHS));
7770fd6a04bSMatt Arsenault return true;
7780fd6a04bSMatt Arsenault }
7790fd6a04bSMatt Arsenault
7800fd6a04bSMatt Arsenault return false;
7810fd6a04bSMatt Arsenault }
7820fd6a04bSMatt Arsenault
getPassName() const783117296c0SMehdi Amini StringRef AMDGPUDAGToDAGISel::getPassName() const {
78445bb48eaSTom Stellard return "AMDGPU DAG->DAG Pattern Instruction Selection";
78545bb48eaSTom Stellard }
78645bb48eaSTom Stellard
78745bb48eaSTom Stellard //===----------------------------------------------------------------------===//
78845bb48eaSTom Stellard // Complex Patterns
78945bb48eaSTom Stellard //===----------------------------------------------------------------------===//
79045bb48eaSTom Stellard
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)79145bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
79245bb48eaSTom Stellard SDValue &Offset) {
79320287697STom Stellard return false;
79445bb48eaSTom Stellard }
79545bb48eaSTom Stellard
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)79645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
79745bb48eaSTom Stellard SDValue &Offset) {
79845bb48eaSTom Stellard ConstantSDNode *C;
79945bb48eaSTom Stellard SDLoc DL(Addr);
80045bb48eaSTom Stellard
80145bb48eaSTom Stellard if ((C = dyn_cast<ConstantSDNode>(Addr))) {
802c5a154dbSTom Stellard Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
80345bb48eaSTom Stellard Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80406200bd7SJan Vesely } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
80506200bd7SJan Vesely (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
806c5a154dbSTom Stellard Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
80706200bd7SJan Vesely Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80845bb48eaSTom Stellard } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
80945bb48eaSTom Stellard (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
81045bb48eaSTom Stellard Base = Addr.getOperand(0);
81145bb48eaSTom Stellard Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
81245bb48eaSTom Stellard } else {
81345bb48eaSTom Stellard Base = Addr;
81445bb48eaSTom Stellard Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
81545bb48eaSTom Stellard }
81645bb48eaSTom Stellard
81745bb48eaSTom Stellard return true;
81845bb48eaSTom Stellard }
81945bb48eaSTom Stellard
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const8207cd57dcdSMatt Arsenault SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
8217cd57dcdSMatt Arsenault const SDLoc &DL) const {
8227cd57dcdSMatt Arsenault SDNode *Mov = CurDAG->getMachineNode(
8237cd57dcdSMatt Arsenault AMDGPU::S_MOV_B32, DL, MVT::i32,
8247cd57dcdSMatt Arsenault CurDAG->getTargetConstant(Val, DL, MVT::i32));
8257cd57dcdSMatt Arsenault return SDValue(Mov, 0);
8267cd57dcdSMatt Arsenault }
8277cd57dcdSMatt Arsenault
82884445dd1SMatt Arsenault // FIXME: Should only handle addcarry/subcarry
SelectADD_SUB_I64(SDNode * N)82995927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
83045bb48eaSTom Stellard SDLoc DL(N);
83145bb48eaSTom Stellard SDValue LHS = N->getOperand(0);
83245bb48eaSTom Stellard SDValue RHS = N->getOperand(1);
83345bb48eaSTom Stellard
83467624af0SNicolai Haehnle unsigned Opcode = N->getOpcode();
83567624af0SNicolai Haehnle bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
83667624af0SNicolai Haehnle bool ProduceCarry =
83767624af0SNicolai Haehnle ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
83884445dd1SMatt Arsenault bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
83945bb48eaSTom Stellard
84045bb48eaSTom Stellard SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
84145bb48eaSTom Stellard SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
84245bb48eaSTom Stellard
84345bb48eaSTom Stellard SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84445bb48eaSTom Stellard DL, MVT::i32, LHS, Sub0);
84545bb48eaSTom Stellard SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84645bb48eaSTom Stellard DL, MVT::i32, LHS, Sub1);
84745bb48eaSTom Stellard
84845bb48eaSTom Stellard SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
84945bb48eaSTom Stellard DL, MVT::i32, RHS, Sub0);
85045bb48eaSTom Stellard SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
85145bb48eaSTom Stellard DL, MVT::i32, RHS, Sub1);
85245bb48eaSTom Stellard
85345bb48eaSTom Stellard SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
85445bb48eaSTom Stellard
8556e34e718Salex-t static const unsigned OpcMap[2][2][2] = {
8566e34e718Salex-t {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
85779f67caeSMatt Arsenault {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
8586e34e718Salex-t {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
8596e34e718Salex-t {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
8606e34e718Salex-t
8616e34e718Salex-t unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
8626e34e718Salex-t unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
86345bb48eaSTom Stellard
86467624af0SNicolai Haehnle SDNode *AddLo;
86567624af0SNicolai Haehnle if (!ConsumeCarry) {
86667624af0SNicolai Haehnle SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
86767624af0SNicolai Haehnle AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
86867624af0SNicolai Haehnle } else {
86967624af0SNicolai Haehnle SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
87067624af0SNicolai Haehnle AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
87167624af0SNicolai Haehnle }
87267624af0SNicolai Haehnle SDValue AddHiArgs[] = {
87367624af0SNicolai Haehnle SDValue(Hi0, 0),
87467624af0SNicolai Haehnle SDValue(Hi1, 0),
87567624af0SNicolai Haehnle SDValue(AddLo, 1)
87667624af0SNicolai Haehnle };
87767624af0SNicolai Haehnle SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
87845bb48eaSTom Stellard
87967624af0SNicolai Haehnle SDValue RegSequenceArgs[] = {
88045bb48eaSTom Stellard CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
88145bb48eaSTom Stellard SDValue(AddLo,0),
88245bb48eaSTom Stellard Sub0,
88345bb48eaSTom Stellard SDValue(AddHi,0),
88445bb48eaSTom Stellard Sub1,
88545bb48eaSTom Stellard };
88667624af0SNicolai Haehnle SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
88767624af0SNicolai Haehnle MVT::i64, RegSequenceArgs);
88867624af0SNicolai Haehnle
88967624af0SNicolai Haehnle if (ProduceCarry) {
89067624af0SNicolai Haehnle // Replace the carry-use
8913264c1bdSNirav Dave ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
89267624af0SNicolai Haehnle }
89367624af0SNicolai Haehnle
89467624af0SNicolai Haehnle // Replace the remaining uses.
8953264c1bdSNirav Dave ReplaceNode(N, RegSequence);
89645bb48eaSTom Stellard }
89745bb48eaSTom Stellard
SelectAddcSubb(SDNode * N)8988f3da70eSStanislav Mekhanoshin void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
8998f3da70eSStanislav Mekhanoshin SDLoc DL(N);
9008f3da70eSStanislav Mekhanoshin SDValue LHS = N->getOperand(0);
9018f3da70eSStanislav Mekhanoshin SDValue RHS = N->getOperand(1);
9028f3da70eSStanislav Mekhanoshin SDValue CI = N->getOperand(2);
9038f3da70eSStanislav Mekhanoshin
9045b898bddSalex-t if (N->isDivergent()) {
9058f3da70eSStanislav Mekhanoshin unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
9068f3da70eSStanislav Mekhanoshin : AMDGPU::V_SUBB_U32_e64;
9078f3da70eSStanislav Mekhanoshin CurDAG->SelectNodeTo(
9088f3da70eSStanislav Mekhanoshin N, Opc, N->getVTList(),
9095b898bddSalex-t {LHS, RHS, CI,
9105b898bddSalex-t CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9115b898bddSalex-t } else {
9125b898bddSalex-t unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
9135b898bddSalex-t : AMDGPU::S_SUB_CO_PSEUDO;
9145b898bddSalex-t CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
9155b898bddSalex-t }
9168f3da70eSStanislav Mekhanoshin }
9178f3da70eSStanislav Mekhanoshin
SelectUADDO_USUBO(SDNode * N)918ee3f0acfSMatt Arsenault void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
919ee3f0acfSMatt Arsenault // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
920ee3f0acfSMatt Arsenault // carry out despite the _i32 name. These were renamed in VI to _U32.
921ee3f0acfSMatt Arsenault // FIXME: We should probably rename the opcodes here.
9225b898bddSalex-t bool IsAdd = N->getOpcode() == ISD::UADDO;
9235b898bddSalex-t bool IsVALU = N->isDivergent();
9245b898bddSalex-t
9255b898bddSalex-t for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
9265b898bddSalex-t ++UI)
9275b898bddSalex-t if (UI.getUse().getResNo() == 1) {
9285b898bddSalex-t if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
9295b898bddSalex-t (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
9305b898bddSalex-t IsVALU = true;
9315b898bddSalex-t break;
9325b898bddSalex-t }
9335b898bddSalex-t }
9345b898bddSalex-t
9355b898bddSalex-t if (IsVALU) {
93679f67caeSMatt Arsenault unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
937ee3f0acfSMatt Arsenault
938eea5177dSMichael Liao CurDAG->SelectNodeTo(
939eea5177dSMichael Liao N, Opc, N->getVTList(),
940cfdfba99STim Renouf {N->getOperand(0), N->getOperand(1),
941eea5177dSMichael Liao CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
9425b898bddSalex-t } else {
9435b898bddSalex-t unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
9445b898bddSalex-t : AMDGPU::S_USUBO_PSEUDO;
9455b898bddSalex-t
9465b898bddSalex-t CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
9475b898bddSalex-t {N->getOperand(0), N->getOperand(1)});
9485b898bddSalex-t }
949ee3f0acfSMatt Arsenault }
950ee3f0acfSMatt Arsenault
SelectFMA_W_CHAIN(SDNode * N)9518485fa09STom Stellard void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
9528485fa09STom Stellard SDLoc SL(N);
9538485fa09STom Stellard // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
9548485fa09STom Stellard SDValue Ops[10];
9558485fa09STom Stellard
9568485fa09STom Stellard SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
9578485fa09STom Stellard SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9588485fa09STom Stellard SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
9598485fa09STom Stellard Ops[8] = N->getOperand(0);
9608485fa09STom Stellard Ops[9] = N->getOperand(4);
9618485fa09STom Stellard
962598bebeaSJay Foad // If there are no source modifiers, prefer fmac over fma because it can use
963598bebeaSJay Foad // the smaller VOP2 encoding.
964598bebeaSJay Foad bool UseFMAC = Subtarget->hasDLInsts() &&
965598bebeaSJay Foad cast<ConstantSDNode>(Ops[0])->isZero() &&
966598bebeaSJay Foad cast<ConstantSDNode>(Ops[2])->isZero() &&
967598bebeaSJay Foad cast<ConstantSDNode>(Ops[4])->isZero();
968598bebeaSJay Foad unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
969598bebeaSJay Foad CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
9708485fa09STom Stellard }
9718485fa09STom Stellard
SelectFMUL_W_CHAIN(SDNode * N)9728485fa09STom Stellard void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
9738485fa09STom Stellard SDLoc SL(N);
9748485fa09STom Stellard // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
9758485fa09STom Stellard SDValue Ops[8];
9768485fa09STom Stellard
9778485fa09STom Stellard SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
9788485fa09STom Stellard SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
9798485fa09STom Stellard Ops[6] = N->getOperand(0);
9808485fa09STom Stellard Ops[7] = N->getOperand(3);
9818485fa09STom Stellard
9828485fa09STom Stellard CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
9838485fa09STom Stellard }
9848485fa09STom Stellard
98545bb48eaSTom Stellard // We need to handle this here because tablegen doesn't support matching
98645bb48eaSTom Stellard // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)98795927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
98845bb48eaSTom Stellard SDLoc SL(N);
98945bb48eaSTom Stellard EVT VT = N->getValueType(0);
99045bb48eaSTom Stellard
99145bb48eaSTom Stellard assert(VT == MVT::f32 || VT == MVT::f64);
99245bb48eaSTom Stellard
99345bb48eaSTom Stellard unsigned Opc
994314e29edSJoe Nash = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
99545bb48eaSTom Stellard
9965b91a6a8SJay Foad // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
9975b91a6a8SJay Foad // omod
9985b91a6a8SJay Foad SDValue Ops[8];
9995b91a6a8SJay Foad SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
10005b91a6a8SJay Foad SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
10015b91a6a8SJay Foad SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
10023b99f12aSMatt Arsenault CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
100345bb48eaSTom Stellard }
100445bb48eaSTom Stellard
10054f6318feSMatt Arsenault // We need to handle this here because tablegen doesn't support matching
10064f6318feSMatt Arsenault // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)10074f6318feSMatt Arsenault void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
10084f6318feSMatt Arsenault SDLoc SL(N);
10094f6318feSMatt Arsenault bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
1010d943c514SJay Foad unsigned Opc;
1011d943c514SJay Foad if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1012d943c514SJay Foad Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013d943c514SJay Foad : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1014d943c514SJay Foad else
1015d943c514SJay Foad Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
10164f6318feSMatt Arsenault
10174f6318feSMatt Arsenault SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
10184f6318feSMatt Arsenault SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
10194f6318feSMatt Arsenault Clamp };
10204f6318feSMatt Arsenault CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
10214f6318feSMatt Arsenault }
10224f6318feSMatt Arsenault
1023d7e03df7SJay Foad // We need to handle this here because tablegen doesn't support matching
1024d7e03df7SJay Foad // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)1025d7e03df7SJay Foad void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1026d7e03df7SJay Foad SDLoc SL(N);
1027d7e03df7SJay Foad bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1028d943c514SJay Foad unsigned Opc;
1029d943c514SJay Foad if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
1030d943c514SJay Foad Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1031d943c514SJay Foad : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1032d943c514SJay Foad else
1033d943c514SJay Foad Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1034d7e03df7SJay Foad
1035d7e03df7SJay Foad SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1036d7e03df7SJay Foad SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1037d7e03df7SJay Foad SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1038d7e03df7SJay Foad SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1039d7e03df7SJay Foad if (!SDValue(N, 0).use_empty()) {
1040d7e03df7SJay Foad SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1041d7e03df7SJay Foad SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1042d7e03df7SJay Foad MVT::i32, SDValue(Mad, 0), Sub0);
1043d7e03df7SJay Foad ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1044d7e03df7SJay Foad }
1045d7e03df7SJay Foad if (!SDValue(N, 1).use_empty()) {
1046d7e03df7SJay Foad SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1047d7e03df7SJay Foad SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1048d7e03df7SJay Foad MVT::i32, SDValue(Mad, 0), Sub1);
1049d7e03df7SJay Foad ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1050d7e03df7SJay Foad }
1051d7e03df7SJay Foad CurDAG->RemoveDeadNode(N);
1052d7e03df7SJay Foad }
1053d7e03df7SJay Foad
isDSOffsetLegal(SDValue Base,unsigned Offset) const1054040c5027SJay Foad bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1055040c5027SJay Foad if (!isUInt<16>(Offset))
105645bb48eaSTom Stellard return false;
105745bb48eaSTom Stellard
1058040c5027SJay Foad if (!Base || Subtarget->hasUsableDSOffset() ||
1059706f930bSMatt Arsenault Subtarget->unsafeDSOffsetFoldingEnabled())
106045bb48eaSTom Stellard return true;
106145bb48eaSTom Stellard
106245bb48eaSTom Stellard // On Southern Islands instruction with a negative base value and an offset
106345bb48eaSTom Stellard // don't seem to work.
106445bb48eaSTom Stellard return CurDAG->SignBitIsZero(Base);
106545bb48eaSTom Stellard }
106645bb48eaSTom Stellard
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const106745bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
106845bb48eaSTom Stellard SDValue &Offset) const {
106992b24f32STom Stellard SDLoc DL(Addr);
107045bb48eaSTom Stellard if (CurDAG->isBaseWithConstantOffset(Addr)) {
107145bb48eaSTom Stellard SDValue N0 = Addr.getOperand(0);
107245bb48eaSTom Stellard SDValue N1 = Addr.getOperand(1);
107345bb48eaSTom Stellard ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1074040c5027SJay Foad if (isDSOffsetLegal(N0, C1->getSExtValue())) {
107545bb48eaSTom Stellard // (add n0, c0)
107645bb48eaSTom Stellard Base = N0;
107792b24f32STom Stellard Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
107845bb48eaSTom Stellard return true;
107945bb48eaSTom Stellard }
1080966a94f8SMatt Arsenault } else if (Addr.getOpcode() == ISD::SUB) {
1081966a94f8SMatt Arsenault // sub C, x -> add (sub 0, x), C
1082966a94f8SMatt Arsenault if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1083966a94f8SMatt Arsenault int64_t ByteOffset = C->getSExtValue();
1084040c5027SJay Foad if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1085966a94f8SMatt Arsenault SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
108645bb48eaSTom Stellard
1087966a94f8SMatt Arsenault // XXX - This is kind of hacky. Create a dummy sub node so we can check
1088966a94f8SMatt Arsenault // the known bits in isDSOffsetLegal. We need to emit the selected node
1089966a94f8SMatt Arsenault // here, so this is thrown away.
1090966a94f8SMatt Arsenault SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
1091966a94f8SMatt Arsenault Zero, Addr.getOperand(1));
1092966a94f8SMatt Arsenault
1093040c5027SJay Foad if (isDSOffsetLegal(Sub, ByteOffset)) {
1094cfdfba99STim Renouf SmallVector<SDValue, 3> Opnds;
1095cfdfba99STim Renouf Opnds.push_back(Zero);
1096cfdfba99STim Renouf Opnds.push_back(Addr.getOperand(1));
109784445dd1SMatt Arsenault
1098cfdfba99STim Renouf // FIXME: Select to VOP3 version for with-carry.
109979f67caeSMatt Arsenault unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1100cfdfba99STim Renouf if (Subtarget->hasAddNoCarry()) {
1101cfdfba99STim Renouf SubOp = AMDGPU::V_SUB_U32_e64;
1102eea5177dSMichael Liao Opnds.push_back(
1103eea5177dSMichael Liao CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1104cfdfba99STim Renouf }
1105cfdfba99STim Renouf
1106cfdfba99STim Renouf MachineSDNode *MachineSub =
1107cfdfba99STim Renouf CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1108966a94f8SMatt Arsenault
1109966a94f8SMatt Arsenault Base = SDValue(MachineSub, 0);
111026a2ab74STom Stellard Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1111966a94f8SMatt Arsenault return true;
1112966a94f8SMatt Arsenault }
1113966a94f8SMatt Arsenault }
1114966a94f8SMatt Arsenault }
1115966a94f8SMatt Arsenault } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
111645bb48eaSTom Stellard // If we have a constant address, prefer to put the constant into the
111745bb48eaSTom Stellard // offset. This can save moves to load the constant address since multiple
111845bb48eaSTom Stellard // operations can share the zero base address register, and enables merging
111945bb48eaSTom Stellard // into read2 / write2 instructions.
1120966a94f8SMatt Arsenault
1121966a94f8SMatt Arsenault SDLoc DL(Addr);
1122966a94f8SMatt Arsenault
1123040c5027SJay Foad if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
112445bb48eaSTom Stellard SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
112545bb48eaSTom Stellard MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
112645bb48eaSTom Stellard DL, MVT::i32, Zero);
112745bb48eaSTom Stellard Base = SDValue(MovZero, 0);
112826a2ab74STom Stellard Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
112945bb48eaSTom Stellard return true;
113045bb48eaSTom Stellard }
113145bb48eaSTom Stellard }
113245bb48eaSTom Stellard
113345bb48eaSTom Stellard // default case
113445bb48eaSTom Stellard Base = Addr;
1135966a94f8SMatt Arsenault Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
113645bb48eaSTom Stellard return true;
113745bb48eaSTom Stellard }
113845bb48eaSTom Stellard
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const1139040c5027SJay Foad bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1140040c5027SJay Foad unsigned Offset1,
1141040c5027SJay Foad unsigned Size) const {
1142040c5027SJay Foad if (Offset0 % Size != 0 || Offset1 % Size != 0)
1143040c5027SJay Foad return false;
1144040c5027SJay Foad if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
1145040c5027SJay Foad return false;
1146040c5027SJay Foad
1147040c5027SJay Foad if (!Base || Subtarget->hasUsableDSOffset() ||
1148040c5027SJay Foad Subtarget->unsafeDSOffsetFoldingEnabled())
1149040c5027SJay Foad return true;
1150040c5027SJay Foad
1151040c5027SJay Foad // On Southern Islands instruction with a negative base value and an offset
1152040c5027SJay Foad // don't seem to work.
1153040c5027SJay Foad return CurDAG->SignBitIsZero(Base);
1154040c5027SJay Foad }
1155040c5027SJay Foad
1156966a94f8SMatt Arsenault // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const115745bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
115845bb48eaSTom Stellard SDValue &Offset0,
115945bb48eaSTom Stellard SDValue &Offset1) const {
1160040c5027SJay Foad return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1161d17ea67bSMirko Brkusanin }
1162d17ea67bSMirko Brkusanin
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const1163d17ea67bSMirko Brkusanin bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1164d17ea67bSMirko Brkusanin SDValue &Offset0,
1165d17ea67bSMirko Brkusanin SDValue &Offset1) const {
1166040c5027SJay Foad return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1167d17ea67bSMirko Brkusanin }
1168d17ea67bSMirko Brkusanin
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const1169d17ea67bSMirko Brkusanin bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1170d17ea67bSMirko Brkusanin SDValue &Offset0, SDValue &Offset1,
1171040c5027SJay Foad unsigned Size) const {
117245bb48eaSTom Stellard SDLoc DL(Addr);
117345bb48eaSTom Stellard
117445bb48eaSTom Stellard if (CurDAG->isBaseWithConstantOffset(Addr)) {
117545bb48eaSTom Stellard SDValue N0 = Addr.getOperand(0);
117645bb48eaSTom Stellard SDValue N1 = Addr.getOperand(1);
117745bb48eaSTom Stellard ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1178040c5027SJay Foad unsigned OffsetValue0 = C1->getZExtValue();
1179040c5027SJay Foad unsigned OffsetValue1 = OffsetValue0 + Size;
1180040c5027SJay Foad
118145bb48eaSTom Stellard // (add n0, c0)
1182040c5027SJay Foad if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
118345bb48eaSTom Stellard Base = N0;
1184040c5027SJay Foad Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1185040c5027SJay Foad Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
118645bb48eaSTom Stellard return true;
118745bb48eaSTom Stellard }
1188966a94f8SMatt Arsenault } else if (Addr.getOpcode() == ISD::SUB) {
1189966a94f8SMatt Arsenault // sub C, x -> add (sub 0, x), C
1190d17ea67bSMirko Brkusanin if (const ConstantSDNode *C =
1191d17ea67bSMirko Brkusanin dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
1192040c5027SJay Foad unsigned OffsetValue0 = C->getZExtValue();
1193040c5027SJay Foad unsigned OffsetValue1 = OffsetValue0 + Size;
119445bb48eaSTom Stellard
1195040c5027SJay Foad if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1196966a94f8SMatt Arsenault SDLoc DL(Addr);
1197966a94f8SMatt Arsenault SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1198966a94f8SMatt Arsenault
1199966a94f8SMatt Arsenault // XXX - This is kind of hacky. Create a dummy sub node so we can check
1200966a94f8SMatt Arsenault // the known bits in isDSOffsetLegal. We need to emit the selected node
1201966a94f8SMatt Arsenault // here, so this is thrown away.
1202d17ea67bSMirko Brkusanin SDValue Sub =
1203d17ea67bSMirko Brkusanin CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
1204966a94f8SMatt Arsenault
1205040c5027SJay Foad if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1206cfdfba99STim Renouf SmallVector<SDValue, 3> Opnds;
1207cfdfba99STim Renouf Opnds.push_back(Zero);
1208cfdfba99STim Renouf Opnds.push_back(Addr.getOperand(1));
120979f67caeSMatt Arsenault unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1210cfdfba99STim Renouf if (Subtarget->hasAddNoCarry()) {
1211cfdfba99STim Renouf SubOp = AMDGPU::V_SUB_U32_e64;
1212eea5177dSMichael Liao Opnds.push_back(
1213eea5177dSMichael Liao CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
1214cfdfba99STim Renouf }
121584445dd1SMatt Arsenault
1216d17ea67bSMirko Brkusanin MachineSDNode *MachineSub = CurDAG->getMachineNode(
1217040c5027SJay Foad SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
1218966a94f8SMatt Arsenault
1219966a94f8SMatt Arsenault Base = SDValue(MachineSub, 0);
1220040c5027SJay Foad Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1221040c5027SJay Foad Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
1222966a94f8SMatt Arsenault return true;
1223966a94f8SMatt Arsenault }
1224966a94f8SMatt Arsenault }
1225966a94f8SMatt Arsenault }
1226966a94f8SMatt Arsenault } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1227040c5027SJay Foad unsigned OffsetValue0 = CAddr->getZExtValue();
1228040c5027SJay Foad unsigned OffsetValue1 = OffsetValue0 + Size;
122945bb48eaSTom Stellard
1230040c5027SJay Foad if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
123145bb48eaSTom Stellard SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
1232d17ea67bSMirko Brkusanin MachineSDNode *MovZero =
1233d17ea67bSMirko Brkusanin CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
123445bb48eaSTom Stellard Base = SDValue(MovZero, 0);
1235040c5027SJay Foad Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
1236040c5027SJay Foad Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
123745bb48eaSTom Stellard return true;
123845bb48eaSTom Stellard }
123945bb48eaSTom Stellard }
124045bb48eaSTom Stellard
124145bb48eaSTom Stellard // default case
12420efdd06bSMatt Arsenault
124345bb48eaSTom Stellard Base = Addr;
124445bb48eaSTom Stellard Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
124545bb48eaSTom Stellard Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
124645bb48eaSTom Stellard return true;
124745bb48eaSTom Stellard }
124845bb48eaSTom Stellard
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const12493bffb1cdSStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
12503bffb1cdSStanislav Mekhanoshin SDValue &SOffset, SDValue &Offset,
12513bffb1cdSStanislav Mekhanoshin SDValue &Offen, SDValue &Idxen,
1252edd6da10SStanislav Mekhanoshin SDValue &Addr64) const {
1253b41574a9SChangpeng Fang // Subtarget prefers to use flat instruction
1254fdaad485SMatt Arsenault // FIXME: This should be a pattern predicate and not reach here
1255b41574a9SChangpeng Fang if (Subtarget->useFlatForGlobal())
1256b41574a9SChangpeng Fang return false;
1257b41574a9SChangpeng Fang
125845bb48eaSTom Stellard SDLoc DL(Addr);
125945bb48eaSTom Stellard
126045bb48eaSTom Stellard Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
126145bb48eaSTom Stellard Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
126245bb48eaSTom Stellard Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
126345bb48eaSTom Stellard SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
126445bb48eaSTom Stellard
1265f1c7b92aSTim Renouf ConstantSDNode *C1 = nullptr;
1266f1c7b92aSTim Renouf SDValue N0 = Addr;
126745bb48eaSTom Stellard if (CurDAG->isBaseWithConstantOffset(Addr)) {
1268f1c7b92aSTim Renouf C1 = cast<ConstantSDNode>(Addr.getOperand(1));
1269f1c7b92aSTim Renouf if (isUInt<32>(C1->getZExtValue()))
1270f1c7b92aSTim Renouf N0 = Addr.getOperand(0);
1271f1c7b92aSTim Renouf else
1272f1c7b92aSTim Renouf C1 = nullptr;
1273f1c7b92aSTim Renouf }
127445bb48eaSTom Stellard
127545bb48eaSTom Stellard if (N0.getOpcode() == ISD::ADD) {
1276f1c7b92aSTim Renouf // (add N2, N3) -> addr64, or
127745bb48eaSTom Stellard // (add (add N2, N3), C1) -> addr64
127845bb48eaSTom Stellard SDValue N2 = N0.getOperand(0);
127945bb48eaSTom Stellard SDValue N3 = N0.getOperand(1);
128045bb48eaSTom Stellard Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1281f1c7b92aSTim Renouf
1282f1c7b92aSTim Renouf if (N2->isDivergent()) {
1283f1c7b92aSTim Renouf if (N3->isDivergent()) {
1284f1c7b92aSTim Renouf // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
1285f1c7b92aSTim Renouf // addr64, and construct the resource from a 0 address.
1286f1c7b92aSTim Renouf Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1287f1c7b92aSTim Renouf VAddr = N0;
1288f1c7b92aSTim Renouf } else {
1289f1c7b92aSTim Renouf // N2 is divergent, N3 is not.
1290f1c7b92aSTim Renouf Ptr = N3;
1291f1c7b92aSTim Renouf VAddr = N2;
1292f1c7b92aSTim Renouf }
1293f1c7b92aSTim Renouf } else {
1294f1c7b92aSTim Renouf // N2 is not divergent.
129545bb48eaSTom Stellard Ptr = N2;
129645bb48eaSTom Stellard VAddr = N3;
1297f1c7b92aSTim Renouf }
1298f1c7b92aSTim Renouf Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1299f1c7b92aSTim Renouf } else if (N0->isDivergent()) {
1300f1c7b92aSTim Renouf // N0 is divergent. Use it as the addr64, and construct the resource from a
1301f1c7b92aSTim Renouf // 0 address.
1302f1c7b92aSTim Renouf Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1303f1c7b92aSTim Renouf VAddr = N0;
1304f1c7b92aSTim Renouf Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
130545bb48eaSTom Stellard } else {
1306f1c7b92aSTim Renouf // N0 -> offset, or
1307f1c7b92aSTim Renouf // (N0 + C1) -> offset
130845bb48eaSTom Stellard VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
130945bb48eaSTom Stellard Ptr = N0;
131045bb48eaSTom Stellard }
131145bb48eaSTom Stellard
1312f1c7b92aSTim Renouf if (!C1) {
1313f1c7b92aSTim Renouf // No offset.
1314f1c7b92aSTim Renouf Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1315f1c7b92aSTim Renouf return true;
1316f1c7b92aSTim Renouf }
1317f1c7b92aSTim Renouf
1318ffadcb74SMarek Olsak if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
1319f1c7b92aSTim Renouf // Legal offset for instruction.
132045bb48eaSTom Stellard Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1321b41574a9SChangpeng Fang return true;
132288701817SMatt Arsenault }
132388701817SMatt Arsenault
132445bb48eaSTom Stellard // Illegal offset, store it in soffset.
132545bb48eaSTom Stellard Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1326f1c7b92aSTim Renouf SOffset =
1327f1c7b92aSTim Renouf SDValue(CurDAG->getMachineNode(
1328f1c7b92aSTim Renouf AMDGPU::S_MOV_B32, DL, MVT::i32,
132945bb48eaSTom Stellard CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
133045bb48eaSTom Stellard 0);
1331b41574a9SChangpeng Fang return true;
133245bb48eaSTom Stellard }
133345bb48eaSTom Stellard
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const133445bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
133545bb48eaSTom Stellard SDValue &VAddr, SDValue &SOffset,
1336edd6da10SStanislav Mekhanoshin SDValue &Offset) const {
133745bb48eaSTom Stellard SDValue Ptr, Offen, Idxen, Addr64;
133845bb48eaSTom Stellard
133970580f83STom Stellard // addr64 bit was removed for volcanic islands.
1340fdaad485SMatt Arsenault // FIXME: This should be a pattern predicate and not reach here
1341e4c2e9b0SMatt Arsenault if (!Subtarget->hasAddr64())
134270580f83STom Stellard return false;
134370580f83STom Stellard
1344edd6da10SStanislav Mekhanoshin if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1345b41574a9SChangpeng Fang return false;
134645bb48eaSTom Stellard
134745bb48eaSTom Stellard ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
134845bb48eaSTom Stellard if (C->getSExtValue()) {
134945bb48eaSTom Stellard SDLoc DL(Addr);
135045bb48eaSTom Stellard
135145bb48eaSTom Stellard const SITargetLowering& Lowering =
135245bb48eaSTom Stellard *static_cast<const SITargetLowering*>(getTargetLowering());
135345bb48eaSTom Stellard
135445bb48eaSTom Stellard SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
135545bb48eaSTom Stellard return true;
135645bb48eaSTom Stellard }
135745bb48eaSTom Stellard
135845bb48eaSTom Stellard return false;
135945bb48eaSTom Stellard }
136045bb48eaSTom Stellard
foldFrameIndex(SDValue N) const1361156d3ae0SMatt Arsenault std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
136260b1967cSScott Linder SDLoc DL(N);
1363156d3ae0SMatt Arsenault
13645a061041SChristudasan Devadasan auto *FI = dyn_cast<FrameIndexSDNode>(N);
13655a061041SChristudasan Devadasan SDValue TFI =
13665a061041SChristudasan Devadasan FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1367156d3ae0SMatt Arsenault
13685a061041SChristudasan Devadasan // We rebase the base address into an absolute stack address and hence
1369ff8a1caeSChristudasan Devadasan // use constant 0 for soffset. This value must be retained until
1370ff8a1caeSChristudasan Devadasan // frame elimination and eliminateFrameIndex will choose the appropriate
1371ff8a1caeSChristudasan Devadasan // frame register if need be.
13725a061041SChristudasan Devadasan return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1373156d3ae0SMatt Arsenault }
1374156d3ae0SMatt Arsenault
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const1375b81495dcSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1376156d3ae0SMatt Arsenault SDValue Addr, SDValue &Rsrc,
137745bb48eaSTom Stellard SDValue &VAddr, SDValue &SOffset,
137845bb48eaSTom Stellard SDValue &ImmOffset) const {
137945bb48eaSTom Stellard
138045bb48eaSTom Stellard SDLoc DL(Addr);
138145bb48eaSTom Stellard MachineFunction &MF = CurDAG->getMachineFunction();
13820e3d3893SMatt Arsenault const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
138345bb48eaSTom Stellard
13840e3d3893SMatt Arsenault Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
138545bb48eaSTom Stellard
13860774ea26SMatt Arsenault if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1387bb10fa3aSMatt Arsenault int64_t Imm = CAddr->getSExtValue();
1388bb10fa3aSMatt Arsenault const int64_t NullPtr =
1389bb10fa3aSMatt Arsenault AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1390bb10fa3aSMatt Arsenault // Don't fold null pointer.
1391bb10fa3aSMatt Arsenault if (Imm != NullPtr) {
13920774ea26SMatt Arsenault SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1393bb10fa3aSMatt Arsenault MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1394bb10fa3aSMatt Arsenault AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
13950774ea26SMatt Arsenault VAddr = SDValue(MovHighBits, 0);
1396156d3ae0SMatt Arsenault
1397690f5b7aSSebastian Neubauer SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
13980774ea26SMatt Arsenault ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
13990774ea26SMatt Arsenault return true;
14000774ea26SMatt Arsenault }
1401bb10fa3aSMatt Arsenault }
14020774ea26SMatt Arsenault
140345bb48eaSTom Stellard if (CurDAG->isBaseWithConstantOffset(Addr)) {
14040774ea26SMatt Arsenault // (add n0, c1)
14050774ea26SMatt Arsenault
140678655fcfSTom Stellard SDValue N0 = Addr.getOperand(0);
140745bb48eaSTom Stellard SDValue N1 = Addr.getOperand(1);
1408cd09961fSMatt Arsenault
1409caf0ed4dSMatt Arsenault // Offsets in vaddr must be positive if range checking is enabled.
141045b98189SMatt Arsenault //
1411caf0ed4dSMatt Arsenault // The total computation of vaddr + soffset + offset must not overflow. If
1412caf0ed4dSMatt Arsenault // vaddr is negative, even if offset is 0 the sgpr offset add will end up
141345b98189SMatt Arsenault // overflowing.
1414caf0ed4dSMatt Arsenault //
1415caf0ed4dSMatt Arsenault // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
1416caf0ed4dSMatt Arsenault // always perform a range check. If a negative vaddr base index was used,
1417caf0ed4dSMatt Arsenault // this would fail the range check. The overall address computation would
1418caf0ed4dSMatt Arsenault // compute a valid address, but this doesn't happen due to the range
1419caf0ed4dSMatt Arsenault // check. For out-of-bounds MUBUF loads, a 0 is returned.
1420caf0ed4dSMatt Arsenault //
1421caf0ed4dSMatt Arsenault // Therefore it should be safe to fold any VGPR offset on gfx9 into the
1422caf0ed4dSMatt Arsenault // MUBUF vaddr, but not on older subtargets which can only do this if the
1423caf0ed4dSMatt Arsenault // sign bit is known 0.
142445bb48eaSTom Stellard ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
142545b98189SMatt Arsenault if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
1426caf0ed4dSMatt Arsenault (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1427caf0ed4dSMatt Arsenault CurDAG->SignBitIsZero(N0))) {
1428156d3ae0SMatt Arsenault std::tie(VAddr, SOffset) = foldFrameIndex(N0);
142945bb48eaSTom Stellard ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
143045bb48eaSTom Stellard return true;
143145bb48eaSTom Stellard }
143245bb48eaSTom Stellard }
143345bb48eaSTom Stellard
143445bb48eaSTom Stellard // (node)
1435156d3ae0SMatt Arsenault std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
143645bb48eaSTom Stellard ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
143745bb48eaSTom Stellard return true;
143845bb48eaSTom Stellard }
143945bb48eaSTom Stellard
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)1440690f5b7aSSebastian Neubauer static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
1441690f5b7aSSebastian Neubauer if (Val.getOpcode() != ISD::CopyFromReg)
1442690f5b7aSSebastian Neubauer return false;
1443690f5b7aSSebastian Neubauer auto RC =
1444690f5b7aSSebastian Neubauer TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
1445690f5b7aSSebastian Neubauer return RC && TRI.isSGPRClass(RC);
1446690f5b7aSSebastian Neubauer }
1447690f5b7aSSebastian Neubauer
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const1448b81495dcSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1449156d3ae0SMatt Arsenault SDValue Addr,
14500774ea26SMatt Arsenault SDValue &SRsrc,
14510774ea26SMatt Arsenault SDValue &SOffset,
14520774ea26SMatt Arsenault SDValue &Offset) const {
1453690f5b7aSSebastian Neubauer const SIRegisterInfo *TRI =
1454690f5b7aSSebastian Neubauer static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
14550774ea26SMatt Arsenault MachineFunction &MF = CurDAG->getMachineFunction();
14560774ea26SMatt Arsenault const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1457690f5b7aSSebastian Neubauer SDLoc DL(Addr);
1458690f5b7aSSebastian Neubauer
1459690f5b7aSSebastian Neubauer // CopyFromReg <sgpr>
1460690f5b7aSSebastian Neubauer if (IsCopyFromSGPR(*TRI, Addr)) {
1461690f5b7aSSebastian Neubauer SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1462690f5b7aSSebastian Neubauer SOffset = Addr;
1463690f5b7aSSebastian Neubauer Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1464690f5b7aSSebastian Neubauer return true;
1465690f5b7aSSebastian Neubauer }
1466690f5b7aSSebastian Neubauer
1467690f5b7aSSebastian Neubauer ConstantSDNode *CAddr;
1468690f5b7aSSebastian Neubauer if (Addr.getOpcode() == ISD::ADD) {
1469690f5b7aSSebastian Neubauer // Add (CopyFromReg <sgpr>) <constant>
1470690f5b7aSSebastian Neubauer CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
1471690f5b7aSSebastian Neubauer if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1472690f5b7aSSebastian Neubauer return false;
1473690f5b7aSSebastian Neubauer if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
1474690f5b7aSSebastian Neubauer return false;
1475690f5b7aSSebastian Neubauer
1476690f5b7aSSebastian Neubauer SOffset = Addr.getOperand(0);
1477690f5b7aSSebastian Neubauer } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
1478690f5b7aSSebastian Neubauer SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
1479690f5b7aSSebastian Neubauer // <constant>
1480690f5b7aSSebastian Neubauer SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1481690f5b7aSSebastian Neubauer } else {
1482690f5b7aSSebastian Neubauer return false;
1483690f5b7aSSebastian Neubauer }
14840774ea26SMatt Arsenault
14850774ea26SMatt Arsenault SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1486156d3ae0SMatt Arsenault
14870774ea26SMatt Arsenault Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
14880774ea26SMatt Arsenault return true;
14890774ea26SMatt Arsenault }
14900774ea26SMatt Arsenault
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const149145bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1492edd6da10SStanislav Mekhanoshin SDValue &SOffset, SDValue &Offset
1493edd6da10SStanislav Mekhanoshin ) const {
149445bb48eaSTom Stellard SDValue Ptr, VAddr, Offen, Idxen, Addr64;
149545bb48eaSTom Stellard const SIInstrInfo *TII =
149645bb48eaSTom Stellard static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
149745bb48eaSTom Stellard
1498edd6da10SStanislav Mekhanoshin if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1499b41574a9SChangpeng Fang return false;
150045bb48eaSTom Stellard
150145bb48eaSTom Stellard if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
150245bb48eaSTom Stellard !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
150345bb48eaSTom Stellard !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
150445bb48eaSTom Stellard uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1505735f4671SChris Lattner APInt::getAllOnes(32).getZExtValue(); // Size
150645bb48eaSTom Stellard SDLoc DL(Addr);
150745bb48eaSTom Stellard
150845bb48eaSTom Stellard const SITargetLowering& Lowering =
150945bb48eaSTom Stellard *static_cast<const SITargetLowering*>(getTargetLowering());
151045bb48eaSTom Stellard
151145bb48eaSTom Stellard SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
151245bb48eaSTom Stellard return true;
151345bb48eaSTom Stellard }
151445bb48eaSTom Stellard return false;
151545bb48eaSTom Stellard }
151645bb48eaSTom Stellard
15174227c62bSMatt Arsenault // Find a load or store from corresponding pattern root.
15184227c62bSMatt Arsenault // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)15194227c62bSMatt Arsenault static MemSDNode* findMemSDNode(SDNode *N) {
15204227c62bSMatt Arsenault N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
15214227c62bSMatt Arsenault if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
15224227c62bSMatt Arsenault return MN;
15234227c62bSMatt Arsenault assert(isa<BuildVectorSDNode>(N));
15244227c62bSMatt Arsenault for (SDValue V : N->op_values())
15254227c62bSMatt Arsenault if (MemSDNode *MN =
15264227c62bSMatt Arsenault dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
15274227c62bSMatt Arsenault return MN;
15284227c62bSMatt Arsenault llvm_unreachable("cannot find MemSDNode in the pattern!");
15294227c62bSMatt Arsenault }
15304227c62bSMatt Arsenault
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const1531cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1532cc7add52SSebastian Neubauer SDValue &VAddr, SDValue &Offset,
1533cc7add52SSebastian Neubauer uint64_t FlatVariant) const {
15344227c62bSMatt Arsenault int64_t OffsetVal = 0;
15354227c62bSMatt Arsenault
1536038d884aSStanislav Mekhanoshin unsigned AS = findMemSDNode(N)->getAddressSpace();
1537038d884aSStanislav Mekhanoshin
1538cc7add52SSebastian Neubauer bool CanHaveFlatSegmentOffsetBug =
1539cc7add52SSebastian Neubauer Subtarget->hasFlatSegmentOffsetBug() &&
1540cc7add52SSebastian Neubauer FlatVariant == SIInstrFlags::FLAT &&
1541cc7add52SSebastian Neubauer (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
1542cc7add52SSebastian Neubauer
1543cc7add52SSebastian Neubauer if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1544b1360caaSMichael Liao SDValue N0, N1;
15450fd6a04bSMatt Arsenault if (isBaseWithConstantOffset64(Addr, N0, N1)) {
154613c03162SSebastian Neubauer int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
15474227c62bSMatt Arsenault
15484227c62bSMatt Arsenault const SIInstrInfo *TII = Subtarget->getInstrInfo();
1549cc7add52SSebastian Neubauer if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
15504227c62bSMatt Arsenault Addr = N0;
15514227c62bSMatt Arsenault OffsetVal = COffsetVal;
15527cd57dcdSMatt Arsenault } else {
15537cd57dcdSMatt Arsenault // If the offset doesn't fit, put the low bits into the offset field and
15547cd57dcdSMatt Arsenault // add the rest.
1555760af7a0SJay Foad //
1556760af7a0SJay Foad // For a FLAT instruction the hardware decides whether to access
1557760af7a0SJay Foad // global/scratch/shared memory based on the high bits of vaddr,
1558760af7a0SJay Foad // ignoring the offset field, so we have to ensure that when we add
1559760af7a0SJay Foad // remainder to vaddr it still points into the same underlying object.
1560760af7a0SJay Foad // The easiest way to do that is to make sure that we split the offset
1561760af7a0SJay Foad // into two pieces that are both >= 0 or both <= 0.
15627cd57dcdSMatt Arsenault
15637cd57dcdSMatt Arsenault SDLoc DL(N);
1564e722943eSMatt Arsenault uint64_t RemainderOffset;
15657cd57dcdSMatt Arsenault
1566cc7add52SSebastian Neubauer std::tie(OffsetVal, RemainderOffset) =
1567cc7add52SSebastian Neubauer TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
15687cd57dcdSMatt Arsenault
1569038d884aSStanislav Mekhanoshin SDValue AddOffsetLo =
1570038d884aSStanislav Mekhanoshin getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1571038d884aSStanislav Mekhanoshin SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1572038d884aSStanislav Mekhanoshin
1573038d884aSStanislav Mekhanoshin if (Addr.getValueType().getSizeInBits() == 32) {
1574038d884aSStanislav Mekhanoshin SmallVector<SDValue, 3> Opnds;
1575038d884aSStanislav Mekhanoshin Opnds.push_back(N0);
1576038d884aSStanislav Mekhanoshin Opnds.push_back(AddOffsetLo);
1577038d884aSStanislav Mekhanoshin unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1578038d884aSStanislav Mekhanoshin if (Subtarget->hasAddNoCarry()) {
1579038d884aSStanislav Mekhanoshin AddOp = AMDGPU::V_ADD_U32_e64;
1580038d884aSStanislav Mekhanoshin Opnds.push_back(Clamp);
1581038d884aSStanislav Mekhanoshin }
1582038d884aSStanislav Mekhanoshin Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1583038d884aSStanislav Mekhanoshin } else {
1584b1360caaSMichael Liao // TODO: Should this try to use a scalar add pseudo if the base address
1585b1360caaSMichael Liao // is uniform and saddr is usable?
15867cd57dcdSMatt Arsenault SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
15877cd57dcdSMatt Arsenault SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
15887cd57dcdSMatt Arsenault
1589038d884aSStanislav Mekhanoshin SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1590038d884aSStanislav Mekhanoshin DL, MVT::i32, N0, Sub0);
1591038d884aSStanislav Mekhanoshin SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1592038d884aSStanislav Mekhanoshin DL, MVT::i32, N0, Sub1);
15937cd57dcdSMatt Arsenault
1594b1360caaSMichael Liao SDValue AddOffsetHi =
1595b1360caaSMichael Liao getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
15967cd57dcdSMatt Arsenault
15977cd57dcdSMatt Arsenault SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
15987cd57dcdSMatt Arsenault
1599b1360caaSMichael Liao SDNode *Add =
160079f67caeSMatt Arsenault CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
16017cd57dcdSMatt Arsenault {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
16027cd57dcdSMatt Arsenault
16037cd57dcdSMatt Arsenault SDNode *Addc = CurDAG->getMachineNode(
16047cd57dcdSMatt Arsenault AMDGPU::V_ADDC_U32_e64, DL, VTs,
16057cd57dcdSMatt Arsenault {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
16067cd57dcdSMatt Arsenault
16077cd57dcdSMatt Arsenault SDValue RegSequenceArgs[] = {
16087cd57dcdSMatt Arsenault CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1609b1360caaSMichael Liao SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
16107cd57dcdSMatt Arsenault
16117cd57dcdSMatt Arsenault Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1612b1360caaSMichael Liao MVT::i64, RegSequenceArgs),
1613b1360caaSMichael Liao 0);
1614b1360caaSMichael Liao }
16154227c62bSMatt Arsenault }
16164227c62bSMatt Arsenault }
1617038d884aSStanislav Mekhanoshin }
16184227c62bSMatt Arsenault
16194227c62bSMatt Arsenault VAddr = Addr;
16204227c62bSMatt Arsenault Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
16214227c62bSMatt Arsenault return true;
1622db7c6a87SMatt Arsenault }
1623db7c6a87SMatt Arsenault
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1624cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1625cc7add52SSebastian Neubauer SDValue &VAddr,
1626cc7add52SSebastian Neubauer SDValue &Offset) const {
1627cc7add52SSebastian Neubauer return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
1628cc7add52SSebastian Neubauer }
1629cc7add52SSebastian Neubauer
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1630cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1631cc7add52SSebastian Neubauer SDValue &VAddr,
1632cc7add52SSebastian Neubauer SDValue &Offset) const {
1633cc7add52SSebastian Neubauer return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
1634cc7add52SSebastian Neubauer }
1635cc7add52SSebastian Neubauer
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const1636cc7add52SSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1637cc7add52SSebastian Neubauer SDValue &VAddr,
1638cc7add52SSebastian Neubauer SDValue &Offset) const {
1639cc7add52SSebastian Neubauer return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1640cc7add52SSebastian Neubauer SIInstrFlags::FlatScratch);
1641cc7add52SSebastian Neubauer }
1642cc7add52SSebastian Neubauer
1643e1a2f471SMatt Arsenault // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)1644e1a2f471SMatt Arsenault static SDValue matchZExtFromI32(SDValue Op) {
1645e1a2f471SMatt Arsenault if (Op.getOpcode() != ISD::ZERO_EXTEND)
1646e1a2f471SMatt Arsenault return SDValue();
1647e1a2f471SMatt Arsenault
1648e1a2f471SMatt Arsenault SDValue ExtSrc = Op.getOperand(0);
1649e1a2f471SMatt Arsenault return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
1650e1a2f471SMatt Arsenault }
1651e1a2f471SMatt Arsenault
1652e1a2f471SMatt Arsenault // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const1653e1a2f471SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
1654e1a2f471SMatt Arsenault SDValue Addr,
1655e1a2f471SMatt Arsenault SDValue &SAddr,
1656e1a2f471SMatt Arsenault SDValue &VOffset,
1657e1a2f471SMatt Arsenault SDValue &Offset) const {
1658e1a2f471SMatt Arsenault int64_t ImmOffset = 0;
1659e1a2f471SMatt Arsenault
1660e1a2f471SMatt Arsenault // Match the immediate offset first, which canonically is moved as low as
1661e1a2f471SMatt Arsenault // possible.
1662e1a2f471SMatt Arsenault
1663d2e52eecSMatt Arsenault SDValue LHS, RHS;
1664d2e52eecSMatt Arsenault if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1665e1a2f471SMatt Arsenault int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1666e1a2f471SMatt Arsenault const SIInstrInfo *TII = Subtarget->getInstrInfo();
1667e1a2f471SMatt Arsenault
1668cc7add52SSebastian Neubauer if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
1669cc7add52SSebastian Neubauer SIInstrFlags::FlatGlobal)) {
1670e1a2f471SMatt Arsenault Addr = LHS;
1671e1a2f471SMatt Arsenault ImmOffset = COffsetVal;
1672909a5ccfSStanislav Mekhanoshin } else if (!LHS->isDivergent()) {
1673909a5ccfSStanislav Mekhanoshin if (COffsetVal > 0) {
1674a6e353b1SMatt Arsenault SDLoc SL(N);
1675909a5ccfSStanislav Mekhanoshin // saddr + large_offset -> saddr +
1676909a5ccfSStanislav Mekhanoshin // (voffset = large_offset & ~MaxOffset) +
1677a6e353b1SMatt Arsenault // (large_offset & MaxOffset);
1678a6e353b1SMatt Arsenault int64_t SplitImmOffset, RemainderOffset;
1679cc7add52SSebastian Neubauer std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
1680cc7add52SSebastian Neubauer COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
1681a6e353b1SMatt Arsenault
1682a6e353b1SMatt Arsenault if (isUInt<32>(RemainderOffset)) {
1683a6e353b1SMatt Arsenault SDNode *VMov = CurDAG->getMachineNode(
1684a6e353b1SMatt Arsenault AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1685a6e353b1SMatt Arsenault CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1686a6e353b1SMatt Arsenault VOffset = SDValue(VMov, 0);
1687a6e353b1SMatt Arsenault SAddr = LHS;
1688a6e353b1SMatt Arsenault Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1689a6e353b1SMatt Arsenault return true;
1690a6e353b1SMatt Arsenault }
1691e1a2f471SMatt Arsenault }
1692909a5ccfSStanislav Mekhanoshin
1693909a5ccfSStanislav Mekhanoshin // We are adding a 64 bit SGPR and a constant. If constant bus limit
1694909a5ccfSStanislav Mekhanoshin // is 1 we would need to perform 1 or 2 extra moves for each half of
1695909a5ccfSStanislav Mekhanoshin // the constant and it is better to do a scalar add and then issue a
1696909a5ccfSStanislav Mekhanoshin // single VALU instruction to materialize zero. Otherwise it is less
1697909a5ccfSStanislav Mekhanoshin // instructions to perform VALU adds with immediates or inline literals.
1698909a5ccfSStanislav Mekhanoshin unsigned NumLiterals =
1699909a5ccfSStanislav Mekhanoshin !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
1700909a5ccfSStanislav Mekhanoshin !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
1701909a5ccfSStanislav Mekhanoshin if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
1702909a5ccfSStanislav Mekhanoshin return false;
1703909a5ccfSStanislav Mekhanoshin }
1704e1a2f471SMatt Arsenault }
1705e1a2f471SMatt Arsenault
1706e1a2f471SMatt Arsenault // Match the variable offset.
1707909a5ccfSStanislav Mekhanoshin if (Addr.getOpcode() == ISD::ADD) {
1708d2e52eecSMatt Arsenault LHS = Addr.getOperand(0);
1709d2e52eecSMatt Arsenault RHS = Addr.getOperand(1);
1710e1a2f471SMatt Arsenault
1711e1a2f471SMatt Arsenault if (!LHS->isDivergent()) {
1712e1a2f471SMatt Arsenault // add (i64 sgpr), (zero_extend (i32 vgpr))
1713e1a2f471SMatt Arsenault if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
1714e1a2f471SMatt Arsenault SAddr = LHS;
1715e1a2f471SMatt Arsenault VOffset = ZextRHS;
1716e1a2f471SMatt Arsenault }
1717e1a2f471SMatt Arsenault }
1718e1a2f471SMatt Arsenault
1719e1a2f471SMatt Arsenault if (!SAddr && !RHS->isDivergent()) {
1720e1a2f471SMatt Arsenault // add (zero_extend (i32 vgpr)), (i64 sgpr)
1721e1a2f471SMatt Arsenault if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
1722e1a2f471SMatt Arsenault SAddr = RHS;
1723e1a2f471SMatt Arsenault VOffset = ZextLHS;
1724e1a2f471SMatt Arsenault }
1725e1a2f471SMatt Arsenault }
1726e1a2f471SMatt Arsenault
1727909a5ccfSStanislav Mekhanoshin if (SAddr) {
1728909a5ccfSStanislav Mekhanoshin Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1729909a5ccfSStanislav Mekhanoshin return true;
1730909a5ccfSStanislav Mekhanoshin }
1731909a5ccfSStanislav Mekhanoshin }
1732909a5ccfSStanislav Mekhanoshin
1733909a5ccfSStanislav Mekhanoshin if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
1734909a5ccfSStanislav Mekhanoshin isa<ConstantSDNode>(Addr))
1735e1a2f471SMatt Arsenault return false;
1736e1a2f471SMatt Arsenault
1737909a5ccfSStanislav Mekhanoshin // It's cheaper to materialize a single 32-bit zero for vaddr than the two
1738909a5ccfSStanislav Mekhanoshin // moves required to copy a 64-bit SGPR to VGPR.
1739909a5ccfSStanislav Mekhanoshin SAddr = Addr;
1740909a5ccfSStanislav Mekhanoshin SDNode *VMov =
1741909a5ccfSStanislav Mekhanoshin CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
1742909a5ccfSStanislav Mekhanoshin CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
1743909a5ccfSStanislav Mekhanoshin VOffset = SDValue(VMov, 0);
1744e1a2f471SMatt Arsenault Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1745e1a2f471SMatt Arsenault return true;
1746e1a2f471SMatt Arsenault }
1747e1a2f471SMatt Arsenault
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)1748b7ebb25eSStanislav Mekhanoshin static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
1749b7ebb25eSStanislav Mekhanoshin if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1750b7ebb25eSStanislav Mekhanoshin SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
1751b7ebb25eSStanislav Mekhanoshin } else if (SAddr.getOpcode() == ISD::ADD &&
1752b7ebb25eSStanislav Mekhanoshin isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
1753b7ebb25eSStanislav Mekhanoshin // Materialize this into a scalar move for scalar address to avoid
1754b7ebb25eSStanislav Mekhanoshin // readfirstlane.
1755b7ebb25eSStanislav Mekhanoshin auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
1756b7ebb25eSStanislav Mekhanoshin SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1757b7ebb25eSStanislav Mekhanoshin FI->getValueType(0));
175896e1fcb1SSebastian Neubauer SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
1759b7ebb25eSStanislav Mekhanoshin MVT::i32, TFI, SAddr.getOperand(1)),
1760b7ebb25eSStanislav Mekhanoshin 0);
1761b7ebb25eSStanislav Mekhanoshin }
1762b7ebb25eSStanislav Mekhanoshin
1763b7ebb25eSStanislav Mekhanoshin return SAddr;
1764b7ebb25eSStanislav Mekhanoshin }
1765b7ebb25eSStanislav Mekhanoshin
1766038d884aSStanislav Mekhanoshin // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const1767690f5b7aSSebastian Neubauer bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
1768038d884aSStanislav Mekhanoshin SDValue &SAddr,
1769038d884aSStanislav Mekhanoshin SDValue &Offset) const {
1770038d884aSStanislav Mekhanoshin if (Addr->isDivergent())
1771038d884aSStanislav Mekhanoshin return false;
1772038d884aSStanislav Mekhanoshin
1773690f5b7aSSebastian Neubauer SDLoc DL(Addr);
1774690f5b7aSSebastian Neubauer
1775038d884aSStanislav Mekhanoshin int64_t COffsetVal = 0;
1776038d884aSStanislav Mekhanoshin
1777038d884aSStanislav Mekhanoshin if (CurDAG->isBaseWithConstantOffset(Addr)) {
1778038d884aSStanislav Mekhanoshin COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
1779038d884aSStanislav Mekhanoshin SAddr = Addr.getOperand(0);
1780690f5b7aSSebastian Neubauer } else {
1781690f5b7aSSebastian Neubauer SAddr = Addr;
1782038d884aSStanislav Mekhanoshin }
1783038d884aSStanislav Mekhanoshin
1784b7ebb25eSStanislav Mekhanoshin SAddr = SelectSAddrFI(CurDAG, SAddr);
1785038d884aSStanislav Mekhanoshin
1786038d884aSStanislav Mekhanoshin const SIInstrInfo *TII = Subtarget->getInstrInfo();
1787038d884aSStanislav Mekhanoshin
1788cc7add52SSebastian Neubauer if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
1789cc7add52SSebastian Neubauer SIInstrFlags::FlatScratch)) {
179013c03162SSebastian Neubauer int64_t SplitImmOffset, RemainderOffset;
179113c03162SSebastian Neubauer std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
179213c03162SSebastian Neubauer COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
1793038d884aSStanislav Mekhanoshin
179413c03162SSebastian Neubauer COffsetVal = SplitImmOffset;
1795038d884aSStanislav Mekhanoshin
1796038d884aSStanislav Mekhanoshin SDValue AddOffset =
1797690f5b7aSSebastian Neubauer SAddr.getOpcode() == ISD::TargetFrameIndex
1798690f5b7aSSebastian Neubauer ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
1799690f5b7aSSebastian Neubauer : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
180096e1fcb1SSebastian Neubauer SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
180196e1fcb1SSebastian Neubauer SAddr, AddOffset),
180296e1fcb1SSebastian Neubauer 0);
1803038d884aSStanislav Mekhanoshin }
1804038d884aSStanislav Mekhanoshin
1805690f5b7aSSebastian Neubauer Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
1806038d884aSStanislav Mekhanoshin
1807038d884aSStanislav Mekhanoshin return true;
1808038d884aSStanislav Mekhanoshin }
1809038d884aSStanislav Mekhanoshin
18107b9f620eSJay Foad // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const18117b9f620eSJay Foad bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
18127b9f620eSJay Foad SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
18137b9f620eSJay Foad if (!Subtarget->hasFlatScratchSVSSwizzleBug())
18147b9f620eSJay Foad return false;
18157b9f620eSJay Foad
18167b9f620eSJay Foad // The bug affects the swizzling of SVS accesses if there is any carry out
18177b9f620eSJay Foad // from the two low order bits (i.e. from bit 1 into bit 2) when adding
18187b9f620eSJay Foad // voffset to (soffset + inst_offset).
18197b9f620eSJay Foad KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
18207b9f620eSJay Foad KnownBits SKnown = KnownBits::computeForAddSub(
18217b9f620eSJay Foad true, false, CurDAG->computeKnownBits(SAddr),
18227b9f620eSJay Foad KnownBits::makeConstant(APInt(32, ImmOffset)));
18237b9f620eSJay Foad uint64_t VMax = VKnown.getMaxValue().getZExtValue();
18247b9f620eSJay Foad uint64_t SMax = SKnown.getMaxValue().getZExtValue();
18257b9f620eSJay Foad return (VMax & 3) + (SMax & 3) >= 4;
18267b9f620eSJay Foad }
18277b9f620eSJay Foad
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const182836fe3f13SStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
182936fe3f13SStanislav Mekhanoshin SDValue &VAddr, SDValue &SAddr,
183036fe3f13SStanislav Mekhanoshin SDValue &Offset) const {
183136fe3f13SStanislav Mekhanoshin int64_t ImmOffset = 0;
183236fe3f13SStanislav Mekhanoshin
183336fe3f13SStanislav Mekhanoshin SDValue LHS, RHS;
183436fe3f13SStanislav Mekhanoshin if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
183536fe3f13SStanislav Mekhanoshin int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
183636fe3f13SStanislav Mekhanoshin const SIInstrInfo *TII = Subtarget->getInstrInfo();
183736fe3f13SStanislav Mekhanoshin
183836fe3f13SStanislav Mekhanoshin if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
183936fe3f13SStanislav Mekhanoshin Addr = LHS;
184036fe3f13SStanislav Mekhanoshin ImmOffset = COffsetVal;
184136fe3f13SStanislav Mekhanoshin } else if (!LHS->isDivergent() && COffsetVal > 0) {
184236fe3f13SStanislav Mekhanoshin SDLoc SL(N);
184336fe3f13SStanislav Mekhanoshin // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
184436fe3f13SStanislav Mekhanoshin // (large_offset & MaxOffset);
184536fe3f13SStanislav Mekhanoshin int64_t SplitImmOffset, RemainderOffset;
184636fe3f13SStanislav Mekhanoshin std::tie(SplitImmOffset, RemainderOffset)
184736fe3f13SStanislav Mekhanoshin = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
184836fe3f13SStanislav Mekhanoshin
184936fe3f13SStanislav Mekhanoshin if (isUInt<32>(RemainderOffset)) {
185036fe3f13SStanislav Mekhanoshin SDNode *VMov = CurDAG->getMachineNode(
185136fe3f13SStanislav Mekhanoshin AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
185236fe3f13SStanislav Mekhanoshin CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
185336fe3f13SStanislav Mekhanoshin VAddr = SDValue(VMov, 0);
185436fe3f13SStanislav Mekhanoshin SAddr = LHS;
18557b9f620eSJay Foad if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
18567b9f620eSJay Foad return false;
185736fe3f13SStanislav Mekhanoshin Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
185836fe3f13SStanislav Mekhanoshin return true;
185936fe3f13SStanislav Mekhanoshin }
186036fe3f13SStanislav Mekhanoshin }
186136fe3f13SStanislav Mekhanoshin }
186236fe3f13SStanislav Mekhanoshin
186336fe3f13SStanislav Mekhanoshin if (Addr.getOpcode() != ISD::ADD)
186436fe3f13SStanislav Mekhanoshin return false;
186536fe3f13SStanislav Mekhanoshin
186636fe3f13SStanislav Mekhanoshin LHS = Addr.getOperand(0);
186736fe3f13SStanislav Mekhanoshin RHS = Addr.getOperand(1);
186836fe3f13SStanislav Mekhanoshin
186936fe3f13SStanislav Mekhanoshin if (!LHS->isDivergent() && RHS->isDivergent()) {
187036fe3f13SStanislav Mekhanoshin SAddr = LHS;
187136fe3f13SStanislav Mekhanoshin VAddr = RHS;
187236fe3f13SStanislav Mekhanoshin } else if (!RHS->isDivergent() && LHS->isDivergent()) {
187336fe3f13SStanislav Mekhanoshin SAddr = RHS;
187436fe3f13SStanislav Mekhanoshin VAddr = LHS;
187536fe3f13SStanislav Mekhanoshin } else {
187636fe3f13SStanislav Mekhanoshin return false;
187736fe3f13SStanislav Mekhanoshin }
187836fe3f13SStanislav Mekhanoshin
18797b9f620eSJay Foad if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
18807b9f620eSJay Foad return false;
188136fe3f13SStanislav Mekhanoshin SAddr = SelectSAddrFI(CurDAG, SAddr);
188236fe3f13SStanislav Mekhanoshin Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
188336fe3f13SStanislav Mekhanoshin return true;
188436fe3f13SStanislav Mekhanoshin }
188536fe3f13SStanislav Mekhanoshin
18864696a33dSIvan Kosarev // Match an immediate (if Imm is true) or an SGPR (if Imm is false)
18874696a33dSIvan Kosarev // offset. If Imm32Only is true, match only 32-bit immediate offsets
18884696a33dSIvan Kosarev // available on CI.
SelectSMRDOffset(SDValue Addr,SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const1889432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode,
1890432cbd78SIvan Kosarev SDValue *SOffset, SDValue *Offset,
18914696a33dSIvan Kosarev bool Imm32Only) const {
1892dee26a28STom Stellard ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
189377ce2e21SJakub Kuderski if (!C) {
1894432cbd78SIvan Kosarev if (!SOffset)
18954696a33dSIvan Kosarev return false;
189677ce2e21SJakub Kuderski if (ByteOffsetNode.getValueType().isScalarInteger() &&
189777ce2e21SJakub Kuderski ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1898432cbd78SIvan Kosarev *SOffset = ByteOffsetNode;
189977ce2e21SJakub Kuderski return true;
190077ce2e21SJakub Kuderski }
190177ce2e21SJakub Kuderski if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
190277ce2e21SJakub Kuderski if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1903432cbd78SIvan Kosarev *SOffset = ByteOffsetNode.getOperand(0);
190477ce2e21SJakub Kuderski return true;
190577ce2e21SJakub Kuderski }
190677ce2e21SJakub Kuderski }
1907dee26a28STom Stellard return false;
190877ce2e21SJakub Kuderski }
1909dee26a28STom Stellard
1910dee26a28STom Stellard SDLoc SL(ByteOffsetNode);
191130f18ed3SAustin Kerbow // GFX9 and GFX10 have signed byte immediate offsets.
191230f18ed3SAustin Kerbow int64_t ByteOffset = C->getSExtValue();
19130426c2d0SMatt Arsenault Optional<int64_t> EncodedOffset =
191430f18ed3SAustin Kerbow AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false);
1915432cbd78SIvan Kosarev if (EncodedOffset && Offset && !Imm32Only) {
1916432cbd78SIvan Kosarev *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1917dee26a28STom Stellard return true;
1918dee26a28STom Stellard }
1919dee26a28STom Stellard
192030f18ed3SAustin Kerbow // SGPR and literal offsets are unsigned.
192130f18ed3SAustin Kerbow if (ByteOffset < 0)
192230f18ed3SAustin Kerbow return false;
192330f18ed3SAustin Kerbow
192430f18ed3SAustin Kerbow EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1925432cbd78SIvan Kosarev if (EncodedOffset && Offset && Imm32Only) {
1926432cbd78SIvan Kosarev *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
19270426c2d0SMatt Arsenault return true;
19280426c2d0SMatt Arsenault }
19290426c2d0SMatt Arsenault
19300426c2d0SMatt Arsenault if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1931217361c3STom Stellard return false;
1932217361c3STom Stellard
1933432cbd78SIvan Kosarev if (SOffset) {
1934dee26a28STom Stellard SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1935432cbd78SIvan Kosarev *SOffset = SDValue(
19360426c2d0SMatt Arsenault CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
1937dee26a28STom Stellard return true;
1938dee26a28STom Stellard }
1939dee26a28STom Stellard
19404696a33dSIvan Kosarev return false;
19414696a33dSIvan Kosarev }
19424696a33dSIvan Kosarev
Expand32BitAddress(SDValue Addr) const1943923712b6SMatt Arsenault SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
1944923712b6SMatt Arsenault if (Addr.getValueType() != MVT::i32)
1945923712b6SMatt Arsenault return Addr;
1946923712b6SMatt Arsenault
1947923712b6SMatt Arsenault // Zero-extend a 32-bit address.
1948923712b6SMatt Arsenault SDLoc SL(Addr);
1949923712b6SMatt Arsenault
1950923712b6SMatt Arsenault const MachineFunction &MF = CurDAG->getMachineFunction();
1951923712b6SMatt Arsenault const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1952923712b6SMatt Arsenault unsigned AddrHiVal = Info->get32BitAddressHighBits();
1953923712b6SMatt Arsenault SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
1954923712b6SMatt Arsenault
1955923712b6SMatt Arsenault const SDValue Ops[] = {
1956923712b6SMatt Arsenault CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
1957923712b6SMatt Arsenault Addr,
1958923712b6SMatt Arsenault CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
1959923712b6SMatt Arsenault SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
1960923712b6SMatt Arsenault 0),
1961923712b6SMatt Arsenault CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
1962923712b6SMatt Arsenault };
1963923712b6SMatt Arsenault
1964923712b6SMatt Arsenault return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
1965923712b6SMatt Arsenault Ops), 0);
1966923712b6SMatt Arsenault }
1967923712b6SMatt Arsenault
19684696a33dSIvan Kosarev // Match a base and an immediate (if Imm is true) or an SGPR
19694696a33dSIvan Kosarev // (if Imm is false) offset. If Imm32Only is true, match only 32-bit
19704696a33dSIvan Kosarev // immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const1971432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1972432cbd78SIvan Kosarev SDValue *SOffset, SDValue *Offset,
19734696a33dSIvan Kosarev bool Imm32Only) const {
1974dee26a28STom Stellard SDLoc SL(Addr);
1975923712b6SMatt Arsenault
1976432cbd78SIvan Kosarev if (SOffset && Offset) {
1977432cbd78SIvan Kosarev assert(!Imm32Only);
1978432cbd78SIvan Kosarev SDValue B;
1979432cbd78SIvan Kosarev return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1980432cbd78SIvan Kosarev SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1981432cbd78SIvan Kosarev }
1982432cbd78SIvan Kosarev
19833fc2079cSMarek Olsak // A 32-bit (address + offset) should not cause unsigned 32-bit integer
19843fc2079cSMarek Olsak // wraparound, because s_load instructions perform the addition in 64 bits.
19853fc2079cSMarek Olsak if ((Addr.getValueType() != MVT::i32 ||
1986b1360caaSMichael Liao Addr->getFlags().hasNoUnsignedWrap())) {
1987b1360caaSMichael Liao SDValue N0, N1;
1988b1360caaSMichael Liao // Extract the base and offset if possible.
1989b1360caaSMichael Liao if (CurDAG->isBaseWithConstantOffset(Addr) ||
1990b1360caaSMichael Liao Addr.getOpcode() == ISD::ADD) {
1991b1360caaSMichael Liao N0 = Addr.getOperand(0);
1992b1360caaSMichael Liao N1 = Addr.getOperand(1);
1993b1360caaSMichael Liao } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1994b1360caaSMichael Liao assert(N0 && N1 && isa<ConstantSDNode>(N1));
1995b1360caaSMichael Liao }
1996b1360caaSMichael Liao if (N0 && N1) {
1997432cbd78SIvan Kosarev if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) {
1998432cbd78SIvan Kosarev SBase = N0;
1999dee26a28STom Stellard return true;
2000dee26a28STom Stellard }
2001432cbd78SIvan Kosarev if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) {
2002432cbd78SIvan Kosarev SBase = N1;
20039c66c02eSIvan Kosarev return true;
20049c66c02eSIvan Kosarev }
2005dee26a28STom Stellard }
20064696a33dSIvan Kosarev return false;
2007b1360caaSMichael Liao }
2008432cbd78SIvan Kosarev if (Offset && !SOffset) {
2009432cbd78SIvan Kosarev SBase = Addr;
2010432cbd78SIvan Kosarev *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
2011432cbd78SIvan Kosarev return true;
2012432cbd78SIvan Kosarev }
20134696a33dSIvan Kosarev return false;
2014432cbd78SIvan Kosarev }
2015432cbd78SIvan Kosarev
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2016432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2017432cbd78SIvan Kosarev SDValue *SOffset, SDValue *Offset,
2018432cbd78SIvan Kosarev bool Imm32Only) const {
2019432cbd78SIvan Kosarev if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only))
2020432cbd78SIvan Kosarev return false;
2021432cbd78SIvan Kosarev SBase = Expand32BitAddress(SBase);
2022dee26a28STom Stellard return true;
2023dee26a28STom Stellard }
2024dee26a28STom Stellard
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const2025dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2026dee26a28STom Stellard SDValue &Offset) const {
2027432cbd78SIvan Kosarev return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
20288973a0a2SMarek Olsak }
2029dee26a28STom Stellard
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const20308973a0a2SMarek Olsak bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
20318973a0a2SMarek Olsak SDValue &Offset) const {
2032f7521dc2SMatt Arsenault assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2033432cbd78SIvan Kosarev return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2034432cbd78SIvan Kosarev /* Imm32Only */ true);
2035217361c3STom Stellard }
2036217361c3STom Stellard
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const2037dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2038432cbd78SIvan Kosarev SDValue &SOffset) const {
2039432cbd78SIvan Kosarev return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2040432cbd78SIvan Kosarev }
2041432cbd78SIvan Kosarev
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2042432cbd78SIvan Kosarev bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2043432cbd78SIvan Kosarev SDValue &SOffset,
2044dee26a28STom Stellard SDValue &Offset) const {
2045432cbd78SIvan Kosarev return SelectSMRD(Addr, SBase, &SOffset, &Offset);
2046dee26a28STom Stellard }
2047dee26a28STom Stellard
SelectSMRDBufferImm(SDValue Addr,SDValue & Offset) const2048dee26a28STom Stellard bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
2049dee26a28STom Stellard SDValue &Offset) const {
205075fcdfa1SMatt Arsenault if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
205130f18ed3SAustin Kerbow // The immediate offset for S_BUFFER instructions is unsigned.
205230f18ed3SAustin Kerbow if (auto Imm =
205330f18ed3SAustin Kerbow AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) {
205475fcdfa1SMatt Arsenault Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
205575fcdfa1SMatt Arsenault return true;
205675fcdfa1SMatt Arsenault }
205775fcdfa1SMatt Arsenault }
205875fcdfa1SMatt Arsenault
205975fcdfa1SMatt Arsenault return false;
20608973a0a2SMarek Olsak }
2061dee26a28STom Stellard
SelectSMRDBufferImm32(SDValue Addr,SDValue & Offset) const20628973a0a2SMarek Olsak bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
20638973a0a2SMarek Olsak SDValue &Offset) const {
2064f7521dc2SMatt Arsenault assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
20658973a0a2SMarek Olsak
206675fcdfa1SMatt Arsenault if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) {
206775fcdfa1SMatt Arsenault if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget,
206875fcdfa1SMatt Arsenault C->getZExtValue())) {
206975fcdfa1SMatt Arsenault Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32);
207075fcdfa1SMatt Arsenault return true;
207175fcdfa1SMatt Arsenault }
207275fcdfa1SMatt Arsenault }
2073217361c3STom Stellard
207475fcdfa1SMatt Arsenault return false;
2075217361c3STom Stellard }
2076217361c3STom Stellard
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const20777968c345SNicolai Haehnle bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
20781322b6f8SMatt Arsenault SDValue &Base,
20797968c345SNicolai Haehnle SDValue &Offset) const {
20801322b6f8SMatt Arsenault SDLoc DL(Index);
20811322b6f8SMatt Arsenault
20821322b6f8SMatt Arsenault if (CurDAG->isBaseWithConstantOffset(Index)) {
20831322b6f8SMatt Arsenault SDValue N0 = Index.getOperand(0);
20841322b6f8SMatt Arsenault SDValue N1 = Index.getOperand(1);
20851322b6f8SMatt Arsenault ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
20861322b6f8SMatt Arsenault
20871322b6f8SMatt Arsenault // (add n0, c0)
20886f539294SChangpeng Fang // Don't peel off the offset (c0) if doing so could possibly lead
20896f539294SChangpeng Fang // the base (n0) to be negative.
2090591b029fSStanislav Mekhanoshin // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2091591b029fSStanislav Mekhanoshin if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2092591b029fSStanislav Mekhanoshin (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
20931322b6f8SMatt Arsenault Base = N0;
20941322b6f8SMatt Arsenault Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
20951322b6f8SMatt Arsenault return true;
20961322b6f8SMatt Arsenault }
20976f539294SChangpeng Fang }
20981322b6f8SMatt Arsenault
20991322b6f8SMatt Arsenault if (isa<ConstantSDNode>(Index))
21001322b6f8SMatt Arsenault return false;
21011322b6f8SMatt Arsenault
21021322b6f8SMatt Arsenault Base = Index;
21031322b6f8SMatt Arsenault Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
21041322b6f8SMatt Arsenault return true;
21051322b6f8SMatt Arsenault }
21061322b6f8SMatt Arsenault
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)21070a3d755eSalex-t SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2108bdc4956bSBenjamin Kramer SDValue Val, uint32_t Offset,
2109bdc4956bSBenjamin Kramer uint32_t Width) {
21100a3d755eSalex-t if (Val->isDivergent()) {
21110a3d755eSalex-t unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
21120a3d755eSalex-t SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
21130a3d755eSalex-t SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
21140a3d755eSalex-t
21150a3d755eSalex-t return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
21160a3d755eSalex-t }
21170a3d755eSalex-t unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
211845bb48eaSTom Stellard // Transformation function, pack the offset and width of a BFE into
211945bb48eaSTom Stellard // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
212045bb48eaSTom Stellard // source, bits [5:0] contain the offset and bits [22:16] the width.
212145bb48eaSTom Stellard uint32_t PackedVal = Offset | (Width << 16);
212245bb48eaSTom Stellard SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
212345bb48eaSTom Stellard
212445bb48eaSTom Stellard return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
212545bb48eaSTom Stellard }
212645bb48eaSTom Stellard
SelectS_BFEFromShifts(SDNode * N)212795927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
212845bb48eaSTom Stellard // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
212945bb48eaSTom Stellard // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
213045bb48eaSTom Stellard // Predicate: 0 < b <= c < 32
213145bb48eaSTom Stellard
213245bb48eaSTom Stellard const SDValue &Shl = N->getOperand(0);
213345bb48eaSTom Stellard ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
213445bb48eaSTom Stellard ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
213545bb48eaSTom Stellard
213645bb48eaSTom Stellard if (B && C) {
213745bb48eaSTom Stellard uint32_t BVal = B->getZExtValue();
213845bb48eaSTom Stellard uint32_t CVal = C->getZExtValue();
213945bb48eaSTom Stellard
214045bb48eaSTom Stellard if (0 < BVal && BVal <= CVal && CVal < 32) {
214145bb48eaSTom Stellard bool Signed = N->getOpcode() == ISD::SRA;
21420a3d755eSalex-t ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
214395927c0fSJustin Bogner 32 - CVal));
214495927c0fSJustin Bogner return;
214545bb48eaSTom Stellard }
214645bb48eaSTom Stellard }
214795927c0fSJustin Bogner SelectCode(N);
214845bb48eaSTom Stellard }
214945bb48eaSTom Stellard
SelectS_BFE(SDNode * N)215095927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
215145bb48eaSTom Stellard switch (N->getOpcode()) {
215245bb48eaSTom Stellard case ISD::AND:
215345bb48eaSTom Stellard if (N->getOperand(0).getOpcode() == ISD::SRL) {
215445bb48eaSTom Stellard // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
215545bb48eaSTom Stellard // Predicate: isMask(mask)
215645bb48eaSTom Stellard const SDValue &Srl = N->getOperand(0);
215745bb48eaSTom Stellard ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
215845bb48eaSTom Stellard ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
215945bb48eaSTom Stellard
216045bb48eaSTom Stellard if (Shift && Mask) {
216145bb48eaSTom Stellard uint32_t ShiftVal = Shift->getZExtValue();
216245bb48eaSTom Stellard uint32_t MaskVal = Mask->getZExtValue();
216345bb48eaSTom Stellard
216445bb48eaSTom Stellard if (isMask_32(MaskVal)) {
216545bb48eaSTom Stellard uint32_t WidthVal = countPopulation(MaskVal);
21660a3d755eSalex-t ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
21670a3d755eSalex-t WidthVal));
216895927c0fSJustin Bogner return;
216945bb48eaSTom Stellard }
217045bb48eaSTom Stellard }
217145bb48eaSTom Stellard }
217245bb48eaSTom Stellard break;
217345bb48eaSTom Stellard case ISD::SRL:
217445bb48eaSTom Stellard if (N->getOperand(0).getOpcode() == ISD::AND) {
217545bb48eaSTom Stellard // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
217645bb48eaSTom Stellard // Predicate: isMask(mask >> b)
217745bb48eaSTom Stellard const SDValue &And = N->getOperand(0);
217845bb48eaSTom Stellard ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
217945bb48eaSTom Stellard ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
218045bb48eaSTom Stellard
218145bb48eaSTom Stellard if (Shift && Mask) {
218245bb48eaSTom Stellard uint32_t ShiftVal = Shift->getZExtValue();
218345bb48eaSTom Stellard uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
218445bb48eaSTom Stellard
218545bb48eaSTom Stellard if (isMask_32(MaskVal)) {
218645bb48eaSTom Stellard uint32_t WidthVal = countPopulation(MaskVal);
21870a3d755eSalex-t ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
21880a3d755eSalex-t WidthVal));
218995927c0fSJustin Bogner return;
219045bb48eaSTom Stellard }
219145bb48eaSTom Stellard }
219295927c0fSJustin Bogner } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
219395927c0fSJustin Bogner SelectS_BFEFromShifts(N);
219495927c0fSJustin Bogner return;
219595927c0fSJustin Bogner }
219645bb48eaSTom Stellard break;
219745bb48eaSTom Stellard case ISD::SRA:
219895927c0fSJustin Bogner if (N->getOperand(0).getOpcode() == ISD::SHL) {
219995927c0fSJustin Bogner SelectS_BFEFromShifts(N);
220095927c0fSJustin Bogner return;
220195927c0fSJustin Bogner }
220245bb48eaSTom Stellard break;
22037e8de01fSMatt Arsenault
22047e8de01fSMatt Arsenault case ISD::SIGN_EXTEND_INREG: {
22057e8de01fSMatt Arsenault // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
22067e8de01fSMatt Arsenault SDValue Src = N->getOperand(0);
22077e8de01fSMatt Arsenault if (Src.getOpcode() != ISD::SRL)
22087e8de01fSMatt Arsenault break;
22097e8de01fSMatt Arsenault
22107e8de01fSMatt Arsenault const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
22117e8de01fSMatt Arsenault if (!Amt)
22127e8de01fSMatt Arsenault break;
22137e8de01fSMatt Arsenault
22147e8de01fSMatt Arsenault unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
22150a3d755eSalex-t ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
221695927c0fSJustin Bogner Amt->getZExtValue(), Width));
221795927c0fSJustin Bogner return;
22187e8de01fSMatt Arsenault }
221945bb48eaSTom Stellard }
222045bb48eaSTom Stellard
222195927c0fSJustin Bogner SelectCode(N);
222245bb48eaSTom Stellard }
222345bb48eaSTom Stellard
isCBranchSCC(const SDNode * N) const22247b1dc2c9SMatt Arsenault bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
22257b1dc2c9SMatt Arsenault assert(N->getOpcode() == ISD::BRCOND);
22267b1dc2c9SMatt Arsenault if (!N->hasOneUse())
22277b1dc2c9SMatt Arsenault return false;
22287b1dc2c9SMatt Arsenault
22297b1dc2c9SMatt Arsenault SDValue Cond = N->getOperand(1);
22307b1dc2c9SMatt Arsenault if (Cond.getOpcode() == ISD::CopyToReg)
22317b1dc2c9SMatt Arsenault Cond = Cond.getOperand(2);
22327b1dc2c9SMatt Arsenault
22337b1dc2c9SMatt Arsenault if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
22347b1dc2c9SMatt Arsenault return false;
22357b1dc2c9SMatt Arsenault
22367b1dc2c9SMatt Arsenault MVT VT = Cond.getOperand(0).getSimpleValueType();
22377b1dc2c9SMatt Arsenault if (VT == MVT::i32)
22387b1dc2c9SMatt Arsenault return true;
22397b1dc2c9SMatt Arsenault
22407b1dc2c9SMatt Arsenault if (VT == MVT::i64) {
22415bfbae5cSTom Stellard auto ST = static_cast<const GCNSubtarget *>(Subtarget);
22427b1dc2c9SMatt Arsenault
22437b1dc2c9SMatt Arsenault ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
22447b1dc2c9SMatt Arsenault return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
22457b1dc2c9SMatt Arsenault }
22467b1dc2c9SMatt Arsenault
22477b1dc2c9SMatt Arsenault return false;
22487b1dc2c9SMatt Arsenault }
22497b1dc2c9SMatt Arsenault
SelectBRCOND(SDNode * N)225095927c0fSJustin Bogner void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2251bc4497b1STom Stellard SDValue Cond = N->getOperand(1);
2252bc4497b1STom Stellard
2253327188aaSMatt Arsenault if (Cond.isUndef()) {
2254327188aaSMatt Arsenault CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2255327188aaSMatt Arsenault N->getOperand(2), N->getOperand(0));
2256327188aaSMatt Arsenault return;
2257327188aaSMatt Arsenault }
2258327188aaSMatt Arsenault
225952500216SStanislav Mekhanoshin const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
226052500216SStanislav Mekhanoshin const SIRegisterInfo *TRI = ST->getRegisterInfo();
226152500216SStanislav Mekhanoshin
2262d674e0acSMatt Arsenault bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2263d674e0acSMatt Arsenault unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2264178050c3SMatt Arsenault Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2265bc4497b1STom Stellard SDLoc SL(N);
2266bc4497b1STom Stellard
22676eaad1e5STim Renouf if (!UseSCCBr) {
22686eaad1e5STim Renouf // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
22696eaad1e5STim Renouf // analyzed what generates the vcc value, so we do not know whether vcc
22706eaad1e5STim Renouf // bits for disabled lanes are 0. Thus we need to mask out bits for
22716eaad1e5STim Renouf // disabled lanes.
22726eaad1e5STim Renouf //
22736eaad1e5STim Renouf // For the case that we select S_CBRANCH_SCC1 and it gets
22746eaad1e5STim Renouf // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
22756eaad1e5STim Renouf // SIInstrInfo::moveToVALU which inserts the S_AND).
22766eaad1e5STim Renouf //
22776eaad1e5STim Renouf // We could add an analysis of what generates the vcc value here and omit
22786eaad1e5STim Renouf // the S_AND when is unnecessary. But it would be better to add a separate
22796eaad1e5STim Renouf // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
22806eaad1e5STim Renouf // catches both cases.
228152500216SStanislav Mekhanoshin Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
228252500216SStanislav Mekhanoshin : AMDGPU::S_AND_B64,
228352500216SStanislav Mekhanoshin SL, MVT::i1,
228452500216SStanislav Mekhanoshin CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
228552500216SStanislav Mekhanoshin : AMDGPU::EXEC,
228652500216SStanislav Mekhanoshin MVT::i1),
22876eaad1e5STim Renouf Cond),
22886eaad1e5STim Renouf 0);
22896eaad1e5STim Renouf }
22906eaad1e5STim Renouf
2291d674e0acSMatt Arsenault SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2292d674e0acSMatt Arsenault CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2293bc4497b1STom Stellard N->getOperand(2), // Basic Block
2294f530e8b3SMatt Arsenault VCC.getValue(0));
2295bc4497b1STom Stellard }
2296bc4497b1STom Stellard
SelectFMAD_FMA(SDNode * N)22970084adc5SMatt Arsenault void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
2298d7e2303dSMatt Arsenault MVT VT = N->getSimpleValueType(0);
22990084adc5SMatt Arsenault bool IsFMA = N->getOpcode() == ISD::FMA;
23000084adc5SMatt Arsenault if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
23010084adc5SMatt Arsenault !Subtarget->hasFmaMixInsts()) ||
23020084adc5SMatt Arsenault ((IsFMA && Subtarget->hasMadMixInsts()) ||
23030084adc5SMatt Arsenault (!IsFMA && Subtarget->hasFmaMixInsts()))) {
2304d7e2303dSMatt Arsenault SelectCode(N);
2305d7e2303dSMatt Arsenault return;
2306d7e2303dSMatt Arsenault }
2307d7e2303dSMatt Arsenault
2308d7e2303dSMatt Arsenault SDValue Src0 = N->getOperand(0);
2309d7e2303dSMatt Arsenault SDValue Src1 = N->getOperand(1);
2310d7e2303dSMatt Arsenault SDValue Src2 = N->getOperand(2);
2311d7e2303dSMatt Arsenault unsigned Src0Mods, Src1Mods, Src2Mods;
2312d7e2303dSMatt Arsenault
23130084adc5SMatt Arsenault // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
23140084adc5SMatt Arsenault // using the conversion from f16.
2315d7e2303dSMatt Arsenault bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2316d7e2303dSMatt Arsenault bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2317d7e2303dSMatt Arsenault bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2318d7e2303dSMatt Arsenault
23191024b73eSMatt Arsenault assert((IsFMA || !Mode.allFP32Denormals()) &&
2320d7e2303dSMatt Arsenault "fmad selected with denormals enabled");
2321d7e2303dSMatt Arsenault // TODO: We can select this with f32 denormals enabled if all the sources are
2322d7e2303dSMatt Arsenault // converted from f16 (in which case fmad isn't legal).
2323d7e2303dSMatt Arsenault
2324d7e2303dSMatt Arsenault if (Sel0 || Sel1 || Sel2) {
2325d7e2303dSMatt Arsenault // For dummy operands.
2326d7e2303dSMatt Arsenault SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2327d7e2303dSMatt Arsenault SDValue Ops[] = {
2328d7e2303dSMatt Arsenault CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
2329d7e2303dSMatt Arsenault CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
2330d7e2303dSMatt Arsenault CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
2331d7e2303dSMatt Arsenault CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
2332d7e2303dSMatt Arsenault Zero, Zero
2333d7e2303dSMatt Arsenault };
2334d7e2303dSMatt Arsenault
23350084adc5SMatt Arsenault CurDAG->SelectNodeTo(N,
23360084adc5SMatt Arsenault IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
23370084adc5SMatt Arsenault MVT::f32, Ops);
2338d7e2303dSMatt Arsenault } else {
2339d7e2303dSMatt Arsenault SelectCode(N);
2340d7e2303dSMatt Arsenault }
2341d7e2303dSMatt Arsenault }
2342d7e2303dSMatt Arsenault
SelectDSAppendConsume(SDNode * N,unsigned IntrID)2343d3c84e67SMatt Arsenault void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2344cdd191d9SMatt Arsenault // The address is assumed to be uniform, so if it ends up in a VGPR, it will
2345cdd191d9SMatt Arsenault // be copied to an SGPR with readfirstlane.
2346cdd191d9SMatt Arsenault unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2347cdd191d9SMatt Arsenault AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2348cdd191d9SMatt Arsenault
2349cdd191d9SMatt Arsenault SDValue Chain = N->getOperand(0);
2350cdd191d9SMatt Arsenault SDValue Ptr = N->getOperand(2);
2351cdd191d9SMatt Arsenault MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
23529e5fa333SMatt Arsenault MachineMemOperand *MMO = M->getMemOperand();
2353cdd191d9SMatt Arsenault bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
2354cdd191d9SMatt Arsenault
2355cdd191d9SMatt Arsenault SDValue Offset;
2356cdd191d9SMatt Arsenault if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2357cdd191d9SMatt Arsenault SDValue PtrBase = Ptr.getOperand(0);
2358cdd191d9SMatt Arsenault SDValue PtrOffset = Ptr.getOperand(1);
2359cdd191d9SMatt Arsenault
2360cdd191d9SMatt Arsenault const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2361040c5027SJay Foad if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2362cdd191d9SMatt Arsenault N = glueCopyToM0(N, PtrBase);
2363cdd191d9SMatt Arsenault Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2364cdd191d9SMatt Arsenault }
2365cdd191d9SMatt Arsenault }
2366cdd191d9SMatt Arsenault
2367cdd191d9SMatt Arsenault if (!Offset) {
2368cdd191d9SMatt Arsenault N = glueCopyToM0(N, Ptr);
2369cdd191d9SMatt Arsenault Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2370cdd191d9SMatt Arsenault }
2371cdd191d9SMatt Arsenault
2372cdd191d9SMatt Arsenault SDValue Ops[] = {
2373cdd191d9SMatt Arsenault Offset,
2374cdd191d9SMatt Arsenault CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2375cdd191d9SMatt Arsenault Chain,
2376cdd191d9SMatt Arsenault N->getOperand(N->getNumOperands() - 1) // New glue
2377cdd191d9SMatt Arsenault };
2378cdd191d9SMatt Arsenault
23799e5fa333SMatt Arsenault SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
23809e5fa333SMatt Arsenault CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2381cdd191d9SMatt Arsenault }
2382cdd191d9SMatt Arsenault
gwsIntrinToOpcode(unsigned IntrID)2383740322f1SMatt Arsenault static unsigned gwsIntrinToOpcode(unsigned IntrID) {
2384740322f1SMatt Arsenault switch (IntrID) {
2385740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_init:
2386740322f1SMatt Arsenault return AMDGPU::DS_GWS_INIT;
2387740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_barrier:
2388740322f1SMatt Arsenault return AMDGPU::DS_GWS_BARRIER;
2389740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_v:
2390740322f1SMatt Arsenault return AMDGPU::DS_GWS_SEMA_V;
2391740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_br:
2392740322f1SMatt Arsenault return AMDGPU::DS_GWS_SEMA_BR;
2393740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_p:
2394740322f1SMatt Arsenault return AMDGPU::DS_GWS_SEMA_P;
2395740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_release_all:
2396740322f1SMatt Arsenault return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2397740322f1SMatt Arsenault default:
2398740322f1SMatt Arsenault llvm_unreachable("not a gws intrinsic");
2399740322f1SMatt Arsenault }
2400740322f1SMatt Arsenault }
2401740322f1SMatt Arsenault
SelectDS_GWS(SDNode * N,unsigned IntrID)24024d55d024SMatt Arsenault void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
2403740322f1SMatt Arsenault if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2404740322f1SMatt Arsenault !Subtarget->hasGWSSemaReleaseAll()) {
2405740322f1SMatt Arsenault // Let this error.
2406740322f1SMatt Arsenault SelectCode(N);
2407740322f1SMatt Arsenault return;
2408740322f1SMatt Arsenault }
2409740322f1SMatt Arsenault
2410740322f1SMatt Arsenault // Chain, intrinsic ID, vsrc, offset
2411740322f1SMatt Arsenault const bool HasVSrc = N->getNumOperands() == 4;
2412740322f1SMatt Arsenault assert(HasVSrc || N->getNumOperands() == 3);
2413740322f1SMatt Arsenault
24144d55d024SMatt Arsenault SDLoc SL(N);
2415740322f1SMatt Arsenault SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
24164d55d024SMatt Arsenault int ImmOffset = 0;
24174d55d024SMatt Arsenault MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
24184d55d024SMatt Arsenault MachineMemOperand *MMO = M->getMemOperand();
24194d55d024SMatt Arsenault
24204d55d024SMatt Arsenault // Don't worry if the offset ends up in a VGPR. Only one lane will have
24214d55d024SMatt Arsenault // effect, so SIFixSGPRCopies will validly insert readfirstlane.
24224d55d024SMatt Arsenault
24234d55d024SMatt Arsenault // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
24244d55d024SMatt Arsenault // offset field) % 64. Some versions of the programming guide omit the m0
24254d55d024SMatt Arsenault // part, or claim it's from offset 0.
24264d55d024SMatt Arsenault if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
242748c0df5dSMatt Arsenault // If we have a constant offset, try to use the 0 in m0 as the base.
242848c0df5dSMatt Arsenault // TODO: Look into changing the default m0 initialization value. If the
242948c0df5dSMatt Arsenault // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
243048c0df5dSMatt Arsenault // the immediate offset.
243148c0df5dSMatt Arsenault glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
243248c0df5dSMatt Arsenault ImmOffset = ConstOffset->getZExtValue();
24334d55d024SMatt Arsenault } else {
24344d55d024SMatt Arsenault if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
24354d55d024SMatt Arsenault ImmOffset = BaseOffset.getConstantOperandVal(1);
24364d55d024SMatt Arsenault BaseOffset = BaseOffset.getOperand(0);
24374d55d024SMatt Arsenault }
24384d55d024SMatt Arsenault
24394d55d024SMatt Arsenault // Prefer to do the shift in an SGPR since it should be possible to use m0
24404d55d024SMatt Arsenault // as the result directly. If it's already an SGPR, it will be eliminated
24414d55d024SMatt Arsenault // later.
24424d55d024SMatt Arsenault SDNode *SGPROffset
24434d55d024SMatt Arsenault = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
24444d55d024SMatt Arsenault BaseOffset);
24454d55d024SMatt Arsenault // Shift to offset in m0
24464d55d024SMatt Arsenault SDNode *M0Base
24474d55d024SMatt Arsenault = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
24484d55d024SMatt Arsenault SDValue(SGPROffset, 0),
24494d55d024SMatt Arsenault CurDAG->getTargetConstant(16, SL, MVT::i32));
24504d55d024SMatt Arsenault glueCopyToM0(N, SDValue(M0Base, 0));
24514d55d024SMatt Arsenault }
24524d55d024SMatt Arsenault
2453740322f1SMatt Arsenault SDValue Chain = N->getOperand(0);
24544d55d024SMatt Arsenault SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
24554d55d024SMatt Arsenault
2456740322f1SMatt Arsenault const unsigned Opc = gwsIntrinToOpcode(IntrID);
2457740322f1SMatt Arsenault SmallVector<SDValue, 5> Ops;
2458740322f1SMatt Arsenault if (HasVSrc)
2459bb582ebdSMatt Arsenault Ops.push_back(N->getOperand(2));
2460740322f1SMatt Arsenault Ops.push_back(OffsetField);
2461740322f1SMatt Arsenault Ops.push_back(Chain);
24624d55d024SMatt Arsenault
24634d55d024SMatt Arsenault SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
24644d55d024SMatt Arsenault CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
24654d55d024SMatt Arsenault }
24664d55d024SMatt Arsenault
SelectInterpP1F16(SDNode * N)246768b102b9SMatt Arsenault void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
246868b102b9SMatt Arsenault if (Subtarget->getLDSBankCount() != 16) {
246968b102b9SMatt Arsenault // This is a single instruction with a pattern.
247068b102b9SMatt Arsenault SelectCode(N);
247168b102b9SMatt Arsenault return;
247268b102b9SMatt Arsenault }
247368b102b9SMatt Arsenault
247468b102b9SMatt Arsenault SDLoc DL(N);
247568b102b9SMatt Arsenault
247668b102b9SMatt Arsenault // This requires 2 instructions. It is possible to write a pattern to support
247768b102b9SMatt Arsenault // this, but the generated isel emitter doesn't correctly deal with multiple
247868b102b9SMatt Arsenault // output instructions using the same physical register input. The copy to m0
247968b102b9SMatt Arsenault // is incorrectly placed before the second instruction.
248068b102b9SMatt Arsenault //
248168b102b9SMatt Arsenault // TODO: Match source modifiers.
248268b102b9SMatt Arsenault //
248368b102b9SMatt Arsenault // def : Pat <
248468b102b9SMatt Arsenault // (int_amdgcn_interp_p1_f16
248568b102b9SMatt Arsenault // (VOP3Mods f32:$src0, i32:$src0_modifiers),
248668b102b9SMatt Arsenault // (i32 timm:$attrchan), (i32 timm:$attr),
248768b102b9SMatt Arsenault // (i1 timm:$high), M0),
248868b102b9SMatt Arsenault // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
248968b102b9SMatt Arsenault // timm:$attrchan, 0,
249068b102b9SMatt Arsenault // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
249168b102b9SMatt Arsenault // let Predicates = [has16BankLDS];
249268b102b9SMatt Arsenault // }
249368b102b9SMatt Arsenault
249468b102b9SMatt Arsenault // 16 bank LDS
249568b102b9SMatt Arsenault SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
249668b102b9SMatt Arsenault N->getOperand(5), SDValue());
249768b102b9SMatt Arsenault
249868b102b9SMatt Arsenault SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
249968b102b9SMatt Arsenault
250068b102b9SMatt Arsenault SDNode *InterpMov =
250168b102b9SMatt Arsenault CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
250268b102b9SMatt Arsenault CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
250368b102b9SMatt Arsenault N->getOperand(3), // Attr
250468b102b9SMatt Arsenault N->getOperand(2), // Attrchan
250568b102b9SMatt Arsenault ToM0.getValue(1) // In glue
250668b102b9SMatt Arsenault });
250768b102b9SMatt Arsenault
250868b102b9SMatt Arsenault SDNode *InterpP1LV =
250968b102b9SMatt Arsenault CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
251068b102b9SMatt Arsenault CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
251168b102b9SMatt Arsenault N->getOperand(1), // Src0
251268b102b9SMatt Arsenault N->getOperand(3), // Attr
251368b102b9SMatt Arsenault N->getOperand(2), // Attrchan
251468b102b9SMatt Arsenault CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
251568b102b9SMatt Arsenault SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
251668b102b9SMatt Arsenault N->getOperand(4), // high
251768b102b9SMatt Arsenault CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
251868b102b9SMatt Arsenault CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
251968b102b9SMatt Arsenault SDValue(InterpMov, 1)
252068b102b9SMatt Arsenault });
252168b102b9SMatt Arsenault
252268b102b9SMatt Arsenault CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
252368b102b9SMatt Arsenault }
252468b102b9SMatt Arsenault
SelectINTRINSIC_W_CHAIN(SDNode * N)2525d3c84e67SMatt Arsenault void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
2526d3c84e67SMatt Arsenault unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2527d3c84e67SMatt Arsenault switch (IntrID) {
2528d3c84e67SMatt Arsenault case Intrinsic::amdgcn_ds_append:
2529d3c84e67SMatt Arsenault case Intrinsic::amdgcn_ds_consume: {
2530d3c84e67SMatt Arsenault if (N->getValueType(0) != MVT::i32)
2531d3c84e67SMatt Arsenault break;
2532d3c84e67SMatt Arsenault SelectDSAppendConsume(N, IntrID);
2533d3c84e67SMatt Arsenault return;
2534d3c84e67SMatt Arsenault }
25354d55d024SMatt Arsenault }
25364d55d024SMatt Arsenault
25374d55d024SMatt Arsenault SelectCode(N);
25384d55d024SMatt Arsenault }
25394d55d024SMatt Arsenault
SelectINTRINSIC_WO_CHAIN(SDNode * N)254000e89b42SCarl Ritson void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
254100e89b42SCarl Ritson unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
254200e89b42SCarl Ritson unsigned Opcode;
254300e89b42SCarl Ritson switch (IntrID) {
25440b283570SCarl Ritson case Intrinsic::amdgcn_wqm:
25450b283570SCarl Ritson Opcode = AMDGPU::WQM;
25460b283570SCarl Ritson break;
254700e89b42SCarl Ritson case Intrinsic::amdgcn_softwqm:
254800e89b42SCarl Ritson Opcode = AMDGPU::SOFT_WQM;
254900e89b42SCarl Ritson break;
25500b283570SCarl Ritson case Intrinsic::amdgcn_wwm:
2551c3ce7baeSPiotr Sobczak case Intrinsic::amdgcn_strict_wwm:
2552c3ce7baeSPiotr Sobczak Opcode = AMDGPU::STRICT_WWM;
25530b283570SCarl Ritson break;
25544672bac1SPiotr Sobczak case Intrinsic::amdgcn_strict_wqm:
25554672bac1SPiotr Sobczak Opcode = AMDGPU::STRICT_WQM;
25564672bac1SPiotr Sobczak break;
255768b102b9SMatt Arsenault case Intrinsic::amdgcn_interp_p1_f16:
255868b102b9SMatt Arsenault SelectInterpP1F16(N);
255968b102b9SMatt Arsenault return;
256000e89b42SCarl Ritson default:
256100e89b42SCarl Ritson SelectCode(N);
256200e89b42SCarl Ritson return;
256300e89b42SCarl Ritson }
256400e89b42SCarl Ritson
256500e89b42SCarl Ritson SDValue Src = N->getOperand(1);
256600e89b42SCarl Ritson CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
256700e89b42SCarl Ritson }
256800e89b42SCarl Ritson
SelectINTRINSIC_VOID(SDNode * N)25694d55d024SMatt Arsenault void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
25704d55d024SMatt Arsenault unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
25714d55d024SMatt Arsenault switch (IntrID) {
25724d55d024SMatt Arsenault case Intrinsic::amdgcn_ds_gws_init:
25734d55d024SMatt Arsenault case Intrinsic::amdgcn_ds_gws_barrier:
2574740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_v:
2575740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_br:
2576740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_p:
2577740322f1SMatt Arsenault case Intrinsic::amdgcn_ds_gws_sema_release_all:
25784d55d024SMatt Arsenault SelectDS_GWS(N, IntrID);
25794d55d024SMatt Arsenault return;
2580d3c84e67SMatt Arsenault default:
2581d3c84e67SMatt Arsenault break;
2582d3c84e67SMatt Arsenault }
2583d3c84e67SMatt Arsenault
2584d3c84e67SMatt Arsenault SelectCode(N);
2585d3c84e67SMatt Arsenault }
2586d3c84e67SMatt Arsenault
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool AllowAbs) const2587d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
25885b91a6a8SJay Foad unsigned &Mods,
25895b91a6a8SJay Foad bool AllowAbs) const {
2590d7e2303dSMatt Arsenault Mods = 0;
259145bb48eaSTom Stellard Src = In;
259245bb48eaSTom Stellard
259345bb48eaSTom Stellard if (Src.getOpcode() == ISD::FNEG) {
259445bb48eaSTom Stellard Mods |= SISrcMods::NEG;
259545bb48eaSTom Stellard Src = Src.getOperand(0);
259645bb48eaSTom Stellard }
259745bb48eaSTom Stellard
25985b91a6a8SJay Foad if (AllowAbs && Src.getOpcode() == ISD::FABS) {
259945bb48eaSTom Stellard Mods |= SISrcMods::ABS;
260045bb48eaSTom Stellard Src = Src.getOperand(0);
260145bb48eaSTom Stellard }
260245bb48eaSTom Stellard
2603d7e2303dSMatt Arsenault return true;
2604d7e2303dSMatt Arsenault }
2605d7e2303dSMatt Arsenault
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const2606d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
2607d7e2303dSMatt Arsenault SDValue &SrcMods) const {
2608d7e2303dSMatt Arsenault unsigned Mods;
2609d7e2303dSMatt Arsenault if (SelectVOP3ModsImpl(In, Src, Mods)) {
261045bb48eaSTom Stellard SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
261145bb48eaSTom Stellard return true;
261245bb48eaSTom Stellard }
261345bb48eaSTom Stellard
2614d7e2303dSMatt Arsenault return false;
2615d7e2303dSMatt Arsenault }
2616d7e2303dSMatt Arsenault
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const26175b91a6a8SJay Foad bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
26185b91a6a8SJay Foad SDValue &SrcMods) const {
26195b91a6a8SJay Foad unsigned Mods;
26205b91a6a8SJay Foad if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
26215b91a6a8SJay Foad SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
26225b91a6a8SJay Foad return true;
26235b91a6a8SJay Foad }
26245b91a6a8SJay Foad
26255b91a6a8SJay Foad return false;
26265b91a6a8SJay Foad }
26275b91a6a8SJay Foad
SelectVOP3Mods_NNaN(SDValue In,SDValue & Src,SDValue & SrcMods) const2628f84e5d9aSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
2629f84e5d9aSMatt Arsenault SDValue &SrcMods) const {
2630f84e5d9aSMatt Arsenault SelectVOP3Mods(In, Src, SrcMods);
2631f84e5d9aSMatt Arsenault return isNoNanSrc(Src);
2632f84e5d9aSMatt Arsenault }
2633f84e5d9aSMatt Arsenault
SelectVOP3NoMods(SDValue In,SDValue & Src) const2634df58e825SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
2635df58e825SMatt Arsenault if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
2636df58e825SMatt Arsenault return false;
2637df58e825SMatt Arsenault
2638df58e825SMatt Arsenault Src = In;
2639df58e825SMatt Arsenault return true;
2640db5a11f6STom Stellard }
2641db5a11f6STom Stellard
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const264220d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
264320d20156SJoe Nash SDValue &SrcMods,
264420d20156SJoe Nash bool OpSel) const {
264520d20156SJoe Nash unsigned Mods;
264620d20156SJoe Nash if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
264720d20156SJoe Nash if (OpSel)
264820d20156SJoe Nash Mods |= SISrcMods::OP_SEL_0;
264920d20156SJoe Nash SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
265020d20156SJoe Nash return true;
265120d20156SJoe Nash }
265220d20156SJoe Nash
265320d20156SJoe Nash return false;
265420d20156SJoe Nash }
265520d20156SJoe Nash
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const265620d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
265720d20156SJoe Nash SDValue &SrcMods) const {
265820d20156SJoe Nash return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
265920d20156SJoe Nash }
266020d20156SJoe Nash
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const266120d20156SJoe Nash bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
266220d20156SJoe Nash SDValue &SrcMods) const {
266320d20156SJoe Nash return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
266420d20156SJoe Nash }
266520d20156SJoe Nash
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const266645bb48eaSTom Stellard bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
266745bb48eaSTom Stellard SDValue &SrcMods, SDValue &Clamp,
266845bb48eaSTom Stellard SDValue &Omod) const {
266945bb48eaSTom Stellard SDLoc DL(In);
2670df58e825SMatt Arsenault Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2671df58e825SMatt Arsenault Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
267245bb48eaSTom Stellard
267345bb48eaSTom Stellard return SelectVOP3Mods(In, Src, SrcMods);
267445bb48eaSTom Stellard }
267545bb48eaSTom Stellard
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const26765b91a6a8SJay Foad bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
26775b91a6a8SJay Foad SDValue &SrcMods, SDValue &Clamp,
26785b91a6a8SJay Foad SDValue &Omod) const {
26795b91a6a8SJay Foad SDLoc DL(In);
26805b91a6a8SJay Foad Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
26815b91a6a8SJay Foad Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
26825b91a6a8SJay Foad
26835b91a6a8SJay Foad return SelectVOP3BMods(In, Src, SrcMods);
26845b91a6a8SJay Foad }
26855b91a6a8SJay Foad
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const2686c512d448SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
2687c512d448SDmitry Preobrazhensky SDValue &Clamp, SDValue &Omod) const {
2688c512d448SDmitry Preobrazhensky Src = In;
2689c512d448SDmitry Preobrazhensky
2690c512d448SDmitry Preobrazhensky SDLoc DL(In);
2691df58e825SMatt Arsenault Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
2692df58e825SMatt Arsenault Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
2693c512d448SDmitry Preobrazhensky
2694c512d448SDmitry Preobrazhensky return true;
2695c512d448SDmitry Preobrazhensky }
2696c512d448SDmitry Preobrazhensky
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const2697eb522e68SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2698c4500de2SStanislav Mekhanoshin SDValue &SrcMods, bool IsDOT) const {
2699eb522e68SMatt Arsenault unsigned Mods = 0;
2700eb522e68SMatt Arsenault Src = In;
2701eb522e68SMatt Arsenault
2702eb522e68SMatt Arsenault if (Src.getOpcode() == ISD::FNEG) {
2703786eeea2SMatt Arsenault Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
2704eb522e68SMatt Arsenault Src = Src.getOperand(0);
2705eb522e68SMatt Arsenault }
2706eb522e68SMatt Arsenault
2707c4500de2SStanislav Mekhanoshin if (Src.getOpcode() == ISD::BUILD_VECTOR &&
2708c4500de2SStanislav Mekhanoshin (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
2709786eeea2SMatt Arsenault unsigned VecMods = Mods;
2710786eeea2SMatt Arsenault
271198f2946aSMatt Arsenault SDValue Lo = stripBitcast(Src.getOperand(0));
271298f2946aSMatt Arsenault SDValue Hi = stripBitcast(Src.getOperand(1));
2713786eeea2SMatt Arsenault
2714786eeea2SMatt Arsenault if (Lo.getOpcode() == ISD::FNEG) {
271598f2946aSMatt Arsenault Lo = stripBitcast(Lo.getOperand(0));
2716786eeea2SMatt Arsenault Mods ^= SISrcMods::NEG;
2717786eeea2SMatt Arsenault }
2718786eeea2SMatt Arsenault
2719786eeea2SMatt Arsenault if (Hi.getOpcode() == ISD::FNEG) {
272098f2946aSMatt Arsenault Hi = stripBitcast(Hi.getOperand(0));
2721786eeea2SMatt Arsenault Mods ^= SISrcMods::NEG_HI;
2722786eeea2SMatt Arsenault }
2723786eeea2SMatt Arsenault
272498f2946aSMatt Arsenault if (isExtractHiElt(Lo, Lo))
272598f2946aSMatt Arsenault Mods |= SISrcMods::OP_SEL_0;
272698f2946aSMatt Arsenault
272798f2946aSMatt Arsenault if (isExtractHiElt(Hi, Hi))
272898f2946aSMatt Arsenault Mods |= SISrcMods::OP_SEL_1;
272998f2946aSMatt Arsenault
2730a8d9d507SStanislav Mekhanoshin unsigned VecSize = Src.getValueSizeInBits();
273198f2946aSMatt Arsenault Lo = stripExtractLoElt(Lo);
273298f2946aSMatt Arsenault Hi = stripExtractLoElt(Hi);
273398f2946aSMatt Arsenault
2734a8d9d507SStanislav Mekhanoshin if (Lo.getValueSizeInBits() > VecSize) {
2735a8d9d507SStanislav Mekhanoshin Lo = CurDAG->getTargetExtractSubreg(
2736a8d9d507SStanislav Mekhanoshin (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2737a8d9d507SStanislav Mekhanoshin MVT::getIntegerVT(VecSize), Lo);
2738a8d9d507SStanislav Mekhanoshin }
2739a8d9d507SStanislav Mekhanoshin
2740a8d9d507SStanislav Mekhanoshin if (Hi.getValueSizeInBits() > VecSize) {
2741a8d9d507SStanislav Mekhanoshin Hi = CurDAG->getTargetExtractSubreg(
2742a8d9d507SStanislav Mekhanoshin (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
2743a8d9d507SStanislav Mekhanoshin MVT::getIntegerVT(VecSize), Hi);
2744a8d9d507SStanislav Mekhanoshin }
2745a8d9d507SStanislav Mekhanoshin
2746a8d9d507SStanislav Mekhanoshin assert(Lo.getValueSizeInBits() <= VecSize &&
2747a8d9d507SStanislav Mekhanoshin Hi.getValueSizeInBits() <= VecSize);
2748a8d9d507SStanislav Mekhanoshin
2749786eeea2SMatt Arsenault if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
2750786eeea2SMatt Arsenault // Really a scalar input. Just select from the low half of the register to
2751786eeea2SMatt Arsenault // avoid packing.
2752786eeea2SMatt Arsenault
2753a8d9d507SStanislav Mekhanoshin if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
2754786eeea2SMatt Arsenault Src = Lo;
2755a8d9d507SStanislav Mekhanoshin } else {
2756a8d9d507SStanislav Mekhanoshin assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
2757a8d9d507SStanislav Mekhanoshin
2758a8d9d507SStanislav Mekhanoshin SDLoc SL(In);
2759a8d9d507SStanislav Mekhanoshin SDValue Undef = SDValue(
2760a8d9d507SStanislav Mekhanoshin CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
2761a8d9d507SStanislav Mekhanoshin Lo.getValueType()), 0);
2762a8d9d507SStanislav Mekhanoshin auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2763a8d9d507SStanislav Mekhanoshin : AMDGPU::SReg_64RegClassID;
2764a8d9d507SStanislav Mekhanoshin const SDValue Ops[] = {
2765a8d9d507SStanislav Mekhanoshin CurDAG->getTargetConstant(RC, SL, MVT::i32),
2766a8d9d507SStanislav Mekhanoshin Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2767a8d9d507SStanislav Mekhanoshin Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
2768a8d9d507SStanislav Mekhanoshin
2769a8d9d507SStanislav Mekhanoshin Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
2770a8d9d507SStanislav Mekhanoshin Src.getValueType(), Ops), 0);
2771a8d9d507SStanislav Mekhanoshin }
2772786eeea2SMatt Arsenault SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2773786eeea2SMatt Arsenault return true;
2774786eeea2SMatt Arsenault }
2775786eeea2SMatt Arsenault
2776a8d9d507SStanislav Mekhanoshin if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
2777a8d9d507SStanislav Mekhanoshin uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
2778a8d9d507SStanislav Mekhanoshin .bitcastToAPInt().getZExtValue();
2779a8d9d507SStanislav Mekhanoshin if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
2780a8d9d507SStanislav Mekhanoshin Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
2781a8d9d507SStanislav Mekhanoshin SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2782a8d9d507SStanislav Mekhanoshin return true;
2783a8d9d507SStanislav Mekhanoshin }
2784a8d9d507SStanislav Mekhanoshin }
2785a8d9d507SStanislav Mekhanoshin
2786786eeea2SMatt Arsenault Mods = VecMods;
2787786eeea2SMatt Arsenault }
2788786eeea2SMatt Arsenault
2789eb522e68SMatt Arsenault // Packed instructions do not have abs modifiers.
2790eb522e68SMatt Arsenault Mods |= SISrcMods::OP_SEL_1;
2791eb522e68SMatt Arsenault
2792eb522e68SMatt Arsenault SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2793eb522e68SMatt Arsenault return true;
2794eb522e68SMatt Arsenault }
2795eb522e68SMatt Arsenault
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const2796c4500de2SStanislav Mekhanoshin bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2797c4500de2SStanislav Mekhanoshin SDValue &SrcMods) const {
2798c4500de2SStanislav Mekhanoshin return SelectVOP3PMods(In, Src, SrcMods, true);
2799c4500de2SStanislav Mekhanoshin }
2800c4500de2SStanislav Mekhanoshin
SelectDotIUVOP3PMods(SDValue In,SDValue & Src) const28012d43de13SJoe Nash bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
28022d43de13SJoe Nash const ConstantSDNode *C = cast<ConstantSDNode>(In);
28032d43de13SJoe Nash // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
28042d43de13SJoe Nash // 1 promotes packed values to signed, 0 treats them as unsigned.
28052d43de13SJoe Nash assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
28062d43de13SJoe Nash
28072d43de13SJoe Nash unsigned Mods = SISrcMods::OP_SEL_1;
28082d43de13SJoe Nash unsigned SrcSign = C->getAPIntValue().getZExtValue();
28092d43de13SJoe Nash if (SrcSign == 1)
28102d43de13SJoe Nash Mods ^= SISrcMods::NEG;
28112d43de13SJoe Nash
28122d43de13SJoe Nash Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28132d43de13SJoe Nash return true;
28142d43de13SJoe Nash }
28152d43de13SJoe Nash
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const28164874838aSPiotr Sobczak bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
28174874838aSPiotr Sobczak SDValue &Src) const {
28184874838aSPiotr Sobczak const ConstantSDNode *C = cast<ConstantSDNode>(In);
28194874838aSPiotr Sobczak assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
28204874838aSPiotr Sobczak
28214874838aSPiotr Sobczak unsigned Mods = SISrcMods::OP_SEL_1;
28224874838aSPiotr Sobczak unsigned SrcVal = C->getAPIntValue().getZExtValue();
28234874838aSPiotr Sobczak if (SrcVal == 1)
28244874838aSPiotr Sobczak Mods |= SISrcMods::OP_SEL_0;
28254874838aSPiotr Sobczak
28264874838aSPiotr Sobczak Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
28274874838aSPiotr Sobczak return true;
28284874838aSPiotr Sobczak }
28294874838aSPiotr Sobczak
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const2830abf28394SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
2831abf28394SDmitry Preobrazhensky SDValue &SrcMods) const {
2832abf28394SDmitry Preobrazhensky Src = In;
2833abf28394SDmitry Preobrazhensky // FIXME: Handle op_sel
2834abf28394SDmitry Preobrazhensky SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
2835abf28394SDmitry Preobrazhensky return true;
2836abf28394SDmitry Preobrazhensky }
2837abf28394SDmitry Preobrazhensky
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2838abf28394SDmitry Preobrazhensky bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
2839abf28394SDmitry Preobrazhensky SDValue &SrcMods) const {
2840abf28394SDmitry Preobrazhensky // FIXME: Handle op_sel
2841abf28394SDmitry Preobrazhensky return SelectVOP3Mods(In, Src, SrcMods);
2842abf28394SDmitry Preobrazhensky }
2843abf28394SDmitry Preobrazhensky
2844d7e2303dSMatt Arsenault // The return value is not whether the match is possible (which it always is),
2845d7e2303dSMatt Arsenault // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const2846d7e2303dSMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
2847d7e2303dSMatt Arsenault unsigned &Mods) const {
2848d7e2303dSMatt Arsenault Mods = 0;
2849d7e2303dSMatt Arsenault SelectVOP3ModsImpl(In, Src, Mods);
2850d7e2303dSMatt Arsenault
2851d7e2303dSMatt Arsenault if (Src.getOpcode() == ISD::FP_EXTEND) {
2852d7e2303dSMatt Arsenault Src = Src.getOperand(0);
2853d7e2303dSMatt Arsenault assert(Src.getValueType() == MVT::f16);
2854d7e2303dSMatt Arsenault Src = stripBitcast(Src);
2855d7e2303dSMatt Arsenault
2856550c66d1SMatt Arsenault // Be careful about folding modifiers if we already have an abs. fneg is
2857550c66d1SMatt Arsenault // applied last, so we don't want to apply an earlier fneg.
2858550c66d1SMatt Arsenault if ((Mods & SISrcMods::ABS) == 0) {
2859550c66d1SMatt Arsenault unsigned ModsTmp;
2860550c66d1SMatt Arsenault SelectVOP3ModsImpl(Src, Src, ModsTmp);
2861550c66d1SMatt Arsenault
2862550c66d1SMatt Arsenault if ((ModsTmp & SISrcMods::NEG) != 0)
2863550c66d1SMatt Arsenault Mods ^= SISrcMods::NEG;
2864550c66d1SMatt Arsenault
2865550c66d1SMatt Arsenault if ((ModsTmp & SISrcMods::ABS) != 0)
2866550c66d1SMatt Arsenault Mods |= SISrcMods::ABS;
2867550c66d1SMatt Arsenault }
2868550c66d1SMatt Arsenault
2869d7e2303dSMatt Arsenault // op_sel/op_sel_hi decide the source type and source.
2870d7e2303dSMatt Arsenault // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
2871d7e2303dSMatt Arsenault // If the sources's op_sel is set, it picks the high half of the source
2872d7e2303dSMatt Arsenault // register.
2873d7e2303dSMatt Arsenault
2874d7e2303dSMatt Arsenault Mods |= SISrcMods::OP_SEL_1;
2875550c66d1SMatt Arsenault if (isExtractHiElt(Src, Src)) {
2876d7e2303dSMatt Arsenault Mods |= SISrcMods::OP_SEL_0;
2877d7e2303dSMatt Arsenault
2878550c66d1SMatt Arsenault // TODO: Should we try to look for neg/abs here?
2879550c66d1SMatt Arsenault }
2880550c66d1SMatt Arsenault
2881d7e2303dSMatt Arsenault return true;
2882d7e2303dSMatt Arsenault }
2883d7e2303dSMatt Arsenault
2884d7e2303dSMatt Arsenault return false;
2885d7e2303dSMatt Arsenault }
2886d7e2303dSMatt Arsenault
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const288776935122SMatt Arsenault bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
288876935122SMatt Arsenault SDValue &SrcMods) const {
288976935122SMatt Arsenault unsigned Mods = 0;
289076935122SMatt Arsenault SelectVOP3PMadMixModsImpl(In, Src, Mods);
289176935122SMatt Arsenault SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
289276935122SMatt Arsenault return true;
289376935122SMatt Arsenault }
289476935122SMatt Arsenault
getHi16Elt(SDValue In) const2895e8c03a25SMatt Arsenault SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
2896e8c03a25SMatt Arsenault if (In.isUndef())
2897e8c03a25SMatt Arsenault return CurDAG->getUNDEF(MVT::i32);
2898e8c03a25SMatt Arsenault
2899e8c03a25SMatt Arsenault if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
2900e8c03a25SMatt Arsenault SDLoc SL(In);
2901e8c03a25SMatt Arsenault return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
2902e8c03a25SMatt Arsenault }
2903e8c03a25SMatt Arsenault
2904e8c03a25SMatt Arsenault if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
2905e8c03a25SMatt Arsenault SDLoc SL(In);
2906e8c03a25SMatt Arsenault return CurDAG->getConstant(
2907e8c03a25SMatt Arsenault C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2908e8c03a25SMatt Arsenault }
2909e8c03a25SMatt Arsenault
2910e8c03a25SMatt Arsenault SDValue Src;
2911e8c03a25SMatt Arsenault if (isExtractHiElt(In, Src))
2912e8c03a25SMatt Arsenault return Src;
2913e8c03a25SMatt Arsenault
2914e8c03a25SMatt Arsenault return SDValue();
2915e8c03a25SMatt Arsenault }
2916e8c03a25SMatt Arsenault
isVGPRImm(const SDNode * N) const2917db7ee766SAlexander Timofeev bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
2918e4c2e9b0SMatt Arsenault assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
2919e4c2e9b0SMatt Arsenault
2920db7ee766SAlexander Timofeev const SIRegisterInfo *SIRI =
2921db7ee766SAlexander Timofeev static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
2922db7ee766SAlexander Timofeev const SIInstrInfo * SII =
2923db7ee766SAlexander Timofeev static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
2924db7ee766SAlexander Timofeev
2925db7ee766SAlexander Timofeev unsigned Limit = 0;
2926db7ee766SAlexander Timofeev bool AllUsesAcceptSReg = true;
2927db7ee766SAlexander Timofeev for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
2928db7ee766SAlexander Timofeev Limit < 10 && U != E; ++U, ++Limit) {
2929db7ee766SAlexander Timofeev const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
2930db7ee766SAlexander Timofeev
2931db7ee766SAlexander Timofeev // If the register class is unknown, it could be an unknown
2932db7ee766SAlexander Timofeev // register class that needs to be an SGPR, e.g. an inline asm
2933db7ee766SAlexander Timofeev // constraint
2934db7ee766SAlexander Timofeev if (!RC || SIRI->isSGPRClass(RC))
2935db7ee766SAlexander Timofeev return false;
2936db7ee766SAlexander Timofeev
2937db7ee766SAlexander Timofeev if (RC != &AMDGPU::VS_32RegClass) {
2938db7ee766SAlexander Timofeev AllUsesAcceptSReg = false;
2939db7ee766SAlexander Timofeev SDNode * User = *U;
2940db7ee766SAlexander Timofeev if (User->isMachineOpcode()) {
2941db7ee766SAlexander Timofeev unsigned Opc = User->getMachineOpcode();
2942db7ee766SAlexander Timofeev MCInstrDesc Desc = SII->get(Opc);
2943db7ee766SAlexander Timofeev if (Desc.isCommutable()) {
2944db7ee766SAlexander Timofeev unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
2945db7ee766SAlexander Timofeev unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2946db7ee766SAlexander Timofeev if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
2947db7ee766SAlexander Timofeev unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
2948db7ee766SAlexander Timofeev const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
2949db7ee766SAlexander Timofeev if (CommutedRC == &AMDGPU::VS_32RegClass)
2950db7ee766SAlexander Timofeev AllUsesAcceptSReg = true;
2951db7ee766SAlexander Timofeev }
2952db7ee766SAlexander Timofeev }
2953db7ee766SAlexander Timofeev }
29546527b2a4SSebastian Neubauer // If "AllUsesAcceptSReg == false" so far we haven't succeeded
2955db7ee766SAlexander Timofeev // commuting current user. This means have at least one use
2956db7ee766SAlexander Timofeev // that strictly require VGPR. Thus, we will not attempt to commute
2957db7ee766SAlexander Timofeev // other user instructions.
2958db7ee766SAlexander Timofeev if (!AllUsesAcceptSReg)
2959db7ee766SAlexander Timofeev break;
2960db7ee766SAlexander Timofeev }
2961db7ee766SAlexander Timofeev }
2962db7ee766SAlexander Timofeev return !AllUsesAcceptSReg && (Limit < 10);
2963db7ee766SAlexander Timofeev }
2964db7ee766SAlexander Timofeev
isUniformLoad(const SDNode * N) const29654d302f69SAlexander Timofeev bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
29664d302f69SAlexander Timofeev auto Ld = cast<LoadSDNode>(N);
29674d302f69SAlexander Timofeev
296807881861SGuillaume Chatelet return Ld->getAlign() >= Align(4) &&
296907881861SGuillaume Chatelet (((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
297007881861SGuillaume Chatelet Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
297107881861SGuillaume Chatelet !N->isDivergent()) ||
297207881861SGuillaume Chatelet (Subtarget->getScalarizeGlobalBehavior() &&
29734d302f69SAlexander Timofeev Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
297407881861SGuillaume Chatelet Ld->isSimple() && !N->isDivergent() &&
297507881861SGuillaume Chatelet static_cast<const SITargetLowering *>(getTargetLowering())
297607881861SGuillaume Chatelet ->isMemOpHasNoClobberedMemOperand(N)));
29774d302f69SAlexander Timofeev }
2978db7ee766SAlexander Timofeev
PostprocessISelDAG()297945bb48eaSTom Stellard void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
298045bb48eaSTom Stellard const AMDGPUTargetLowering& Lowering =
298145bb48eaSTom Stellard *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
298245bb48eaSTom Stellard bool IsModified = false;
298345bb48eaSTom Stellard do {
298445bb48eaSTom Stellard IsModified = false;
298568f05052SMatt Arsenault
298645bb48eaSTom Stellard // Go over all selected nodes and try to fold them a bit more
298768f05052SMatt Arsenault SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
298868f05052SMatt Arsenault while (Position != CurDAG->allnodes_end()) {
298968f05052SMatt Arsenault SDNode *Node = &*Position++;
299068f05052SMatt Arsenault MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
299145bb48eaSTom Stellard if (!MachineNode)
299245bb48eaSTom Stellard continue;
299345bb48eaSTom Stellard
299445bb48eaSTom Stellard SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
299568f05052SMatt Arsenault if (ResNode != Node) {
299668f05052SMatt Arsenault if (ResNode)
299768f05052SMatt Arsenault ReplaceUses(Node, ResNode);
299845bb48eaSTom Stellard IsModified = true;
299945bb48eaSTom Stellard }
300045bb48eaSTom Stellard }
300145bb48eaSTom Stellard CurDAG->RemoveDeadNodes();
300245bb48eaSTom Stellard } while (IsModified);
300345bb48eaSTom Stellard }
3004