1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "SIDefines.h"
22 #include "SIISelLowering.h"
23 #include "SIInstrInfo.h"
24 #include "SIMachineFunctionInfo.h"
25 #include "SIRegisterInfo.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/FunctionLoweringInfo.h"
31 #include "llvm/CodeGen/ISDOpcodes.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/MachineValueType.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGISel.h"
37 #include "llvm/CodeGen/SelectionDAGNodes.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/MC/MCInstrDesc.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <new>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 namespace llvm {
54 
55 class R600InstrInfo;
56 
57 } // end namespace llvm
58 
59 //===----------------------------------------------------------------------===//
60 // Instruction Selector Implementation
61 //===----------------------------------------------------------------------===//
62 
63 namespace {
64 
65 /// AMDGPU specific code to select AMDGPU machine instructions for
66 /// SelectionDAG operations.
67 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
68   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
69   // make the right decision when generating code for different targets.
70   const AMDGPUSubtarget *Subtarget;
71   AMDGPUAS AMDGPUASI;
72 
73 public:
74   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
75                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
76     : SelectionDAGISel(*TM, OptLevel) {
77     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
78   }
79   ~AMDGPUDAGToDAGISel() override = default;
80 
81   void getAnalysisUsage(AnalysisUsage &AU) const override {
82     AU.addRequired<AMDGPUArgumentUsageInfo>();
83     SelectionDAGISel::getAnalysisUsage(AU);
84   }
85 
86   bool runOnMachineFunction(MachineFunction &MF) override;
87   void Select(SDNode *N) override;
88   StringRef getPassName() const override;
89   void PostprocessISelDAG() override;
90 
91 protected:
92   void SelectBuildVector(SDNode *N, unsigned RegClassID);
93 
94 private:
95   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
96   bool isNoNanSrc(SDValue N) const;
97   bool isInlineImmediate(const SDNode *N) const;
98   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
99                    const R600InstrInfo *TII);
100   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
101   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
102 
103   bool isConstantLoad(const MemSDNode *N, int cbID) const;
104   bool isUniformBr(const SDNode *N) const;
105 
106   SDNode *glueCopyToM0(SDNode *N) const;
107 
108   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
109   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
110   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
111                                        SDValue& Offset);
112   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
113   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
114   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
115                        unsigned OffsetBits) const;
116   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
117   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
118                                  SDValue &Offset1) const;
119   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
120                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
121                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
122                    SDValue &TFE) const;
123   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
124                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
125                          SDValue &SLC, SDValue &TFE) const;
126   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
127                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
128                          SDValue &SLC) const;
129   bool SelectMUBUFScratchOffen(SDNode *Root,
130                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
131                                SDValue &SOffset, SDValue &ImmOffset) const;
132   bool SelectMUBUFScratchOffset(SDNode *Root,
133                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
134                                 SDValue &Offset) const;
135 
136   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
137                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
138                          SDValue &TFE) const;
139   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
140                          SDValue &Offset, SDValue &SLC) const;
141   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
142                          SDValue &Offset) const;
143   bool SelectMUBUFConstant(SDValue Constant,
144                            SDValue &SOffset,
145                            SDValue &ImmOffset) const;
146   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
147                                   SDValue &ImmOffset) const;
148   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
149                                    SDValue &ImmOffset, SDValue &VOffset) const;
150 
151   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
152                         SDValue &Offset, SDValue &SLC) const;
153   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
154                               SDValue &Offset, SDValue &SLC) const;
155 
156   template <bool IsSigned>
157   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
158                         SDValue &Offset, SDValue &SLC) const;
159 
160   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
161                         bool &Imm) const;
162   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
163                   bool &Imm) const;
164   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
165   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
166   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
167   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
168   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
169   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
170   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
171 
172   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
173   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
174   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
175   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
176   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
177                        SDValue &Clamp, SDValue &Omod) const;
178   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
179                          SDValue &Clamp, SDValue &Omod) const;
180 
181   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
182                                  SDValue &Clamp,
183                                  SDValue &Omod) const;
184 
185   bool SelectVOP3OMods(SDValue In, SDValue &Src,
186                        SDValue &Clamp, SDValue &Omod) const;
187 
188   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
189   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
190                         SDValue &Clamp) const;
191 
192   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
193   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
194                         SDValue &Clamp) const;
195 
196   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
197   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
198                             SDValue &Clamp) const;
199   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
200 
201   void SelectADD_SUB_I64(SDNode *N);
202   void SelectUADDO_USUBO(SDNode *N);
203   void SelectDIV_SCALE(SDNode *N);
204   void SelectFMA_W_CHAIN(SDNode *N);
205   void SelectFMUL_W_CHAIN(SDNode *N);
206 
207   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
208                    uint32_t Offset, uint32_t Width);
209   void SelectS_BFEFromShifts(SDNode *N);
210   void SelectS_BFE(SDNode *N);
211   bool isCBranchSCC(const SDNode *N) const;
212   void SelectBRCOND(SDNode *N);
213   void SelectFMAD(SDNode *N);
214   void SelectATOMIC_CMP_SWAP(SDNode *N);
215 
216 protected:
217   // Include the pieces autogenerated from the target description.
218 #include "AMDGPUGenDAGISel.inc"
219 };
220 
221 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
222 public:
223   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
224       AMDGPUDAGToDAGISel(TM, OptLevel) {}
225 
226   void Select(SDNode *N) override;
227 
228   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
229                           SDValue &Offset) override;
230   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
231                           SDValue &Offset) override;
232 };
233 
234 }  // end anonymous namespace
235 
236 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
237                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
238 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
239 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
240                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
241 
242 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
243 // DAG, ready for instruction scheduling.
244 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
245                                         CodeGenOpt::Level OptLevel) {
246   return new AMDGPUDAGToDAGISel(TM, OptLevel);
247 }
248 
249 /// \brief This pass converts a legalized DAG into a R600-specific
250 // DAG, ready for instruction scheduling.
251 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
252                                       CodeGenOpt::Level OptLevel) {
253   return new R600DAGToDAGISel(TM, OptLevel);
254 }
255 
256 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
257   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
258   return SelectionDAGISel::runOnMachineFunction(MF);
259 }
260 
261 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
262   if (TM.Options.NoNaNsFPMath)
263     return true;
264 
265   // TODO: Move into isKnownNeverNaN
266   if (N->getFlags().isDefined())
267     return N->getFlags().hasNoNaNs();
268 
269   return CurDAG->isKnownNeverNaN(N);
270 }
271 
272 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
273   const SIInstrInfo *TII
274     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
275 
276   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
277     return TII->isInlineConstant(C->getAPIntValue());
278 
279   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
280     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
281 
282   return false;
283 }
284 
285 /// \brief Determine the register class for \p OpNo
286 /// \returns The register class of the virtual register that will be used for
287 /// the given operand number \OpNo or NULL if the register class cannot be
288 /// determined.
289 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
290                                                           unsigned OpNo) const {
291   if (!N->isMachineOpcode()) {
292     if (N->getOpcode() == ISD::CopyToReg) {
293       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
294       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
295         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
296         return MRI.getRegClass(Reg);
297       }
298 
299       const SIRegisterInfo *TRI
300         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
301       return TRI->getPhysRegClass(Reg);
302     }
303 
304     return nullptr;
305   }
306 
307   switch (N->getMachineOpcode()) {
308   default: {
309     const MCInstrDesc &Desc =
310         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
311     unsigned OpIdx = Desc.getNumDefs() + OpNo;
312     if (OpIdx >= Desc.getNumOperands())
313       return nullptr;
314     int RegClass = Desc.OpInfo[OpIdx].RegClass;
315     if (RegClass == -1)
316       return nullptr;
317 
318     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
319   }
320   case AMDGPU::REG_SEQUENCE: {
321     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
322     const TargetRegisterClass *SuperRC =
323         Subtarget->getRegisterInfo()->getRegClass(RCID);
324 
325     SDValue SubRegOp = N->getOperand(OpNo + 1);
326     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
327     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
328                                                               SubRegIdx);
329   }
330   }
331 }
332 
333 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
334   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
335     return N;
336 
337   const SITargetLowering& Lowering =
338       *static_cast<const SITargetLowering*>(getTargetLowering());
339 
340   // Write max value to m0 before each load operation
341 
342   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
343                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
344 
345   SDValue Glue = M0.getValue(1);
346 
347   SmallVector <SDValue, 8> Ops;
348   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
349      Ops.push_back(N->getOperand(i));
350   }
351   Ops.push_back(Glue);
352   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
353 
354   return N;
355 }
356 
357 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
358   switch (NumVectorElts) {
359   case 1:
360     return AMDGPU::SReg_32_XM0RegClassID;
361   case 2:
362     return AMDGPU::SReg_64RegClassID;
363   case 4:
364     return AMDGPU::SReg_128RegClassID;
365   case 8:
366     return AMDGPU::SReg_256RegClassID;
367   case 16:
368     return AMDGPU::SReg_512RegClassID;
369   }
370 
371   llvm_unreachable("invalid vector size");
372 }
373 
374 static bool getConstantValue(SDValue N, uint32_t &Out) {
375   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
376     Out = C->getAPIntValue().getZExtValue();
377     return true;
378   }
379 
380   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
381     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
382     return true;
383   }
384 
385   return false;
386 }
387 
388 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
389   EVT VT = N->getValueType(0);
390   unsigned NumVectorElts = VT.getVectorNumElements();
391   EVT EltVT = VT.getVectorElementType();
392   const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
393   SDLoc DL(N);
394   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
395 
396   if (NumVectorElts == 1) {
397     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
398                          RegClass);
399     return;
400   }
401 
402   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
403                                   "supported yet");
404   // 16 = Max Num Vector Elements
405   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
406   // 1 = Vector Register Class
407   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
408 
409   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
410   bool IsRegSeq = true;
411   unsigned NOps = N->getNumOperands();
412   for (unsigned i = 0; i < NOps; i++) {
413     // XXX: Why is this here?
414     if (isa<RegisterSDNode>(N->getOperand(i))) {
415       IsRegSeq = false;
416       break;
417     }
418     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
419     RegSeqArgs[1 + (2 * i) + 1] =
420             CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
421                                       MVT::i32);
422   }
423   if (NOps != NumVectorElts) {
424     // Fill in the missing undef elements if this was a scalar_to_vector.
425     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
426     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
427                                                    DL, EltVT);
428     for (unsigned i = NOps; i < NumVectorElts; ++i) {
429       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
430       RegSeqArgs[1 + (2 * i) + 1] =
431         CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
432     }
433   }
434 
435   if (!IsRegSeq)
436     SelectCode(N);
437   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
438 }
439 
440 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
441   unsigned int Opc = N->getOpcode();
442   if (N->isMachineOpcode()) {
443     N->setNodeId(-1);
444     return;   // Already selected.
445   }
446 
447   if (isa<AtomicSDNode>(N) ||
448       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
449     N = glueCopyToM0(N);
450 
451   switch (Opc) {
452   default: break;
453   // We are selecting i64 ADD here instead of custom lower it during
454   // DAG legalization, so we can fold some i64 ADDs used for address
455   // calculation into the LOAD and STORE instructions.
456   case ISD::ADD:
457   case ISD::ADDC:
458   case ISD::ADDE:
459   case ISD::SUB:
460   case ISD::SUBC:
461   case ISD::SUBE: {
462     if (N->getValueType(0) != MVT::i64)
463       break;
464 
465     SelectADD_SUB_I64(N);
466     return;
467   }
468   case ISD::UADDO:
469   case ISD::USUBO: {
470     SelectUADDO_USUBO(N);
471     return;
472   }
473   case AMDGPUISD::FMUL_W_CHAIN: {
474     SelectFMUL_W_CHAIN(N);
475     return;
476   }
477   case AMDGPUISD::FMA_W_CHAIN: {
478     SelectFMA_W_CHAIN(N);
479     return;
480   }
481 
482   case ISD::SCALAR_TO_VECTOR:
483   case ISD::BUILD_VECTOR: {
484     EVT VT = N->getValueType(0);
485     unsigned NumVectorElts = VT.getVectorNumElements();
486 
487     if (VT == MVT::v2i16 || VT == MVT::v2f16) {
488       if (Opc == ISD::BUILD_VECTOR) {
489         uint32_t LHSVal, RHSVal;
490         if (getConstantValue(N->getOperand(0), LHSVal) &&
491             getConstantValue(N->getOperand(1), RHSVal)) {
492           uint32_t K = LHSVal | (RHSVal << 16);
493           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
494                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
495           return;
496         }
497       }
498 
499       break;
500     }
501 
502     assert(VT.getVectorElementType().bitsEq(MVT::i32));
503     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
504     SelectBuildVector(N, RegClassID);
505     return;
506   }
507   case ISD::BUILD_PAIR: {
508     SDValue RC, SubReg0, SubReg1;
509     SDLoc DL(N);
510     if (N->getValueType(0) == MVT::i128) {
511       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
512       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
513       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
514     } else if (N->getValueType(0) == MVT::i64) {
515       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
516       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
517       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
518     } else {
519       llvm_unreachable("Unhandled value type for BUILD_PAIR");
520     }
521     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
522                             N->getOperand(1), SubReg1 };
523     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
524                                           N->getValueType(0), Ops));
525     return;
526   }
527 
528   case ISD::Constant:
529   case ISD::ConstantFP: {
530     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
531       break;
532 
533     uint64_t Imm;
534     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
535       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
536     else {
537       ConstantSDNode *C = cast<ConstantSDNode>(N);
538       Imm = C->getZExtValue();
539     }
540 
541     SDLoc DL(N);
542     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
543                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
544                                                     MVT::i32));
545     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
546                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
547     const SDValue Ops[] = {
548       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
549       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
550       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
551     };
552 
553     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
554                                           N->getValueType(0), Ops));
555     return;
556   }
557   case ISD::LOAD:
558   case ISD::STORE: {
559     N = glueCopyToM0(N);
560     break;
561   }
562 
563   case AMDGPUISD::BFE_I32:
564   case AMDGPUISD::BFE_U32: {
565     // There is a scalar version available, but unlike the vector version which
566     // has a separate operand for the offset and width, the scalar version packs
567     // the width and offset into a single operand. Try to move to the scalar
568     // version if the offsets are constant, so that we can try to keep extended
569     // loads of kernel arguments in SGPRs.
570 
571     // TODO: Technically we could try to pattern match scalar bitshifts of
572     // dynamic values, but it's probably not useful.
573     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
574     if (!Offset)
575       break;
576 
577     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
578     if (!Width)
579       break;
580 
581     bool Signed = Opc == AMDGPUISD::BFE_I32;
582 
583     uint32_t OffsetVal = Offset->getZExtValue();
584     uint32_t WidthVal = Width->getZExtValue();
585 
586     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
587                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
588     return;
589   }
590   case AMDGPUISD::DIV_SCALE: {
591     SelectDIV_SCALE(N);
592     return;
593   }
594   case ISD::CopyToReg: {
595     const SITargetLowering& Lowering =
596       *static_cast<const SITargetLowering*>(getTargetLowering());
597     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
598     break;
599   }
600   case ISD::AND:
601   case ISD::SRL:
602   case ISD::SRA:
603   case ISD::SIGN_EXTEND_INREG:
604     if (N->getValueType(0) != MVT::i32)
605       break;
606 
607     SelectS_BFE(N);
608     return;
609   case ISD::BRCOND:
610     SelectBRCOND(N);
611     return;
612   case ISD::FMAD:
613     SelectFMAD(N);
614     return;
615   case AMDGPUISD::ATOMIC_CMP_SWAP:
616     SelectATOMIC_CMP_SWAP(N);
617     return;
618   }
619 
620   SelectCode(N);
621 }
622 
623 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
624   if (!N->readMem())
625     return false;
626   if (CbId == -1)
627     return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
628 
629   return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
630 }
631 
632 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
633   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
634   const Instruction *Term = BB->getTerminator();
635   return Term->getMetadata("amdgpu.uniform") ||
636          Term->getMetadata("structurizecfg.uniform");
637 }
638 
639 StringRef AMDGPUDAGToDAGISel::getPassName() const {
640   return "AMDGPU DAG->DAG Pattern Instruction Selection";
641 }
642 
643 //===----------------------------------------------------------------------===//
644 // Complex Patterns
645 //===----------------------------------------------------------------------===//
646 
647 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
648                                                          SDValue& IntPtr) {
649   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
650     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
651                                        true);
652     return true;
653   }
654   return false;
655 }
656 
657 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
658     SDValue& BaseReg, SDValue &Offset) {
659   if (!isa<ConstantSDNode>(Addr)) {
660     BaseReg = Addr;
661     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
662     return true;
663   }
664   return false;
665 }
666 
667 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
668                                             SDValue &Offset) {
669   return false;
670 }
671 
672 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
673                                             SDValue &Offset) {
674   ConstantSDNode *C;
675   SDLoc DL(Addr);
676 
677   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
678     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
679     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
680   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
681              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
682     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
683     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
684   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
685             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
686     Base = Addr.getOperand(0);
687     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
688   } else {
689     Base = Addr;
690     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
691   }
692 
693   return true;
694 }
695 
696 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
697   SDLoc DL(N);
698   SDValue LHS = N->getOperand(0);
699   SDValue RHS = N->getOperand(1);
700 
701   unsigned Opcode = N->getOpcode();
702   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
703   bool ProduceCarry =
704       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
705   bool IsAdd =
706       (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
707 
708   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
709   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
710 
711   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
712                                        DL, MVT::i32, LHS, Sub0);
713   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
714                                        DL, MVT::i32, LHS, Sub1);
715 
716   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
717                                        DL, MVT::i32, RHS, Sub0);
718   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
719                                        DL, MVT::i32, RHS, Sub1);
720 
721   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
722 
723   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
724   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
725 
726   SDNode *AddLo;
727   if (!ConsumeCarry) {
728     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
729     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
730   } else {
731     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
732     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
733   }
734   SDValue AddHiArgs[] = {
735     SDValue(Hi0, 0),
736     SDValue(Hi1, 0),
737     SDValue(AddLo, 1)
738   };
739   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
740 
741   SDValue RegSequenceArgs[] = {
742     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
743     SDValue(AddLo,0),
744     Sub0,
745     SDValue(AddHi,0),
746     Sub1,
747   };
748   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
749                                                MVT::i64, RegSequenceArgs);
750 
751   if (ProduceCarry) {
752     // Replace the carry-use
753     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
754   }
755 
756   // Replace the remaining uses.
757   CurDAG->ReplaceAllUsesWith(N, RegSequence);
758   CurDAG->RemoveDeadNode(N);
759 }
760 
761 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
762   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
763   // carry out despite the _i32 name. These were renamed in VI to _U32.
764   // FIXME: We should probably rename the opcodes here.
765   unsigned Opc = N->getOpcode() == ISD::UADDO ?
766     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
767 
768   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
769                        { N->getOperand(0), N->getOperand(1) });
770 }
771 
772 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
773   SDLoc SL(N);
774   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
775   SDValue Ops[10];
776 
777   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
778   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
779   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
780   Ops[8] = N->getOperand(0);
781   Ops[9] = N->getOperand(4);
782 
783   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
784 }
785 
786 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
787   SDLoc SL(N);
788   //	src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
789   SDValue Ops[8];
790 
791   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
792   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
793   Ops[6] = N->getOperand(0);
794   Ops[7] = N->getOperand(3);
795 
796   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
797 }
798 
799 // We need to handle this here because tablegen doesn't support matching
800 // instructions with multiple outputs.
801 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
802   SDLoc SL(N);
803   EVT VT = N->getValueType(0);
804 
805   assert(VT == MVT::f32 || VT == MVT::f64);
806 
807   unsigned Opc
808     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
809 
810   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
811   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
812 }
813 
814 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
815                                          unsigned OffsetBits) const {
816   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
817       (OffsetBits == 8 && !isUInt<8>(Offset)))
818     return false;
819 
820   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
821       Subtarget->unsafeDSOffsetFoldingEnabled())
822     return true;
823 
824   // On Southern Islands instruction with a negative base value and an offset
825   // don't seem to work.
826   return CurDAG->SignBitIsZero(Base);
827 }
828 
829 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
830                                               SDValue &Offset) const {
831   SDLoc DL(Addr);
832   if (CurDAG->isBaseWithConstantOffset(Addr)) {
833     SDValue N0 = Addr.getOperand(0);
834     SDValue N1 = Addr.getOperand(1);
835     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
836     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
837       // (add n0, c0)
838       Base = N0;
839       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
840       return true;
841     }
842   } else if (Addr.getOpcode() == ISD::SUB) {
843     // sub C, x -> add (sub 0, x), C
844     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
845       int64_t ByteOffset = C->getSExtValue();
846       if (isUInt<16>(ByteOffset)) {
847         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
848 
849         // XXX - This is kind of hacky. Create a dummy sub node so we can check
850         // the known bits in isDSOffsetLegal. We need to emit the selected node
851         // here, so this is thrown away.
852         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
853                                       Zero, Addr.getOperand(1));
854 
855         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
856           MachineSDNode *MachineSub
857             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
858                                      Zero, Addr.getOperand(1));
859 
860           Base = SDValue(MachineSub, 0);
861           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
862           return true;
863         }
864       }
865     }
866   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
867     // If we have a constant address, prefer to put the constant into the
868     // offset. This can save moves to load the constant address since multiple
869     // operations can share the zero base address register, and enables merging
870     // into read2 / write2 instructions.
871 
872     SDLoc DL(Addr);
873 
874     if (isUInt<16>(CAddr->getZExtValue())) {
875       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
876       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
877                                  DL, MVT::i32, Zero);
878       Base = SDValue(MovZero, 0);
879       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
880       return true;
881     }
882   }
883 
884   // default case
885   Base = Addr;
886   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
887   return true;
888 }
889 
890 // TODO: If offset is too big, put low 16-bit into offset.
891 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
892                                                    SDValue &Offset0,
893                                                    SDValue &Offset1) const {
894   SDLoc DL(Addr);
895 
896   if (CurDAG->isBaseWithConstantOffset(Addr)) {
897     SDValue N0 = Addr.getOperand(0);
898     SDValue N1 = Addr.getOperand(1);
899     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
900     unsigned DWordOffset0 = C1->getZExtValue() / 4;
901     unsigned DWordOffset1 = DWordOffset0 + 1;
902     // (add n0, c0)
903     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
904       Base = N0;
905       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
906       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
907       return true;
908     }
909   } else if (Addr.getOpcode() == ISD::SUB) {
910     // sub C, x -> add (sub 0, x), C
911     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
912       unsigned DWordOffset0 = C->getZExtValue() / 4;
913       unsigned DWordOffset1 = DWordOffset0 + 1;
914 
915       if (isUInt<8>(DWordOffset0)) {
916         SDLoc DL(Addr);
917         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
918 
919         // XXX - This is kind of hacky. Create a dummy sub node so we can check
920         // the known bits in isDSOffsetLegal. We need to emit the selected node
921         // here, so this is thrown away.
922         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
923                                       Zero, Addr.getOperand(1));
924 
925         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
926           MachineSDNode *MachineSub
927             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
928                                      Zero, Addr.getOperand(1));
929 
930           Base = SDValue(MachineSub, 0);
931           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
932           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
933           return true;
934         }
935       }
936     }
937   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
938     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
939     unsigned DWordOffset1 = DWordOffset0 + 1;
940     assert(4 * DWordOffset0 == CAddr->getZExtValue());
941 
942     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
943       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
944       MachineSDNode *MovZero
945         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
946                                  DL, MVT::i32, Zero);
947       Base = SDValue(MovZero, 0);
948       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
949       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
950       return true;
951     }
952   }
953 
954   // default case
955 
956   // FIXME: This is broken on SI where we still need to check if the base
957   // pointer is positive here.
958   Base = Addr;
959   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
960   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
961   return true;
962 }
963 
964 static bool isLegalMUBUFImmOffset(unsigned Imm) {
965   return isUInt<12>(Imm);
966 }
967 
968 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
969   return isLegalMUBUFImmOffset(Imm->getZExtValue());
970 }
971 
972 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
973                                      SDValue &VAddr, SDValue &SOffset,
974                                      SDValue &Offset, SDValue &Offen,
975                                      SDValue &Idxen, SDValue &Addr64,
976                                      SDValue &GLC, SDValue &SLC,
977                                      SDValue &TFE) const {
978   // Subtarget prefers to use flat instruction
979   if (Subtarget->useFlatForGlobal())
980     return false;
981 
982   SDLoc DL(Addr);
983 
984   if (!GLC.getNode())
985     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
986   if (!SLC.getNode())
987     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
988   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
989 
990   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
991   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
992   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
993   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
994 
995   if (CurDAG->isBaseWithConstantOffset(Addr)) {
996     SDValue N0 = Addr.getOperand(0);
997     SDValue N1 = Addr.getOperand(1);
998     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
999 
1000     if (N0.getOpcode() == ISD::ADD) {
1001       // (add (add N2, N3), C1) -> addr64
1002       SDValue N2 = N0.getOperand(0);
1003       SDValue N3 = N0.getOperand(1);
1004       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1005       Ptr = N2;
1006       VAddr = N3;
1007     } else {
1008       // (add N0, C1) -> offset
1009       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1010       Ptr = N0;
1011     }
1012 
1013     if (isLegalMUBUFImmOffset(C1)) {
1014       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1015       return true;
1016     }
1017 
1018     if (isUInt<32>(C1->getZExtValue())) {
1019       // Illegal offset, store it in soffset.
1020       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1021       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1022                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1023                         0);
1024       return true;
1025     }
1026   }
1027 
1028   if (Addr.getOpcode() == ISD::ADD) {
1029     // (add N0, N1) -> addr64
1030     SDValue N0 = Addr.getOperand(0);
1031     SDValue N1 = Addr.getOperand(1);
1032     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1033     Ptr = N0;
1034     VAddr = N1;
1035     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1036     return true;
1037   }
1038 
1039   // default case -> offset
1040   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1041   Ptr = Addr;
1042   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1043 
1044   return true;
1045 }
1046 
1047 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1048                                            SDValue &VAddr, SDValue &SOffset,
1049                                            SDValue &Offset, SDValue &GLC,
1050                                            SDValue &SLC, SDValue &TFE) const {
1051   SDValue Ptr, Offen, Idxen, Addr64;
1052 
1053   // addr64 bit was removed for volcanic islands.
1054   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1055     return false;
1056 
1057   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1058               GLC, SLC, TFE))
1059     return false;
1060 
1061   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1062   if (C->getSExtValue()) {
1063     SDLoc DL(Addr);
1064 
1065     const SITargetLowering& Lowering =
1066       *static_cast<const SITargetLowering*>(getTargetLowering());
1067 
1068     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1069     return true;
1070   }
1071 
1072   return false;
1073 }
1074 
1075 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1076                                            SDValue &VAddr, SDValue &SOffset,
1077                                            SDValue &Offset,
1078                                            SDValue &SLC) const {
1079   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1080   SDValue GLC, TFE;
1081 
1082   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1083 }
1084 
1085 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1086   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1087   return PSV && PSV->isStack();
1088 }
1089 
1090 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1091   const MachineFunction &MF = CurDAG->getMachineFunction();
1092   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1093 
1094   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1095     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1096                                               FI->getValueType(0));
1097 
1098     // If we can resolve this to a frame index access, this is relative to the
1099     // frame pointer SGPR.
1100     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1101                                                    MVT::i32));
1102   }
1103 
1104   // If we don't know this private access is a local stack object, it needs to
1105   // be relative to the entry point's scratch wave offset register.
1106   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1107                                                MVT::i32));
1108 }
1109 
1110 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root,
1111                                                  SDValue Addr, SDValue &Rsrc,
1112                                                  SDValue &VAddr, SDValue &SOffset,
1113                                                  SDValue &ImmOffset) const {
1114 
1115   SDLoc DL(Addr);
1116   MachineFunction &MF = CurDAG->getMachineFunction();
1117   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1118 
1119   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1120 
1121   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1122     unsigned Imm = CAddr->getZExtValue();
1123     assert(!isLegalMUBUFImmOffset(Imm) &&
1124            "should have been selected by other pattern");
1125 
1126     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1127     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1128                                                         DL, MVT::i32, HighBits);
1129     VAddr = SDValue(MovHighBits, 0);
1130 
1131     // In a call sequence, stores to the argument stack area are relative to the
1132     // stack pointer.
1133     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
1134     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1135       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1136 
1137     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1138     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1139     return true;
1140   }
1141 
1142   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1143     // (add n0, c1)
1144 
1145     SDValue N0 = Addr.getOperand(0);
1146     SDValue N1 = Addr.getOperand(1);
1147 
1148     // Offsets in vaddr must be positive.
1149     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1150     if (isLegalMUBUFImmOffset(C1)) {
1151       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1152       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1153       return true;
1154     }
1155   }
1156 
1157   // (node)
1158   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1159   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1160   return true;
1161 }
1162 
1163 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root,
1164                                                   SDValue Addr,
1165                                                   SDValue &SRsrc,
1166                                                   SDValue &SOffset,
1167                                                   SDValue &Offset) const {
1168   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1169   if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
1170     return false;
1171 
1172   SDLoc DL(Addr);
1173   MachineFunction &MF = CurDAG->getMachineFunction();
1174   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1175 
1176   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1177 
1178   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo();
1179   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1180     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1181 
1182   // FIXME: Get from MachinePointerInfo? We should only be using the frame
1183   // offset if we know this is in a call sequence.
1184   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1185 
1186   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1187   return true;
1188 }
1189 
1190 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1191                                            SDValue &SOffset, SDValue &Offset,
1192                                            SDValue &GLC, SDValue &SLC,
1193                                            SDValue &TFE) const {
1194   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1195   const SIInstrInfo *TII =
1196     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1197 
1198   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1199               GLC, SLC, TFE))
1200     return false;
1201 
1202   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1203       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1204       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1205     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1206                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1207     SDLoc DL(Addr);
1208 
1209     const SITargetLowering& Lowering =
1210       *static_cast<const SITargetLowering*>(getTargetLowering());
1211 
1212     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1213     return true;
1214   }
1215   return false;
1216 }
1217 
1218 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1219                                            SDValue &Soffset, SDValue &Offset
1220                                            ) const {
1221   SDValue GLC, SLC, TFE;
1222 
1223   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1224 }
1225 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1226                                            SDValue &Soffset, SDValue &Offset,
1227                                            SDValue &SLC) const {
1228   SDValue GLC, TFE;
1229 
1230   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1231 }
1232 
1233 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1234                                              SDValue &SOffset,
1235                                              SDValue &ImmOffset) const {
1236   SDLoc DL(Constant);
1237   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1238   uint32_t Overflow = 0;
1239 
1240   if (Imm >= 4096) {
1241     if (Imm <= 4095 + 64) {
1242       // Use an SOffset inline constant for 1..64
1243       Overflow = Imm - 4095;
1244       Imm = 4095;
1245     } else {
1246       // Try to keep the same value in SOffset for adjacent loads, so that
1247       // the corresponding register contents can be re-used.
1248       //
1249       // Load values with all low-bits set into SOffset, so that a larger
1250       // range of values can be covered using s_movk_i32
1251       uint32_t High = (Imm + 1) & ~4095;
1252       uint32_t Low = (Imm + 1) & 4095;
1253       Imm = Low;
1254       Overflow = High - 1;
1255     }
1256   }
1257 
1258   // There is a hardware bug in SI and CI which prevents address clamping in
1259   // MUBUF instructions from working correctly with SOffsets. The immediate
1260   // offset is unaffected.
1261   if (Overflow > 0 &&
1262       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1263     return false;
1264 
1265   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1266 
1267   if (Overflow <= 64)
1268     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1269   else
1270     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1271                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1272                       0);
1273 
1274   return true;
1275 }
1276 
1277 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1278                                                     SDValue &SOffset,
1279                                                     SDValue &ImmOffset) const {
1280   SDLoc DL(Offset);
1281 
1282   if (!isa<ConstantSDNode>(Offset))
1283     return false;
1284 
1285   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1286 }
1287 
1288 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1289                                                      SDValue &SOffset,
1290                                                      SDValue &ImmOffset,
1291                                                      SDValue &VOffset) const {
1292   SDLoc DL(Offset);
1293 
1294   // Don't generate an unnecessary voffset for constant offsets.
1295   if (isa<ConstantSDNode>(Offset)) {
1296     SDValue Tmp1, Tmp2;
1297 
1298     // When necessary, use a voffset in <= CI anyway to work around a hardware
1299     // bug.
1300     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1301         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1302       return false;
1303   }
1304 
1305   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1306     SDValue N0 = Offset.getOperand(0);
1307     SDValue N1 = Offset.getOperand(1);
1308     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1309         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1310       VOffset = N0;
1311       return true;
1312     }
1313   }
1314 
1315   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1316   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1317   VOffset = Offset;
1318 
1319   return true;
1320 }
1321 
1322 template <bool IsSigned>
1323 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1324                                           SDValue &VAddr,
1325                                           SDValue &Offset,
1326                                           SDValue &SLC) const {
1327   int64_t OffsetVal = 0;
1328 
1329   if (Subtarget->hasFlatInstOffsets() &&
1330       CurDAG->isBaseWithConstantOffset(Addr)) {
1331     SDValue N0 = Addr.getOperand(0);
1332     SDValue N1 = Addr.getOperand(1);
1333     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1334 
1335     if ((IsSigned && isInt<13>(COffsetVal)) ||
1336         (!IsSigned && isUInt<12>(COffsetVal))) {
1337       Addr = N0;
1338       OffsetVal = COffsetVal;
1339     }
1340   }
1341 
1342   VAddr = Addr;
1343   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1344   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1345 
1346   return true;
1347 }
1348 
1349 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1350                                           SDValue &VAddr,
1351                                           SDValue &Offset,
1352                                           SDValue &SLC) const {
1353   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1354 }
1355 
1356 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1357                                           SDValue &VAddr,
1358                                           SDValue &Offset,
1359                                           SDValue &SLC) const {
1360   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1361 }
1362 
1363 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1364                                           SDValue &Offset, bool &Imm) const {
1365 
1366   // FIXME: Handle non-constant offsets.
1367   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1368   if (!C)
1369     return false;
1370 
1371   SDLoc SL(ByteOffsetNode);
1372   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1373   int64_t ByteOffset = C->getSExtValue();
1374   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1375 
1376   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1377     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1378     Imm = true;
1379     return true;
1380   }
1381 
1382   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1383     return false;
1384 
1385   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1386     // 32-bit Immediates are supported on Sea Islands.
1387     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1388   } else {
1389     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1390     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1391                                             C32Bit), 0);
1392   }
1393   Imm = false;
1394   return true;
1395 }
1396 
1397 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1398                                      SDValue &Offset, bool &Imm) const {
1399   SDLoc SL(Addr);
1400   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1401     SDValue N0 = Addr.getOperand(0);
1402     SDValue N1 = Addr.getOperand(1);
1403 
1404     if (SelectSMRDOffset(N1, Offset, Imm)) {
1405       SBase = N0;
1406       return true;
1407     }
1408   }
1409   SBase = Addr;
1410   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1411   Imm = true;
1412   return true;
1413 }
1414 
1415 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1416                                        SDValue &Offset) const {
1417   bool Imm;
1418   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1419 }
1420 
1421 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1422                                          SDValue &Offset) const {
1423 
1424   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1425     return false;
1426 
1427   bool Imm;
1428   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1429     return false;
1430 
1431   return !Imm && isa<ConstantSDNode>(Offset);
1432 }
1433 
1434 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1435                                         SDValue &Offset) const {
1436   bool Imm;
1437   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1438          !isa<ConstantSDNode>(Offset);
1439 }
1440 
1441 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1442                                              SDValue &Offset) const {
1443   bool Imm;
1444   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1445 }
1446 
1447 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1448                                                SDValue &Offset) const {
1449   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1450     return false;
1451 
1452   bool Imm;
1453   if (!SelectSMRDOffset(Addr, Offset, Imm))
1454     return false;
1455 
1456   return !Imm && isa<ConstantSDNode>(Offset);
1457 }
1458 
1459 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1460                                               SDValue &Offset) const {
1461   bool Imm;
1462   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1463          !isa<ConstantSDNode>(Offset);
1464 }
1465 
1466 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1467                                             SDValue &Base,
1468                                             SDValue &Offset) const {
1469   SDLoc DL(Index);
1470 
1471   if (CurDAG->isBaseWithConstantOffset(Index)) {
1472     SDValue N0 = Index.getOperand(0);
1473     SDValue N1 = Index.getOperand(1);
1474     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1475 
1476     // (add n0, c0)
1477     Base = N0;
1478     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1479     return true;
1480   }
1481 
1482   if (isa<ConstantSDNode>(Index))
1483     return false;
1484 
1485   Base = Index;
1486   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1487   return true;
1488 }
1489 
1490 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1491                                      SDValue Val, uint32_t Offset,
1492                                      uint32_t Width) {
1493   // Transformation function, pack the offset and width of a BFE into
1494   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1495   // source, bits [5:0] contain the offset and bits [22:16] the width.
1496   uint32_t PackedVal = Offset | (Width << 16);
1497   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1498 
1499   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1500 }
1501 
1502 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1503   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1504   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1505   // Predicate: 0 < b <= c < 32
1506 
1507   const SDValue &Shl = N->getOperand(0);
1508   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1509   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1510 
1511   if (B && C) {
1512     uint32_t BVal = B->getZExtValue();
1513     uint32_t CVal = C->getZExtValue();
1514 
1515     if (0 < BVal && BVal <= CVal && CVal < 32) {
1516       bool Signed = N->getOpcode() == ISD::SRA;
1517       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1518 
1519       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1520                               32 - CVal));
1521       return;
1522     }
1523   }
1524   SelectCode(N);
1525 }
1526 
1527 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1528   switch (N->getOpcode()) {
1529   case ISD::AND:
1530     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1531       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1532       // Predicate: isMask(mask)
1533       const SDValue &Srl = N->getOperand(0);
1534       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1535       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1536 
1537       if (Shift && Mask) {
1538         uint32_t ShiftVal = Shift->getZExtValue();
1539         uint32_t MaskVal = Mask->getZExtValue();
1540 
1541         if (isMask_32(MaskVal)) {
1542           uint32_t WidthVal = countPopulation(MaskVal);
1543 
1544           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1545                                   Srl.getOperand(0), ShiftVal, WidthVal));
1546           return;
1547         }
1548       }
1549     }
1550     break;
1551   case ISD::SRL:
1552     if (N->getOperand(0).getOpcode() == ISD::AND) {
1553       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1554       // Predicate: isMask(mask >> b)
1555       const SDValue &And = N->getOperand(0);
1556       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1557       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1558 
1559       if (Shift && Mask) {
1560         uint32_t ShiftVal = Shift->getZExtValue();
1561         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1562 
1563         if (isMask_32(MaskVal)) {
1564           uint32_t WidthVal = countPopulation(MaskVal);
1565 
1566           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1567                                   And.getOperand(0), ShiftVal, WidthVal));
1568           return;
1569         }
1570       }
1571     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1572       SelectS_BFEFromShifts(N);
1573       return;
1574     }
1575     break;
1576   case ISD::SRA:
1577     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1578       SelectS_BFEFromShifts(N);
1579       return;
1580     }
1581     break;
1582 
1583   case ISD::SIGN_EXTEND_INREG: {
1584     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1585     SDValue Src = N->getOperand(0);
1586     if (Src.getOpcode() != ISD::SRL)
1587       break;
1588 
1589     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1590     if (!Amt)
1591       break;
1592 
1593     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1594     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1595                             Amt->getZExtValue(), Width));
1596     return;
1597   }
1598   }
1599 
1600   SelectCode(N);
1601 }
1602 
1603 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1604   assert(N->getOpcode() == ISD::BRCOND);
1605   if (!N->hasOneUse())
1606     return false;
1607 
1608   SDValue Cond = N->getOperand(1);
1609   if (Cond.getOpcode() == ISD::CopyToReg)
1610     Cond = Cond.getOperand(2);
1611 
1612   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1613     return false;
1614 
1615   MVT VT = Cond.getOperand(0).getSimpleValueType();
1616   if (VT == MVT::i32)
1617     return true;
1618 
1619   if (VT == MVT::i64) {
1620     auto ST = static_cast<const SISubtarget *>(Subtarget);
1621 
1622     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1623     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1624   }
1625 
1626   return false;
1627 }
1628 
1629 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1630   SDValue Cond = N->getOperand(1);
1631 
1632   if (Cond.isUndef()) {
1633     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1634                          N->getOperand(2), N->getOperand(0));
1635     return;
1636   }
1637 
1638   if (isCBranchSCC(N)) {
1639     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1640     SelectCode(N);
1641     return;
1642   }
1643 
1644   SDLoc SL(N);
1645 
1646   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
1647   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1648                        N->getOperand(2), // Basic Block
1649                        VCC.getValue(0));
1650 }
1651 
1652 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
1653   MVT VT = N->getSimpleValueType(0);
1654   if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
1655     SelectCode(N);
1656     return;
1657   }
1658 
1659   SDValue Src0 = N->getOperand(0);
1660   SDValue Src1 = N->getOperand(1);
1661   SDValue Src2 = N->getOperand(2);
1662   unsigned Src0Mods, Src1Mods, Src2Mods;
1663 
1664   // Avoid using v_mad_mix_f32 unless there is actually an operand using the
1665   // conversion from f16.
1666   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1667   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1668   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1669 
1670   assert(!Subtarget->hasFP32Denormals() &&
1671          "fmad selected with denormals enabled");
1672   // TODO: We can select this with f32 denormals enabled if all the sources are
1673   // converted from f16 (in which case fmad isn't legal).
1674 
1675   if (Sel0 || Sel1 || Sel2) {
1676     // For dummy operands.
1677     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1678     SDValue Ops[] = {
1679       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1680       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1681       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1682       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1683       Zero, Zero
1684     };
1685 
1686     CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
1687   } else {
1688     SelectCode(N);
1689   }
1690 }
1691 
1692 // This is here because there isn't a way to use the generated sub0_sub1 as the
1693 // subreg index to EXTRACT_SUBREG in tablegen.
1694 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1695   MemSDNode *Mem = cast<MemSDNode>(N);
1696   unsigned AS = Mem->getAddressSpace();
1697   if (AS == AMDGPUASI.FLAT_ADDRESS) {
1698     SelectCode(N);
1699     return;
1700   }
1701 
1702   MVT VT = N->getSimpleValueType(0);
1703   bool Is32 = (VT == MVT::i32);
1704   SDLoc SL(N);
1705 
1706   MachineSDNode *CmpSwap = nullptr;
1707   if (Subtarget->hasAddr64()) {
1708     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1709 
1710     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1711       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1712         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1713       SDValue CmpVal = Mem->getOperand(2);
1714 
1715       // XXX - Do we care about glue operands?
1716 
1717       SDValue Ops[] = {
1718         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1719       };
1720 
1721       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1722     }
1723   }
1724 
1725   if (!CmpSwap) {
1726     SDValue SRsrc, SOffset, Offset, SLC;
1727     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1728       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1729         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1730 
1731       SDValue CmpVal = Mem->getOperand(2);
1732       SDValue Ops[] = {
1733         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1734       };
1735 
1736       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1737     }
1738   }
1739 
1740   if (!CmpSwap) {
1741     SelectCode(N);
1742     return;
1743   }
1744 
1745   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1746   *MMOs = Mem->getMemOperand();
1747   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1748 
1749   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1750   SDValue Extract
1751     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1752 
1753   ReplaceUses(SDValue(N, 0), Extract);
1754   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1755   CurDAG->RemoveDeadNode(N);
1756 }
1757 
1758 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1759                                             unsigned &Mods) const {
1760   Mods = 0;
1761   Src = In;
1762 
1763   if (Src.getOpcode() == ISD::FNEG) {
1764     Mods |= SISrcMods::NEG;
1765     Src = Src.getOperand(0);
1766   }
1767 
1768   if (Src.getOpcode() == ISD::FABS) {
1769     Mods |= SISrcMods::ABS;
1770     Src = Src.getOperand(0);
1771   }
1772 
1773   return true;
1774 }
1775 
1776 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1777                                         SDValue &SrcMods) const {
1778   unsigned Mods;
1779   if (SelectVOP3ModsImpl(In, Src, Mods)) {
1780     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1781     return true;
1782   }
1783 
1784   return false;
1785 }
1786 
1787 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1788                                              SDValue &SrcMods) const {
1789   SelectVOP3Mods(In, Src, SrcMods);
1790   return isNoNanSrc(Src);
1791 }
1792 
1793 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1794   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1795     return false;
1796 
1797   Src = In;
1798   return true;
1799 }
1800 
1801 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1802                                          SDValue &SrcMods, SDValue &Clamp,
1803                                          SDValue &Omod) const {
1804   SDLoc DL(In);
1805   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1806   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1807 
1808   return SelectVOP3Mods(In, Src, SrcMods);
1809 }
1810 
1811 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1812                                                    SDValue &SrcMods,
1813                                                    SDValue &Clamp,
1814                                                    SDValue &Omod) const {
1815   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1816   return SelectVOP3Mods(In, Src, SrcMods);
1817 }
1818 
1819 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1820                                          SDValue &Clamp, SDValue &Omod) const {
1821   Src = In;
1822 
1823   SDLoc DL(In);
1824   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1825   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1826 
1827   return true;
1828 }
1829 
1830 static SDValue stripBitcast(SDValue Val) {
1831   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1832 }
1833 
1834 // Figure out if this is really an extract of the high 16-bits of a dword.
1835 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1836   In = stripBitcast(In);
1837   if (In.getOpcode() != ISD::TRUNCATE)
1838     return false;
1839 
1840   SDValue Srl = In.getOperand(0);
1841   if (Srl.getOpcode() == ISD::SRL) {
1842     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1843       if (ShiftAmt->getZExtValue() == 16) {
1844         Out = stripBitcast(Srl.getOperand(0));
1845         return true;
1846       }
1847     }
1848   }
1849 
1850   return false;
1851 }
1852 
1853 // Look through operations that obscure just looking at the low 16-bits of the
1854 // same register.
1855 static SDValue stripExtractLoElt(SDValue In) {
1856   if (In.getOpcode() == ISD::TRUNCATE) {
1857     SDValue Src = In.getOperand(0);
1858     if (Src.getValueType().getSizeInBits() == 32)
1859       return stripBitcast(Src);
1860   }
1861 
1862   return In;
1863 }
1864 
1865 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1866                                          SDValue &SrcMods) const {
1867   unsigned Mods = 0;
1868   Src = In;
1869 
1870   if (Src.getOpcode() == ISD::FNEG) {
1871     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1872     Src = Src.getOperand(0);
1873   }
1874 
1875   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1876     unsigned VecMods = Mods;
1877 
1878     SDValue Lo = stripBitcast(Src.getOperand(0));
1879     SDValue Hi = stripBitcast(Src.getOperand(1));
1880 
1881     if (Lo.getOpcode() == ISD::FNEG) {
1882       Lo = stripBitcast(Lo.getOperand(0));
1883       Mods ^= SISrcMods::NEG;
1884     }
1885 
1886     if (Hi.getOpcode() == ISD::FNEG) {
1887       Hi = stripBitcast(Hi.getOperand(0));
1888       Mods ^= SISrcMods::NEG_HI;
1889     }
1890 
1891     if (isExtractHiElt(Lo, Lo))
1892       Mods |= SISrcMods::OP_SEL_0;
1893 
1894     if (isExtractHiElt(Hi, Hi))
1895       Mods |= SISrcMods::OP_SEL_1;
1896 
1897     Lo = stripExtractLoElt(Lo);
1898     Hi = stripExtractLoElt(Hi);
1899 
1900     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1901       // Really a scalar input. Just select from the low half of the register to
1902       // avoid packing.
1903 
1904       Src = Lo;
1905       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1906       return true;
1907     }
1908 
1909     Mods = VecMods;
1910   }
1911 
1912   // Packed instructions do not have abs modifiers.
1913   Mods |= SISrcMods::OP_SEL_1;
1914 
1915   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1916   return true;
1917 }
1918 
1919 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1920                                           SDValue &SrcMods,
1921                                           SDValue &Clamp) const {
1922   SDLoc SL(In);
1923 
1924   // FIXME: Handle clamp and op_sel
1925   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1926 
1927   return SelectVOP3PMods(In, Src, SrcMods);
1928 }
1929 
1930 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1931                                          SDValue &SrcMods) const {
1932   Src = In;
1933   // FIXME: Handle op_sel
1934   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1935   return true;
1936 }
1937 
1938 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1939                                           SDValue &SrcMods,
1940                                           SDValue &Clamp) const {
1941   SDLoc SL(In);
1942 
1943   // FIXME: Handle clamp
1944   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1945 
1946   return SelectVOP3OpSel(In, Src, SrcMods);
1947 }
1948 
1949 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
1950                                              SDValue &SrcMods) const {
1951   // FIXME: Handle op_sel
1952   return SelectVOP3Mods(In, Src, SrcMods);
1953 }
1954 
1955 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
1956                                               SDValue &SrcMods,
1957                                               SDValue &Clamp) const {
1958   SDLoc SL(In);
1959 
1960   // FIXME: Handle clamp
1961   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1962 
1963   return SelectVOP3OpSelMods(In, Src, SrcMods);
1964 }
1965 
1966 // The return value is not whether the match is possible (which it always is),
1967 // but whether or not it a conversion is really used.
1968 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
1969                                                    unsigned &Mods) const {
1970   Mods = 0;
1971   SelectVOP3ModsImpl(In, Src, Mods);
1972 
1973   if (Src.getOpcode() == ISD::FP_EXTEND) {
1974     Src = Src.getOperand(0);
1975     assert(Src.getValueType() == MVT::f16);
1976     Src = stripBitcast(Src);
1977 
1978     // op_sel/op_sel_hi decide the source type and source.
1979     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
1980     // If the sources's op_sel is set, it picks the high half of the source
1981     // register.
1982 
1983     Mods |= SISrcMods::OP_SEL_1;
1984     if (isExtractHiElt(Src, Src))
1985       Mods |= SISrcMods::OP_SEL_0;
1986 
1987     return true;
1988   }
1989 
1990   return false;
1991 }
1992 
1993 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1994   const AMDGPUTargetLowering& Lowering =
1995     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1996   bool IsModified = false;
1997   do {
1998     IsModified = false;
1999     // Go over all selected nodes and try to fold them a bit more
2000     for (SDNode &Node : CurDAG->allnodes()) {
2001       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
2002       if (!MachineNode)
2003         continue;
2004 
2005       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2006       if (ResNode != &Node) {
2007         ReplaceUses(&Node, ResNode);
2008         IsModified = true;
2009       }
2010     }
2011     CurDAG->RemoveDeadNodes();
2012   } while (IsModified);
2013 }
2014 
2015 void R600DAGToDAGISel::Select(SDNode *N) {
2016   unsigned int Opc = N->getOpcode();
2017   if (N->isMachineOpcode()) {
2018     N->setNodeId(-1);
2019     return;   // Already selected.
2020   }
2021 
2022   switch (Opc) {
2023   default: break;
2024   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2025   case ISD::SCALAR_TO_VECTOR:
2026   case ISD::BUILD_VECTOR: {
2027     EVT VT = N->getValueType(0);
2028     unsigned NumVectorElts = VT.getVectorNumElements();
2029     unsigned RegClassID;
2030     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2031     // that adds a 128 bits reg copy when going through TwoAddressInstructions
2032     // pass. We want to avoid 128 bits copies as much as possible because they
2033     // can't be bundled by our scheduler.
2034     switch(NumVectorElts) {
2035     case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
2036     case 4:
2037       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2038         RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
2039       else
2040         RegClassID = AMDGPU::R600_Reg128RegClassID;
2041       break;
2042     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2043     }
2044     SelectBuildVector(N, RegClassID);
2045     return;
2046   }
2047   }
2048 
2049   SelectCode(N);
2050 }
2051 
2052 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2053                                           SDValue &Offset) {
2054   ConstantSDNode *C;
2055   SDLoc DL(Addr);
2056 
2057   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2058     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2059     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2060   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2061              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2062     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2063     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2064   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2065             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2066     Base = Addr.getOperand(0);
2067     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2068   } else {
2069     Base = Addr;
2070     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2071   }
2072 
2073   return true;
2074 }
2075 
2076 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2077                                           SDValue &Offset) {
2078   ConstantSDNode *IMMOffset;
2079 
2080   if (Addr.getOpcode() == ISD::ADD
2081       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2082       && isInt<16>(IMMOffset->getZExtValue())) {
2083 
2084       Base = Addr.getOperand(0);
2085       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2086                                          MVT::i32);
2087       return true;
2088   // If the pointer address is constant, we can move it to the offset field.
2089   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2090              && isInt<16>(IMMOffset->getZExtValue())) {
2091     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2092                                   SDLoc(CurDAG->getEntryNode()),
2093                                   AMDGPU::ZERO, MVT::i32);
2094     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2095                                        MVT::i32);
2096     return true;
2097   }
2098 
2099   // Default case, no offset
2100   Base = Addr;
2101   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2102   return true;
2103 }
2104