1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUArgumentUsageInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUInstrInfo.h"
19 #include "AMDGPURegisterInfo.h"
20 #include "AMDGPUSubtarget.h"
21 #include "AMDGPUTargetMachine.h"
22 #include "SIDefines.h"
23 #include "SIISelLowering.h"
24 #include "SIInstrInfo.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIRegisterInfo.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Analysis/ValueTracking.h"
31 #include "llvm/CodeGen/FunctionLoweringInfo.h"
32 #include "llvm/CodeGen/ISDOpcodes.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/MachineValueType.h"
36 #include "llvm/CodeGen/SelectionDAG.h"
37 #include "llvm/CodeGen/SelectionDAGISel.h"
38 #include "llvm/CodeGen/SelectionDAGNodes.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/MC/MCInstrDesc.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/MathExtras.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <new>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 namespace llvm {
55 
56 class R600InstrInfo;
57 
58 } // end namespace llvm
59 
60 //===----------------------------------------------------------------------===//
61 // Instruction Selector Implementation
62 //===----------------------------------------------------------------------===//
63 
64 namespace {
65 
66 /// AMDGPU specific code to select AMDGPU machine instructions for
67 /// SelectionDAG operations.
68 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
69   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
70   // make the right decision when generating code for different targets.
71   const AMDGPUSubtarget *Subtarget;
72   AMDGPUAS AMDGPUASI;
73   bool EnableLateStructurizeCFG;
74 
75 public:
76   explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
77                               CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
78     : SelectionDAGISel(*TM, OptLevel) {
79     AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
80     EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
81   }
82   ~AMDGPUDAGToDAGISel() override = default;
83 
84   void getAnalysisUsage(AnalysisUsage &AU) const override {
85     AU.addRequired<AMDGPUArgumentUsageInfo>();
86     SelectionDAGISel::getAnalysisUsage(AU);
87   }
88 
89   bool runOnMachineFunction(MachineFunction &MF) override;
90   void Select(SDNode *N) override;
91   StringRef getPassName() const override;
92   void PostprocessISelDAG() override;
93 
94 protected:
95   void SelectBuildVector(SDNode *N, unsigned RegClassID);
96 
97 private:
98   std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
99   bool isNoNanSrc(SDValue N) const;
100   bool isInlineImmediate(const SDNode *N) const;
101   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
102                    const R600InstrInfo *TII);
103   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
104   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
105 
106   bool isConstantLoad(const MemSDNode *N, int cbID) const;
107   bool isUniformBr(const SDNode *N) const;
108 
109   SDNode *glueCopyToM0(SDNode *N) const;
110 
111   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
112   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
113   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
114                                        SDValue& Offset);
115   virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
116   virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
117   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
118                        unsigned OffsetBits) const;
119   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
120   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
121                                  SDValue &Offset1) const;
122   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
123                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
124                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
125                    SDValue &TFE) const;
126   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
127                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
128                          SDValue &SLC, SDValue &TFE) const;
129   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
130                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
131                          SDValue &SLC) const;
132   bool SelectMUBUFScratchOffen(SDNode *Parent,
133                                SDValue Addr, SDValue &RSrc, SDValue &VAddr,
134                                SDValue &SOffset, SDValue &ImmOffset) const;
135   bool SelectMUBUFScratchOffset(SDNode *Parent,
136                                 SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
137                                 SDValue &Offset) const;
138 
139   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
140                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
141                          SDValue &TFE) const;
142   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
143                          SDValue &Offset, SDValue &SLC) const;
144   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
145                          SDValue &Offset) const;
146   bool SelectMUBUFConstant(SDValue Constant,
147                            SDValue &SOffset,
148                            SDValue &ImmOffset) const;
149   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
150                                   SDValue &ImmOffset) const;
151   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
152                                    SDValue &ImmOffset, SDValue &VOffset) const;
153 
154   bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
155                         SDValue &Offset, SDValue &SLC) const;
156   bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
157                               SDValue &Offset, SDValue &SLC) const;
158 
159   template <bool IsSigned>
160   bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
161                         SDValue &Offset, SDValue &SLC) const;
162 
163   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
164                         bool &Imm) const;
165   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
166                   bool &Imm) const;
167   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
168   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
169   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
170   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
171   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
172   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
173 
174   bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
175   bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
176   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
177   bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
178   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
179                        SDValue &Clamp, SDValue &Omod) const;
180   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
181                          SDValue &Clamp, SDValue &Omod) const;
182 
183   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
184                                  SDValue &Clamp,
185                                  SDValue &Omod) const;
186 
187   bool SelectVOP3OMods(SDValue In, SDValue &Src,
188                        SDValue &Clamp, SDValue &Omod) const;
189 
190   bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
191   bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
192                         SDValue &Clamp) const;
193 
194   bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
195   bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
196                         SDValue &Clamp) const;
197 
198   bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
199   bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
200                             SDValue &Clamp) const;
201   bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
202   bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
203 
204   void SelectADD_SUB_I64(SDNode *N);
205   void SelectUADDO_USUBO(SDNode *N);
206   void SelectDIV_SCALE(SDNode *N);
207   void SelectFMA_W_CHAIN(SDNode *N);
208   void SelectFMUL_W_CHAIN(SDNode *N);
209 
210   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
211                    uint32_t Offset, uint32_t Width);
212   void SelectS_BFEFromShifts(SDNode *N);
213   void SelectS_BFE(SDNode *N);
214   bool isCBranchSCC(const SDNode *N) const;
215   void SelectBRCOND(SDNode *N);
216   void SelectFMAD(SDNode *N);
217   void SelectATOMIC_CMP_SWAP(SDNode *N);
218 
219 protected:
220   // Include the pieces autogenerated from the target description.
221 #include "AMDGPUGenDAGISel.inc"
222 };
223 
224 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
225 public:
226   explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
227       AMDGPUDAGToDAGISel(TM, OptLevel) {}
228 
229   void Select(SDNode *N) override;
230 
231   bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
232                           SDValue &Offset) override;
233   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
234                           SDValue &Offset) override;
235 };
236 
237 }  // end anonymous namespace
238 
239 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
240                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
241 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
242 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
243                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
244 
245 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
246 // DAG, ready for instruction scheduling.
247 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
248                                         CodeGenOpt::Level OptLevel) {
249   return new AMDGPUDAGToDAGISel(TM, OptLevel);
250 }
251 
252 /// \brief This pass converts a legalized DAG into a R600-specific
253 // DAG, ready for instruction scheduling.
254 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
255                                       CodeGenOpt::Level OptLevel) {
256   return new R600DAGToDAGISel(TM, OptLevel);
257 }
258 
259 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
260   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
261   return SelectionDAGISel::runOnMachineFunction(MF);
262 }
263 
264 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
265   if (TM.Options.NoNaNsFPMath)
266     return true;
267 
268   // TODO: Move into isKnownNeverNaN
269   if (N->getFlags().isDefined())
270     return N->getFlags().hasNoNaNs();
271 
272   return CurDAG->isKnownNeverNaN(N);
273 }
274 
275 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
276   const SIInstrInfo *TII
277     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
278 
279   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
280     return TII->isInlineConstant(C->getAPIntValue());
281 
282   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
283     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
284 
285   return false;
286 }
287 
288 /// \brief Determine the register class for \p OpNo
289 /// \returns The register class of the virtual register that will be used for
290 /// the given operand number \OpNo or NULL if the register class cannot be
291 /// determined.
292 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
293                                                           unsigned OpNo) const {
294   if (!N->isMachineOpcode()) {
295     if (N->getOpcode() == ISD::CopyToReg) {
296       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
297       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
298         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
299         return MRI.getRegClass(Reg);
300       }
301 
302       const SIRegisterInfo *TRI
303         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
304       return TRI->getPhysRegClass(Reg);
305     }
306 
307     return nullptr;
308   }
309 
310   switch (N->getMachineOpcode()) {
311   default: {
312     const MCInstrDesc &Desc =
313         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
314     unsigned OpIdx = Desc.getNumDefs() + OpNo;
315     if (OpIdx >= Desc.getNumOperands())
316       return nullptr;
317     int RegClass = Desc.OpInfo[OpIdx].RegClass;
318     if (RegClass == -1)
319       return nullptr;
320 
321     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
322   }
323   case AMDGPU::REG_SEQUENCE: {
324     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
325     const TargetRegisterClass *SuperRC =
326         Subtarget->getRegisterInfo()->getRegClass(RCID);
327 
328     SDValue SubRegOp = N->getOperand(OpNo + 1);
329     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
330     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
331                                                               SubRegIdx);
332   }
333   }
334 }
335 
336 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
337   if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
338     return N;
339 
340   const SITargetLowering& Lowering =
341       *static_cast<const SITargetLowering*>(getTargetLowering());
342 
343   // Write max value to m0 before each load operation
344 
345   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
346                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
347 
348   SDValue Glue = M0.getValue(1);
349 
350   SmallVector <SDValue, 8> Ops;
351   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
352      Ops.push_back(N->getOperand(i));
353   }
354   Ops.push_back(Glue);
355   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
356 
357   return N;
358 }
359 
360 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
361   switch (NumVectorElts) {
362   case 1:
363     return AMDGPU::SReg_32_XM0RegClassID;
364   case 2:
365     return AMDGPU::SReg_64RegClassID;
366   case 4:
367     return AMDGPU::SReg_128RegClassID;
368   case 8:
369     return AMDGPU::SReg_256RegClassID;
370   case 16:
371     return AMDGPU::SReg_512RegClassID;
372   }
373 
374   llvm_unreachable("invalid vector size");
375 }
376 
377 static bool getConstantValue(SDValue N, uint32_t &Out) {
378   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
379     Out = C->getAPIntValue().getZExtValue();
380     return true;
381   }
382 
383   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
384     Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
385     return true;
386   }
387 
388   return false;
389 }
390 
391 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
392   EVT VT = N->getValueType(0);
393   unsigned NumVectorElts = VT.getVectorNumElements();
394   EVT EltVT = VT.getVectorElementType();
395   const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
396   SDLoc DL(N);
397   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
398 
399   if (NumVectorElts == 1) {
400     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
401                          RegClass);
402     return;
403   }
404 
405   assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
406                                   "supported yet");
407   // 16 = Max Num Vector Elements
408   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
409   // 1 = Vector Register Class
410   SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
411 
412   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
413   bool IsRegSeq = true;
414   unsigned NOps = N->getNumOperands();
415   for (unsigned i = 0; i < NOps; i++) {
416     // XXX: Why is this here?
417     if (isa<RegisterSDNode>(N->getOperand(i))) {
418       IsRegSeq = false;
419       break;
420     }
421     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
422     RegSeqArgs[1 + (2 * i) + 1] =
423             CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
424                                       MVT::i32);
425   }
426   if (NOps != NumVectorElts) {
427     // Fill in the missing undef elements if this was a scalar_to_vector.
428     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
429     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
430                                                    DL, EltVT);
431     for (unsigned i = NOps; i < NumVectorElts; ++i) {
432       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
433       RegSeqArgs[1 + (2 * i) + 1] =
434         CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
435     }
436   }
437 
438   if (!IsRegSeq)
439     SelectCode(N);
440   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
441 }
442 
443 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
444   unsigned int Opc = N->getOpcode();
445   if (N->isMachineOpcode()) {
446     N->setNodeId(-1);
447     return;   // Already selected.
448   }
449 
450   if (isa<AtomicSDNode>(N) ||
451       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
452     N = glueCopyToM0(N);
453 
454   switch (Opc) {
455   default: break;
456   // We are selecting i64 ADD here instead of custom lower it during
457   // DAG legalization, so we can fold some i64 ADDs used for address
458   // calculation into the LOAD and STORE instructions.
459   case ISD::ADD:
460   case ISD::ADDC:
461   case ISD::ADDE:
462   case ISD::SUB:
463   case ISD::SUBC:
464   case ISD::SUBE: {
465     if (N->getValueType(0) != MVT::i64)
466       break;
467 
468     SelectADD_SUB_I64(N);
469     return;
470   }
471   case ISD::UADDO:
472   case ISD::USUBO: {
473     SelectUADDO_USUBO(N);
474     return;
475   }
476   case AMDGPUISD::FMUL_W_CHAIN: {
477     SelectFMUL_W_CHAIN(N);
478     return;
479   }
480   case AMDGPUISD::FMA_W_CHAIN: {
481     SelectFMA_W_CHAIN(N);
482     return;
483   }
484 
485   case ISD::SCALAR_TO_VECTOR:
486   case ISD::BUILD_VECTOR: {
487     EVT VT = N->getValueType(0);
488     unsigned NumVectorElts = VT.getVectorNumElements();
489 
490     if (VT == MVT::v2i16 || VT == MVT::v2f16) {
491       if (Opc == ISD::BUILD_VECTOR) {
492         uint32_t LHSVal, RHSVal;
493         if (getConstantValue(N->getOperand(0), LHSVal) &&
494             getConstantValue(N->getOperand(1), RHSVal)) {
495           uint32_t K = LHSVal | (RHSVal << 16);
496           CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
497                                CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
498           return;
499         }
500       }
501 
502       break;
503     }
504 
505     assert(VT.getVectorElementType().bitsEq(MVT::i32));
506     unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
507     SelectBuildVector(N, RegClassID);
508     return;
509   }
510   case ISD::BUILD_PAIR: {
511     SDValue RC, SubReg0, SubReg1;
512     SDLoc DL(N);
513     if (N->getValueType(0) == MVT::i128) {
514       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
515       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
516       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
517     } else if (N->getValueType(0) == MVT::i64) {
518       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
519       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
520       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
521     } else {
522       llvm_unreachable("Unhandled value type for BUILD_PAIR");
523     }
524     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
525                             N->getOperand(1), SubReg1 };
526     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
527                                           N->getValueType(0), Ops));
528     return;
529   }
530 
531   case ISD::Constant:
532   case ISD::ConstantFP: {
533     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
534       break;
535 
536     uint64_t Imm;
537     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
538       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
539     else {
540       ConstantSDNode *C = cast<ConstantSDNode>(N);
541       Imm = C->getZExtValue();
542     }
543 
544     SDLoc DL(N);
545     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
546                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
547                                                     MVT::i32));
548     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
549                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
550     const SDValue Ops[] = {
551       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
552       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
553       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
554     };
555 
556     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
557                                           N->getValueType(0), Ops));
558     return;
559   }
560   case ISD::LOAD:
561   case ISD::STORE: {
562     N = glueCopyToM0(N);
563     break;
564   }
565 
566   case AMDGPUISD::BFE_I32:
567   case AMDGPUISD::BFE_U32: {
568     // There is a scalar version available, but unlike the vector version which
569     // has a separate operand for the offset and width, the scalar version packs
570     // the width and offset into a single operand. Try to move to the scalar
571     // version if the offsets are constant, so that we can try to keep extended
572     // loads of kernel arguments in SGPRs.
573 
574     // TODO: Technically we could try to pattern match scalar bitshifts of
575     // dynamic values, but it's probably not useful.
576     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
577     if (!Offset)
578       break;
579 
580     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
581     if (!Width)
582       break;
583 
584     bool Signed = Opc == AMDGPUISD::BFE_I32;
585 
586     uint32_t OffsetVal = Offset->getZExtValue();
587     uint32_t WidthVal = Width->getZExtValue();
588 
589     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
590                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
591     return;
592   }
593   case AMDGPUISD::DIV_SCALE: {
594     SelectDIV_SCALE(N);
595     return;
596   }
597   case ISD::CopyToReg: {
598     const SITargetLowering& Lowering =
599       *static_cast<const SITargetLowering*>(getTargetLowering());
600     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
601     break;
602   }
603   case ISD::AND:
604   case ISD::SRL:
605   case ISD::SRA:
606   case ISD::SIGN_EXTEND_INREG:
607     if (N->getValueType(0) != MVT::i32)
608       break;
609 
610     SelectS_BFE(N);
611     return;
612   case ISD::BRCOND:
613     SelectBRCOND(N);
614     return;
615   case ISD::FMAD:
616     SelectFMAD(N);
617     return;
618   case AMDGPUISD::ATOMIC_CMP_SWAP:
619     SelectATOMIC_CMP_SWAP(N);
620     return;
621   }
622 
623   SelectCode(N);
624 }
625 
626 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
627   if (!N->readMem())
628     return false;
629   if (CbId == -1)
630     return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
631 
632   return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
633 }
634 
635 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
636   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
637   const Instruction *Term = BB->getTerminator();
638   return Term->getMetadata("amdgpu.uniform") ||
639          Term->getMetadata("structurizecfg.uniform");
640 }
641 
642 StringRef AMDGPUDAGToDAGISel::getPassName() const {
643   return "AMDGPU DAG->DAG Pattern Instruction Selection";
644 }
645 
646 //===----------------------------------------------------------------------===//
647 // Complex Patterns
648 //===----------------------------------------------------------------------===//
649 
650 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
651                                                          SDValue& IntPtr) {
652   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
653     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
654                                        true);
655     return true;
656   }
657   return false;
658 }
659 
660 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
661     SDValue& BaseReg, SDValue &Offset) {
662   if (!isa<ConstantSDNode>(Addr)) {
663     BaseReg = Addr;
664     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
665     return true;
666   }
667   return false;
668 }
669 
670 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
671                                             SDValue &Offset) {
672   return false;
673 }
674 
675 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
676                                             SDValue &Offset) {
677   ConstantSDNode *C;
678   SDLoc DL(Addr);
679 
680   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
681     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
682     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
683   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
684              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
685     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
686     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
687   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
688             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
689     Base = Addr.getOperand(0);
690     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
691   } else {
692     Base = Addr;
693     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
694   }
695 
696   return true;
697 }
698 
699 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
700   SDLoc DL(N);
701   SDValue LHS = N->getOperand(0);
702   SDValue RHS = N->getOperand(1);
703 
704   unsigned Opcode = N->getOpcode();
705   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
706   bool ProduceCarry =
707       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
708   bool IsAdd =
709       (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
710 
711   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
712   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
713 
714   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
715                                        DL, MVT::i32, LHS, Sub0);
716   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
717                                        DL, MVT::i32, LHS, Sub1);
718 
719   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
720                                        DL, MVT::i32, RHS, Sub0);
721   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
722                                        DL, MVT::i32, RHS, Sub1);
723 
724   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
725 
726   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
727   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
728 
729   SDNode *AddLo;
730   if (!ConsumeCarry) {
731     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
732     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
733   } else {
734     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
735     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
736   }
737   SDValue AddHiArgs[] = {
738     SDValue(Hi0, 0),
739     SDValue(Hi1, 0),
740     SDValue(AddLo, 1)
741   };
742   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
743 
744   SDValue RegSequenceArgs[] = {
745     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
746     SDValue(AddLo,0),
747     Sub0,
748     SDValue(AddHi,0),
749     Sub1,
750   };
751   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
752                                                MVT::i64, RegSequenceArgs);
753 
754   if (ProduceCarry) {
755     // Replace the carry-use
756     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
757   }
758 
759   // Replace the remaining uses.
760   CurDAG->ReplaceAllUsesWith(N, RegSequence);
761   CurDAG->RemoveDeadNode(N);
762 }
763 
764 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
765   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
766   // carry out despite the _i32 name. These were renamed in VI to _U32.
767   // FIXME: We should probably rename the opcodes here.
768   unsigned Opc = N->getOpcode() == ISD::UADDO ?
769     AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
770 
771   CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
772                        { N->getOperand(0), N->getOperand(1) });
773 }
774 
775 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
776   SDLoc SL(N);
777   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
778   SDValue Ops[10];
779 
780   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
781   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
782   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
783   Ops[8] = N->getOperand(0);
784   Ops[9] = N->getOperand(4);
785 
786   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
787 }
788 
789 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
790   SDLoc SL(N);
791   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
792   SDValue Ops[8];
793 
794   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
795   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
796   Ops[6] = N->getOperand(0);
797   Ops[7] = N->getOperand(3);
798 
799   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
800 }
801 
802 // We need to handle this here because tablegen doesn't support matching
803 // instructions with multiple outputs.
804 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
805   SDLoc SL(N);
806   EVT VT = N->getValueType(0);
807 
808   assert(VT == MVT::f32 || VT == MVT::f64);
809 
810   unsigned Opc
811     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
812 
813   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
814   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
815 }
816 
817 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
818                                          unsigned OffsetBits) const {
819   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
820       (OffsetBits == 8 && !isUInt<8>(Offset)))
821     return false;
822 
823   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
824       Subtarget->unsafeDSOffsetFoldingEnabled())
825     return true;
826 
827   // On Southern Islands instruction with a negative base value and an offset
828   // don't seem to work.
829   return CurDAG->SignBitIsZero(Base);
830 }
831 
832 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
833                                               SDValue &Offset) const {
834   SDLoc DL(Addr);
835   if (CurDAG->isBaseWithConstantOffset(Addr)) {
836     SDValue N0 = Addr.getOperand(0);
837     SDValue N1 = Addr.getOperand(1);
838     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
839     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
840       // (add n0, c0)
841       Base = N0;
842       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
843       return true;
844     }
845   } else if (Addr.getOpcode() == ISD::SUB) {
846     // sub C, x -> add (sub 0, x), C
847     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
848       int64_t ByteOffset = C->getSExtValue();
849       if (isUInt<16>(ByteOffset)) {
850         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
851 
852         // XXX - This is kind of hacky. Create a dummy sub node so we can check
853         // the known bits in isDSOffsetLegal. We need to emit the selected node
854         // here, so this is thrown away.
855         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
856                                       Zero, Addr.getOperand(1));
857 
858         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
859           MachineSDNode *MachineSub
860             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
861                                      Zero, Addr.getOperand(1));
862 
863           Base = SDValue(MachineSub, 0);
864           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
865           return true;
866         }
867       }
868     }
869   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
870     // If we have a constant address, prefer to put the constant into the
871     // offset. This can save moves to load the constant address since multiple
872     // operations can share the zero base address register, and enables merging
873     // into read2 / write2 instructions.
874 
875     SDLoc DL(Addr);
876 
877     if (isUInt<16>(CAddr->getZExtValue())) {
878       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
879       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
880                                  DL, MVT::i32, Zero);
881       Base = SDValue(MovZero, 0);
882       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
883       return true;
884     }
885   }
886 
887   // default case
888   Base = Addr;
889   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
890   return true;
891 }
892 
893 // TODO: If offset is too big, put low 16-bit into offset.
894 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
895                                                    SDValue &Offset0,
896                                                    SDValue &Offset1) const {
897   SDLoc DL(Addr);
898 
899   if (CurDAG->isBaseWithConstantOffset(Addr)) {
900     SDValue N0 = Addr.getOperand(0);
901     SDValue N1 = Addr.getOperand(1);
902     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
903     unsigned DWordOffset0 = C1->getZExtValue() / 4;
904     unsigned DWordOffset1 = DWordOffset0 + 1;
905     // (add n0, c0)
906     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
907       Base = N0;
908       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
909       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
910       return true;
911     }
912   } else if (Addr.getOpcode() == ISD::SUB) {
913     // sub C, x -> add (sub 0, x), C
914     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
915       unsigned DWordOffset0 = C->getZExtValue() / 4;
916       unsigned DWordOffset1 = DWordOffset0 + 1;
917 
918       if (isUInt<8>(DWordOffset0)) {
919         SDLoc DL(Addr);
920         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
921 
922         // XXX - This is kind of hacky. Create a dummy sub node so we can check
923         // the known bits in isDSOffsetLegal. We need to emit the selected node
924         // here, so this is thrown away.
925         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
926                                       Zero, Addr.getOperand(1));
927 
928         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
929           MachineSDNode *MachineSub
930             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
931                                      Zero, Addr.getOperand(1));
932 
933           Base = SDValue(MachineSub, 0);
934           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
935           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
936           return true;
937         }
938       }
939     }
940   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
941     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
942     unsigned DWordOffset1 = DWordOffset0 + 1;
943     assert(4 * DWordOffset0 == CAddr->getZExtValue());
944 
945     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
946       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
947       MachineSDNode *MovZero
948         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
949                                  DL, MVT::i32, Zero);
950       Base = SDValue(MovZero, 0);
951       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
952       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
953       return true;
954     }
955   }
956 
957   // default case
958 
959   // FIXME: This is broken on SI where we still need to check if the base
960   // pointer is positive here.
961   Base = Addr;
962   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
963   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
964   return true;
965 }
966 
967 static bool isLegalMUBUFImmOffset(unsigned Imm) {
968   return isUInt<12>(Imm);
969 }
970 
971 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
972   return isLegalMUBUFImmOffset(Imm->getZExtValue());
973 }
974 
975 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
976                                      SDValue &VAddr, SDValue &SOffset,
977                                      SDValue &Offset, SDValue &Offen,
978                                      SDValue &Idxen, SDValue &Addr64,
979                                      SDValue &GLC, SDValue &SLC,
980                                      SDValue &TFE) const {
981   // Subtarget prefers to use flat instruction
982   if (Subtarget->useFlatForGlobal())
983     return false;
984 
985   SDLoc DL(Addr);
986 
987   if (!GLC.getNode())
988     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
989   if (!SLC.getNode())
990     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
991   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
992 
993   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
994   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
995   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
996   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
997 
998   if (CurDAG->isBaseWithConstantOffset(Addr)) {
999     SDValue N0 = Addr.getOperand(0);
1000     SDValue N1 = Addr.getOperand(1);
1001     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1002 
1003     if (N0.getOpcode() == ISD::ADD) {
1004       // (add (add N2, N3), C1) -> addr64
1005       SDValue N2 = N0.getOperand(0);
1006       SDValue N3 = N0.getOperand(1);
1007       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1008       Ptr = N2;
1009       VAddr = N3;
1010     } else {
1011       // (add N0, C1) -> offset
1012       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1013       Ptr = N0;
1014     }
1015 
1016     if (isLegalMUBUFImmOffset(C1)) {
1017       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1018       return true;
1019     }
1020 
1021     if (isUInt<32>(C1->getZExtValue())) {
1022       // Illegal offset, store it in soffset.
1023       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1024       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1025                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1026                         0);
1027       return true;
1028     }
1029   }
1030 
1031   if (Addr.getOpcode() == ISD::ADD) {
1032     // (add N0, N1) -> addr64
1033     SDValue N0 = Addr.getOperand(0);
1034     SDValue N1 = Addr.getOperand(1);
1035     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1036     Ptr = N0;
1037     VAddr = N1;
1038     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1039     return true;
1040   }
1041 
1042   // default case -> offset
1043   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1044   Ptr = Addr;
1045   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1046 
1047   return true;
1048 }
1049 
1050 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1051                                            SDValue &VAddr, SDValue &SOffset,
1052                                            SDValue &Offset, SDValue &GLC,
1053                                            SDValue &SLC, SDValue &TFE) const {
1054   SDValue Ptr, Offen, Idxen, Addr64;
1055 
1056   // addr64 bit was removed for volcanic islands.
1057   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1058     return false;
1059 
1060   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1061               GLC, SLC, TFE))
1062     return false;
1063 
1064   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1065   if (C->getSExtValue()) {
1066     SDLoc DL(Addr);
1067 
1068     const SITargetLowering& Lowering =
1069       *static_cast<const SITargetLowering*>(getTargetLowering());
1070 
1071     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1072     return true;
1073   }
1074 
1075   return false;
1076 }
1077 
1078 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1079                                            SDValue &VAddr, SDValue &SOffset,
1080                                            SDValue &Offset,
1081                                            SDValue &SLC) const {
1082   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1083   SDValue GLC, TFE;
1084 
1085   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1086 }
1087 
1088 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
1089   auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
1090   return PSV && PSV->isStack();
1091 }
1092 
1093 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1094   const MachineFunction &MF = CurDAG->getMachineFunction();
1095   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1096 
1097   if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
1098     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
1099                                               FI->getValueType(0));
1100 
1101     // If we can resolve this to a frame index access, this is relative to the
1102     // frame pointer SGPR.
1103     return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
1104                                                    MVT::i32));
1105   }
1106 
1107   // If we don't know this private access is a local stack object, it needs to
1108   // be relative to the entry point's scratch wave offset register.
1109   return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
1110                                                MVT::i32));
1111 }
1112 
1113 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1114                                                  SDValue Addr, SDValue &Rsrc,
1115                                                  SDValue &VAddr, SDValue &SOffset,
1116                                                  SDValue &ImmOffset) const {
1117 
1118   SDLoc DL(Addr);
1119   MachineFunction &MF = CurDAG->getMachineFunction();
1120   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1121 
1122   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1123 
1124   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1125     unsigned Imm = CAddr->getZExtValue();
1126     assert(!isLegalMUBUFImmOffset(Imm) &&
1127            "should have been selected by other pattern");
1128 
1129     SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1130     MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1131                                                         DL, MVT::i32, HighBits);
1132     VAddr = SDValue(MovHighBits, 0);
1133 
1134     // In a call sequence, stores to the argument stack area are relative to the
1135     // stack pointer.
1136     const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1137     unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1138       Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1139 
1140     SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1141     ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
1142     return true;
1143   }
1144 
1145   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1146     // (add n0, c1)
1147 
1148     SDValue N0 = Addr.getOperand(0);
1149     SDValue N1 = Addr.getOperand(1);
1150 
1151     // Offsets in vaddr must be positive.
1152     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1153     if (isLegalMUBUFImmOffset(C1)) {
1154       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1155       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1156       return true;
1157     }
1158   }
1159 
1160   // (node)
1161   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1162   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1163   return true;
1164 }
1165 
1166 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1167                                                   SDValue Addr,
1168                                                   SDValue &SRsrc,
1169                                                   SDValue &SOffset,
1170                                                   SDValue &Offset) const {
1171   ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
1172   if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
1173     return false;
1174 
1175   SDLoc DL(Addr);
1176   MachineFunction &MF = CurDAG->getMachineFunction();
1177   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1178 
1179   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1180 
1181   const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
1182   unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
1183     Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
1184 
1185   // FIXME: Get from MachinePointerInfo? We should only be using the frame
1186   // offset if we know this is in a call sequence.
1187   SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
1188 
1189   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1190   return true;
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1194                                            SDValue &SOffset, SDValue &Offset,
1195                                            SDValue &GLC, SDValue &SLC,
1196                                            SDValue &TFE) const {
1197   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1198   const SIInstrInfo *TII =
1199     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1200 
1201   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1202               GLC, SLC, TFE))
1203     return false;
1204 
1205   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1206       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1207       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1208     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1209                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1210     SDLoc DL(Addr);
1211 
1212     const SITargetLowering& Lowering =
1213       *static_cast<const SITargetLowering*>(getTargetLowering());
1214 
1215     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1216     return true;
1217   }
1218   return false;
1219 }
1220 
1221 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1222                                            SDValue &Soffset, SDValue &Offset
1223                                            ) const {
1224   SDValue GLC, SLC, TFE;
1225 
1226   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1227 }
1228 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1229                                            SDValue &Soffset, SDValue &Offset,
1230                                            SDValue &SLC) const {
1231   SDValue GLC, TFE;
1232 
1233   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1234 }
1235 
1236 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1237                                              SDValue &SOffset,
1238                                              SDValue &ImmOffset) const {
1239   SDLoc DL(Constant);
1240   const uint32_t Align = 4;
1241   const uint32_t MaxImm = alignDown(4095, Align);
1242   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1243   uint32_t Overflow = 0;
1244 
1245   if (Imm > MaxImm) {
1246     if (Imm <= MaxImm + 64) {
1247       // Use an SOffset inline constant for 4..64
1248       Overflow = Imm - MaxImm;
1249       Imm = MaxImm;
1250     } else {
1251       // Try to keep the same value in SOffset for adjacent loads, so that
1252       // the corresponding register contents can be re-used.
1253       //
1254       // Load values with all low-bits (except for alignment bits) set into
1255       // SOffset, so that a larger range of values can be covered using
1256       // s_movk_i32.
1257       //
1258       // Atomic operations fail to work correctly when individual address
1259       // components are unaligned, even if their sum is aligned.
1260       uint32_t High = (Imm + Align) & ~4095;
1261       uint32_t Low = (Imm + Align) & 4095;
1262       Imm = Low;
1263       Overflow = High - Align;
1264     }
1265   }
1266 
1267   // There is a hardware bug in SI and CI which prevents address clamping in
1268   // MUBUF instructions from working correctly with SOffsets. The immediate
1269   // offset is unaffected.
1270   if (Overflow > 0 &&
1271       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1272     return false;
1273 
1274   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1275 
1276   if (Overflow <= 64)
1277     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1278   else
1279     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1280                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1281                       0);
1282 
1283   return true;
1284 }
1285 
1286 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1287                                                     SDValue &SOffset,
1288                                                     SDValue &ImmOffset) const {
1289   SDLoc DL(Offset);
1290 
1291   if (!isa<ConstantSDNode>(Offset))
1292     return false;
1293 
1294   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1295 }
1296 
1297 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1298                                                      SDValue &SOffset,
1299                                                      SDValue &ImmOffset,
1300                                                      SDValue &VOffset) const {
1301   SDLoc DL(Offset);
1302 
1303   // Don't generate an unnecessary voffset for constant offsets.
1304   if (isa<ConstantSDNode>(Offset)) {
1305     SDValue Tmp1, Tmp2;
1306 
1307     // When necessary, use a voffset in <= CI anyway to work around a hardware
1308     // bug.
1309     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1310         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1311       return false;
1312   }
1313 
1314   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1315     SDValue N0 = Offset.getOperand(0);
1316     SDValue N1 = Offset.getOperand(1);
1317     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1318         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1319       VOffset = N0;
1320       return true;
1321     }
1322   }
1323 
1324   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1325   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1326   VOffset = Offset;
1327 
1328   return true;
1329 }
1330 
1331 template <bool IsSigned>
1332 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
1333                                           SDValue &VAddr,
1334                                           SDValue &Offset,
1335                                           SDValue &SLC) const {
1336   int64_t OffsetVal = 0;
1337 
1338   if (Subtarget->hasFlatInstOffsets() &&
1339       CurDAG->isBaseWithConstantOffset(Addr)) {
1340     SDValue N0 = Addr.getOperand(0);
1341     SDValue N1 = Addr.getOperand(1);
1342     int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1343 
1344     if ((IsSigned && isInt<13>(COffsetVal)) ||
1345         (!IsSigned && isUInt<12>(COffsetVal))) {
1346       Addr = N0;
1347       OffsetVal = COffsetVal;
1348     }
1349   }
1350 
1351   VAddr = Addr;
1352   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
1353   SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1354 
1355   return true;
1356 }
1357 
1358 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
1359                                           SDValue &VAddr,
1360                                           SDValue &Offset,
1361                                           SDValue &SLC) const {
1362   return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
1363 }
1364 
1365 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
1366                                           SDValue &VAddr,
1367                                           SDValue &Offset,
1368                                           SDValue &SLC) const {
1369   return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
1370 }
1371 
1372 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1373                                           SDValue &Offset, bool &Imm) const {
1374 
1375   // FIXME: Handle non-constant offsets.
1376   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1377   if (!C)
1378     return false;
1379 
1380   SDLoc SL(ByteOffsetNode);
1381   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1382   int64_t ByteOffset = C->getSExtValue();
1383   int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
1384 
1385   if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
1386     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1387     Imm = true;
1388     return true;
1389   }
1390 
1391   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1392     return false;
1393 
1394   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1395     // 32-bit Immediates are supported on Sea Islands.
1396     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1397   } else {
1398     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1399     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1400                                             C32Bit), 0);
1401   }
1402   Imm = false;
1403   return true;
1404 }
1405 
1406 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1407                                      SDValue &Offset, bool &Imm) const {
1408   SDLoc SL(Addr);
1409   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1410     SDValue N0 = Addr.getOperand(0);
1411     SDValue N1 = Addr.getOperand(1);
1412 
1413     if (SelectSMRDOffset(N1, Offset, Imm)) {
1414       SBase = N0;
1415       return true;
1416     }
1417   }
1418   SBase = Addr;
1419   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1420   Imm = true;
1421   return true;
1422 }
1423 
1424 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1425                                        SDValue &Offset) const {
1426   bool Imm;
1427   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1428 }
1429 
1430 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1431                                          SDValue &Offset) const {
1432 
1433   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1434     return false;
1435 
1436   bool Imm;
1437   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1438     return false;
1439 
1440   return !Imm && isa<ConstantSDNode>(Offset);
1441 }
1442 
1443 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1444                                         SDValue &Offset) const {
1445   bool Imm;
1446   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1447          !isa<ConstantSDNode>(Offset);
1448 }
1449 
1450 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1451                                              SDValue &Offset) const {
1452   bool Imm;
1453   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1454 }
1455 
1456 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1457                                                SDValue &Offset) const {
1458   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1459     return false;
1460 
1461   bool Imm;
1462   if (!SelectSMRDOffset(Addr, Offset, Imm))
1463     return false;
1464 
1465   return !Imm && isa<ConstantSDNode>(Offset);
1466 }
1467 
1468 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1469                                             SDValue &Base,
1470                                             SDValue &Offset) const {
1471   SDLoc DL(Index);
1472 
1473   if (CurDAG->isBaseWithConstantOffset(Index)) {
1474     SDValue N0 = Index.getOperand(0);
1475     SDValue N1 = Index.getOperand(1);
1476     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1477 
1478     // (add n0, c0)
1479     Base = N0;
1480     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1481     return true;
1482   }
1483 
1484   if (isa<ConstantSDNode>(Index))
1485     return false;
1486 
1487   Base = Index;
1488   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1489   return true;
1490 }
1491 
1492 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1493                                      SDValue Val, uint32_t Offset,
1494                                      uint32_t Width) {
1495   // Transformation function, pack the offset and width of a BFE into
1496   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1497   // source, bits [5:0] contain the offset and bits [22:16] the width.
1498   uint32_t PackedVal = Offset | (Width << 16);
1499   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1500 
1501   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1502 }
1503 
1504 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1505   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1506   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1507   // Predicate: 0 < b <= c < 32
1508 
1509   const SDValue &Shl = N->getOperand(0);
1510   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1511   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1512 
1513   if (B && C) {
1514     uint32_t BVal = B->getZExtValue();
1515     uint32_t CVal = C->getZExtValue();
1516 
1517     if (0 < BVal && BVal <= CVal && CVal < 32) {
1518       bool Signed = N->getOpcode() == ISD::SRA;
1519       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1520 
1521       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1522                               32 - CVal));
1523       return;
1524     }
1525   }
1526   SelectCode(N);
1527 }
1528 
1529 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1530   switch (N->getOpcode()) {
1531   case ISD::AND:
1532     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1533       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1534       // Predicate: isMask(mask)
1535       const SDValue &Srl = N->getOperand(0);
1536       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1537       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1538 
1539       if (Shift && Mask) {
1540         uint32_t ShiftVal = Shift->getZExtValue();
1541         uint32_t MaskVal = Mask->getZExtValue();
1542 
1543         if (isMask_32(MaskVal)) {
1544           uint32_t WidthVal = countPopulation(MaskVal);
1545 
1546           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1547                                   Srl.getOperand(0), ShiftVal, WidthVal));
1548           return;
1549         }
1550       }
1551     }
1552     break;
1553   case ISD::SRL:
1554     if (N->getOperand(0).getOpcode() == ISD::AND) {
1555       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1556       // Predicate: isMask(mask >> b)
1557       const SDValue &And = N->getOperand(0);
1558       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1559       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1560 
1561       if (Shift && Mask) {
1562         uint32_t ShiftVal = Shift->getZExtValue();
1563         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1564 
1565         if (isMask_32(MaskVal)) {
1566           uint32_t WidthVal = countPopulation(MaskVal);
1567 
1568           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1569                                   And.getOperand(0), ShiftVal, WidthVal));
1570           return;
1571         }
1572       }
1573     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1574       SelectS_BFEFromShifts(N);
1575       return;
1576     }
1577     break;
1578   case ISD::SRA:
1579     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1580       SelectS_BFEFromShifts(N);
1581       return;
1582     }
1583     break;
1584 
1585   case ISD::SIGN_EXTEND_INREG: {
1586     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1587     SDValue Src = N->getOperand(0);
1588     if (Src.getOpcode() != ISD::SRL)
1589       break;
1590 
1591     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1592     if (!Amt)
1593       break;
1594 
1595     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1596     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1597                             Amt->getZExtValue(), Width));
1598     return;
1599   }
1600   }
1601 
1602   SelectCode(N);
1603 }
1604 
1605 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1606   assert(N->getOpcode() == ISD::BRCOND);
1607   if (!N->hasOneUse())
1608     return false;
1609 
1610   SDValue Cond = N->getOperand(1);
1611   if (Cond.getOpcode() == ISD::CopyToReg)
1612     Cond = Cond.getOperand(2);
1613 
1614   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1615     return false;
1616 
1617   MVT VT = Cond.getOperand(0).getSimpleValueType();
1618   if (VT == MVT::i32)
1619     return true;
1620 
1621   if (VT == MVT::i64) {
1622     auto ST = static_cast<const SISubtarget *>(Subtarget);
1623 
1624     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1625     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1626   }
1627 
1628   return false;
1629 }
1630 
1631 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1632   SDValue Cond = N->getOperand(1);
1633 
1634   if (Cond.isUndef()) {
1635     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
1636                          N->getOperand(2), N->getOperand(0));
1637     return;
1638   }
1639 
1640   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
1641   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
1642   unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
1643   SDLoc SL(N);
1644 
1645   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
1646   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
1647                        N->getOperand(2), // Basic Block
1648                        VCC.getValue(0));
1649 }
1650 
1651 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
1652   MVT VT = N->getSimpleValueType(0);
1653   if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
1654     SelectCode(N);
1655     return;
1656   }
1657 
1658   SDValue Src0 = N->getOperand(0);
1659   SDValue Src1 = N->getOperand(1);
1660   SDValue Src2 = N->getOperand(2);
1661   unsigned Src0Mods, Src1Mods, Src2Mods;
1662 
1663   // Avoid using v_mad_mix_f32 unless there is actually an operand using the
1664   // conversion from f16.
1665   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
1666   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
1667   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
1668 
1669   assert(!Subtarget->hasFP32Denormals() &&
1670          "fmad selected with denormals enabled");
1671   // TODO: We can select this with f32 denormals enabled if all the sources are
1672   // converted from f16 (in which case fmad isn't legal).
1673 
1674   if (Sel0 || Sel1 || Sel2) {
1675     // For dummy operands.
1676     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
1677     SDValue Ops[] = {
1678       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
1679       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
1680       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
1681       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
1682       Zero, Zero
1683     };
1684 
1685     CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
1686   } else {
1687     SelectCode(N);
1688   }
1689 }
1690 
1691 // This is here because there isn't a way to use the generated sub0_sub1 as the
1692 // subreg index to EXTRACT_SUBREG in tablegen.
1693 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1694   MemSDNode *Mem = cast<MemSDNode>(N);
1695   unsigned AS = Mem->getAddressSpace();
1696   if (AS == AMDGPUASI.FLAT_ADDRESS) {
1697     SelectCode(N);
1698     return;
1699   }
1700 
1701   MVT VT = N->getSimpleValueType(0);
1702   bool Is32 = (VT == MVT::i32);
1703   SDLoc SL(N);
1704 
1705   MachineSDNode *CmpSwap = nullptr;
1706   if (Subtarget->hasAddr64()) {
1707     SDValue SRsrc, VAddr, SOffset, Offset, SLC;
1708 
1709     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1710       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
1711         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
1712       SDValue CmpVal = Mem->getOperand(2);
1713 
1714       // XXX - Do we care about glue operands?
1715 
1716       SDValue Ops[] = {
1717         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1718       };
1719 
1720       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1721     }
1722   }
1723 
1724   if (!CmpSwap) {
1725     SDValue SRsrc, SOffset, Offset, SLC;
1726     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1727       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
1728         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
1729 
1730       SDValue CmpVal = Mem->getOperand(2);
1731       SDValue Ops[] = {
1732         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1733       };
1734 
1735       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1736     }
1737   }
1738 
1739   if (!CmpSwap) {
1740     SelectCode(N);
1741     return;
1742   }
1743 
1744   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1745   *MMOs = Mem->getMemOperand();
1746   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1747 
1748   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1749   SDValue Extract
1750     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1751 
1752   ReplaceUses(SDValue(N, 0), Extract);
1753   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1754   CurDAG->RemoveDeadNode(N);
1755 }
1756 
1757 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
1758                                             unsigned &Mods) const {
1759   Mods = 0;
1760   Src = In;
1761 
1762   if (Src.getOpcode() == ISD::FNEG) {
1763     Mods |= SISrcMods::NEG;
1764     Src = Src.getOperand(0);
1765   }
1766 
1767   if (Src.getOpcode() == ISD::FABS) {
1768     Mods |= SISrcMods::ABS;
1769     Src = Src.getOperand(0);
1770   }
1771 
1772   return true;
1773 }
1774 
1775 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1776                                         SDValue &SrcMods) const {
1777   unsigned Mods;
1778   if (SelectVOP3ModsImpl(In, Src, Mods)) {
1779     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1780     return true;
1781   }
1782 
1783   return false;
1784 }
1785 
1786 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
1787                                              SDValue &SrcMods) const {
1788   SelectVOP3Mods(In, Src, SrcMods);
1789   return isNoNanSrc(Src);
1790 }
1791 
1792 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
1793   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
1794     return false;
1795 
1796   Src = In;
1797   return true;
1798 }
1799 
1800 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1801                                          SDValue &SrcMods, SDValue &Clamp,
1802                                          SDValue &Omod) const {
1803   SDLoc DL(In);
1804   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1805   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1806 
1807   return SelectVOP3Mods(In, Src, SrcMods);
1808 }
1809 
1810 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1811                                                    SDValue &SrcMods,
1812                                                    SDValue &Clamp,
1813                                                    SDValue &Omod) const {
1814   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1815   return SelectVOP3Mods(In, Src, SrcMods);
1816 }
1817 
1818 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
1819                                          SDValue &Clamp, SDValue &Omod) const {
1820   Src = In;
1821 
1822   SDLoc DL(In);
1823   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1824   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
1825 
1826   return true;
1827 }
1828 
1829 static SDValue stripBitcast(SDValue Val) {
1830   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
1831 }
1832 
1833 // Figure out if this is really an extract of the high 16-bits of a dword.
1834 static bool isExtractHiElt(SDValue In, SDValue &Out) {
1835   In = stripBitcast(In);
1836   if (In.getOpcode() != ISD::TRUNCATE)
1837     return false;
1838 
1839   SDValue Srl = In.getOperand(0);
1840   if (Srl.getOpcode() == ISD::SRL) {
1841     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
1842       if (ShiftAmt->getZExtValue() == 16) {
1843         Out = stripBitcast(Srl.getOperand(0));
1844         return true;
1845       }
1846     }
1847   }
1848 
1849   return false;
1850 }
1851 
1852 // Look through operations that obscure just looking at the low 16-bits of the
1853 // same register.
1854 static SDValue stripExtractLoElt(SDValue In) {
1855   if (In.getOpcode() == ISD::TRUNCATE) {
1856     SDValue Src = In.getOperand(0);
1857     if (Src.getValueType().getSizeInBits() == 32)
1858       return stripBitcast(Src);
1859   }
1860 
1861   return In;
1862 }
1863 
1864 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
1865                                          SDValue &SrcMods) const {
1866   unsigned Mods = 0;
1867   Src = In;
1868 
1869   if (Src.getOpcode() == ISD::FNEG) {
1870     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
1871     Src = Src.getOperand(0);
1872   }
1873 
1874   if (Src.getOpcode() == ISD::BUILD_VECTOR) {
1875     unsigned VecMods = Mods;
1876 
1877     SDValue Lo = stripBitcast(Src.getOperand(0));
1878     SDValue Hi = stripBitcast(Src.getOperand(1));
1879 
1880     if (Lo.getOpcode() == ISD::FNEG) {
1881       Lo = stripBitcast(Lo.getOperand(0));
1882       Mods ^= SISrcMods::NEG;
1883     }
1884 
1885     if (Hi.getOpcode() == ISD::FNEG) {
1886       Hi = stripBitcast(Hi.getOperand(0));
1887       Mods ^= SISrcMods::NEG_HI;
1888     }
1889 
1890     if (isExtractHiElt(Lo, Lo))
1891       Mods |= SISrcMods::OP_SEL_0;
1892 
1893     if (isExtractHiElt(Hi, Hi))
1894       Mods |= SISrcMods::OP_SEL_1;
1895 
1896     Lo = stripExtractLoElt(Lo);
1897     Hi = stripExtractLoElt(Hi);
1898 
1899     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
1900       // Really a scalar input. Just select from the low half of the register to
1901       // avoid packing.
1902 
1903       Src = Lo;
1904       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1905       return true;
1906     }
1907 
1908     Mods = VecMods;
1909   }
1910 
1911   // Packed instructions do not have abs modifiers.
1912   Mods |= SISrcMods::OP_SEL_1;
1913 
1914   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1915   return true;
1916 }
1917 
1918 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
1919                                           SDValue &SrcMods,
1920                                           SDValue &Clamp) const {
1921   SDLoc SL(In);
1922 
1923   // FIXME: Handle clamp and op_sel
1924   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1925 
1926   return SelectVOP3PMods(In, Src, SrcMods);
1927 }
1928 
1929 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
1930                                          SDValue &SrcMods) const {
1931   Src = In;
1932   // FIXME: Handle op_sel
1933   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1934   return true;
1935 }
1936 
1937 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
1938                                           SDValue &SrcMods,
1939                                           SDValue &Clamp) const {
1940   SDLoc SL(In);
1941 
1942   // FIXME: Handle clamp
1943   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1944 
1945   return SelectVOP3OpSel(In, Src, SrcMods);
1946 }
1947 
1948 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
1949                                              SDValue &SrcMods) const {
1950   // FIXME: Handle op_sel
1951   return SelectVOP3Mods(In, Src, SrcMods);
1952 }
1953 
1954 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
1955                                               SDValue &SrcMods,
1956                                               SDValue &Clamp) const {
1957   SDLoc SL(In);
1958 
1959   // FIXME: Handle clamp
1960   Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
1961 
1962   return SelectVOP3OpSelMods(In, Src, SrcMods);
1963 }
1964 
1965 // The return value is not whether the match is possible (which it always is),
1966 // but whether or not it a conversion is really used.
1967 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
1968                                                    unsigned &Mods) const {
1969   Mods = 0;
1970   SelectVOP3ModsImpl(In, Src, Mods);
1971 
1972   if (Src.getOpcode() == ISD::FP_EXTEND) {
1973     Src = Src.getOperand(0);
1974     assert(Src.getValueType() == MVT::f16);
1975     Src = stripBitcast(Src);
1976 
1977     // Be careful about folding modifiers if we already have an abs. fneg is
1978     // applied last, so we don't want to apply an earlier fneg.
1979     if ((Mods & SISrcMods::ABS) == 0) {
1980       unsigned ModsTmp;
1981       SelectVOP3ModsImpl(Src, Src, ModsTmp);
1982 
1983       if ((ModsTmp & SISrcMods::NEG) != 0)
1984         Mods ^= SISrcMods::NEG;
1985 
1986       if ((ModsTmp & SISrcMods::ABS) != 0)
1987         Mods |= SISrcMods::ABS;
1988     }
1989 
1990     // op_sel/op_sel_hi decide the source type and source.
1991     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
1992     // If the sources's op_sel is set, it picks the high half of the source
1993     // register.
1994 
1995     Mods |= SISrcMods::OP_SEL_1;
1996     if (isExtractHiElt(Src, Src)) {
1997       Mods |= SISrcMods::OP_SEL_0;
1998 
1999       // TODO: Should we try to look for neg/abs here?
2000     }
2001 
2002     return true;
2003   }
2004 
2005   return false;
2006 }
2007 
2008 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
2009                                                SDValue &SrcMods) const {
2010   unsigned Mods = 0;
2011   SelectVOP3PMadMixModsImpl(In, Src, Mods);
2012   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2013   return true;
2014 }
2015 
2016 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
2017   const AMDGPUTargetLowering& Lowering =
2018     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
2019   bool IsModified = false;
2020   do {
2021     IsModified = false;
2022     // Go over all selected nodes and try to fold them a bit more
2023     for (SDNode &Node : CurDAG->allnodes()) {
2024       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
2025       if (!MachineNode)
2026         continue;
2027 
2028       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
2029       if (ResNode != &Node) {
2030         ReplaceUses(&Node, ResNode);
2031         IsModified = true;
2032       }
2033     }
2034     CurDAG->RemoveDeadNodes();
2035   } while (IsModified);
2036 }
2037 
2038 void R600DAGToDAGISel::Select(SDNode *N) {
2039   unsigned int Opc = N->getOpcode();
2040   if (N->isMachineOpcode()) {
2041     N->setNodeId(-1);
2042     return;   // Already selected.
2043   }
2044 
2045   switch (Opc) {
2046   default: break;
2047   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
2048   case ISD::SCALAR_TO_VECTOR:
2049   case ISD::BUILD_VECTOR: {
2050     EVT VT = N->getValueType(0);
2051     unsigned NumVectorElts = VT.getVectorNumElements();
2052     unsigned RegClassID;
2053     // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
2054     // that adds a 128 bits reg copy when going through TwoAddressInstructions
2055     // pass. We want to avoid 128 bits copies as much as possible because they
2056     // can't be bundled by our scheduler.
2057     switch(NumVectorElts) {
2058     case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
2059     case 4:
2060       if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
2061         RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
2062       else
2063         RegClassID = AMDGPU::R600_Reg128RegClassID;
2064       break;
2065     default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
2066     }
2067     SelectBuildVector(N, RegClassID);
2068     return;
2069   }
2070   }
2071 
2072   SelectCode(N);
2073 }
2074 
2075 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
2076                                           SDValue &Offset) {
2077   ConstantSDNode *C;
2078   SDLoc DL(Addr);
2079 
2080   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
2081     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2082     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2083   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
2084              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
2085     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
2086     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2087   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
2088             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
2089     Base = Addr.getOperand(0);
2090     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
2091   } else {
2092     Base = Addr;
2093     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2094   }
2095 
2096   return true;
2097 }
2098 
2099 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
2100                                           SDValue &Offset) {
2101   ConstantSDNode *IMMOffset;
2102 
2103   if (Addr.getOpcode() == ISD::ADD
2104       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
2105       && isInt<16>(IMMOffset->getZExtValue())) {
2106 
2107       Base = Addr.getOperand(0);
2108       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2109                                          MVT::i32);
2110       return true;
2111   // If the pointer address is constant, we can move it to the offset field.
2112   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
2113              && isInt<16>(IMMOffset->getZExtValue())) {
2114     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
2115                                   SDLoc(CurDAG->getEntryNode()),
2116                                   AMDGPU::ZERO, MVT::i32);
2117     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
2118                                        MVT::i32);
2119     return true;
2120   }
2121 
2122   // Default case, no offset
2123   Base = Addr;
2124   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2125   return true;
2126 }
2127