1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/CodeGen/FunctionLoweringInfo.h"
22 #include "llvm/CodeGen/PseudoSourceValue.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SelectionDAGISel.h"
25 #include "llvm/IR/DiagnosticInfo.h"
26 
27 using namespace llvm;
28 
29 namespace llvm {
30 class R600InstrInfo;
31 }
32 
33 //===----------------------------------------------------------------------===//
34 // Instruction Selector Implementation
35 //===----------------------------------------------------------------------===//
36 
37 namespace {
38 
39 static bool isCBranchSCC(const SDNode *N) {
40   assert(N->getOpcode() == ISD::BRCOND);
41   if (!N->hasOneUse())
42     return false;
43 
44   SDValue Cond = N->getOperand(1);
45   if (Cond.getOpcode() == ISD::CopyToReg)
46     Cond = Cond.getOperand(2);
47   return Cond.getOpcode() == ISD::SETCC &&
48          Cond.getOperand(0).getValueType() == MVT::i32 &&
49 	 Cond.hasOneUse();
50 }
51 
52 /// AMDGPU specific code to select AMDGPU machine instructions for
53 /// SelectionDAG operations.
54 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
55   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
56   // make the right decision when generating code for different targets.
57   const AMDGPUSubtarget *Subtarget;
58 
59 public:
60   AMDGPUDAGToDAGISel(TargetMachine &TM);
61   virtual ~AMDGPUDAGToDAGISel();
62   bool runOnMachineFunction(MachineFunction &MF) override;
63   SDNode *Select(SDNode *N) override;
64   const char *getPassName() const override;
65   void PreprocessISelDAG() override;
66   void PostprocessISelDAG() override;
67 
68 private:
69   bool isInlineImmediate(SDNode *N) const;
70   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
71                    const R600InstrInfo *TII);
72   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
73   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
74 
75   // Complex pattern selectors
76   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
77   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
78   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
79 
80   static bool checkType(const Value *ptr, unsigned int addrspace);
81   static bool checkPrivateAddress(const MachineMemOperand *Op);
82 
83   static bool isGlobalStore(const StoreSDNode *N);
84   static bool isFlatStore(const StoreSDNode *N);
85   static bool isPrivateStore(const StoreSDNode *N);
86   static bool isLocalStore(const StoreSDNode *N);
87   static bool isRegionStore(const StoreSDNode *N);
88 
89   bool isCPLoad(const LoadSDNode *N) const;
90   bool isConstantLoad(const LoadSDNode *N, int cbID) const;
91   bool isGlobalLoad(const LoadSDNode *N) const;
92   bool isFlatLoad(const LoadSDNode *N) const;
93   bool isParamLoad(const LoadSDNode *N) const;
94   bool isPrivateLoad(const LoadSDNode *N) const;
95   bool isLocalLoad(const LoadSDNode *N) const;
96   bool isRegionLoad(const LoadSDNode *N) const;
97 
98   bool isUniformBr(const SDNode *N) const;
99 
100   SDNode *glueCopyToM0(SDNode *N) const;
101 
102   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
103   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
104   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
105                                        SDValue& Offset);
106   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
107   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
108   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
109                        unsigned OffsetBits) const;
110   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
111   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
112                                  SDValue &Offset1) const;
113   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
114                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
115                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
116                    SDValue &TFE) const;
117   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
118                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
119                          SDValue &SLC, SDValue &TFE) const;
120   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
121                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
122                          SDValue &SLC) const;
123   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
124                           SDValue &SOffset, SDValue &ImmOffset) const;
125   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
126                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
127                          SDValue &TFE) const;
128   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
129                          SDValue &Offset, SDValue &GLC) const;
130   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
131                         bool &Imm) const;
132   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
133                   bool &Imm) const;
134   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
135   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
136   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
137   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
138   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
139   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
140   SDNode *SelectAddrSpaceCast(SDNode *N);
141   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
142   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
143   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
144                        SDValue &Clamp, SDValue &Omod) const;
145   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
146                          SDValue &Clamp, SDValue &Omod) const;
147 
148   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
149                             SDValue &Omod) const;
150   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
151                                  SDValue &Clamp,
152                                  SDValue &Omod) const;
153 
154   SDNode *SelectADD_SUB_I64(SDNode *N);
155   SDNode *SelectDIV_SCALE(SDNode *N);
156 
157   SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
158                    uint32_t Offset, uint32_t Width);
159   SDNode *SelectS_BFEFromShifts(SDNode *N);
160   SDNode *SelectS_BFE(SDNode *N);
161   SDNode *SelectBRCOND(SDNode *N);
162 
163   // Include the pieces autogenerated from the target description.
164 #include "AMDGPUGenDAGISel.inc"
165 };
166 }  // end anonymous namespace
167 
168 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
169 // DAG, ready for instruction scheduling.
170 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
171   return new AMDGPUDAGToDAGISel(TM);
172 }
173 
174 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
175     : SelectionDAGISel(TM) {}
176 
177 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
178   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
179   return SelectionDAGISel::runOnMachineFunction(MF);
180 }
181 
182 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
183 }
184 
185 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
186   const SITargetLowering *TL
187       = static_cast<const SITargetLowering *>(getTargetLowering());
188   return TL->analyzeImmediate(N) == 0;
189 }
190 
191 /// \brief Determine the register class for \p OpNo
192 /// \returns The register class of the virtual register that will be used for
193 /// the given operand number \OpNo or NULL if the register class cannot be
194 /// determined.
195 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
196                                                           unsigned OpNo) const {
197   if (!N->isMachineOpcode())
198     return nullptr;
199 
200   switch (N->getMachineOpcode()) {
201   default: {
202     const MCInstrDesc &Desc =
203         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
204     unsigned OpIdx = Desc.getNumDefs() + OpNo;
205     if (OpIdx >= Desc.getNumOperands())
206       return nullptr;
207     int RegClass = Desc.OpInfo[OpIdx].RegClass;
208     if (RegClass == -1)
209       return nullptr;
210 
211     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
212   }
213   case AMDGPU::REG_SEQUENCE: {
214     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
215     const TargetRegisterClass *SuperRC =
216         Subtarget->getRegisterInfo()->getRegClass(RCID);
217 
218     SDValue SubRegOp = N->getOperand(OpNo + 1);
219     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
220     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
221                                                               SubRegIdx);
222   }
223   }
224 }
225 
226 bool AMDGPUDAGToDAGISel::SelectADDRParam(
227   SDValue Addr, SDValue& R1, SDValue& R2) {
228 
229   if (Addr.getOpcode() == ISD::FrameIndex) {
230     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
231       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
232       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
233     } else {
234       R1 = Addr;
235       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
236     }
237   } else if (Addr.getOpcode() == ISD::ADD) {
238     R1 = Addr.getOperand(0);
239     R2 = Addr.getOperand(1);
240   } else {
241     R1 = Addr;
242     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
243   }
244   return true;
245 }
246 
247 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
248   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
249       Addr.getOpcode() == ISD::TargetGlobalAddress) {
250     return false;
251   }
252   return SelectADDRParam(Addr, R1, R2);
253 }
254 
255 
256 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
257   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
258       Addr.getOpcode() == ISD::TargetGlobalAddress) {
259     return false;
260   }
261 
262   if (Addr.getOpcode() == ISD::FrameIndex) {
263     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
264       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
265       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
266     } else {
267       R1 = Addr;
268       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
269     }
270   } else if (Addr.getOpcode() == ISD::ADD) {
271     R1 = Addr.getOperand(0);
272     R2 = Addr.getOperand(1);
273   } else {
274     R1 = Addr;
275     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
276   }
277   return true;
278 }
279 
280 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
281   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
282       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
283                  AMDGPUAS::LOCAL_ADDRESS))
284     return N;
285 
286   const SITargetLowering& Lowering =
287       *static_cast<const SITargetLowering*>(getTargetLowering());
288 
289   // Write max value to m0 before each load operation
290 
291   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
292                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
293 
294   SDValue Glue = M0.getValue(1);
295 
296   SmallVector <SDValue, 8> Ops;
297   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
298      Ops.push_back(N->getOperand(i));
299   }
300   Ops.push_back(Glue);
301   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
302 
303   return N;
304 }
305 
306 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
307   switch (NumVectorElts) {
308   case 1:
309     return AMDGPU::SReg_32RegClassID;
310   case 2:
311     return AMDGPU::SReg_64RegClassID;
312   case 4:
313     return AMDGPU::SReg_128RegClassID;
314   case 8:
315     return AMDGPU::SReg_256RegClassID;
316   case 16:
317     return AMDGPU::SReg_512RegClassID;
318   }
319 
320   llvm_unreachable("invalid vector size");
321 }
322 
323 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
324   unsigned int Opc = N->getOpcode();
325   if (N->isMachineOpcode()) {
326     N->setNodeId(-1);
327     return nullptr;   // Already selected.
328   }
329 
330   if (isa<AtomicSDNode>(N))
331     N = glueCopyToM0(N);
332 
333   switch (Opc) {
334   default: break;
335   // We are selecting i64 ADD here instead of custom lower it during
336   // DAG legalization, so we can fold some i64 ADDs used for address
337   // calculation into the LOAD and STORE instructions.
338   case ISD::ADD:
339   case ISD::SUB: {
340     if (N->getValueType(0) != MVT::i64 ||
341         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
342       break;
343 
344     return SelectADD_SUB_I64(N);
345   }
346   case ISD::SCALAR_TO_VECTOR:
347   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
348   case ISD::BUILD_VECTOR: {
349     unsigned RegClassID;
350     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
351     EVT VT = N->getValueType(0);
352     unsigned NumVectorElts = VT.getVectorNumElements();
353     EVT EltVT = VT.getVectorElementType();
354     assert(EltVT.bitsEq(MVT::i32));
355     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
356       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
357     } else {
358       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
359       // that adds a 128 bits reg copy when going through TwoAddressInstructions
360       // pass. We want to avoid 128 bits copies as much as possible because they
361       // can't be bundled by our scheduler.
362       switch(NumVectorElts) {
363       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
364       case 4:
365         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
366           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
367         else
368           RegClassID = AMDGPU::R600_Reg128RegClassID;
369         break;
370       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
371       }
372     }
373 
374     SDLoc DL(N);
375     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
376 
377     if (NumVectorElts == 1) {
378       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
379                                   N->getOperand(0), RegClass);
380     }
381 
382     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
383                                   "supported yet");
384     // 16 = Max Num Vector Elements
385     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
386     // 1 = Vector Register Class
387     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
388 
389     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
390     bool IsRegSeq = true;
391     unsigned NOps = N->getNumOperands();
392     for (unsigned i = 0; i < NOps; i++) {
393       // XXX: Why is this here?
394       if (isa<RegisterSDNode>(N->getOperand(i))) {
395         IsRegSeq = false;
396         break;
397       }
398       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
399       RegSeqArgs[1 + (2 * i) + 1] =
400               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
401                                         MVT::i32);
402     }
403 
404     if (NOps != NumVectorElts) {
405       // Fill in the missing undef elements if this was a scalar_to_vector.
406       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
407 
408       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
409                                                      DL, EltVT);
410       for (unsigned i = NOps; i < NumVectorElts; ++i) {
411         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
412         RegSeqArgs[1 + (2 * i) + 1] =
413           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
414       }
415     }
416 
417     if (!IsRegSeq)
418       break;
419     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
420                                 RegSeqArgs);
421   }
422   case ISD::BUILD_PAIR: {
423     SDValue RC, SubReg0, SubReg1;
424     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
425       break;
426     }
427     SDLoc DL(N);
428     if (N->getValueType(0) == MVT::i128) {
429       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
430       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
431       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
432     } else if (N->getValueType(0) == MVT::i64) {
433       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
434       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
435       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
436     } else {
437       llvm_unreachable("Unhandled value type for BUILD_PAIR");
438     }
439     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
440                             N->getOperand(1), SubReg1 };
441     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
442                                   DL, N->getValueType(0), Ops);
443   }
444 
445   case ISD::Constant:
446   case ISD::ConstantFP: {
447     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
448         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
449       break;
450 
451     uint64_t Imm;
452     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
453       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
454     else {
455       ConstantSDNode *C = cast<ConstantSDNode>(N);
456       Imm = C->getZExtValue();
457     }
458 
459     SDLoc DL(N);
460     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
461                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
462                                                     MVT::i32));
463     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
464                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
465     const SDValue Ops[] = {
466       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
467       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
468       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
469     };
470 
471     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
472                                   N->getValueType(0), Ops);
473   }
474   case ISD::LOAD:
475   case ISD::STORE: {
476     N = glueCopyToM0(N);
477     break;
478   }
479 
480   case AMDGPUISD::BFE_I32:
481   case AMDGPUISD::BFE_U32: {
482     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
483       break;
484 
485     // There is a scalar version available, but unlike the vector version which
486     // has a separate operand for the offset and width, the scalar version packs
487     // the width and offset into a single operand. Try to move to the scalar
488     // version if the offsets are constant, so that we can try to keep extended
489     // loads of kernel arguments in SGPRs.
490 
491     // TODO: Technically we could try to pattern match scalar bitshifts of
492     // dynamic values, but it's probably not useful.
493     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
494     if (!Offset)
495       break;
496 
497     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
498     if (!Width)
499       break;
500 
501     bool Signed = Opc == AMDGPUISD::BFE_I32;
502 
503     uint32_t OffsetVal = Offset->getZExtValue();
504     uint32_t WidthVal = Width->getZExtValue();
505 
506     return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
507                     N->getOperand(0), OffsetVal, WidthVal);
508   }
509   case AMDGPUISD::DIV_SCALE: {
510     return SelectDIV_SCALE(N);
511   }
512   case ISD::CopyToReg: {
513     const SITargetLowering& Lowering =
514       *static_cast<const SITargetLowering*>(getTargetLowering());
515     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
516     break;
517   }
518   case ISD::ADDRSPACECAST:
519     return SelectAddrSpaceCast(N);
520   case ISD::AND:
521   case ISD::SRL:
522   case ISD::SRA:
523     if (N->getValueType(0) != MVT::i32 ||
524         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
525       break;
526 
527     return SelectS_BFE(N);
528   case ISD::BRCOND:
529     return SelectBRCOND(N);
530   }
531 
532   return SelectCode(N);
533 }
534 
535 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
536   assert(AS != 0 && "Use checkPrivateAddress instead.");
537   if (!Ptr)
538     return false;
539 
540   return Ptr->getType()->getPointerAddressSpace() == AS;
541 }
542 
543 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
544   if (Op->getPseudoValue())
545     return true;
546 
547   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
548     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
549 
550   return false;
551 }
552 
553 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
554   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
555 }
556 
557 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
558   const Value *MemVal = N->getMemOperand()->getValue();
559   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
560           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
561           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
562 }
563 
564 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
565   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
566 }
567 
568 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) {
569   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
570 }
571 
572 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
573   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
574 }
575 
576 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
577   const Value *MemVal = N->getMemOperand()->getValue();
578   if (CbId == -1)
579     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
580 
581   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
582 }
583 
584 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
585   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
586     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
587         N->getMemoryVT().bitsLT(MVT::i32))
588       return true;
589 
590   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
591 }
592 
593 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
594   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
595 }
596 
597 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
598   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
599 }
600 
601 bool AMDGPUDAGToDAGISel::isFlatLoad(const  LoadSDNode *N) const {
602   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
603 }
604 
605 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
606   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
607 }
608 
609 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
610   MachineMemOperand *MMO = N->getMemOperand();
611   if (checkPrivateAddress(N->getMemOperand())) {
612     if (MMO) {
613       const PseudoSourceValue *PSV = MMO->getPseudoValue();
614       if (PSV && PSV->isConstantPool()) {
615         return true;
616       }
617     }
618   }
619   return false;
620 }
621 
622 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
623   if (checkPrivateAddress(N->getMemOperand())) {
624     // Check to make sure we are not a constant pool load or a constant load
625     // that is marked as a private load
626     if (isCPLoad(N) || isConstantLoad(N, -1)) {
627       return false;
628     }
629   }
630 
631   const Value *MemVal = N->getMemOperand()->getValue();
632   return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
633     !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
634     !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
635     !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
636     !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
637     !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
638     !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS);
639 }
640 
641 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
642   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
643   return BB->getTerminator()->getMetadata("amdgpu.uniform");
644 }
645 
646 const char *AMDGPUDAGToDAGISel::getPassName() const {
647   return "AMDGPU DAG->DAG Pattern Instruction Selection";
648 }
649 
650 //===----------------------------------------------------------------------===//
651 // Complex Patterns
652 //===----------------------------------------------------------------------===//
653 
654 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
655                                                          SDValue& IntPtr) {
656   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
657     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
658                                        true);
659     return true;
660   }
661   return false;
662 }
663 
664 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
665     SDValue& BaseReg, SDValue &Offset) {
666   if (!isa<ConstantSDNode>(Addr)) {
667     BaseReg = Addr;
668     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
669     return true;
670   }
671   return false;
672 }
673 
674 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
675                                            SDValue &Offset) {
676   ConstantSDNode *IMMOffset;
677 
678   if (Addr.getOpcode() == ISD::ADD
679       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
680       && isInt<16>(IMMOffset->getZExtValue())) {
681 
682       Base = Addr.getOperand(0);
683       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
684                                          MVT::i32);
685       return true;
686   // If the pointer address is constant, we can move it to the offset field.
687   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
688              && isInt<16>(IMMOffset->getZExtValue())) {
689     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
690                                   SDLoc(CurDAG->getEntryNode()),
691                                   AMDGPU::ZERO, MVT::i32);
692     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
693                                        MVT::i32);
694     return true;
695   }
696 
697   // Default case, no offset
698   Base = Addr;
699   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
700   return true;
701 }
702 
703 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
704                                             SDValue &Offset) {
705   ConstantSDNode *C;
706   SDLoc DL(Addr);
707 
708   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
709     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
710     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
711   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
712             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
713     Base = Addr.getOperand(0);
714     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
715   } else {
716     Base = Addr;
717     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
718   }
719 
720   return true;
721 }
722 
723 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
724   SDLoc DL(N);
725   SDValue LHS = N->getOperand(0);
726   SDValue RHS = N->getOperand(1);
727 
728   bool IsAdd = (N->getOpcode() == ISD::ADD);
729 
730   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
731   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
732 
733   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
734                                        DL, MVT::i32, LHS, Sub0);
735   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
736                                        DL, MVT::i32, LHS, Sub1);
737 
738   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
739                                        DL, MVT::i32, RHS, Sub0);
740   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
741                                        DL, MVT::i32, RHS, Sub1);
742 
743   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
744   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
745 
746 
747   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
748   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
749 
750   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
751   SDValue Carry(AddLo, 1);
752   SDNode *AddHi
753     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
754                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
755 
756   SDValue Args[5] = {
757     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
758     SDValue(AddLo,0),
759     Sub0,
760     SDValue(AddHi,0),
761     Sub1,
762   };
763   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
764 }
765 
766 // We need to handle this here because tablegen doesn't support matching
767 // instructions with multiple outputs.
768 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
769   SDLoc SL(N);
770   EVT VT = N->getValueType(0);
771 
772   assert(VT == MVT::f32 || VT == MVT::f64);
773 
774   unsigned Opc
775     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
776 
777   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
778   // omod
779   SDValue Ops[8];
780 
781   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
782   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
783   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
784   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
785 }
786 
787 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
788                                          unsigned OffsetBits) const {
789   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
790       (OffsetBits == 8 && !isUInt<8>(Offset)))
791     return false;
792 
793   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
794       Subtarget->unsafeDSOffsetFoldingEnabled())
795     return true;
796 
797   // On Southern Islands instruction with a negative base value and an offset
798   // don't seem to work.
799   return CurDAG->SignBitIsZero(Base);
800 }
801 
802 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
803                                               SDValue &Offset) const {
804   if (CurDAG->isBaseWithConstantOffset(Addr)) {
805     SDValue N0 = Addr.getOperand(0);
806     SDValue N1 = Addr.getOperand(1);
807     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
808     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
809       // (add n0, c0)
810       Base = N0;
811       Offset = N1;
812       return true;
813     }
814   } else if (Addr.getOpcode() == ISD::SUB) {
815     // sub C, x -> add (sub 0, x), C
816     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
817       int64_t ByteOffset = C->getSExtValue();
818       if (isUInt<16>(ByteOffset)) {
819         SDLoc DL(Addr);
820         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
821 
822         // XXX - This is kind of hacky. Create a dummy sub node so we can check
823         // the known bits in isDSOffsetLegal. We need to emit the selected node
824         // here, so this is thrown away.
825         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
826                                       Zero, Addr.getOperand(1));
827 
828         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
829           MachineSDNode *MachineSub
830             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
831                                      Zero, Addr.getOperand(1));
832 
833           Base = SDValue(MachineSub, 0);
834           Offset = Addr.getOperand(0);
835           return true;
836         }
837       }
838     }
839   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
840     // If we have a constant address, prefer to put the constant into the
841     // offset. This can save moves to load the constant address since multiple
842     // operations can share the zero base address register, and enables merging
843     // into read2 / write2 instructions.
844 
845     SDLoc DL(Addr);
846 
847     if (isUInt<16>(CAddr->getZExtValue())) {
848       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
849       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
850                                  DL, MVT::i32, Zero);
851       Base = SDValue(MovZero, 0);
852       Offset = Addr;
853       return true;
854     }
855   }
856 
857   // default case
858   Base = Addr;
859   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
860   return true;
861 }
862 
863 // TODO: If offset is too big, put low 16-bit into offset.
864 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
865                                                    SDValue &Offset0,
866                                                    SDValue &Offset1) const {
867   SDLoc DL(Addr);
868 
869   if (CurDAG->isBaseWithConstantOffset(Addr)) {
870     SDValue N0 = Addr.getOperand(0);
871     SDValue N1 = Addr.getOperand(1);
872     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
873     unsigned DWordOffset0 = C1->getZExtValue() / 4;
874     unsigned DWordOffset1 = DWordOffset0 + 1;
875     // (add n0, c0)
876     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
877       Base = N0;
878       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
879       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
880       return true;
881     }
882   } else if (Addr.getOpcode() == ISD::SUB) {
883     // sub C, x -> add (sub 0, x), C
884     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
885       unsigned DWordOffset0 = C->getZExtValue() / 4;
886       unsigned DWordOffset1 = DWordOffset0 + 1;
887 
888       if (isUInt<8>(DWordOffset0)) {
889         SDLoc DL(Addr);
890         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
891 
892         // XXX - This is kind of hacky. Create a dummy sub node so we can check
893         // the known bits in isDSOffsetLegal. We need to emit the selected node
894         // here, so this is thrown away.
895         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
896                                       Zero, Addr.getOperand(1));
897 
898         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
899           MachineSDNode *MachineSub
900             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
901                                      Zero, Addr.getOperand(1));
902 
903           Base = SDValue(MachineSub, 0);
904           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
905           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
906           return true;
907         }
908       }
909     }
910   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
911     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
912     unsigned DWordOffset1 = DWordOffset0 + 1;
913     assert(4 * DWordOffset0 == CAddr->getZExtValue());
914 
915     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
916       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
917       MachineSDNode *MovZero
918         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
919                                  DL, MVT::i32, Zero);
920       Base = SDValue(MovZero, 0);
921       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
922       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
923       return true;
924     }
925   }
926 
927   // default case
928   Base = Addr;
929   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
930   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
931   return true;
932 }
933 
934 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
935   return isUInt<12>(Imm->getZExtValue());
936 }
937 
938 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
939                                      SDValue &VAddr, SDValue &SOffset,
940                                      SDValue &Offset, SDValue &Offen,
941                                      SDValue &Idxen, SDValue &Addr64,
942                                      SDValue &GLC, SDValue &SLC,
943                                      SDValue &TFE) const {
944   // Subtarget prefers to use flat instruction
945   if (Subtarget->useFlatForGlobal())
946     return false;
947 
948   SDLoc DL(Addr);
949 
950   GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
951   SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
952   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
953 
954   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
955   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
956   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
957   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
958 
959   if (CurDAG->isBaseWithConstantOffset(Addr)) {
960     SDValue N0 = Addr.getOperand(0);
961     SDValue N1 = Addr.getOperand(1);
962     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
963 
964     if (N0.getOpcode() == ISD::ADD) {
965       // (add (add N2, N3), C1) -> addr64
966       SDValue N2 = N0.getOperand(0);
967       SDValue N3 = N0.getOperand(1);
968       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
969       Ptr = N2;
970       VAddr = N3;
971     } else {
972 
973       // (add N0, C1) -> offset
974       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
975       Ptr = N0;
976     }
977 
978     if (isLegalMUBUFImmOffset(C1)) {
979         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
980         return true;
981     } else if (isUInt<32>(C1->getZExtValue())) {
982       // Illegal offset, store it in soffset.
983       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
984       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
985                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
986                         0);
987       return true;
988     }
989   }
990 
991   if (Addr.getOpcode() == ISD::ADD) {
992     // (add N0, N1) -> addr64
993     SDValue N0 = Addr.getOperand(0);
994     SDValue N1 = Addr.getOperand(1);
995     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
996     Ptr = N0;
997     VAddr = N1;
998     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
999     return true;
1000   }
1001 
1002   // default case -> offset
1003   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1004   Ptr = Addr;
1005   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1006 
1007   return true;
1008 }
1009 
1010 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1011                                            SDValue &VAddr, SDValue &SOffset,
1012                                            SDValue &Offset, SDValue &GLC,
1013                                            SDValue &SLC, SDValue &TFE) const {
1014   SDValue Ptr, Offen, Idxen, Addr64;
1015 
1016   // addr64 bit was removed for volcanic islands.
1017   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1018     return false;
1019 
1020   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1021               GLC, SLC, TFE))
1022     return false;
1023 
1024   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1025   if (C->getSExtValue()) {
1026     SDLoc DL(Addr);
1027 
1028     const SITargetLowering& Lowering =
1029       *static_cast<const SITargetLowering*>(getTargetLowering());
1030 
1031     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1032     return true;
1033   }
1034 
1035   return false;
1036 }
1037 
1038 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1039                                            SDValue &VAddr, SDValue &SOffset,
1040                                            SDValue &Offset,
1041                                            SDValue &SLC) const {
1042   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1043   SDValue GLC, TFE;
1044 
1045   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1046 }
1047 
1048 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1049                                             SDValue &VAddr, SDValue &SOffset,
1050                                             SDValue &ImmOffset) const {
1051 
1052   SDLoc DL(Addr);
1053   MachineFunction &MF = CurDAG->getMachineFunction();
1054   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1055 
1056   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1057   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1058 
1059   // (add n0, c1)
1060   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1061     SDValue N0 = Addr.getOperand(0);
1062     SDValue N1 = Addr.getOperand(1);
1063 
1064     // Offsets in vaddr must be positive.
1065     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1066     if (isLegalMUBUFImmOffset(C1) && CurDAG->SignBitIsZero(N0)) {
1067       VAddr = N0;
1068       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1069       return true;
1070     }
1071   }
1072 
1073   // (node)
1074   VAddr = Addr;
1075   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1076   return true;
1077 }
1078 
1079 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1080                                            SDValue &SOffset, SDValue &Offset,
1081                                            SDValue &GLC, SDValue &SLC,
1082                                            SDValue &TFE) const {
1083   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1084   const SIInstrInfo *TII =
1085     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1086 
1087   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1088               GLC, SLC, TFE))
1089     return false;
1090 
1091   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1092       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1093       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1094     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1095                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1096     SDLoc DL(Addr);
1097 
1098     const SITargetLowering& Lowering =
1099       *static_cast<const SITargetLowering*>(getTargetLowering());
1100 
1101     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1102     return true;
1103   }
1104   return false;
1105 }
1106 
1107 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1108                                            SDValue &Soffset, SDValue &Offset,
1109                                            SDValue &GLC) const {
1110   SDValue SLC, TFE;
1111 
1112   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1113 }
1114 
1115 ///
1116 /// \param EncodedOffset This is the immediate value that will be encoded
1117 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1118 ///        will be in units of dwords and on VI+ it will be units of bytes.
1119 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1120                                  int64_t EncodedOffset) {
1121   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1122      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1123 }
1124 
1125 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1126                                           SDValue &Offset, bool &Imm) const {
1127 
1128   // FIXME: Handle non-constant offsets.
1129   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1130   if (!C)
1131     return false;
1132 
1133   SDLoc SL(ByteOffsetNode);
1134   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1135   int64_t ByteOffset = C->getSExtValue();
1136   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1137       ByteOffset >> 2 : ByteOffset;
1138 
1139   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1140     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1141     Imm = true;
1142     return true;
1143   }
1144 
1145   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1146     return false;
1147 
1148   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1149     // 32-bit Immediates are supported on Sea Islands.
1150     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1151   } else {
1152     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1153     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1154                                             C32Bit), 0);
1155   }
1156   Imm = false;
1157   return true;
1158 }
1159 
1160 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1161                                      SDValue &Offset, bool &Imm) const {
1162 
1163   SDLoc SL(Addr);
1164   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1165     SDValue N0 = Addr.getOperand(0);
1166     SDValue N1 = Addr.getOperand(1);
1167 
1168     if (SelectSMRDOffset(N1, Offset, Imm)) {
1169       SBase = N0;
1170       return true;
1171     }
1172   }
1173   SBase = Addr;
1174   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1175   Imm = true;
1176   return true;
1177 }
1178 
1179 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1180                                        SDValue &Offset) const {
1181   bool Imm;
1182   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1183 }
1184 
1185 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1186                                          SDValue &Offset) const {
1187 
1188   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1189     return false;
1190 
1191   bool Imm;
1192   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1193     return false;
1194 
1195   return !Imm && isa<ConstantSDNode>(Offset);
1196 }
1197 
1198 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1199                                         SDValue &Offset) const {
1200   bool Imm;
1201   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1202          !isa<ConstantSDNode>(Offset);
1203 }
1204 
1205 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1206                                              SDValue &Offset) const {
1207   bool Imm;
1208   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1209 }
1210 
1211 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1212                                                SDValue &Offset) const {
1213   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1214     return false;
1215 
1216   bool Imm;
1217   if (!SelectSMRDOffset(Addr, Offset, Imm))
1218     return false;
1219 
1220   return !Imm && isa<ConstantSDNode>(Offset);
1221 }
1222 
1223 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1224                                               SDValue &Offset) const {
1225   bool Imm;
1226   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1227          !isa<ConstantSDNode>(Offset);
1228 }
1229 
1230 // FIXME: This is incorrect and only enough to be able to compile.
1231 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1232   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
1233   SDLoc DL(N);
1234 
1235   const MachineFunction &MF = CurDAG->getMachineFunction();
1236   DiagnosticInfoUnsupported NotImplemented(
1237       *MF.getFunction(), "addrspacecast not implemented", DL.getDebugLoc());
1238   CurDAG->getContext()->diagnose(NotImplemented);
1239 
1240   assert(Subtarget->hasFlatAddressSpace() &&
1241          "addrspacecast only supported with flat address space!");
1242 
1243   assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
1244           ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
1245          "Can only cast to / from flat address space!");
1246 
1247   // The flat instructions read the address as the index of the VGPR holding the
1248   // address, so casting should just be reinterpreting the base VGPR, so just
1249   // insert trunc / bitcast / zext.
1250 
1251   SDValue Src = ASC->getOperand(0);
1252   EVT DestVT = ASC->getValueType(0);
1253   EVT SrcVT = Src.getValueType();
1254 
1255   unsigned SrcSize = SrcVT.getSizeInBits();
1256   unsigned DestSize = DestVT.getSizeInBits();
1257 
1258   if (SrcSize > DestSize) {
1259     assert(SrcSize == 64 && DestSize == 32);
1260     return CurDAG->getMachineNode(
1261       TargetOpcode::EXTRACT_SUBREG,
1262       DL,
1263       DestVT,
1264       Src,
1265       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
1266   }
1267 
1268   if (DestSize > SrcSize) {
1269     assert(SrcSize == 32 && DestSize == 64);
1270 
1271     // FIXME: This is probably wrong, we should never be defining
1272     // a register class with both VGPRs and SGPRs
1273     SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
1274                                            MVT::i32);
1275 
1276     const SDValue Ops[] = {
1277       RC,
1278       Src,
1279       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1280       SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1281                                      CurDAG->getConstant(0, DL, MVT::i32)), 0),
1282       CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
1283     };
1284 
1285     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
1286                                   DL, N->getValueType(0), Ops);
1287   }
1288 
1289   assert(SrcSize == 64 && DestSize == 64);
1290   return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
1291 }
1292 
1293 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1294                                      uint32_t Offset, uint32_t Width) {
1295   // Transformation function, pack the offset and width of a BFE into
1296   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1297   // source, bits [5:0] contain the offset and bits [22:16] the width.
1298   uint32_t PackedVal = Offset | (Width << 16);
1299   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1300 
1301   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1302 }
1303 
1304 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1305   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1306   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1307   // Predicate: 0 < b <= c < 32
1308 
1309   const SDValue &Shl = N->getOperand(0);
1310   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1311   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1312 
1313   if (B && C) {
1314     uint32_t BVal = B->getZExtValue();
1315     uint32_t CVal = C->getZExtValue();
1316 
1317     if (0 < BVal && BVal <= CVal && CVal < 32) {
1318       bool Signed = N->getOpcode() == ISD::SRA;
1319       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1320 
1321       return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1322                       CVal - BVal, 32 - CVal);
1323     }
1324   }
1325   return SelectCode(N);
1326 }
1327 
1328 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1329   switch (N->getOpcode()) {
1330   case ISD::AND:
1331     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1332       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1333       // Predicate: isMask(mask)
1334       const SDValue &Srl = N->getOperand(0);
1335       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1336       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1337 
1338       if (Shift && Mask) {
1339         uint32_t ShiftVal = Shift->getZExtValue();
1340         uint32_t MaskVal = Mask->getZExtValue();
1341 
1342         if (isMask_32(MaskVal)) {
1343           uint32_t WidthVal = countPopulation(MaskVal);
1344 
1345           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1346                           ShiftVal, WidthVal);
1347         }
1348       }
1349     }
1350     break;
1351   case ISD::SRL:
1352     if (N->getOperand(0).getOpcode() == ISD::AND) {
1353       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1354       // Predicate: isMask(mask >> b)
1355       const SDValue &And = N->getOperand(0);
1356       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1357       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1358 
1359       if (Shift && Mask) {
1360         uint32_t ShiftVal = Shift->getZExtValue();
1361         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1362 
1363         if (isMask_32(MaskVal)) {
1364           uint32_t WidthVal = countPopulation(MaskVal);
1365 
1366           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1367                           ShiftVal, WidthVal);
1368         }
1369       }
1370     } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1371       return SelectS_BFEFromShifts(N);
1372     break;
1373   case ISD::SRA:
1374     if (N->getOperand(0).getOpcode() == ISD::SHL)
1375       return SelectS_BFEFromShifts(N);
1376     break;
1377   }
1378 
1379   return SelectCode(N);
1380 }
1381 
1382 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1383   SDValue Cond = N->getOperand(1);
1384 
1385   if (isCBranchSCC(N)) {
1386     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1387     return SelectCode(N);
1388   }
1389 
1390   // The result of VOPC instructions is or'd against ~EXEC before it is
1391   // written to vcc or another SGPR.  This means that the value '1' is always
1392   // written to the corresponding bit for results that are masked.  In order
1393   // to correctly check against vccz, we need to and VCC with the EXEC
1394   // register in order to clear the value from the masked bits.
1395 
1396   SDLoc SL(N);
1397 
1398   SDNode *MaskedCond =
1399         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1400                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1401                                Cond);
1402   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1403                                      SDValue(MaskedCond, 0),
1404                                      SDValue()); // Passing SDValue() adds a
1405                                                  // glue output.
1406   return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1407                               N->getOperand(2), // Basic Block
1408                               VCC.getValue(0),  // Chain
1409                               VCC.getValue(1)); // Glue
1410 }
1411 
1412 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1413                                         SDValue &SrcMods) const {
1414 
1415   unsigned Mods = 0;
1416 
1417   Src = In;
1418 
1419   if (Src.getOpcode() == ISD::FNEG) {
1420     Mods |= SISrcMods::NEG;
1421     Src = Src.getOperand(0);
1422   }
1423 
1424   if (Src.getOpcode() == ISD::FABS) {
1425     Mods |= SISrcMods::ABS;
1426     Src = Src.getOperand(0);
1427   }
1428 
1429   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1430 
1431   return true;
1432 }
1433 
1434 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1435                                          SDValue &SrcMods) const {
1436   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1437   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1438 }
1439 
1440 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1441                                          SDValue &SrcMods, SDValue &Clamp,
1442                                          SDValue &Omod) const {
1443   SDLoc DL(In);
1444   // FIXME: Handle Clamp and Omod
1445   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1446   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1447 
1448   return SelectVOP3Mods(In, Src, SrcMods);
1449 }
1450 
1451 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1452                                            SDValue &SrcMods, SDValue &Clamp,
1453                                            SDValue &Omod) const {
1454   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1455 
1456   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1457                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1458                 cast<ConstantSDNode>(Omod)->isNullValue();
1459 }
1460 
1461 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1462                                               SDValue &SrcMods,
1463                                               SDValue &Omod) const {
1464   // FIXME: Handle Omod
1465   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1466 
1467   return SelectVOP3Mods(In, Src, SrcMods);
1468 }
1469 
1470 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1471                                                    SDValue &SrcMods,
1472                                                    SDValue &Clamp,
1473                                                    SDValue &Omod) const {
1474   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1475   return SelectVOP3Mods(In, Src, SrcMods);
1476 }
1477 
1478 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1479   bool Modified = false;
1480 
1481   // XXX - Other targets seem to be able to do this without a worklist.
1482   SmallVector<LoadSDNode *, 8> LoadsToReplace;
1483   SmallVector<StoreSDNode *, 8> StoresToReplace;
1484 
1485   for (SDNode &Node : CurDAG->allnodes()) {
1486     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
1487       EVT VT = LD->getValueType(0);
1488       if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
1489         continue;
1490 
1491       // To simplify the TableGen patters, we replace all i64 loads with v2i32
1492       // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
1493       // legalization, however, so places (ExpandUnalignedLoad) in the DAG
1494       // legalizer assume that if i64 is legal, so doing this promotion early
1495       // can cause problems.
1496       LoadsToReplace.push_back(LD);
1497     } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
1498       // Handle i64 stores here for the same reason mentioned above for loads.
1499       SDValue Value = ST->getValue();
1500       if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
1501         continue;
1502       StoresToReplace.push_back(ST);
1503     }
1504   }
1505 
1506   for (LoadSDNode *LD : LoadsToReplace) {
1507     SDLoc SL(LD);
1508 
1509     SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
1510                                       LD->getBasePtr(), LD->getMemOperand());
1511     SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
1512                                       MVT::i64, NewLoad);
1513     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
1514     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
1515     Modified = true;
1516   }
1517 
1518   for (StoreSDNode *ST : StoresToReplace) {
1519     SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
1520                                        MVT::v2i32, ST->getValue());
1521     const SDValue StoreOps[] = {
1522       ST->getChain(),
1523       NewValue,
1524       ST->getBasePtr(),
1525       ST->getOffset()
1526     };
1527 
1528     CurDAG->UpdateNodeOperands(ST, StoreOps);
1529     Modified = true;
1530   }
1531 
1532   // XXX - Is this necessary?
1533   if (Modified)
1534     CurDAG->RemoveDeadNodes();
1535 }
1536 
1537 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1538   const AMDGPUTargetLowering& Lowering =
1539     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1540   bool IsModified = false;
1541   do {
1542     IsModified = false;
1543     // Go over all selected nodes and try to fold them a bit more
1544     for (SDNode &Node : CurDAG->allnodes()) {
1545       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1546       if (!MachineNode)
1547         continue;
1548 
1549       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1550       if (ResNode != &Node) {
1551         ReplaceUses(&Node, ResNode);
1552         IsModified = true;
1553       }
1554     }
1555     CurDAG->RemoveDeadNodes();
1556   } while (IsModified);
1557 }
1558