1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 static bool isCBranchSCC(const SDNode *N) {
42   assert(N->getOpcode() == ISD::BRCOND);
43   if (!N->hasOneUse())
44     return false;
45 
46   SDValue Cond = N->getOperand(1);
47   if (Cond.getOpcode() == ISD::CopyToReg)
48     Cond = Cond.getOperand(2);
49   return Cond.getOpcode() == ISD::SETCC &&
50          Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
51 }
52 
53 /// AMDGPU specific code to select AMDGPU machine instructions for
54 /// SelectionDAG operations.
55 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
56   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
57   // make the right decision when generating code for different targets.
58   const AMDGPUSubtarget *Subtarget;
59 
60 public:
61   AMDGPUDAGToDAGISel(TargetMachine &TM);
62   virtual ~AMDGPUDAGToDAGISel();
63   bool runOnMachineFunction(MachineFunction &MF) override;
64   void Select(SDNode *N) override;
65   const char *getPassName() const override;
66   void PreprocessISelDAG() override;
67   void PostprocessISelDAG() override;
68 
69 private:
70   bool isInlineImmediate(const SDNode *N) const;
71   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
72                    const R600InstrInfo *TII);
73   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
74   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75 
76   bool isConstantLoad(const MemSDNode *N, int cbID) const;
77   bool isUniformBr(const SDNode *N) const;
78 
79   SDNode *glueCopyToM0(SDNode *N) const;
80 
81   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
82   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
83   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
84                                        SDValue& Offset);
85   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
86   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
87   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
88                        unsigned OffsetBits) const;
89   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
90   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
91                                  SDValue &Offset1) const;
92   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
93                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
94                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
95                    SDValue &TFE) const;
96   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
97                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
98                          SDValue &SLC, SDValue &TFE) const;
99   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
100                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
101                          SDValue &SLC) const;
102   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
103                           SDValue &SOffset, SDValue &ImmOffset) const;
104   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
105                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
106                          SDValue &TFE) const;
107   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
108                          SDValue &Offset, SDValue &SLC) const;
109   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
110                          SDValue &Offset) const;
111   bool SelectMUBUFConstant(SDValue Constant,
112                            SDValue &SOffset,
113                            SDValue &ImmOffset) const;
114   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
115                                   SDValue &ImmOffset) const;
116   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
117                                    SDValue &ImmOffset, SDValue &VOffset) const;
118 
119   bool SelectFlat(SDValue Addr, SDValue &VAddr,
120                   SDValue &SLC, SDValue &TFE) const;
121 
122   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
123                         bool &Imm) const;
124   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
125                   bool &Imm) const;
126   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
127   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
128   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
129   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
130   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
131   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
132   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
133   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
134   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
135   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
136                        SDValue &Clamp, SDValue &Omod) const;
137   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
138                          SDValue &Clamp, SDValue &Omod) const;
139 
140   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
141                             SDValue &Omod) const;
142   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
143                                  SDValue &Clamp,
144                                  SDValue &Omod) const;
145 
146   void SelectADD_SUB_I64(SDNode *N);
147   void SelectDIV_SCALE(SDNode *N);
148 
149   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
150                    uint32_t Offset, uint32_t Width);
151   void SelectS_BFEFromShifts(SDNode *N);
152   void SelectS_BFE(SDNode *N);
153   void SelectBRCOND(SDNode *N);
154   void SelectATOMIC_CMP_SWAP(SDNode *N);
155 
156   // Include the pieces autogenerated from the target description.
157 #include "AMDGPUGenDAGISel.inc"
158 };
159 }  // end anonymous namespace
160 
161 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
162 // DAG, ready for instruction scheduling.
163 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
164   return new AMDGPUDAGToDAGISel(TM);
165 }
166 
167 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
168     : SelectionDAGISel(TM) {}
169 
170 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
171   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
172   return SelectionDAGISel::runOnMachineFunction(MF);
173 }
174 
175 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
176 }
177 
178 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
179   const SIInstrInfo *TII
180     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
181 
182   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
183     return TII->isInlineConstant(C->getAPIntValue());
184 
185   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
186     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
187 
188   return false;
189 }
190 
191 /// \brief Determine the register class for \p OpNo
192 /// \returns The register class of the virtual register that will be used for
193 /// the given operand number \OpNo or NULL if the register class cannot be
194 /// determined.
195 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
196                                                           unsigned OpNo) const {
197   if (!N->isMachineOpcode())
198     return nullptr;
199 
200   switch (N->getMachineOpcode()) {
201   default: {
202     const MCInstrDesc &Desc =
203         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
204     unsigned OpIdx = Desc.getNumDefs() + OpNo;
205     if (OpIdx >= Desc.getNumOperands())
206       return nullptr;
207     int RegClass = Desc.OpInfo[OpIdx].RegClass;
208     if (RegClass == -1)
209       return nullptr;
210 
211     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
212   }
213   case AMDGPU::REG_SEQUENCE: {
214     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
215     const TargetRegisterClass *SuperRC =
216         Subtarget->getRegisterInfo()->getRegClass(RCID);
217 
218     SDValue SubRegOp = N->getOperand(OpNo + 1);
219     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
220     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
221                                                               SubRegIdx);
222   }
223   }
224 }
225 
226 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
227   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
228       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
229     return N;
230 
231   const SITargetLowering& Lowering =
232       *static_cast<const SITargetLowering*>(getTargetLowering());
233 
234   // Write max value to m0 before each load operation
235 
236   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
237                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
238 
239   SDValue Glue = M0.getValue(1);
240 
241   SmallVector <SDValue, 8> Ops;
242   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
243      Ops.push_back(N->getOperand(i));
244   }
245   Ops.push_back(Glue);
246   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
247 
248   return N;
249 }
250 
251 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
252   switch (NumVectorElts) {
253   case 1:
254     return AMDGPU::SReg_32RegClassID;
255   case 2:
256     return AMDGPU::SReg_64RegClassID;
257   case 4:
258     return AMDGPU::SReg_128RegClassID;
259   case 8:
260     return AMDGPU::SReg_256RegClassID;
261   case 16:
262     return AMDGPU::SReg_512RegClassID;
263   }
264 
265   llvm_unreachable("invalid vector size");
266 }
267 
268 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
269   unsigned int Opc = N->getOpcode();
270   if (N->isMachineOpcode()) {
271     N->setNodeId(-1);
272     return;   // Already selected.
273   }
274 
275   if (isa<AtomicSDNode>(N) ||
276       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
277     N = glueCopyToM0(N);
278 
279   switch (Opc) {
280   default: break;
281   // We are selecting i64 ADD here instead of custom lower it during
282   // DAG legalization, so we can fold some i64 ADDs used for address
283   // calculation into the LOAD and STORE instructions.
284   case ISD::ADD:
285   case ISD::SUB: {
286     if (N->getValueType(0) != MVT::i64 ||
287         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
288       break;
289 
290     SelectADD_SUB_I64(N);
291     return;
292   }
293   case ISD::SCALAR_TO_VECTOR:
294   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
295   case ISD::BUILD_VECTOR: {
296     unsigned RegClassID;
297     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
298     EVT VT = N->getValueType(0);
299     unsigned NumVectorElts = VT.getVectorNumElements();
300     EVT EltVT = VT.getVectorElementType();
301     assert(EltVT.bitsEq(MVT::i32));
302     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
303       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
304     } else {
305       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
306       // that adds a 128 bits reg copy when going through TwoAddressInstructions
307       // pass. We want to avoid 128 bits copies as much as possible because they
308       // can't be bundled by our scheduler.
309       switch(NumVectorElts) {
310       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
311       case 4:
312         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
313           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
314         else
315           RegClassID = AMDGPU::R600_Reg128RegClassID;
316         break;
317       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
318       }
319     }
320 
321     SDLoc DL(N);
322     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
323 
324     if (NumVectorElts == 1) {
325       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
326                            RegClass);
327       return;
328     }
329 
330     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
331                                   "supported yet");
332     // 16 = Max Num Vector Elements
333     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
334     // 1 = Vector Register Class
335     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
336 
337     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
338     bool IsRegSeq = true;
339     unsigned NOps = N->getNumOperands();
340     for (unsigned i = 0; i < NOps; i++) {
341       // XXX: Why is this here?
342       if (isa<RegisterSDNode>(N->getOperand(i))) {
343         IsRegSeq = false;
344         break;
345       }
346       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
347       RegSeqArgs[1 + (2 * i) + 1] =
348               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
349                                         MVT::i32);
350     }
351 
352     if (NOps != NumVectorElts) {
353       // Fill in the missing undef elements if this was a scalar_to_vector.
354       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
355 
356       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
357                                                      DL, EltVT);
358       for (unsigned i = NOps; i < NumVectorElts; ++i) {
359         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
360         RegSeqArgs[1 + (2 * i) + 1] =
361           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
362       }
363     }
364 
365     if (!IsRegSeq)
366       break;
367     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
368     return;
369   }
370   case ISD::BUILD_PAIR: {
371     SDValue RC, SubReg0, SubReg1;
372     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
373       break;
374     }
375     SDLoc DL(N);
376     if (N->getValueType(0) == MVT::i128) {
377       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
378       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
379       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
380     } else if (N->getValueType(0) == MVT::i64) {
381       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
382       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
383       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
384     } else {
385       llvm_unreachable("Unhandled value type for BUILD_PAIR");
386     }
387     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
388                             N->getOperand(1), SubReg1 };
389     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
390                                           N->getValueType(0), Ops));
391     return;
392   }
393 
394   case ISD::Constant:
395   case ISD::ConstantFP: {
396     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
397         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
398       break;
399 
400     uint64_t Imm;
401     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
402       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
403     else {
404       ConstantSDNode *C = cast<ConstantSDNode>(N);
405       Imm = C->getZExtValue();
406     }
407 
408     SDLoc DL(N);
409     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
410                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
411                                                     MVT::i32));
412     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
413                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
414     const SDValue Ops[] = {
415       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
416       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
417       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
418     };
419 
420     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
421                                           N->getValueType(0), Ops));
422     return;
423   }
424   case ISD::LOAD:
425   case ISD::STORE: {
426     N = glueCopyToM0(N);
427     break;
428   }
429 
430   case AMDGPUISD::BFE_I32:
431   case AMDGPUISD::BFE_U32: {
432     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
433       break;
434 
435     // There is a scalar version available, but unlike the vector version which
436     // has a separate operand for the offset and width, the scalar version packs
437     // the width and offset into a single operand. Try to move to the scalar
438     // version if the offsets are constant, so that we can try to keep extended
439     // loads of kernel arguments in SGPRs.
440 
441     // TODO: Technically we could try to pattern match scalar bitshifts of
442     // dynamic values, but it's probably not useful.
443     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
444     if (!Offset)
445       break;
446 
447     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
448     if (!Width)
449       break;
450 
451     bool Signed = Opc == AMDGPUISD::BFE_I32;
452 
453     uint32_t OffsetVal = Offset->getZExtValue();
454     uint32_t WidthVal = Width->getZExtValue();
455 
456     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
457                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
458     return;
459   }
460   case AMDGPUISD::DIV_SCALE: {
461     SelectDIV_SCALE(N);
462     return;
463   }
464   case ISD::CopyToReg: {
465     const SITargetLowering& Lowering =
466       *static_cast<const SITargetLowering*>(getTargetLowering());
467     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
468     break;
469   }
470   case ISD::AND:
471   case ISD::SRL:
472   case ISD::SRA:
473   case ISD::SIGN_EXTEND_INREG:
474     if (N->getValueType(0) != MVT::i32 ||
475         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
476       break;
477 
478     SelectS_BFE(N);
479     return;
480   case ISD::BRCOND:
481     SelectBRCOND(N);
482     return;
483 
484   case AMDGPUISD::ATOMIC_CMP_SWAP:
485     SelectATOMIC_CMP_SWAP(N);
486     return;
487   }
488 
489   SelectCode(N);
490 }
491 
492 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
493   if (!N->readMem())
494     return false;
495   if (CbId == -1)
496     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
497 
498   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
499 }
500 
501 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
502   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
503   const Instruction *Term = BB->getTerminator();
504   return Term->getMetadata("amdgpu.uniform") ||
505          Term->getMetadata("structurizecfg.uniform");
506 }
507 
508 const char *AMDGPUDAGToDAGISel::getPassName() const {
509   return "AMDGPU DAG->DAG Pattern Instruction Selection";
510 }
511 
512 //===----------------------------------------------------------------------===//
513 // Complex Patterns
514 //===----------------------------------------------------------------------===//
515 
516 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
517                                                          SDValue& IntPtr) {
518   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
519     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
520                                        true);
521     return true;
522   }
523   return false;
524 }
525 
526 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
527     SDValue& BaseReg, SDValue &Offset) {
528   if (!isa<ConstantSDNode>(Addr)) {
529     BaseReg = Addr;
530     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
531     return true;
532   }
533   return false;
534 }
535 
536 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
537                                            SDValue &Offset) {
538   ConstantSDNode *IMMOffset;
539 
540   if (Addr.getOpcode() == ISD::ADD
541       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
542       && isInt<16>(IMMOffset->getZExtValue())) {
543 
544       Base = Addr.getOperand(0);
545       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
546                                          MVT::i32);
547       return true;
548   // If the pointer address is constant, we can move it to the offset field.
549   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
550              && isInt<16>(IMMOffset->getZExtValue())) {
551     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
552                                   SDLoc(CurDAG->getEntryNode()),
553                                   AMDGPU::ZERO, MVT::i32);
554     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
555                                        MVT::i32);
556     return true;
557   }
558 
559   // Default case, no offset
560   Base = Addr;
561   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
562   return true;
563 }
564 
565 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
566                                             SDValue &Offset) {
567   ConstantSDNode *C;
568   SDLoc DL(Addr);
569 
570   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
571     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
572     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
573   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
574             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
575     Base = Addr.getOperand(0);
576     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
577   } else {
578     Base = Addr;
579     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
580   }
581 
582   return true;
583 }
584 
585 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
586   SDLoc DL(N);
587   SDValue LHS = N->getOperand(0);
588   SDValue RHS = N->getOperand(1);
589 
590   bool IsAdd = (N->getOpcode() == ISD::ADD);
591 
592   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
593   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
594 
595   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
596                                        DL, MVT::i32, LHS, Sub0);
597   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
598                                        DL, MVT::i32, LHS, Sub1);
599 
600   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
601                                        DL, MVT::i32, RHS, Sub0);
602   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
603                                        DL, MVT::i32, RHS, Sub1);
604 
605   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
606   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
607 
608   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
609   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
610 
611   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
612   SDValue Carry(AddLo, 1);
613   SDNode *AddHi
614     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
615                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
616 
617   SDValue Args[5] = {
618     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
619     SDValue(AddLo,0),
620     Sub0,
621     SDValue(AddHi,0),
622     Sub1,
623   };
624   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
625 }
626 
627 // We need to handle this here because tablegen doesn't support matching
628 // instructions with multiple outputs.
629 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
630   SDLoc SL(N);
631   EVT VT = N->getValueType(0);
632 
633   assert(VT == MVT::f32 || VT == MVT::f64);
634 
635   unsigned Opc
636     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
637 
638   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
639   // omod
640   SDValue Ops[8];
641 
642   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
643   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
644   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
645   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
646 }
647 
648 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
649                                          unsigned OffsetBits) const {
650   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
651       (OffsetBits == 8 && !isUInt<8>(Offset)))
652     return false;
653 
654   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
655       Subtarget->unsafeDSOffsetFoldingEnabled())
656     return true;
657 
658   // On Southern Islands instruction with a negative base value and an offset
659   // don't seem to work.
660   return CurDAG->SignBitIsZero(Base);
661 }
662 
663 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
664                                               SDValue &Offset) const {
665   SDLoc DL(Addr);
666   if (CurDAG->isBaseWithConstantOffset(Addr)) {
667     SDValue N0 = Addr.getOperand(0);
668     SDValue N1 = Addr.getOperand(1);
669     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
670     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
671       // (add n0, c0)
672       Base = N0;
673       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
674       return true;
675     }
676   } else if (Addr.getOpcode() == ISD::SUB) {
677     // sub C, x -> add (sub 0, x), C
678     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
679       int64_t ByteOffset = C->getSExtValue();
680       if (isUInt<16>(ByteOffset)) {
681         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
682 
683         // XXX - This is kind of hacky. Create a dummy sub node so we can check
684         // the known bits in isDSOffsetLegal. We need to emit the selected node
685         // here, so this is thrown away.
686         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
687                                       Zero, Addr.getOperand(1));
688 
689         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
690           MachineSDNode *MachineSub
691             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
692                                      Zero, Addr.getOperand(1));
693 
694           Base = SDValue(MachineSub, 0);
695           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
696           return true;
697         }
698       }
699     }
700   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
701     // If we have a constant address, prefer to put the constant into the
702     // offset. This can save moves to load the constant address since multiple
703     // operations can share the zero base address register, and enables merging
704     // into read2 / write2 instructions.
705 
706     SDLoc DL(Addr);
707 
708     if (isUInt<16>(CAddr->getZExtValue())) {
709       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
710       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
711                                  DL, MVT::i32, Zero);
712       Base = SDValue(MovZero, 0);
713       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
714       return true;
715     }
716   }
717 
718   // default case
719   Base = Addr;
720   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
721   return true;
722 }
723 
724 // TODO: If offset is too big, put low 16-bit into offset.
725 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
726                                                    SDValue &Offset0,
727                                                    SDValue &Offset1) const {
728   SDLoc DL(Addr);
729 
730   if (CurDAG->isBaseWithConstantOffset(Addr)) {
731     SDValue N0 = Addr.getOperand(0);
732     SDValue N1 = Addr.getOperand(1);
733     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
734     unsigned DWordOffset0 = C1->getZExtValue() / 4;
735     unsigned DWordOffset1 = DWordOffset0 + 1;
736     // (add n0, c0)
737     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
738       Base = N0;
739       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
740       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
741       return true;
742     }
743   } else if (Addr.getOpcode() == ISD::SUB) {
744     // sub C, x -> add (sub 0, x), C
745     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
746       unsigned DWordOffset0 = C->getZExtValue() / 4;
747       unsigned DWordOffset1 = DWordOffset0 + 1;
748 
749       if (isUInt<8>(DWordOffset0)) {
750         SDLoc DL(Addr);
751         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
752 
753         // XXX - This is kind of hacky. Create a dummy sub node so we can check
754         // the known bits in isDSOffsetLegal. We need to emit the selected node
755         // here, so this is thrown away.
756         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
757                                       Zero, Addr.getOperand(1));
758 
759         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
760           MachineSDNode *MachineSub
761             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
762                                      Zero, Addr.getOperand(1));
763 
764           Base = SDValue(MachineSub, 0);
765           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
766           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
767           return true;
768         }
769       }
770     }
771   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
772     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
773     unsigned DWordOffset1 = DWordOffset0 + 1;
774     assert(4 * DWordOffset0 == CAddr->getZExtValue());
775 
776     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
777       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
778       MachineSDNode *MovZero
779         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
780                                  DL, MVT::i32, Zero);
781       Base = SDValue(MovZero, 0);
782       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
783       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
784       return true;
785     }
786   }
787 
788   // default case
789   Base = Addr;
790   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
791   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
792   return true;
793 }
794 
795 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
796   return isUInt<12>(Imm->getZExtValue());
797 }
798 
799 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
800                                      SDValue &VAddr, SDValue &SOffset,
801                                      SDValue &Offset, SDValue &Offen,
802                                      SDValue &Idxen, SDValue &Addr64,
803                                      SDValue &GLC, SDValue &SLC,
804                                      SDValue &TFE) const {
805   // Subtarget prefers to use flat instruction
806   if (Subtarget->useFlatForGlobal())
807     return false;
808 
809   SDLoc DL(Addr);
810 
811   if (!GLC.getNode())
812     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
813   if (!SLC.getNode())
814     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
815   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
816 
817   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
818   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
819   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
820   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
821 
822   if (CurDAG->isBaseWithConstantOffset(Addr)) {
823     SDValue N0 = Addr.getOperand(0);
824     SDValue N1 = Addr.getOperand(1);
825     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
826 
827     if (N0.getOpcode() == ISD::ADD) {
828       // (add (add N2, N3), C1) -> addr64
829       SDValue N2 = N0.getOperand(0);
830       SDValue N3 = N0.getOperand(1);
831       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
832       Ptr = N2;
833       VAddr = N3;
834     } else {
835 
836       // (add N0, C1) -> offset
837       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
838       Ptr = N0;
839     }
840 
841     if (isLegalMUBUFImmOffset(C1)) {
842       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
843       return true;
844     }
845 
846     if (isUInt<32>(C1->getZExtValue())) {
847       // Illegal offset, store it in soffset.
848       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
849       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
850                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
851                         0);
852       return true;
853     }
854   }
855 
856   if (Addr.getOpcode() == ISD::ADD) {
857     // (add N0, N1) -> addr64
858     SDValue N0 = Addr.getOperand(0);
859     SDValue N1 = Addr.getOperand(1);
860     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
861     Ptr = N0;
862     VAddr = N1;
863     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
864     return true;
865   }
866 
867   // default case -> offset
868   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
869   Ptr = Addr;
870   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
871 
872   return true;
873 }
874 
875 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
876                                            SDValue &VAddr, SDValue &SOffset,
877                                            SDValue &Offset, SDValue &GLC,
878                                            SDValue &SLC, SDValue &TFE) const {
879   SDValue Ptr, Offen, Idxen, Addr64;
880 
881   // addr64 bit was removed for volcanic islands.
882   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
883     return false;
884 
885   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
886               GLC, SLC, TFE))
887     return false;
888 
889   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
890   if (C->getSExtValue()) {
891     SDLoc DL(Addr);
892 
893     const SITargetLowering& Lowering =
894       *static_cast<const SITargetLowering*>(getTargetLowering());
895 
896     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
897     return true;
898   }
899 
900   return false;
901 }
902 
903 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
904                                            SDValue &VAddr, SDValue &SOffset,
905                                            SDValue &Offset,
906                                            SDValue &SLC) const {
907   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
908   SDValue GLC, TFE;
909 
910   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
911 }
912 
913 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
914                                             SDValue &VAddr, SDValue &SOffset,
915                                             SDValue &ImmOffset) const {
916 
917   SDLoc DL(Addr);
918   MachineFunction &MF = CurDAG->getMachineFunction();
919   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
920 
921   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
922   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
923 
924   // (add n0, c1)
925   if (CurDAG->isBaseWithConstantOffset(Addr)) {
926     SDValue N0 = Addr.getOperand(0);
927     SDValue N1 = Addr.getOperand(1);
928 
929     // Offsets in vaddr must be positive.
930     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
931     if (isLegalMUBUFImmOffset(C1)) {
932       VAddr = N0;
933       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
934       return true;
935     }
936   }
937 
938   // (node)
939   VAddr = Addr;
940   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
941   return true;
942 }
943 
944 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
945                                            SDValue &SOffset, SDValue &Offset,
946                                            SDValue &GLC, SDValue &SLC,
947                                            SDValue &TFE) const {
948   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
949   const SIInstrInfo *TII =
950     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
951 
952   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
953               GLC, SLC, TFE))
954     return false;
955 
956   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
957       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
958       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
959     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
960                     APInt::getAllOnesValue(32).getZExtValue(); // Size
961     SDLoc DL(Addr);
962 
963     const SITargetLowering& Lowering =
964       *static_cast<const SITargetLowering*>(getTargetLowering());
965 
966     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
967     return true;
968   }
969   return false;
970 }
971 
972 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
973                                            SDValue &Soffset, SDValue &Offset
974                                            ) const {
975   SDValue GLC, SLC, TFE;
976 
977   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
978 }
979 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
980                                            SDValue &Soffset, SDValue &Offset,
981                                            SDValue &SLC) const {
982   SDValue GLC, TFE;
983 
984   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
985 }
986 
987 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
988                                              SDValue &SOffset,
989                                              SDValue &ImmOffset) const {
990   SDLoc DL(Constant);
991   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
992   uint32_t Overflow = 0;
993 
994   if (Imm >= 4096) {
995     if (Imm <= 4095 + 64) {
996       // Use an SOffset inline constant for 1..64
997       Overflow = Imm - 4095;
998       Imm = 4095;
999     } else {
1000       // Try to keep the same value in SOffset for adjacent loads, so that
1001       // the corresponding register contents can be re-used.
1002       //
1003       // Load values with all low-bits set into SOffset, so that a larger
1004       // range of values can be covered using s_movk_i32
1005       uint32_t High = (Imm + 1) & ~4095;
1006       uint32_t Low = (Imm + 1) & 4095;
1007       Imm = Low;
1008       Overflow = High - 1;
1009     }
1010   }
1011 
1012   // There is a hardware bug in SI and CI which prevents address clamping in
1013   // MUBUF instructions from working correctly with SOffsets. The immediate
1014   // offset is unaffected.
1015   if (Overflow > 0 &&
1016       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1017     return false;
1018 
1019   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1020 
1021   if (Overflow <= 64)
1022     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1023   else
1024     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1025                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1026                       0);
1027 
1028   return true;
1029 }
1030 
1031 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1032                                                     SDValue &SOffset,
1033                                                     SDValue &ImmOffset) const {
1034   SDLoc DL(Offset);
1035 
1036   if (!isa<ConstantSDNode>(Offset))
1037     return false;
1038 
1039   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1040 }
1041 
1042 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1043                                                      SDValue &SOffset,
1044                                                      SDValue &ImmOffset,
1045                                                      SDValue &VOffset) const {
1046   SDLoc DL(Offset);
1047 
1048   // Don't generate an unnecessary voffset for constant offsets.
1049   if (isa<ConstantSDNode>(Offset)) {
1050     SDValue Tmp1, Tmp2;
1051 
1052     // When necessary, use a voffset in <= CI anyway to work around a hardware
1053     // bug.
1054     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1055         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1056       return false;
1057   }
1058 
1059   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1060     SDValue N0 = Offset.getOperand(0);
1061     SDValue N1 = Offset.getOperand(1);
1062     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1063         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1064       VOffset = N0;
1065       return true;
1066     }
1067   }
1068 
1069   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1070   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1071   VOffset = Offset;
1072 
1073   return true;
1074 }
1075 
1076 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1077                                     SDValue &VAddr,
1078                                     SDValue &SLC,
1079                                     SDValue &TFE) const {
1080   VAddr = Addr;
1081   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1082   return true;
1083 }
1084 
1085 ///
1086 /// \param EncodedOffset This is the immediate value that will be encoded
1087 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1088 ///        will be in units of dwords and on VI+ it will be units of bytes.
1089 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1090                                  int64_t EncodedOffset) {
1091   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1092      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1093 }
1094 
1095 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1096                                           SDValue &Offset, bool &Imm) const {
1097 
1098   // FIXME: Handle non-constant offsets.
1099   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1100   if (!C)
1101     return false;
1102 
1103   SDLoc SL(ByteOffsetNode);
1104   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1105   int64_t ByteOffset = C->getSExtValue();
1106   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1107       ByteOffset >> 2 : ByteOffset;
1108 
1109   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1110     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1111     Imm = true;
1112     return true;
1113   }
1114 
1115   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1116     return false;
1117 
1118   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1119     // 32-bit Immediates are supported on Sea Islands.
1120     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1121   } else {
1122     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1123     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1124                                             C32Bit), 0);
1125   }
1126   Imm = false;
1127   return true;
1128 }
1129 
1130 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1131                                      SDValue &Offset, bool &Imm) const {
1132 
1133   SDLoc SL(Addr);
1134   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1135     SDValue N0 = Addr.getOperand(0);
1136     SDValue N1 = Addr.getOperand(1);
1137 
1138     if (SelectSMRDOffset(N1, Offset, Imm)) {
1139       SBase = N0;
1140       return true;
1141     }
1142   }
1143   SBase = Addr;
1144   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1145   Imm = true;
1146   return true;
1147 }
1148 
1149 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1150                                        SDValue &Offset) const {
1151   bool Imm;
1152   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1153 }
1154 
1155 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1156                                          SDValue &Offset) const {
1157 
1158   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1159     return false;
1160 
1161   bool Imm;
1162   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1163     return false;
1164 
1165   return !Imm && isa<ConstantSDNode>(Offset);
1166 }
1167 
1168 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1169                                         SDValue &Offset) const {
1170   bool Imm;
1171   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1172          !isa<ConstantSDNode>(Offset);
1173 }
1174 
1175 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1176                                              SDValue &Offset) const {
1177   bool Imm;
1178   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1179 }
1180 
1181 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1182                                                SDValue &Offset) const {
1183   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1184     return false;
1185 
1186   bool Imm;
1187   if (!SelectSMRDOffset(Addr, Offset, Imm))
1188     return false;
1189 
1190   return !Imm && isa<ConstantSDNode>(Offset);
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1194                                               SDValue &Offset) const {
1195   bool Imm;
1196   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1197          !isa<ConstantSDNode>(Offset);
1198 }
1199 
1200 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1201                                             SDValue &Base,
1202                                             SDValue &Offset) const {
1203   SDLoc DL(Index);
1204 
1205   if (CurDAG->isBaseWithConstantOffset(Index)) {
1206     SDValue N0 = Index.getOperand(0);
1207     SDValue N1 = Index.getOperand(1);
1208     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1209 
1210     // (add n0, c0)
1211     Base = N0;
1212     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1213     return true;
1214   }
1215 
1216   if (isa<ConstantSDNode>(Index))
1217     return false;
1218 
1219   Base = Index;
1220   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1221   return true;
1222 }
1223 
1224 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1225                                      SDValue Val, uint32_t Offset,
1226                                      uint32_t Width) {
1227   // Transformation function, pack the offset and width of a BFE into
1228   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1229   // source, bits [5:0] contain the offset and bits [22:16] the width.
1230   uint32_t PackedVal = Offset | (Width << 16);
1231   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1232 
1233   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1234 }
1235 
1236 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1237   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1238   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1239   // Predicate: 0 < b <= c < 32
1240 
1241   const SDValue &Shl = N->getOperand(0);
1242   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1243   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1244 
1245   if (B && C) {
1246     uint32_t BVal = B->getZExtValue();
1247     uint32_t CVal = C->getZExtValue();
1248 
1249     if (0 < BVal && BVal <= CVal && CVal < 32) {
1250       bool Signed = N->getOpcode() == ISD::SRA;
1251       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1252 
1253       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1254                               32 - CVal));
1255       return;
1256     }
1257   }
1258   SelectCode(N);
1259 }
1260 
1261 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1262   switch (N->getOpcode()) {
1263   case ISD::AND:
1264     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1265       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1266       // Predicate: isMask(mask)
1267       const SDValue &Srl = N->getOperand(0);
1268       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1269       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1270 
1271       if (Shift && Mask) {
1272         uint32_t ShiftVal = Shift->getZExtValue();
1273         uint32_t MaskVal = Mask->getZExtValue();
1274 
1275         if (isMask_32(MaskVal)) {
1276           uint32_t WidthVal = countPopulation(MaskVal);
1277 
1278           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1279                                   Srl.getOperand(0), ShiftVal, WidthVal));
1280           return;
1281         }
1282       }
1283     }
1284     break;
1285   case ISD::SRL:
1286     if (N->getOperand(0).getOpcode() == ISD::AND) {
1287       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1288       // Predicate: isMask(mask >> b)
1289       const SDValue &And = N->getOperand(0);
1290       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1291       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1292 
1293       if (Shift && Mask) {
1294         uint32_t ShiftVal = Shift->getZExtValue();
1295         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1296 
1297         if (isMask_32(MaskVal)) {
1298           uint32_t WidthVal = countPopulation(MaskVal);
1299 
1300           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1301                                   And.getOperand(0), ShiftVal, WidthVal));
1302           return;
1303         }
1304       }
1305     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1306       SelectS_BFEFromShifts(N);
1307       return;
1308     }
1309     break;
1310   case ISD::SRA:
1311     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1312       SelectS_BFEFromShifts(N);
1313       return;
1314     }
1315     break;
1316 
1317   case ISD::SIGN_EXTEND_INREG: {
1318     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1319     SDValue Src = N->getOperand(0);
1320     if (Src.getOpcode() != ISD::SRL)
1321       break;
1322 
1323     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1324     if (!Amt)
1325       break;
1326 
1327     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1328     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1329                             Amt->getZExtValue(), Width));
1330     return;
1331   }
1332   }
1333 
1334   SelectCode(N);
1335 }
1336 
1337 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1338   SDValue Cond = N->getOperand(1);
1339 
1340   if (isCBranchSCC(N)) {
1341     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1342     SelectCode(N);
1343     return;
1344   }
1345 
1346   // The result of VOPC instructions is or'd against ~EXEC before it is
1347   // written to vcc or another SGPR.  This means that the value '1' is always
1348   // written to the corresponding bit for results that are masked.  In order
1349   // to correctly check against vccz, we need to and VCC with the EXEC
1350   // register in order to clear the value from the masked bits.
1351 
1352   SDLoc SL(N);
1353 
1354   SDNode *MaskedCond =
1355         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1356                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1357                                Cond);
1358   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1359                                      SDValue(MaskedCond, 0),
1360                                      SDValue()); // Passing SDValue() adds a
1361                                                  // glue output.
1362   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1363                        N->getOperand(2), // Basic Block
1364                        VCC.getValue(0),  // Chain
1365                        VCC.getValue(1)); // Glue
1366   return;
1367 }
1368 
1369 // This is here because there isn't a way to use the generated sub0_sub1 as the
1370 // subreg index to EXTRACT_SUBREG in tablegen.
1371 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1372   MemSDNode *Mem = cast<MemSDNode>(N);
1373   unsigned AS = Mem->getAddressSpace();
1374   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1375     SelectCode(N);
1376     return;
1377   }
1378 
1379   MVT VT = N->getSimpleValueType(0);
1380   bool Is32 = (VT == MVT::i32);
1381   SDLoc SL(N);
1382 
1383   MachineSDNode *CmpSwap = nullptr;
1384   if (Subtarget->hasAddr64()) {
1385     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1386 
1387     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1388       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1389         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1390       SDValue CmpVal = Mem->getOperand(2);
1391 
1392       // XXX - Do we care about glue operands?
1393 
1394       SDValue Ops[] = {
1395         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1396       };
1397 
1398       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1399     }
1400   }
1401 
1402   if (!CmpSwap) {
1403     SDValue SRsrc, SOffset, Offset, SLC;
1404     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1405       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1406         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1407 
1408       SDValue CmpVal = Mem->getOperand(2);
1409       SDValue Ops[] = {
1410         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1411       };
1412 
1413       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1414     }
1415   }
1416 
1417   if (!CmpSwap) {
1418     SelectCode(N);
1419     return;
1420   }
1421 
1422   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1423   *MMOs = Mem->getMemOperand();
1424   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1425 
1426   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1427   SDValue Extract
1428     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1429 
1430   ReplaceUses(SDValue(N, 0), Extract);
1431   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1432   CurDAG->RemoveDeadNode(N);
1433 }
1434 
1435 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1436                                         SDValue &SrcMods) const {
1437 
1438   unsigned Mods = 0;
1439 
1440   Src = In;
1441 
1442   if (Src.getOpcode() == ISD::FNEG) {
1443     Mods |= SISrcMods::NEG;
1444     Src = Src.getOperand(0);
1445   }
1446 
1447   if (Src.getOpcode() == ISD::FABS) {
1448     Mods |= SISrcMods::ABS;
1449     Src = Src.getOperand(0);
1450   }
1451 
1452   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1453 
1454   return true;
1455 }
1456 
1457 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1458                                          SDValue &SrcMods) const {
1459   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1460   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1461 }
1462 
1463 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1464                                          SDValue &SrcMods, SDValue &Clamp,
1465                                          SDValue &Omod) const {
1466   SDLoc DL(In);
1467   // FIXME: Handle Clamp and Omod
1468   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1469   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1470 
1471   return SelectVOP3Mods(In, Src, SrcMods);
1472 }
1473 
1474 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1475                                            SDValue &SrcMods, SDValue &Clamp,
1476                                            SDValue &Omod) const {
1477   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1478 
1479   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1480                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1481                 cast<ConstantSDNode>(Omod)->isNullValue();
1482 }
1483 
1484 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1485                                               SDValue &SrcMods,
1486                                               SDValue &Omod) const {
1487   // FIXME: Handle Omod
1488   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1489 
1490   return SelectVOP3Mods(In, Src, SrcMods);
1491 }
1492 
1493 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1494                                                    SDValue &SrcMods,
1495                                                    SDValue &Clamp,
1496                                                    SDValue &Omod) const {
1497   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1498   return SelectVOP3Mods(In, Src, SrcMods);
1499 }
1500 
1501 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1502   MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
1503 
1504   // Handle the perverse case where a frame index is being stored. We don't
1505   // want to see multiple frame index operands on the same instruction since
1506   // it complicates things and violates some assumptions about frame index
1507   // lowering.
1508   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1509        I != E; ++I) {
1510     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1511 
1512     // It's possible that we have a frame index defined in the function that
1513     // isn't used in this block.
1514     if (FI.use_empty())
1515       continue;
1516 
1517     // Skip over the AssertZext inserted during lowering.
1518     SDValue EffectiveFI = FI;
1519     auto It = FI->use_begin();
1520     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1521       EffectiveFI = SDValue(*It, 0);
1522       It = EffectiveFI->use_begin();
1523     }
1524 
1525     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1526       SDUse &Use = It.getUse();
1527       SDNode *User = Use.getUser();
1528       unsigned OpIdx = It.getOperandNo();
1529       ++It;
1530 
1531       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1532         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1533         if (OpIdx == PtrIdx)
1534           continue;
1535 
1536         unsigned OpN = M->getNumOperands();
1537         SDValue NewOps[8];
1538 
1539         assert(OpN < array_lengthof(NewOps));
1540         for (unsigned Op = 0; Op != OpN; ++Op) {
1541           if (Op != OpIdx) {
1542             NewOps[Op] = M->getOperand(Op);
1543             continue;
1544           }
1545 
1546           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1547                                                       SDLoc(M), MVT::i32, FI);
1548           NewOps[Op] = SDValue(Mov, 0);
1549         }
1550 
1551         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1552       }
1553     }
1554   }
1555 }
1556 
1557 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1558   const AMDGPUTargetLowering& Lowering =
1559     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1560   bool IsModified = false;
1561   do {
1562     IsModified = false;
1563     // Go over all selected nodes and try to fold them a bit more
1564     for (SDNode &Node : CurDAG->allnodes()) {
1565       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1566       if (!MachineNode)
1567         continue;
1568 
1569       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1570       if (ResNode != &Node) {
1571         ReplaceUses(&Node, ResNode);
1572         IsModified = true;
1573       }
1574     }
1575     CurDAG->RemoveDeadNodes();
1576   } while (IsModified);
1577 }
1578