1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 static bool isCBranchSCC(const SDNode *N) {
42   assert(N->getOpcode() == ISD::BRCOND);
43   if (!N->hasOneUse())
44     return false;
45 
46   SDValue Cond = N->getOperand(1);
47   if (Cond.getOpcode() == ISD::CopyToReg)
48     Cond = Cond.getOperand(2);
49   return Cond.getOpcode() == ISD::SETCC &&
50          Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
51 }
52 
53 /// AMDGPU specific code to select AMDGPU machine instructions for
54 /// SelectionDAG operations.
55 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
56   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
57   // make the right decision when generating code for different targets.
58   const AMDGPUSubtarget *Subtarget;
59 
60 public:
61   AMDGPUDAGToDAGISel(TargetMachine &TM);
62   virtual ~AMDGPUDAGToDAGISel();
63   bool runOnMachineFunction(MachineFunction &MF) override;
64   void Select(SDNode *N) override;
65   const char *getPassName() const override;
66   void PreprocessISelDAG() override;
67   void PostprocessISelDAG() override;
68 
69 private:
70   bool isInlineImmediate(const SDNode *N) const;
71   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
72                    const R600InstrInfo *TII);
73   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
74   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75 
76   bool isConstantLoad(const MemSDNode *N, int cbID) const;
77   bool isUniformBr(const SDNode *N) const;
78 
79   SDNode *glueCopyToM0(SDNode *N) const;
80 
81   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
82   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
83   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
84                                        SDValue& Offset);
85   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
86   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
87   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
88                        unsigned OffsetBits) const;
89   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
90   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
91                                  SDValue &Offset1) const;
92   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
93                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
94                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
95                    SDValue &TFE) const;
96   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
97                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
98                          SDValue &SLC, SDValue &TFE) const;
99   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
100                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
101                          SDValue &SLC) const;
102   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
103                           SDValue &SOffset, SDValue &ImmOffset) const;
104   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
105                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
106                          SDValue &TFE) const;
107   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
108                          SDValue &Offset, SDValue &SLC) const;
109   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
110                          SDValue &Offset) const;
111   bool SelectMUBUFConstant(SDValue Constant,
112                            SDValue &SOffset,
113                            SDValue &ImmOffset) const;
114   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
115                                   SDValue &ImmOffset) const;
116   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
117                                    SDValue &ImmOffset, SDValue &VOffset) const;
118 
119   bool SelectFlat(SDValue Addr, SDValue &VAddr,
120                   SDValue &SLC, SDValue &TFE) const;
121 
122   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
123                         bool &Imm) const;
124   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
125                   bool &Imm) const;
126   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
127   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
128   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
129   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
130   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
131   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
132   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
133   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
134   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
135   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
136                        SDValue &Clamp, SDValue &Omod) const;
137   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
138                          SDValue &Clamp, SDValue &Omod) const;
139 
140   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
141                             SDValue &Omod) const;
142   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
143                                  SDValue &Clamp,
144                                  SDValue &Omod) const;
145 
146   void SelectADD_SUB_I64(SDNode *N);
147   void SelectDIV_SCALE(SDNode *N);
148 
149   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
150                    uint32_t Offset, uint32_t Width);
151   void SelectS_BFEFromShifts(SDNode *N);
152   void SelectS_BFE(SDNode *N);
153   void SelectBRCOND(SDNode *N);
154   void SelectATOMIC_CMP_SWAP(SDNode *N);
155 
156   // Include the pieces autogenerated from the target description.
157 #include "AMDGPUGenDAGISel.inc"
158 };
159 }  // end anonymous namespace
160 
161 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
162 // DAG, ready for instruction scheduling.
163 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
164   return new AMDGPUDAGToDAGISel(TM);
165 }
166 
167 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
168     : SelectionDAGISel(TM) {}
169 
170 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
171   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
172   return SelectionDAGISel::runOnMachineFunction(MF);
173 }
174 
175 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
176 }
177 
178 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
179   const SIInstrInfo *TII
180     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
181 
182   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
183     return TII->isInlineConstant(C->getAPIntValue());
184 
185   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
186     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
187 
188   return false;
189 }
190 
191 /// \brief Determine the register class for \p OpNo
192 /// \returns The register class of the virtual register that will be used for
193 /// the given operand number \OpNo or NULL if the register class cannot be
194 /// determined.
195 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
196                                                           unsigned OpNo) const {
197   if (!N->isMachineOpcode())
198     return nullptr;
199 
200   switch (N->getMachineOpcode()) {
201   default: {
202     const MCInstrDesc &Desc =
203         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
204     unsigned OpIdx = Desc.getNumDefs() + OpNo;
205     if (OpIdx >= Desc.getNumOperands())
206       return nullptr;
207     int RegClass = Desc.OpInfo[OpIdx].RegClass;
208     if (RegClass == -1)
209       return nullptr;
210 
211     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
212   }
213   case AMDGPU::REG_SEQUENCE: {
214     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
215     const TargetRegisterClass *SuperRC =
216         Subtarget->getRegisterInfo()->getRegClass(RCID);
217 
218     SDValue SubRegOp = N->getOperand(OpNo + 1);
219     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
220     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
221                                                               SubRegIdx);
222   }
223   }
224 }
225 
226 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
227   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
228       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
229     return N;
230 
231   const SITargetLowering& Lowering =
232       *static_cast<const SITargetLowering*>(getTargetLowering());
233 
234   // Write max value to m0 before each load operation
235 
236   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
237                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
238 
239   SDValue Glue = M0.getValue(1);
240 
241   SmallVector <SDValue, 8> Ops;
242   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
243      Ops.push_back(N->getOperand(i));
244   }
245   Ops.push_back(Glue);
246   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
247 
248   return N;
249 }
250 
251 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
252   switch (NumVectorElts) {
253   case 1:
254     return AMDGPU::SReg_32RegClassID;
255   case 2:
256     return AMDGPU::SReg_64RegClassID;
257   case 4:
258     return AMDGPU::SReg_128RegClassID;
259   case 8:
260     return AMDGPU::SReg_256RegClassID;
261   case 16:
262     return AMDGPU::SReg_512RegClassID;
263   }
264 
265   llvm_unreachable("invalid vector size");
266 }
267 
268 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
269   unsigned int Opc = N->getOpcode();
270   if (N->isMachineOpcode()) {
271     N->setNodeId(-1);
272     return;   // Already selected.
273   }
274 
275   if (isa<AtomicSDNode>(N) ||
276       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
277     N = glueCopyToM0(N);
278 
279   switch (Opc) {
280   default: break;
281   // We are selecting i64 ADD here instead of custom lower it during
282   // DAG legalization, so we can fold some i64 ADDs used for address
283   // calculation into the LOAD and STORE instructions.
284   case ISD::ADD:
285   case ISD::SUB: {
286     if (N->getValueType(0) != MVT::i64 ||
287         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
288       break;
289 
290     SelectADD_SUB_I64(N);
291     return;
292   }
293   case ISD::SCALAR_TO_VECTOR:
294   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
295   case ISD::BUILD_VECTOR: {
296     unsigned RegClassID;
297     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
298     EVT VT = N->getValueType(0);
299     unsigned NumVectorElts = VT.getVectorNumElements();
300     EVT EltVT = VT.getVectorElementType();
301     assert(EltVT.bitsEq(MVT::i32));
302     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
303       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
304     } else {
305       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
306       // that adds a 128 bits reg copy when going through TwoAddressInstructions
307       // pass. We want to avoid 128 bits copies as much as possible because they
308       // can't be bundled by our scheduler.
309       switch(NumVectorElts) {
310       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
311       case 4:
312         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
313           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
314         else
315           RegClassID = AMDGPU::R600_Reg128RegClassID;
316         break;
317       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
318       }
319     }
320 
321     SDLoc DL(N);
322     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
323 
324     if (NumVectorElts == 1) {
325       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
326                            RegClass);
327       return;
328     }
329 
330     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
331                                   "supported yet");
332     // 16 = Max Num Vector Elements
333     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
334     // 1 = Vector Register Class
335     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
336 
337     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
338     bool IsRegSeq = true;
339     unsigned NOps = N->getNumOperands();
340     for (unsigned i = 0; i < NOps; i++) {
341       // XXX: Why is this here?
342       if (isa<RegisterSDNode>(N->getOperand(i))) {
343         IsRegSeq = false;
344         break;
345       }
346       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
347       RegSeqArgs[1 + (2 * i) + 1] =
348               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
349                                         MVT::i32);
350     }
351 
352     if (NOps != NumVectorElts) {
353       // Fill in the missing undef elements if this was a scalar_to_vector.
354       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
355 
356       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
357                                                      DL, EltVT);
358       for (unsigned i = NOps; i < NumVectorElts; ++i) {
359         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
360         RegSeqArgs[1 + (2 * i) + 1] =
361           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
362       }
363     }
364 
365     if (!IsRegSeq)
366       break;
367     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
368     return;
369   }
370   case ISD::BUILD_PAIR: {
371     SDValue RC, SubReg0, SubReg1;
372     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
373       break;
374     }
375     SDLoc DL(N);
376     if (N->getValueType(0) == MVT::i128) {
377       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
378       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
379       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
380     } else if (N->getValueType(0) == MVT::i64) {
381       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
382       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
383       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
384     } else {
385       llvm_unreachable("Unhandled value type for BUILD_PAIR");
386     }
387     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
388                             N->getOperand(1), SubReg1 };
389     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
390                                           N->getValueType(0), Ops));
391     return;
392   }
393 
394   case ISD::Constant:
395   case ISD::ConstantFP: {
396     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
397         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
398       break;
399 
400     uint64_t Imm;
401     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
402       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
403     else {
404       ConstantSDNode *C = cast<ConstantSDNode>(N);
405       Imm = C->getZExtValue();
406     }
407 
408     SDLoc DL(N);
409     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
410                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
411                                                     MVT::i32));
412     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
413                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
414     const SDValue Ops[] = {
415       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
416       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
417       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
418     };
419 
420     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
421                                           N->getValueType(0), Ops));
422     return;
423   }
424   case ISD::LOAD:
425   case ISD::STORE: {
426     N = glueCopyToM0(N);
427     break;
428   }
429 
430   case AMDGPUISD::BFE_I32:
431   case AMDGPUISD::BFE_U32: {
432     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
433       break;
434 
435     // There is a scalar version available, but unlike the vector version which
436     // has a separate operand for the offset and width, the scalar version packs
437     // the width and offset into a single operand. Try to move to the scalar
438     // version if the offsets are constant, so that we can try to keep extended
439     // loads of kernel arguments in SGPRs.
440 
441     // TODO: Technically we could try to pattern match scalar bitshifts of
442     // dynamic values, but it's probably not useful.
443     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
444     if (!Offset)
445       break;
446 
447     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
448     if (!Width)
449       break;
450 
451     bool Signed = Opc == AMDGPUISD::BFE_I32;
452 
453     uint32_t OffsetVal = Offset->getZExtValue();
454     uint32_t WidthVal = Width->getZExtValue();
455 
456     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
457                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
458     return;
459   }
460   case AMDGPUISD::DIV_SCALE: {
461     SelectDIV_SCALE(N);
462     return;
463   }
464   case ISD::CopyToReg: {
465     const SITargetLowering& Lowering =
466       *static_cast<const SITargetLowering*>(getTargetLowering());
467     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
468     break;
469   }
470   case ISD::AND:
471   case ISD::SRL:
472   case ISD::SRA:
473   case ISD::SIGN_EXTEND_INREG:
474     if (N->getValueType(0) != MVT::i32 ||
475         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
476       break;
477 
478     SelectS_BFE(N);
479     return;
480   case ISD::BRCOND:
481     SelectBRCOND(N);
482     return;
483 
484   case AMDGPUISD::ATOMIC_CMP_SWAP:
485     SelectATOMIC_CMP_SWAP(N);
486     return;
487   }
488 
489   SelectCode(N);
490 }
491 
492 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
493   if (!N->readMem())
494     return false;
495   if (CbId == -1)
496     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
497 
498   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
499 }
500 
501 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
502   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
503   const Instruction *Term = BB->getTerminator();
504   return Term->getMetadata("amdgpu.uniform") ||
505          Term->getMetadata("structurizecfg.uniform");
506 }
507 
508 const char *AMDGPUDAGToDAGISel::getPassName() const {
509   return "AMDGPU DAG->DAG Pattern Instruction Selection";
510 }
511 
512 //===----------------------------------------------------------------------===//
513 // Complex Patterns
514 //===----------------------------------------------------------------------===//
515 
516 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
517                                                          SDValue& IntPtr) {
518   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
519     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
520                                        true);
521     return true;
522   }
523   return false;
524 }
525 
526 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
527     SDValue& BaseReg, SDValue &Offset) {
528   if (!isa<ConstantSDNode>(Addr)) {
529     BaseReg = Addr;
530     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
531     return true;
532   }
533   return false;
534 }
535 
536 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
537                                            SDValue &Offset) {
538   ConstantSDNode *IMMOffset;
539 
540   if (Addr.getOpcode() == ISD::ADD
541       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
542       && isInt<16>(IMMOffset->getZExtValue())) {
543 
544       Base = Addr.getOperand(0);
545       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
546                                          MVT::i32);
547       return true;
548   // If the pointer address is constant, we can move it to the offset field.
549   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
550              && isInt<16>(IMMOffset->getZExtValue())) {
551     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
552                                   SDLoc(CurDAG->getEntryNode()),
553                                   AMDGPU::ZERO, MVT::i32);
554     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
555                                        MVT::i32);
556     return true;
557   }
558 
559   // Default case, no offset
560   Base = Addr;
561   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
562   return true;
563 }
564 
565 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
566                                             SDValue &Offset) {
567   ConstantSDNode *C;
568   SDLoc DL(Addr);
569 
570   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
571     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
572     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
573   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
574             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
575     Base = Addr.getOperand(0);
576     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
577   } else {
578     Base = Addr;
579     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
580   }
581 
582   return true;
583 }
584 
585 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
586   SDLoc DL(N);
587   SDValue LHS = N->getOperand(0);
588   SDValue RHS = N->getOperand(1);
589 
590   bool IsAdd = (N->getOpcode() == ISD::ADD);
591 
592   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
593   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
594 
595   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
596                                        DL, MVT::i32, LHS, Sub0);
597   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
598                                        DL, MVT::i32, LHS, Sub1);
599 
600   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
601                                        DL, MVT::i32, RHS, Sub0);
602   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
603                                        DL, MVT::i32, RHS, Sub1);
604 
605   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
606   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
607 
608   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
609   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
610 
611   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
612   SDValue Carry(AddLo, 1);
613   SDNode *AddHi
614     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
615                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
616 
617   SDValue Args[5] = {
618     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
619     SDValue(AddLo,0),
620     Sub0,
621     SDValue(AddHi,0),
622     Sub1,
623   };
624   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
625 }
626 
627 // We need to handle this here because tablegen doesn't support matching
628 // instructions with multiple outputs.
629 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
630   SDLoc SL(N);
631   EVT VT = N->getValueType(0);
632 
633   assert(VT == MVT::f32 || VT == MVT::f64);
634 
635   unsigned Opc
636     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
637 
638   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
639   // omod
640   SDValue Ops[8];
641 
642   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
643   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
644   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
645   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
646 }
647 
648 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
649                                          unsigned OffsetBits) const {
650   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
651       (OffsetBits == 8 && !isUInt<8>(Offset)))
652     return false;
653 
654   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
655       Subtarget->unsafeDSOffsetFoldingEnabled())
656     return true;
657 
658   // On Southern Islands instruction with a negative base value and an offset
659   // don't seem to work.
660   return CurDAG->SignBitIsZero(Base);
661 }
662 
663 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
664                                               SDValue &Offset) const {
665   SDLoc DL(Addr);
666   if (CurDAG->isBaseWithConstantOffset(Addr)) {
667     SDValue N0 = Addr.getOperand(0);
668     SDValue N1 = Addr.getOperand(1);
669     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
670     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
671       // (add n0, c0)
672       Base = N0;
673       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
674       return true;
675     }
676   } else if (Addr.getOpcode() == ISD::SUB) {
677     // sub C, x -> add (sub 0, x), C
678     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
679       int64_t ByteOffset = C->getSExtValue();
680       if (isUInt<16>(ByteOffset)) {
681         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
682 
683         // XXX - This is kind of hacky. Create a dummy sub node so we can check
684         // the known bits in isDSOffsetLegal. We need to emit the selected node
685         // here, so this is thrown away.
686         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
687                                       Zero, Addr.getOperand(1));
688 
689         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
690           MachineSDNode *MachineSub
691             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
692                                      Zero, Addr.getOperand(1));
693 
694           Base = SDValue(MachineSub, 0);
695           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
696           return true;
697         }
698       }
699     }
700   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
701     // If we have a constant address, prefer to put the constant into the
702     // offset. This can save moves to load the constant address since multiple
703     // operations can share the zero base address register, and enables merging
704     // into read2 / write2 instructions.
705 
706     SDLoc DL(Addr);
707 
708     if (isUInt<16>(CAddr->getZExtValue())) {
709       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
710       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
711                                  DL, MVT::i32, Zero);
712       Base = SDValue(MovZero, 0);
713       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
714       return true;
715     }
716   }
717 
718   // default case
719   Base = Addr;
720   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
721   return true;
722 }
723 
724 // TODO: If offset is too big, put low 16-bit into offset.
725 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
726                                                    SDValue &Offset0,
727                                                    SDValue &Offset1) const {
728   SDLoc DL(Addr);
729 
730   if (CurDAG->isBaseWithConstantOffset(Addr)) {
731     SDValue N0 = Addr.getOperand(0);
732     SDValue N1 = Addr.getOperand(1);
733     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
734     unsigned DWordOffset0 = C1->getZExtValue() / 4;
735     unsigned DWordOffset1 = DWordOffset0 + 1;
736     // (add n0, c0)
737     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
738       Base = N0;
739       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
740       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
741       return true;
742     }
743   } else if (Addr.getOpcode() == ISD::SUB) {
744     // sub C, x -> add (sub 0, x), C
745     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
746       unsigned DWordOffset0 = C->getZExtValue() / 4;
747       unsigned DWordOffset1 = DWordOffset0 + 1;
748 
749       if (isUInt<8>(DWordOffset0)) {
750         SDLoc DL(Addr);
751         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
752 
753         // XXX - This is kind of hacky. Create a dummy sub node so we can check
754         // the known bits in isDSOffsetLegal. We need to emit the selected node
755         // here, so this is thrown away.
756         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
757                                       Zero, Addr.getOperand(1));
758 
759         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
760           MachineSDNode *MachineSub
761             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
762                                      Zero, Addr.getOperand(1));
763 
764           Base = SDValue(MachineSub, 0);
765           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
766           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
767           return true;
768         }
769       }
770     }
771   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
772     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
773     unsigned DWordOffset1 = DWordOffset0 + 1;
774     assert(4 * DWordOffset0 == CAddr->getZExtValue());
775 
776     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
777       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
778       MachineSDNode *MovZero
779         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
780                                  DL, MVT::i32, Zero);
781       Base = SDValue(MovZero, 0);
782       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
783       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
784       return true;
785     }
786   }
787 
788   // default case
789 
790   // FIXME: This is broken on SI where we still need to check if the base
791   // pointer is positive here.
792   Base = Addr;
793   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
794   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
795   return true;
796 }
797 
798 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
799   return isUInt<12>(Imm->getZExtValue());
800 }
801 
802 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
803                                      SDValue &VAddr, SDValue &SOffset,
804                                      SDValue &Offset, SDValue &Offen,
805                                      SDValue &Idxen, SDValue &Addr64,
806                                      SDValue &GLC, SDValue &SLC,
807                                      SDValue &TFE) const {
808   // Subtarget prefers to use flat instruction
809   if (Subtarget->useFlatForGlobal())
810     return false;
811 
812   SDLoc DL(Addr);
813 
814   if (!GLC.getNode())
815     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
816   if (!SLC.getNode())
817     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
818   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
819 
820   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
821   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
822   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
823   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
824 
825   if (CurDAG->isBaseWithConstantOffset(Addr)) {
826     SDValue N0 = Addr.getOperand(0);
827     SDValue N1 = Addr.getOperand(1);
828     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
829 
830     if (N0.getOpcode() == ISD::ADD) {
831       // (add (add N2, N3), C1) -> addr64
832       SDValue N2 = N0.getOperand(0);
833       SDValue N3 = N0.getOperand(1);
834       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
835       Ptr = N2;
836       VAddr = N3;
837     } else {
838 
839       // (add N0, C1) -> offset
840       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
841       Ptr = N0;
842     }
843 
844     if (isLegalMUBUFImmOffset(C1)) {
845       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
846       return true;
847     }
848 
849     if (isUInt<32>(C1->getZExtValue())) {
850       // Illegal offset, store it in soffset.
851       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
852       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
853                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
854                         0);
855       return true;
856     }
857   }
858 
859   if (Addr.getOpcode() == ISD::ADD) {
860     // (add N0, N1) -> addr64
861     SDValue N0 = Addr.getOperand(0);
862     SDValue N1 = Addr.getOperand(1);
863     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
864     Ptr = N0;
865     VAddr = N1;
866     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
867     return true;
868   }
869 
870   // default case -> offset
871   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
872   Ptr = Addr;
873   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
874 
875   return true;
876 }
877 
878 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
879                                            SDValue &VAddr, SDValue &SOffset,
880                                            SDValue &Offset, SDValue &GLC,
881                                            SDValue &SLC, SDValue &TFE) const {
882   SDValue Ptr, Offen, Idxen, Addr64;
883 
884   // addr64 bit was removed for volcanic islands.
885   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
886     return false;
887 
888   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
889               GLC, SLC, TFE))
890     return false;
891 
892   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
893   if (C->getSExtValue()) {
894     SDLoc DL(Addr);
895 
896     const SITargetLowering& Lowering =
897       *static_cast<const SITargetLowering*>(getTargetLowering());
898 
899     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
900     return true;
901   }
902 
903   return false;
904 }
905 
906 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
907                                            SDValue &VAddr, SDValue &SOffset,
908                                            SDValue &Offset,
909                                            SDValue &SLC) const {
910   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
911   SDValue GLC, TFE;
912 
913   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
914 }
915 
916 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
917                                             SDValue &VAddr, SDValue &SOffset,
918                                             SDValue &ImmOffset) const {
919 
920   SDLoc DL(Addr);
921   MachineFunction &MF = CurDAG->getMachineFunction();
922   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
923 
924   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
925   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
926 
927   // (add n0, c1)
928   if (CurDAG->isBaseWithConstantOffset(Addr)) {
929     SDValue N0 = Addr.getOperand(0);
930     SDValue N1 = Addr.getOperand(1);
931 
932     // Offsets in vaddr must be positive.
933     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
934     if (isLegalMUBUFImmOffset(C1)) {
935       VAddr = N0;
936       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
937       return true;
938     }
939   }
940 
941   // (node)
942   VAddr = Addr;
943   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
944   return true;
945 }
946 
947 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
948                                            SDValue &SOffset, SDValue &Offset,
949                                            SDValue &GLC, SDValue &SLC,
950                                            SDValue &TFE) const {
951   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
952   const SIInstrInfo *TII =
953     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
954 
955   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
956               GLC, SLC, TFE))
957     return false;
958 
959   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
960       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
961       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
962     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
963                     APInt::getAllOnesValue(32).getZExtValue(); // Size
964     SDLoc DL(Addr);
965 
966     const SITargetLowering& Lowering =
967       *static_cast<const SITargetLowering*>(getTargetLowering());
968 
969     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
970     return true;
971   }
972   return false;
973 }
974 
975 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
976                                            SDValue &Soffset, SDValue &Offset
977                                            ) const {
978   SDValue GLC, SLC, TFE;
979 
980   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
981 }
982 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
983                                            SDValue &Soffset, SDValue &Offset,
984                                            SDValue &SLC) const {
985   SDValue GLC, TFE;
986 
987   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
988 }
989 
990 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
991                                              SDValue &SOffset,
992                                              SDValue &ImmOffset) const {
993   SDLoc DL(Constant);
994   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
995   uint32_t Overflow = 0;
996 
997   if (Imm >= 4096) {
998     if (Imm <= 4095 + 64) {
999       // Use an SOffset inline constant for 1..64
1000       Overflow = Imm - 4095;
1001       Imm = 4095;
1002     } else {
1003       // Try to keep the same value in SOffset for adjacent loads, so that
1004       // the corresponding register contents can be re-used.
1005       //
1006       // Load values with all low-bits set into SOffset, so that a larger
1007       // range of values can be covered using s_movk_i32
1008       uint32_t High = (Imm + 1) & ~4095;
1009       uint32_t Low = (Imm + 1) & 4095;
1010       Imm = Low;
1011       Overflow = High - 1;
1012     }
1013   }
1014 
1015   // There is a hardware bug in SI and CI which prevents address clamping in
1016   // MUBUF instructions from working correctly with SOffsets. The immediate
1017   // offset is unaffected.
1018   if (Overflow > 0 &&
1019       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1020     return false;
1021 
1022   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1023 
1024   if (Overflow <= 64)
1025     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1026   else
1027     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1028                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1029                       0);
1030 
1031   return true;
1032 }
1033 
1034 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1035                                                     SDValue &SOffset,
1036                                                     SDValue &ImmOffset) const {
1037   SDLoc DL(Offset);
1038 
1039   if (!isa<ConstantSDNode>(Offset))
1040     return false;
1041 
1042   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1043 }
1044 
1045 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1046                                                      SDValue &SOffset,
1047                                                      SDValue &ImmOffset,
1048                                                      SDValue &VOffset) const {
1049   SDLoc DL(Offset);
1050 
1051   // Don't generate an unnecessary voffset for constant offsets.
1052   if (isa<ConstantSDNode>(Offset)) {
1053     SDValue Tmp1, Tmp2;
1054 
1055     // When necessary, use a voffset in <= CI anyway to work around a hardware
1056     // bug.
1057     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1058         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1059       return false;
1060   }
1061 
1062   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1063     SDValue N0 = Offset.getOperand(0);
1064     SDValue N1 = Offset.getOperand(1);
1065     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1066         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1067       VOffset = N0;
1068       return true;
1069     }
1070   }
1071 
1072   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1073   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1074   VOffset = Offset;
1075 
1076   return true;
1077 }
1078 
1079 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1080                                     SDValue &VAddr,
1081                                     SDValue &SLC,
1082                                     SDValue &TFE) const {
1083   VAddr = Addr;
1084   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1085   return true;
1086 }
1087 
1088 ///
1089 /// \param EncodedOffset This is the immediate value that will be encoded
1090 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1091 ///        will be in units of dwords and on VI+ it will be units of bytes.
1092 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1093                                  int64_t EncodedOffset) {
1094   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1095      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1096 }
1097 
1098 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1099                                           SDValue &Offset, bool &Imm) const {
1100 
1101   // FIXME: Handle non-constant offsets.
1102   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1103   if (!C)
1104     return false;
1105 
1106   SDLoc SL(ByteOffsetNode);
1107   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1108   int64_t ByteOffset = C->getSExtValue();
1109   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1110       ByteOffset >> 2 : ByteOffset;
1111 
1112   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1113     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1114     Imm = true;
1115     return true;
1116   }
1117 
1118   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1119     return false;
1120 
1121   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1122     // 32-bit Immediates are supported on Sea Islands.
1123     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1124   } else {
1125     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1126     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1127                                             C32Bit), 0);
1128   }
1129   Imm = false;
1130   return true;
1131 }
1132 
1133 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1134                                      SDValue &Offset, bool &Imm) const {
1135 
1136   SDLoc SL(Addr);
1137   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1138     SDValue N0 = Addr.getOperand(0);
1139     SDValue N1 = Addr.getOperand(1);
1140 
1141     if (SelectSMRDOffset(N1, Offset, Imm)) {
1142       SBase = N0;
1143       return true;
1144     }
1145   }
1146   SBase = Addr;
1147   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1148   Imm = true;
1149   return true;
1150 }
1151 
1152 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1153                                        SDValue &Offset) const {
1154   bool Imm;
1155   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1156 }
1157 
1158 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1159                                          SDValue &Offset) const {
1160 
1161   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1162     return false;
1163 
1164   bool Imm;
1165   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1166     return false;
1167 
1168   return !Imm && isa<ConstantSDNode>(Offset);
1169 }
1170 
1171 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1172                                         SDValue &Offset) const {
1173   bool Imm;
1174   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1175          !isa<ConstantSDNode>(Offset);
1176 }
1177 
1178 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1179                                              SDValue &Offset) const {
1180   bool Imm;
1181   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1182 }
1183 
1184 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1185                                                SDValue &Offset) const {
1186   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1187     return false;
1188 
1189   bool Imm;
1190   if (!SelectSMRDOffset(Addr, Offset, Imm))
1191     return false;
1192 
1193   return !Imm && isa<ConstantSDNode>(Offset);
1194 }
1195 
1196 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1197                                               SDValue &Offset) const {
1198   bool Imm;
1199   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1200          !isa<ConstantSDNode>(Offset);
1201 }
1202 
1203 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1204                                             SDValue &Base,
1205                                             SDValue &Offset) const {
1206   SDLoc DL(Index);
1207 
1208   if (CurDAG->isBaseWithConstantOffset(Index)) {
1209     SDValue N0 = Index.getOperand(0);
1210     SDValue N1 = Index.getOperand(1);
1211     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1212 
1213     // (add n0, c0)
1214     Base = N0;
1215     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1216     return true;
1217   }
1218 
1219   if (isa<ConstantSDNode>(Index))
1220     return false;
1221 
1222   Base = Index;
1223   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1224   return true;
1225 }
1226 
1227 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1228                                      SDValue Val, uint32_t Offset,
1229                                      uint32_t Width) {
1230   // Transformation function, pack the offset and width of a BFE into
1231   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1232   // source, bits [5:0] contain the offset and bits [22:16] the width.
1233   uint32_t PackedVal = Offset | (Width << 16);
1234   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1235 
1236   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1237 }
1238 
1239 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1240   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1241   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1242   // Predicate: 0 < b <= c < 32
1243 
1244   const SDValue &Shl = N->getOperand(0);
1245   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1246   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1247 
1248   if (B && C) {
1249     uint32_t BVal = B->getZExtValue();
1250     uint32_t CVal = C->getZExtValue();
1251 
1252     if (0 < BVal && BVal <= CVal && CVal < 32) {
1253       bool Signed = N->getOpcode() == ISD::SRA;
1254       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1255 
1256       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1257                               32 - CVal));
1258       return;
1259     }
1260   }
1261   SelectCode(N);
1262 }
1263 
1264 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1265   switch (N->getOpcode()) {
1266   case ISD::AND:
1267     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1268       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1269       // Predicate: isMask(mask)
1270       const SDValue &Srl = N->getOperand(0);
1271       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1272       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1273 
1274       if (Shift && Mask) {
1275         uint32_t ShiftVal = Shift->getZExtValue();
1276         uint32_t MaskVal = Mask->getZExtValue();
1277 
1278         if (isMask_32(MaskVal)) {
1279           uint32_t WidthVal = countPopulation(MaskVal);
1280 
1281           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1282                                   Srl.getOperand(0), ShiftVal, WidthVal));
1283           return;
1284         }
1285       }
1286     }
1287     break;
1288   case ISD::SRL:
1289     if (N->getOperand(0).getOpcode() == ISD::AND) {
1290       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1291       // Predicate: isMask(mask >> b)
1292       const SDValue &And = N->getOperand(0);
1293       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1294       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1295 
1296       if (Shift && Mask) {
1297         uint32_t ShiftVal = Shift->getZExtValue();
1298         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1299 
1300         if (isMask_32(MaskVal)) {
1301           uint32_t WidthVal = countPopulation(MaskVal);
1302 
1303           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1304                                   And.getOperand(0), ShiftVal, WidthVal));
1305           return;
1306         }
1307       }
1308     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1309       SelectS_BFEFromShifts(N);
1310       return;
1311     }
1312     break;
1313   case ISD::SRA:
1314     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1315       SelectS_BFEFromShifts(N);
1316       return;
1317     }
1318     break;
1319 
1320   case ISD::SIGN_EXTEND_INREG: {
1321     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1322     SDValue Src = N->getOperand(0);
1323     if (Src.getOpcode() != ISD::SRL)
1324       break;
1325 
1326     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1327     if (!Amt)
1328       break;
1329 
1330     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1331     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1332                             Amt->getZExtValue(), Width));
1333     return;
1334   }
1335   }
1336 
1337   SelectCode(N);
1338 }
1339 
1340 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1341   SDValue Cond = N->getOperand(1);
1342 
1343   if (isCBranchSCC(N)) {
1344     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1345     SelectCode(N);
1346     return;
1347   }
1348 
1349   // The result of VOPC instructions is or'd against ~EXEC before it is
1350   // written to vcc or another SGPR.  This means that the value '1' is always
1351   // written to the corresponding bit for results that are masked.  In order
1352   // to correctly check against vccz, we need to and VCC with the EXEC
1353   // register in order to clear the value from the masked bits.
1354 
1355   SDLoc SL(N);
1356 
1357   SDNode *MaskedCond =
1358         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1359                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1360                                Cond);
1361   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1362                                      SDValue(MaskedCond, 0),
1363                                      SDValue()); // Passing SDValue() adds a
1364                                                  // glue output.
1365   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1366                        N->getOperand(2), // Basic Block
1367                        VCC.getValue(0),  // Chain
1368                        VCC.getValue(1)); // Glue
1369   return;
1370 }
1371 
1372 // This is here because there isn't a way to use the generated sub0_sub1 as the
1373 // subreg index to EXTRACT_SUBREG in tablegen.
1374 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1375   MemSDNode *Mem = cast<MemSDNode>(N);
1376   unsigned AS = Mem->getAddressSpace();
1377   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1378     SelectCode(N);
1379     return;
1380   }
1381 
1382   MVT VT = N->getSimpleValueType(0);
1383   bool Is32 = (VT == MVT::i32);
1384   SDLoc SL(N);
1385 
1386   MachineSDNode *CmpSwap = nullptr;
1387   if (Subtarget->hasAddr64()) {
1388     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1389 
1390     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1391       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1392         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1393       SDValue CmpVal = Mem->getOperand(2);
1394 
1395       // XXX - Do we care about glue operands?
1396 
1397       SDValue Ops[] = {
1398         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1399       };
1400 
1401       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1402     }
1403   }
1404 
1405   if (!CmpSwap) {
1406     SDValue SRsrc, SOffset, Offset, SLC;
1407     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1408       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1409         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1410 
1411       SDValue CmpVal = Mem->getOperand(2);
1412       SDValue Ops[] = {
1413         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1414       };
1415 
1416       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1417     }
1418   }
1419 
1420   if (!CmpSwap) {
1421     SelectCode(N);
1422     return;
1423   }
1424 
1425   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1426   *MMOs = Mem->getMemOperand();
1427   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1428 
1429   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1430   SDValue Extract
1431     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1432 
1433   ReplaceUses(SDValue(N, 0), Extract);
1434   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1435   CurDAG->RemoveDeadNode(N);
1436 }
1437 
1438 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1439                                         SDValue &SrcMods) const {
1440 
1441   unsigned Mods = 0;
1442 
1443   Src = In;
1444 
1445   if (Src.getOpcode() == ISD::FNEG) {
1446     Mods |= SISrcMods::NEG;
1447     Src = Src.getOperand(0);
1448   }
1449 
1450   if (Src.getOpcode() == ISD::FABS) {
1451     Mods |= SISrcMods::ABS;
1452     Src = Src.getOperand(0);
1453   }
1454 
1455   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1456 
1457   return true;
1458 }
1459 
1460 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1461                                          SDValue &SrcMods) const {
1462   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1463   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1464 }
1465 
1466 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1467                                          SDValue &SrcMods, SDValue &Clamp,
1468                                          SDValue &Omod) const {
1469   SDLoc DL(In);
1470   // FIXME: Handle Clamp and Omod
1471   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1472   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1473 
1474   return SelectVOP3Mods(In, Src, SrcMods);
1475 }
1476 
1477 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1478                                            SDValue &SrcMods, SDValue &Clamp,
1479                                            SDValue &Omod) const {
1480   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1481 
1482   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1483                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1484                 cast<ConstantSDNode>(Omod)->isNullValue();
1485 }
1486 
1487 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1488                                               SDValue &SrcMods,
1489                                               SDValue &Omod) const {
1490   // FIXME: Handle Omod
1491   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1492 
1493   return SelectVOP3Mods(In, Src, SrcMods);
1494 }
1495 
1496 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1497                                                    SDValue &SrcMods,
1498                                                    SDValue &Clamp,
1499                                                    SDValue &Omod) const {
1500   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1501   return SelectVOP3Mods(In, Src, SrcMods);
1502 }
1503 
1504 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1505   MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
1506 
1507   // Handle the perverse case where a frame index is being stored. We don't
1508   // want to see multiple frame index operands on the same instruction since
1509   // it complicates things and violates some assumptions about frame index
1510   // lowering.
1511   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1512        I != E; ++I) {
1513     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1514 
1515     // It's possible that we have a frame index defined in the function that
1516     // isn't used in this block.
1517     if (FI.use_empty())
1518       continue;
1519 
1520     // Skip over the AssertZext inserted during lowering.
1521     SDValue EffectiveFI = FI;
1522     auto It = FI->use_begin();
1523     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1524       EffectiveFI = SDValue(*It, 0);
1525       It = EffectiveFI->use_begin();
1526     }
1527 
1528     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1529       SDUse &Use = It.getUse();
1530       SDNode *User = Use.getUser();
1531       unsigned OpIdx = It.getOperandNo();
1532       ++It;
1533 
1534       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1535         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1536         if (OpIdx == PtrIdx)
1537           continue;
1538 
1539         unsigned OpN = M->getNumOperands();
1540         SDValue NewOps[8];
1541 
1542         assert(OpN < array_lengthof(NewOps));
1543         for (unsigned Op = 0; Op != OpN; ++Op) {
1544           if (Op != OpIdx) {
1545             NewOps[Op] = M->getOperand(Op);
1546             continue;
1547           }
1548 
1549           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1550                                                       SDLoc(M), MVT::i32, FI);
1551           NewOps[Op] = SDValue(Mov, 0);
1552         }
1553 
1554         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1555       }
1556     }
1557   }
1558 }
1559 
1560 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1561   const AMDGPUTargetLowering& Lowering =
1562     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1563   bool IsModified = false;
1564   do {
1565     IsModified = false;
1566     // Go over all selected nodes and try to fold them a bit more
1567     for (SDNode &Node : CurDAG->allnodes()) {
1568       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1569       if (!MachineNode)
1570         continue;
1571 
1572       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1573       if (ResNode != &Node) {
1574         ReplaceUses(&Node, ResNode);
1575         IsModified = true;
1576       }
1577     }
1578     CurDAG->RemoveDeadNodes();
1579   } while (IsModified);
1580 }
1581