1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 /// AMDGPU specific code to select AMDGPU machine instructions for
42 /// SelectionDAG operations.
43 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
44   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
45   // make the right decision when generating code for different targets.
46   const AMDGPUSubtarget *Subtarget;
47 
48 public:
49   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
50       : SelectionDAGISel(TM, OptLevel) {}
51 
52   virtual ~AMDGPUDAGToDAGISel();
53   bool runOnMachineFunction(MachineFunction &MF) override;
54   void Select(SDNode *N) override;
55   StringRef getPassName() const override;
56   void PostprocessISelDAG() override;
57 
58 private:
59   SDValue foldFrameIndex(SDValue N) const;
60   bool isInlineImmediate(const SDNode *N) const;
61   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
62                    const R600InstrInfo *TII);
63   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
64   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
65 
66   bool isConstantLoad(const MemSDNode *N, int cbID) const;
67   bool isUniformBr(const SDNode *N) const;
68 
69   SDNode *glueCopyToM0(SDNode *N) const;
70 
71   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
72   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
73   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
74                                        SDValue& Offset);
75   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
76   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
77   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
78                        unsigned OffsetBits) const;
79   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
80   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
81                                  SDValue &Offset1) const;
82   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
83                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
84                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
85                    SDValue &TFE) const;
86   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
87                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
88                          SDValue &SLC, SDValue &TFE) const;
89   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
90                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
91                          SDValue &SLC) const;
92   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
93                           SDValue &SOffset, SDValue &ImmOffset) const;
94   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
95                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
96                          SDValue &TFE) const;
97   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
98                          SDValue &Offset, SDValue &SLC) const;
99   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
100                          SDValue &Offset) const;
101   bool SelectMUBUFConstant(SDValue Constant,
102                            SDValue &SOffset,
103                            SDValue &ImmOffset) const;
104   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
105                                   SDValue &ImmOffset) const;
106   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
107                                    SDValue &ImmOffset, SDValue &VOffset) const;
108 
109   bool SelectFlat(SDValue Addr, SDValue &VAddr,
110                   SDValue &SLC, SDValue &TFE) const;
111 
112   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
113                         bool &Imm) const;
114   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
115                   bool &Imm) const;
116   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
117   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
118   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
119   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
120   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
121   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
122   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
123   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
124   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
125   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
126                        SDValue &Clamp, SDValue &Omod) const;
127   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
128                          SDValue &Clamp, SDValue &Omod) const;
129 
130   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
131                             SDValue &Omod) const;
132   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
133                                  SDValue &Clamp,
134                                  SDValue &Omod) const;
135 
136   void SelectADD_SUB_I64(SDNode *N);
137   void SelectDIV_SCALE(SDNode *N);
138   void SelectFMA_W_CHAIN(SDNode *N);
139   void SelectFMUL_W_CHAIN(SDNode *N);
140 
141   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
142                    uint32_t Offset, uint32_t Width);
143   void SelectS_BFEFromShifts(SDNode *N);
144   void SelectS_BFE(SDNode *N);
145   bool isCBranchSCC(const SDNode *N) const;
146   void SelectBRCOND(SDNode *N);
147   void SelectATOMIC_CMP_SWAP(SDNode *N);
148 
149   // Include the pieces autogenerated from the target description.
150 #include "AMDGPUGenDAGISel.inc"
151 };
152 }  // end anonymous namespace
153 
154 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
155 // DAG, ready for instruction scheduling.
156 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
157                                         CodeGenOpt::Level OptLevel) {
158   return new AMDGPUDAGToDAGISel(TM, OptLevel);
159 }
160 
161 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
162   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
163   return SelectionDAGISel::runOnMachineFunction(MF);
164 }
165 
166 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
167 }
168 
169 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
170   const SIInstrInfo *TII
171     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
172 
173   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
174     return TII->isInlineConstant(C->getAPIntValue());
175 
176   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
177     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
178 
179   return false;
180 }
181 
182 /// \brief Determine the register class for \p OpNo
183 /// \returns The register class of the virtual register that will be used for
184 /// the given operand number \OpNo or NULL if the register class cannot be
185 /// determined.
186 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
187                                                           unsigned OpNo) const {
188   if (!N->isMachineOpcode()) {
189     if (N->getOpcode() == ISD::CopyToReg) {
190       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
191       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
192         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
193         return MRI.getRegClass(Reg);
194       }
195 
196       const SIRegisterInfo *TRI
197         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
198       return TRI->getPhysRegClass(Reg);
199     }
200 
201     return nullptr;
202   }
203 
204   switch (N->getMachineOpcode()) {
205   default: {
206     const MCInstrDesc &Desc =
207         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
208     unsigned OpIdx = Desc.getNumDefs() + OpNo;
209     if (OpIdx >= Desc.getNumOperands())
210       return nullptr;
211     int RegClass = Desc.OpInfo[OpIdx].RegClass;
212     if (RegClass == -1)
213       return nullptr;
214 
215     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
216   }
217   case AMDGPU::REG_SEQUENCE: {
218     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
219     const TargetRegisterClass *SuperRC =
220         Subtarget->getRegisterInfo()->getRegClass(RCID);
221 
222     SDValue SubRegOp = N->getOperand(OpNo + 1);
223     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
224     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
225                                                               SubRegIdx);
226   }
227   }
228 }
229 
230 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
231   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
232       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
233     return N;
234 
235   const SITargetLowering& Lowering =
236       *static_cast<const SITargetLowering*>(getTargetLowering());
237 
238   // Write max value to m0 before each load operation
239 
240   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
241                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
242 
243   SDValue Glue = M0.getValue(1);
244 
245   SmallVector <SDValue, 8> Ops;
246   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
247      Ops.push_back(N->getOperand(i));
248   }
249   Ops.push_back(Glue);
250   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
251 
252   return N;
253 }
254 
255 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
256   switch (NumVectorElts) {
257   case 1:
258     return AMDGPU::SReg_32_XM0RegClassID;
259   case 2:
260     return AMDGPU::SReg_64RegClassID;
261   case 4:
262     return AMDGPU::SReg_128RegClassID;
263   case 8:
264     return AMDGPU::SReg_256RegClassID;
265   case 16:
266     return AMDGPU::SReg_512RegClassID;
267   }
268 
269   llvm_unreachable("invalid vector size");
270 }
271 
272 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
273   unsigned int Opc = N->getOpcode();
274   if (N->isMachineOpcode()) {
275     N->setNodeId(-1);
276     return;   // Already selected.
277   }
278 
279   if (isa<AtomicSDNode>(N) ||
280       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
281     N = glueCopyToM0(N);
282 
283   switch (Opc) {
284   default: break;
285   // We are selecting i64 ADD here instead of custom lower it during
286   // DAG legalization, so we can fold some i64 ADDs used for address
287   // calculation into the LOAD and STORE instructions.
288   case ISD::ADD:
289   case ISD::ADDC:
290   case ISD::ADDE:
291   case ISD::SUB:
292   case ISD::SUBC:
293   case ISD::SUBE: {
294     if (N->getValueType(0) != MVT::i64 ||
295         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
296       break;
297 
298     SelectADD_SUB_I64(N);
299     return;
300   }
301   case AMDGPUISD::FMUL_W_CHAIN: {
302     SelectFMUL_W_CHAIN(N);
303     return;
304   }
305   case AMDGPUISD::FMA_W_CHAIN: {
306     SelectFMA_W_CHAIN(N);
307     return;
308   }
309 
310   case ISD::SCALAR_TO_VECTOR:
311   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
312   case ISD::BUILD_VECTOR: {
313     unsigned RegClassID;
314     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
315     EVT VT = N->getValueType(0);
316     unsigned NumVectorElts = VT.getVectorNumElements();
317     EVT EltVT = VT.getVectorElementType();
318     assert(EltVT.bitsEq(MVT::i32));
319     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
320       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
321     } else {
322       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
323       // that adds a 128 bits reg copy when going through TwoAddressInstructions
324       // pass. We want to avoid 128 bits copies as much as possible because they
325       // can't be bundled by our scheduler.
326       switch(NumVectorElts) {
327       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
328       case 4:
329         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
330           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
331         else
332           RegClassID = AMDGPU::R600_Reg128RegClassID;
333         break;
334       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
335       }
336     }
337 
338     SDLoc DL(N);
339     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
340 
341     if (NumVectorElts == 1) {
342       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
343                            RegClass);
344       return;
345     }
346 
347     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
348                                   "supported yet");
349     // 16 = Max Num Vector Elements
350     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
351     // 1 = Vector Register Class
352     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
353 
354     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
355     bool IsRegSeq = true;
356     unsigned NOps = N->getNumOperands();
357     for (unsigned i = 0; i < NOps; i++) {
358       // XXX: Why is this here?
359       if (isa<RegisterSDNode>(N->getOperand(i))) {
360         IsRegSeq = false;
361         break;
362       }
363       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
364       RegSeqArgs[1 + (2 * i) + 1] =
365               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
366                                         MVT::i32);
367     }
368 
369     if (NOps != NumVectorElts) {
370       // Fill in the missing undef elements if this was a scalar_to_vector.
371       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
372 
373       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
374                                                      DL, EltVT);
375       for (unsigned i = NOps; i < NumVectorElts; ++i) {
376         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
377         RegSeqArgs[1 + (2 * i) + 1] =
378           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
379       }
380     }
381 
382     if (!IsRegSeq)
383       break;
384     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
385     return;
386   }
387   case ISD::BUILD_PAIR: {
388     SDValue RC, SubReg0, SubReg1;
389     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
390       break;
391     }
392     SDLoc DL(N);
393     if (N->getValueType(0) == MVT::i128) {
394       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
395       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
396       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
397     } else if (N->getValueType(0) == MVT::i64) {
398       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
399       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
400       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
401     } else {
402       llvm_unreachable("Unhandled value type for BUILD_PAIR");
403     }
404     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
405                             N->getOperand(1), SubReg1 };
406     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
407                                           N->getValueType(0), Ops));
408     return;
409   }
410 
411   case ISD::Constant:
412   case ISD::ConstantFP: {
413     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
414         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
415       break;
416 
417     uint64_t Imm;
418     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
419       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
420     else {
421       ConstantSDNode *C = cast<ConstantSDNode>(N);
422       Imm = C->getZExtValue();
423     }
424 
425     SDLoc DL(N);
426     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
427                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
428                                                     MVT::i32));
429     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
430                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
431     const SDValue Ops[] = {
432       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
433       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
434       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
435     };
436 
437     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
438                                           N->getValueType(0), Ops));
439     return;
440   }
441   case ISD::LOAD:
442   case ISD::STORE: {
443     N = glueCopyToM0(N);
444     break;
445   }
446 
447   case AMDGPUISD::BFE_I32:
448   case AMDGPUISD::BFE_U32: {
449     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
450       break;
451 
452     // There is a scalar version available, but unlike the vector version which
453     // has a separate operand for the offset and width, the scalar version packs
454     // the width and offset into a single operand. Try to move to the scalar
455     // version if the offsets are constant, so that we can try to keep extended
456     // loads of kernel arguments in SGPRs.
457 
458     // TODO: Technically we could try to pattern match scalar bitshifts of
459     // dynamic values, but it's probably not useful.
460     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
461     if (!Offset)
462       break;
463 
464     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
465     if (!Width)
466       break;
467 
468     bool Signed = Opc == AMDGPUISD::BFE_I32;
469 
470     uint32_t OffsetVal = Offset->getZExtValue();
471     uint32_t WidthVal = Width->getZExtValue();
472 
473     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
474                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
475     return;
476   }
477   case AMDGPUISD::DIV_SCALE: {
478     SelectDIV_SCALE(N);
479     return;
480   }
481   case ISD::CopyToReg: {
482     const SITargetLowering& Lowering =
483       *static_cast<const SITargetLowering*>(getTargetLowering());
484     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
485     break;
486   }
487   case ISD::AND:
488   case ISD::SRL:
489   case ISD::SRA:
490   case ISD::SIGN_EXTEND_INREG:
491     if (N->getValueType(0) != MVT::i32 ||
492         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
493       break;
494 
495     SelectS_BFE(N);
496     return;
497   case ISD::BRCOND:
498     SelectBRCOND(N);
499     return;
500 
501   case AMDGPUISD::ATOMIC_CMP_SWAP:
502     SelectATOMIC_CMP_SWAP(N);
503     return;
504   }
505 
506   SelectCode(N);
507 }
508 
509 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
510   if (!N->readMem())
511     return false;
512   if (CbId == -1)
513     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
514 
515   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
516 }
517 
518 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
519   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
520   const Instruction *Term = BB->getTerminator();
521   return Term->getMetadata("amdgpu.uniform") ||
522          Term->getMetadata("structurizecfg.uniform");
523 }
524 
525 StringRef AMDGPUDAGToDAGISel::getPassName() const {
526   return "AMDGPU DAG->DAG Pattern Instruction Selection";
527 }
528 
529 //===----------------------------------------------------------------------===//
530 // Complex Patterns
531 //===----------------------------------------------------------------------===//
532 
533 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
534                                                          SDValue& IntPtr) {
535   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
536     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
537                                        true);
538     return true;
539   }
540   return false;
541 }
542 
543 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
544     SDValue& BaseReg, SDValue &Offset) {
545   if (!isa<ConstantSDNode>(Addr)) {
546     BaseReg = Addr;
547     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
548     return true;
549   }
550   return false;
551 }
552 
553 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
554                                            SDValue &Offset) {
555   ConstantSDNode *IMMOffset;
556 
557   if (Addr.getOpcode() == ISD::ADD
558       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
559       && isInt<16>(IMMOffset->getZExtValue())) {
560 
561       Base = Addr.getOperand(0);
562       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
563                                          MVT::i32);
564       return true;
565   // If the pointer address is constant, we can move it to the offset field.
566   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
567              && isInt<16>(IMMOffset->getZExtValue())) {
568     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
569                                   SDLoc(CurDAG->getEntryNode()),
570                                   AMDGPU::ZERO, MVT::i32);
571     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
572                                        MVT::i32);
573     return true;
574   }
575 
576   // Default case, no offset
577   Base = Addr;
578   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
579   return true;
580 }
581 
582 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
583                                             SDValue &Offset) {
584   ConstantSDNode *C;
585   SDLoc DL(Addr);
586 
587   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
588     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
589     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
590   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
591             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
592     Base = Addr.getOperand(0);
593     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
594   } else {
595     Base = Addr;
596     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
597   }
598 
599   return true;
600 }
601 
602 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
603   SDLoc DL(N);
604   SDValue LHS = N->getOperand(0);
605   SDValue RHS = N->getOperand(1);
606 
607   unsigned Opcode = N->getOpcode();
608   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
609   bool ProduceCarry =
610       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
611   bool IsAdd =
612       (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
613 
614   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
615   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
616 
617   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
618                                        DL, MVT::i32, LHS, Sub0);
619   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
620                                        DL, MVT::i32, LHS, Sub1);
621 
622   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
623                                        DL, MVT::i32, RHS, Sub0);
624   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
625                                        DL, MVT::i32, RHS, Sub1);
626 
627   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
628 
629   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
630   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
631 
632   SDNode *AddLo;
633   if (!ConsumeCarry) {
634     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
635     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
636   } else {
637     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
638     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
639   }
640   SDValue AddHiArgs[] = {
641     SDValue(Hi0, 0),
642     SDValue(Hi1, 0),
643     SDValue(AddLo, 1)
644   };
645   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
646 
647   SDValue RegSequenceArgs[] = {
648     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
649     SDValue(AddLo,0),
650     Sub0,
651     SDValue(AddHi,0),
652     Sub1,
653   };
654   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
655                                                MVT::i64, RegSequenceArgs);
656 
657   if (ProduceCarry) {
658     // Replace the carry-use
659     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
660   }
661 
662   // Replace the remaining uses.
663   CurDAG->ReplaceAllUsesWith(N, RegSequence);
664   CurDAG->RemoveDeadNode(N);
665 }
666 
667 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
668   SDLoc SL(N);
669   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
670   SDValue Ops[10];
671 
672   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
673   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
674   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
675   Ops[8] = N->getOperand(0);
676   Ops[9] = N->getOperand(4);
677 
678   CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
679 }
680 
681 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
682   SDLoc SL(N);
683   //	src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
684   SDValue Ops[8];
685 
686   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
687   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
688   Ops[6] = N->getOperand(0);
689   Ops[7] = N->getOperand(3);
690 
691   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
692 }
693 
694 // We need to handle this here because tablegen doesn't support matching
695 // instructions with multiple outputs.
696 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
697   SDLoc SL(N);
698   EVT VT = N->getValueType(0);
699 
700   assert(VT == MVT::f32 || VT == MVT::f64);
701 
702   unsigned Opc
703     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
704 
705   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
706   // omod
707   SDValue Ops[8];
708 
709   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
710   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
711   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
712   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
713 }
714 
715 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
716                                          unsigned OffsetBits) const {
717   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
718       (OffsetBits == 8 && !isUInt<8>(Offset)))
719     return false;
720 
721   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
722       Subtarget->unsafeDSOffsetFoldingEnabled())
723     return true;
724 
725   // On Southern Islands instruction with a negative base value and an offset
726   // don't seem to work.
727   return CurDAG->SignBitIsZero(Base);
728 }
729 
730 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
731                                               SDValue &Offset) const {
732   SDLoc DL(Addr);
733   if (CurDAG->isBaseWithConstantOffset(Addr)) {
734     SDValue N0 = Addr.getOperand(0);
735     SDValue N1 = Addr.getOperand(1);
736     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
737     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
738       // (add n0, c0)
739       Base = N0;
740       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
741       return true;
742     }
743   } else if (Addr.getOpcode() == ISD::SUB) {
744     // sub C, x -> add (sub 0, x), C
745     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
746       int64_t ByteOffset = C->getSExtValue();
747       if (isUInt<16>(ByteOffset)) {
748         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
749 
750         // XXX - This is kind of hacky. Create a dummy sub node so we can check
751         // the known bits in isDSOffsetLegal. We need to emit the selected node
752         // here, so this is thrown away.
753         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
754                                       Zero, Addr.getOperand(1));
755 
756         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
757           MachineSDNode *MachineSub
758             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
759                                      Zero, Addr.getOperand(1));
760 
761           Base = SDValue(MachineSub, 0);
762           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
763           return true;
764         }
765       }
766     }
767   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
768     // If we have a constant address, prefer to put the constant into the
769     // offset. This can save moves to load the constant address since multiple
770     // operations can share the zero base address register, and enables merging
771     // into read2 / write2 instructions.
772 
773     SDLoc DL(Addr);
774 
775     if (isUInt<16>(CAddr->getZExtValue())) {
776       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
777       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
778                                  DL, MVT::i32, Zero);
779       Base = SDValue(MovZero, 0);
780       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
781       return true;
782     }
783   }
784 
785   // default case
786   Base = Addr;
787   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
788   return true;
789 }
790 
791 // TODO: If offset is too big, put low 16-bit into offset.
792 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
793                                                    SDValue &Offset0,
794                                                    SDValue &Offset1) const {
795   SDLoc DL(Addr);
796 
797   if (CurDAG->isBaseWithConstantOffset(Addr)) {
798     SDValue N0 = Addr.getOperand(0);
799     SDValue N1 = Addr.getOperand(1);
800     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
801     unsigned DWordOffset0 = C1->getZExtValue() / 4;
802     unsigned DWordOffset1 = DWordOffset0 + 1;
803     // (add n0, c0)
804     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
805       Base = N0;
806       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
807       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
808       return true;
809     }
810   } else if (Addr.getOpcode() == ISD::SUB) {
811     // sub C, x -> add (sub 0, x), C
812     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
813       unsigned DWordOffset0 = C->getZExtValue() / 4;
814       unsigned DWordOffset1 = DWordOffset0 + 1;
815 
816       if (isUInt<8>(DWordOffset0)) {
817         SDLoc DL(Addr);
818         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
819 
820         // XXX - This is kind of hacky. Create a dummy sub node so we can check
821         // the known bits in isDSOffsetLegal. We need to emit the selected node
822         // here, so this is thrown away.
823         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
824                                       Zero, Addr.getOperand(1));
825 
826         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
827           MachineSDNode *MachineSub
828             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
829                                      Zero, Addr.getOperand(1));
830 
831           Base = SDValue(MachineSub, 0);
832           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
833           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
834           return true;
835         }
836       }
837     }
838   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
839     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
840     unsigned DWordOffset1 = DWordOffset0 + 1;
841     assert(4 * DWordOffset0 == CAddr->getZExtValue());
842 
843     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
844       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
845       MachineSDNode *MovZero
846         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
847                                  DL, MVT::i32, Zero);
848       Base = SDValue(MovZero, 0);
849       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
850       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
851       return true;
852     }
853   }
854 
855   // default case
856 
857   // FIXME: This is broken on SI where we still need to check if the base
858   // pointer is positive here.
859   Base = Addr;
860   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
861   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
862   return true;
863 }
864 
865 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
866   return isUInt<12>(Imm->getZExtValue());
867 }
868 
869 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
870                                      SDValue &VAddr, SDValue &SOffset,
871                                      SDValue &Offset, SDValue &Offen,
872                                      SDValue &Idxen, SDValue &Addr64,
873                                      SDValue &GLC, SDValue &SLC,
874                                      SDValue &TFE) const {
875   // Subtarget prefers to use flat instruction
876   if (Subtarget->useFlatForGlobal())
877     return false;
878 
879   SDLoc DL(Addr);
880 
881   if (!GLC.getNode())
882     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
883   if (!SLC.getNode())
884     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
885   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
886 
887   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
888   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
889   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
890   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
891 
892   if (CurDAG->isBaseWithConstantOffset(Addr)) {
893     SDValue N0 = Addr.getOperand(0);
894     SDValue N1 = Addr.getOperand(1);
895     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
896 
897     if (N0.getOpcode() == ISD::ADD) {
898       // (add (add N2, N3), C1) -> addr64
899       SDValue N2 = N0.getOperand(0);
900       SDValue N3 = N0.getOperand(1);
901       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
902       Ptr = N2;
903       VAddr = N3;
904     } else {
905 
906       // (add N0, C1) -> offset
907       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
908       Ptr = N0;
909     }
910 
911     if (isLegalMUBUFImmOffset(C1)) {
912       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
913       return true;
914     }
915 
916     if (isUInt<32>(C1->getZExtValue())) {
917       // Illegal offset, store it in soffset.
918       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
919       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
920                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
921                         0);
922       return true;
923     }
924   }
925 
926   if (Addr.getOpcode() == ISD::ADD) {
927     // (add N0, N1) -> addr64
928     SDValue N0 = Addr.getOperand(0);
929     SDValue N1 = Addr.getOperand(1);
930     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
931     Ptr = N0;
932     VAddr = N1;
933     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
934     return true;
935   }
936 
937   // default case -> offset
938   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
939   Ptr = Addr;
940   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
941 
942   return true;
943 }
944 
945 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
946                                            SDValue &VAddr, SDValue &SOffset,
947                                            SDValue &Offset, SDValue &GLC,
948                                            SDValue &SLC, SDValue &TFE) const {
949   SDValue Ptr, Offen, Idxen, Addr64;
950 
951   // addr64 bit was removed for volcanic islands.
952   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
953     return false;
954 
955   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
956               GLC, SLC, TFE))
957     return false;
958 
959   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
960   if (C->getSExtValue()) {
961     SDLoc DL(Addr);
962 
963     const SITargetLowering& Lowering =
964       *static_cast<const SITargetLowering*>(getTargetLowering());
965 
966     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
967     return true;
968   }
969 
970   return false;
971 }
972 
973 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
974                                            SDValue &VAddr, SDValue &SOffset,
975                                            SDValue &Offset,
976                                            SDValue &SLC) const {
977   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
978   SDValue GLC, TFE;
979 
980   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
981 }
982 
983 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
984   if (auto FI = dyn_cast<FrameIndexSDNode>(N))
985     return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
986   return N;
987 }
988 
989 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
990                                             SDValue &VAddr, SDValue &SOffset,
991                                             SDValue &ImmOffset) const {
992 
993   SDLoc DL(Addr);
994   MachineFunction &MF = CurDAG->getMachineFunction();
995   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
996 
997   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
998   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
999 
1000   // (add n0, c1)
1001   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1002     SDValue N0 = Addr.getOperand(0);
1003     SDValue N1 = Addr.getOperand(1);
1004 
1005     // Offsets in vaddr must be positive.
1006     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1007     if (isLegalMUBUFImmOffset(C1)) {
1008       VAddr = foldFrameIndex(N0);
1009       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1010       return true;
1011     }
1012   }
1013 
1014   // (node)
1015   VAddr = foldFrameIndex(Addr);
1016   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1017   return true;
1018 }
1019 
1020 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1021                                            SDValue &SOffset, SDValue &Offset,
1022                                            SDValue &GLC, SDValue &SLC,
1023                                            SDValue &TFE) const {
1024   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1025   const SIInstrInfo *TII =
1026     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1027 
1028   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1029               GLC, SLC, TFE))
1030     return false;
1031 
1032   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1033       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1034       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1035     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1036                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1037     SDLoc DL(Addr);
1038 
1039     const SITargetLowering& Lowering =
1040       *static_cast<const SITargetLowering*>(getTargetLowering());
1041 
1042     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1043     return true;
1044   }
1045   return false;
1046 }
1047 
1048 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1049                                            SDValue &Soffset, SDValue &Offset
1050                                            ) const {
1051   SDValue GLC, SLC, TFE;
1052 
1053   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1054 }
1055 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1056                                            SDValue &Soffset, SDValue &Offset,
1057                                            SDValue &SLC) const {
1058   SDValue GLC, TFE;
1059 
1060   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1061 }
1062 
1063 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1064                                              SDValue &SOffset,
1065                                              SDValue &ImmOffset) const {
1066   SDLoc DL(Constant);
1067   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1068   uint32_t Overflow = 0;
1069 
1070   if (Imm >= 4096) {
1071     if (Imm <= 4095 + 64) {
1072       // Use an SOffset inline constant for 1..64
1073       Overflow = Imm - 4095;
1074       Imm = 4095;
1075     } else {
1076       // Try to keep the same value in SOffset for adjacent loads, so that
1077       // the corresponding register contents can be re-used.
1078       //
1079       // Load values with all low-bits set into SOffset, so that a larger
1080       // range of values can be covered using s_movk_i32
1081       uint32_t High = (Imm + 1) & ~4095;
1082       uint32_t Low = (Imm + 1) & 4095;
1083       Imm = Low;
1084       Overflow = High - 1;
1085     }
1086   }
1087 
1088   // There is a hardware bug in SI and CI which prevents address clamping in
1089   // MUBUF instructions from working correctly with SOffsets. The immediate
1090   // offset is unaffected.
1091   if (Overflow > 0 &&
1092       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1093     return false;
1094 
1095   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1096 
1097   if (Overflow <= 64)
1098     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1099   else
1100     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1101                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1102                       0);
1103 
1104   return true;
1105 }
1106 
1107 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1108                                                     SDValue &SOffset,
1109                                                     SDValue &ImmOffset) const {
1110   SDLoc DL(Offset);
1111 
1112   if (!isa<ConstantSDNode>(Offset))
1113     return false;
1114 
1115   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1116 }
1117 
1118 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1119                                                      SDValue &SOffset,
1120                                                      SDValue &ImmOffset,
1121                                                      SDValue &VOffset) const {
1122   SDLoc DL(Offset);
1123 
1124   // Don't generate an unnecessary voffset for constant offsets.
1125   if (isa<ConstantSDNode>(Offset)) {
1126     SDValue Tmp1, Tmp2;
1127 
1128     // When necessary, use a voffset in <= CI anyway to work around a hardware
1129     // bug.
1130     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1131         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1132       return false;
1133   }
1134 
1135   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1136     SDValue N0 = Offset.getOperand(0);
1137     SDValue N1 = Offset.getOperand(1);
1138     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1139         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1140       VOffset = N0;
1141       return true;
1142     }
1143   }
1144 
1145   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1146   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1147   VOffset = Offset;
1148 
1149   return true;
1150 }
1151 
1152 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1153                                     SDValue &VAddr,
1154                                     SDValue &SLC,
1155                                     SDValue &TFE) const {
1156   VAddr = Addr;
1157   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1158   return true;
1159 }
1160 
1161 ///
1162 /// \param EncodedOffset This is the immediate value that will be encoded
1163 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1164 ///        will be in units of dwords and on VI+ it will be units of bytes.
1165 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1166                                  int64_t EncodedOffset) {
1167   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1168      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1169 }
1170 
1171 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1172                                           SDValue &Offset, bool &Imm) const {
1173 
1174   // FIXME: Handle non-constant offsets.
1175   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1176   if (!C)
1177     return false;
1178 
1179   SDLoc SL(ByteOffsetNode);
1180   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1181   int64_t ByteOffset = C->getSExtValue();
1182   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1183       ByteOffset >> 2 : ByteOffset;
1184 
1185   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1186     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1187     Imm = true;
1188     return true;
1189   }
1190 
1191   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1192     return false;
1193 
1194   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1195     // 32-bit Immediates are supported on Sea Islands.
1196     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1197   } else {
1198     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1199     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1200                                             C32Bit), 0);
1201   }
1202   Imm = false;
1203   return true;
1204 }
1205 
1206 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1207                                      SDValue &Offset, bool &Imm) const {
1208 
1209   SDLoc SL(Addr);
1210   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1211     SDValue N0 = Addr.getOperand(0);
1212     SDValue N1 = Addr.getOperand(1);
1213 
1214     if (SelectSMRDOffset(N1, Offset, Imm)) {
1215       SBase = N0;
1216       return true;
1217     }
1218   }
1219   SBase = Addr;
1220   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1221   Imm = true;
1222   return true;
1223 }
1224 
1225 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1226                                        SDValue &Offset) const {
1227   bool Imm;
1228   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1229 }
1230 
1231 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1232                                          SDValue &Offset) const {
1233 
1234   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1235     return false;
1236 
1237   bool Imm;
1238   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1239     return false;
1240 
1241   return !Imm && isa<ConstantSDNode>(Offset);
1242 }
1243 
1244 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1245                                         SDValue &Offset) const {
1246   bool Imm;
1247   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1248          !isa<ConstantSDNode>(Offset);
1249 }
1250 
1251 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1252                                              SDValue &Offset) const {
1253   bool Imm;
1254   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1255 }
1256 
1257 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1258                                                SDValue &Offset) const {
1259   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1260     return false;
1261 
1262   bool Imm;
1263   if (!SelectSMRDOffset(Addr, Offset, Imm))
1264     return false;
1265 
1266   return !Imm && isa<ConstantSDNode>(Offset);
1267 }
1268 
1269 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1270                                               SDValue &Offset) const {
1271   bool Imm;
1272   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1273          !isa<ConstantSDNode>(Offset);
1274 }
1275 
1276 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1277                                             SDValue &Base,
1278                                             SDValue &Offset) const {
1279   SDLoc DL(Index);
1280 
1281   if (CurDAG->isBaseWithConstantOffset(Index)) {
1282     SDValue N0 = Index.getOperand(0);
1283     SDValue N1 = Index.getOperand(1);
1284     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1285 
1286     // (add n0, c0)
1287     Base = N0;
1288     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1289     return true;
1290   }
1291 
1292   if (isa<ConstantSDNode>(Index))
1293     return false;
1294 
1295   Base = Index;
1296   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1297   return true;
1298 }
1299 
1300 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1301                                      SDValue Val, uint32_t Offset,
1302                                      uint32_t Width) {
1303   // Transformation function, pack the offset and width of a BFE into
1304   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1305   // source, bits [5:0] contain the offset and bits [22:16] the width.
1306   uint32_t PackedVal = Offset | (Width << 16);
1307   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1308 
1309   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1310 }
1311 
1312 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1313   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1314   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1315   // Predicate: 0 < b <= c < 32
1316 
1317   const SDValue &Shl = N->getOperand(0);
1318   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1319   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1320 
1321   if (B && C) {
1322     uint32_t BVal = B->getZExtValue();
1323     uint32_t CVal = C->getZExtValue();
1324 
1325     if (0 < BVal && BVal <= CVal && CVal < 32) {
1326       bool Signed = N->getOpcode() == ISD::SRA;
1327       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1328 
1329       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1330                               32 - CVal));
1331       return;
1332     }
1333   }
1334   SelectCode(N);
1335 }
1336 
1337 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1338   switch (N->getOpcode()) {
1339   case ISD::AND:
1340     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1341       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1342       // Predicate: isMask(mask)
1343       const SDValue &Srl = N->getOperand(0);
1344       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1345       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1346 
1347       if (Shift && Mask) {
1348         uint32_t ShiftVal = Shift->getZExtValue();
1349         uint32_t MaskVal = Mask->getZExtValue();
1350 
1351         if (isMask_32(MaskVal)) {
1352           uint32_t WidthVal = countPopulation(MaskVal);
1353 
1354           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1355                                   Srl.getOperand(0), ShiftVal, WidthVal));
1356           return;
1357         }
1358       }
1359     }
1360     break;
1361   case ISD::SRL:
1362     if (N->getOperand(0).getOpcode() == ISD::AND) {
1363       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1364       // Predicate: isMask(mask >> b)
1365       const SDValue &And = N->getOperand(0);
1366       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1367       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1368 
1369       if (Shift && Mask) {
1370         uint32_t ShiftVal = Shift->getZExtValue();
1371         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1372 
1373         if (isMask_32(MaskVal)) {
1374           uint32_t WidthVal = countPopulation(MaskVal);
1375 
1376           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1377                                   And.getOperand(0), ShiftVal, WidthVal));
1378           return;
1379         }
1380       }
1381     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1382       SelectS_BFEFromShifts(N);
1383       return;
1384     }
1385     break;
1386   case ISD::SRA:
1387     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1388       SelectS_BFEFromShifts(N);
1389       return;
1390     }
1391     break;
1392 
1393   case ISD::SIGN_EXTEND_INREG: {
1394     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1395     SDValue Src = N->getOperand(0);
1396     if (Src.getOpcode() != ISD::SRL)
1397       break;
1398 
1399     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1400     if (!Amt)
1401       break;
1402 
1403     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1404     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1405                             Amt->getZExtValue(), Width));
1406     return;
1407   }
1408   }
1409 
1410   SelectCode(N);
1411 }
1412 
1413 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1414   assert(N->getOpcode() == ISD::BRCOND);
1415   if (!N->hasOneUse())
1416     return false;
1417 
1418   SDValue Cond = N->getOperand(1);
1419   if (Cond.getOpcode() == ISD::CopyToReg)
1420     Cond = Cond.getOperand(2);
1421 
1422   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1423     return false;
1424 
1425   MVT VT = Cond.getOperand(0).getSimpleValueType();
1426   if (VT == MVT::i32)
1427     return true;
1428 
1429   if (VT == MVT::i64) {
1430     auto ST = static_cast<const SISubtarget *>(Subtarget);
1431 
1432     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1433     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1434   }
1435 
1436   return false;
1437 }
1438 
1439 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1440   SDValue Cond = N->getOperand(1);
1441 
1442   if (isCBranchSCC(N)) {
1443     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1444     SelectCode(N);
1445     return;
1446   }
1447 
1448   SDLoc SL(N);
1449 
1450   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
1451   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1452                        N->getOperand(2), // Basic Block
1453                        VCC.getValue(0));
1454   return;
1455 }
1456 
1457 // This is here because there isn't a way to use the generated sub0_sub1 as the
1458 // subreg index to EXTRACT_SUBREG in tablegen.
1459 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1460   MemSDNode *Mem = cast<MemSDNode>(N);
1461   unsigned AS = Mem->getAddressSpace();
1462   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1463     SelectCode(N);
1464     return;
1465   }
1466 
1467   MVT VT = N->getSimpleValueType(0);
1468   bool Is32 = (VT == MVT::i32);
1469   SDLoc SL(N);
1470 
1471   MachineSDNode *CmpSwap = nullptr;
1472   if (Subtarget->hasAddr64()) {
1473     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1474 
1475     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1476       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1477         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1478       SDValue CmpVal = Mem->getOperand(2);
1479 
1480       // XXX - Do we care about glue operands?
1481 
1482       SDValue Ops[] = {
1483         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1484       };
1485 
1486       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1487     }
1488   }
1489 
1490   if (!CmpSwap) {
1491     SDValue SRsrc, SOffset, Offset, SLC;
1492     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1493       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1494         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1495 
1496       SDValue CmpVal = Mem->getOperand(2);
1497       SDValue Ops[] = {
1498         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1499       };
1500 
1501       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1502     }
1503   }
1504 
1505   if (!CmpSwap) {
1506     SelectCode(N);
1507     return;
1508   }
1509 
1510   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1511   *MMOs = Mem->getMemOperand();
1512   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1513 
1514   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1515   SDValue Extract
1516     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1517 
1518   ReplaceUses(SDValue(N, 0), Extract);
1519   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1520   CurDAG->RemoveDeadNode(N);
1521 }
1522 
1523 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1524                                         SDValue &SrcMods) const {
1525 
1526   unsigned Mods = 0;
1527 
1528   Src = In;
1529 
1530   if (Src.getOpcode() == ISD::FNEG) {
1531     Mods |= SISrcMods::NEG;
1532     Src = Src.getOperand(0);
1533   }
1534 
1535   if (Src.getOpcode() == ISD::FABS) {
1536     Mods |= SISrcMods::ABS;
1537     Src = Src.getOperand(0);
1538   }
1539 
1540   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1541 
1542   return true;
1543 }
1544 
1545 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1546                                          SDValue &SrcMods) const {
1547   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1548   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1549 }
1550 
1551 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1552                                          SDValue &SrcMods, SDValue &Clamp,
1553                                          SDValue &Omod) const {
1554   SDLoc DL(In);
1555   // FIXME: Handle Clamp and Omod
1556   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1557   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1558 
1559   return SelectVOP3Mods(In, Src, SrcMods);
1560 }
1561 
1562 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1563                                            SDValue &SrcMods, SDValue &Clamp,
1564                                            SDValue &Omod) const {
1565   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1566 
1567   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1568                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1569                 cast<ConstantSDNode>(Omod)->isNullValue();
1570 }
1571 
1572 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1573                                               SDValue &SrcMods,
1574                                               SDValue &Omod) const {
1575   // FIXME: Handle Omod
1576   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1577 
1578   return SelectVOP3Mods(In, Src, SrcMods);
1579 }
1580 
1581 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1582                                                    SDValue &SrcMods,
1583                                                    SDValue &Clamp,
1584                                                    SDValue &Omod) const {
1585   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1586   return SelectVOP3Mods(In, Src, SrcMods);
1587 }
1588 
1589 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1590   const AMDGPUTargetLowering& Lowering =
1591     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1592   bool IsModified = false;
1593   do {
1594     IsModified = false;
1595     // Go over all selected nodes and try to fold them a bit more
1596     for (SDNode &Node : CurDAG->allnodes()) {
1597       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1598       if (!MachineNode)
1599         continue;
1600 
1601       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1602       if (ResNode != &Node) {
1603         ReplaceUses(&Node, ResNode);
1604         IsModified = true;
1605       }
1606     }
1607     CurDAG->RemoveDeadNodes();
1608   } while (IsModified);
1609 }
1610