1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 /// AMDGPU specific code to select AMDGPU machine instructions for
42 /// SelectionDAG operations.
43 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
44   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
45   // make the right decision when generating code for different targets.
46   const AMDGPUSubtarget *Subtarget;
47 
48 public:
49   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
50       : SelectionDAGISel(TM, OptLevel) {}
51 
52   virtual ~AMDGPUDAGToDAGISel();
53   bool runOnMachineFunction(MachineFunction &MF) override;
54   void Select(SDNode *N) override;
55   StringRef getPassName() const override;
56   void PostprocessISelDAG() override;
57 
58 private:
59   SDValue foldFrameIndex(SDValue N) const;
60   bool isInlineImmediate(const SDNode *N) const;
61   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
62                    const R600InstrInfo *TII);
63   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
64   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
65 
66   bool isConstantLoad(const MemSDNode *N, int cbID) const;
67   bool isUniformBr(const SDNode *N) const;
68 
69   SDNode *glueCopyToM0(SDNode *N) const;
70 
71   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
72   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
73   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
74                                        SDValue& Offset);
75   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
76   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
77   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
78                        unsigned OffsetBits) const;
79   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
80   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
81                                  SDValue &Offset1) const;
82   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
83                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
84                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
85                    SDValue &TFE) const;
86   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
87                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
88                          SDValue &SLC, SDValue &TFE) const;
89   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
90                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
91                          SDValue &SLC) const;
92   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
93                           SDValue &SOffset, SDValue &ImmOffset) const;
94   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
95                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
96                          SDValue &TFE) const;
97   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
98                          SDValue &Offset, SDValue &SLC) const;
99   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
100                          SDValue &Offset) const;
101   bool SelectMUBUFConstant(SDValue Constant,
102                            SDValue &SOffset,
103                            SDValue &ImmOffset) const;
104   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
105                                   SDValue &ImmOffset) const;
106   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
107                                    SDValue &ImmOffset, SDValue &VOffset) const;
108 
109   bool SelectFlat(SDValue Addr, SDValue &VAddr,
110                   SDValue &SLC, SDValue &TFE) const;
111 
112   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
113                         bool &Imm) const;
114   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
115                   bool &Imm) const;
116   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
117   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
118   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
119   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
120   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
121   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
122   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
123   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
124   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
125   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
126                        SDValue &Clamp, SDValue &Omod) const;
127   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
128                          SDValue &Clamp, SDValue &Omod) const;
129 
130   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
131                             SDValue &Omod) const;
132   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
133                                  SDValue &Clamp,
134                                  SDValue &Omod) const;
135 
136   void SelectADD_SUB_I64(SDNode *N);
137   void SelectDIV_SCALE(SDNode *N);
138 
139   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
140                    uint32_t Offset, uint32_t Width);
141   void SelectS_BFEFromShifts(SDNode *N);
142   void SelectS_BFE(SDNode *N);
143   bool isCBranchSCC(const SDNode *N) const;
144   void SelectBRCOND(SDNode *N);
145   void SelectATOMIC_CMP_SWAP(SDNode *N);
146 
147   // Include the pieces autogenerated from the target description.
148 #include "AMDGPUGenDAGISel.inc"
149 };
150 }  // end anonymous namespace
151 
152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
153 // DAG, ready for instruction scheduling.
154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
155                                         CodeGenOpt::Level OptLevel) {
156   return new AMDGPUDAGToDAGISel(TM, OptLevel);
157 }
158 
159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
160   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
161   return SelectionDAGISel::runOnMachineFunction(MF);
162 }
163 
164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
165 }
166 
167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
168   const SIInstrInfo *TII
169     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
170 
171   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
172     return TII->isInlineConstant(C->getAPIntValue());
173 
174   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
175     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
176 
177   return false;
178 }
179 
180 /// \brief Determine the register class for \p OpNo
181 /// \returns The register class of the virtual register that will be used for
182 /// the given operand number \OpNo or NULL if the register class cannot be
183 /// determined.
184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
185                                                           unsigned OpNo) const {
186   if (!N->isMachineOpcode())
187     return nullptr;
188 
189   switch (N->getMachineOpcode()) {
190   default: {
191     const MCInstrDesc &Desc =
192         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
193     unsigned OpIdx = Desc.getNumDefs() + OpNo;
194     if (OpIdx >= Desc.getNumOperands())
195       return nullptr;
196     int RegClass = Desc.OpInfo[OpIdx].RegClass;
197     if (RegClass == -1)
198       return nullptr;
199 
200     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
201   }
202   case AMDGPU::REG_SEQUENCE: {
203     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
204     const TargetRegisterClass *SuperRC =
205         Subtarget->getRegisterInfo()->getRegClass(RCID);
206 
207     SDValue SubRegOp = N->getOperand(OpNo + 1);
208     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
209     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
210                                                               SubRegIdx);
211   }
212   }
213 }
214 
215 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
216   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
217       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
218     return N;
219 
220   const SITargetLowering& Lowering =
221       *static_cast<const SITargetLowering*>(getTargetLowering());
222 
223   // Write max value to m0 before each load operation
224 
225   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
226                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
227 
228   SDValue Glue = M0.getValue(1);
229 
230   SmallVector <SDValue, 8> Ops;
231   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
232      Ops.push_back(N->getOperand(i));
233   }
234   Ops.push_back(Glue);
235   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
236 
237   return N;
238 }
239 
240 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
241   switch (NumVectorElts) {
242   case 1:
243     return AMDGPU::SReg_32RegClassID;
244   case 2:
245     return AMDGPU::SReg_64RegClassID;
246   case 4:
247     return AMDGPU::SReg_128RegClassID;
248   case 8:
249     return AMDGPU::SReg_256RegClassID;
250   case 16:
251     return AMDGPU::SReg_512RegClassID;
252   }
253 
254   llvm_unreachable("invalid vector size");
255 }
256 
257 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
258   unsigned int Opc = N->getOpcode();
259   if (N->isMachineOpcode()) {
260     N->setNodeId(-1);
261     return;   // Already selected.
262   }
263 
264   if (isa<AtomicSDNode>(N) ||
265       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
266     N = glueCopyToM0(N);
267 
268   switch (Opc) {
269   default: break;
270   // We are selecting i64 ADD here instead of custom lower it during
271   // DAG legalization, so we can fold some i64 ADDs used for address
272   // calculation into the LOAD and STORE instructions.
273   case ISD::ADD:
274   case ISD::ADDC:
275   case ISD::ADDE:
276   case ISD::SUB:
277   case ISD::SUBC:
278   case ISD::SUBE: {
279     if (N->getValueType(0) != MVT::i64 ||
280         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
281       break;
282 
283     SelectADD_SUB_I64(N);
284     return;
285   }
286   case ISD::SCALAR_TO_VECTOR:
287   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
288   case ISD::BUILD_VECTOR: {
289     unsigned RegClassID;
290     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
291     EVT VT = N->getValueType(0);
292     unsigned NumVectorElts = VT.getVectorNumElements();
293     EVT EltVT = VT.getVectorElementType();
294     assert(EltVT.bitsEq(MVT::i32));
295     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
296       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
297     } else {
298       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
299       // that adds a 128 bits reg copy when going through TwoAddressInstructions
300       // pass. We want to avoid 128 bits copies as much as possible because they
301       // can't be bundled by our scheduler.
302       switch(NumVectorElts) {
303       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
304       case 4:
305         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
306           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
307         else
308           RegClassID = AMDGPU::R600_Reg128RegClassID;
309         break;
310       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
311       }
312     }
313 
314     SDLoc DL(N);
315     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
316 
317     if (NumVectorElts == 1) {
318       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
319                            RegClass);
320       return;
321     }
322 
323     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
324                                   "supported yet");
325     // 16 = Max Num Vector Elements
326     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
327     // 1 = Vector Register Class
328     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
329 
330     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
331     bool IsRegSeq = true;
332     unsigned NOps = N->getNumOperands();
333     for (unsigned i = 0; i < NOps; i++) {
334       // XXX: Why is this here?
335       if (isa<RegisterSDNode>(N->getOperand(i))) {
336         IsRegSeq = false;
337         break;
338       }
339       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
340       RegSeqArgs[1 + (2 * i) + 1] =
341               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
342                                         MVT::i32);
343     }
344 
345     if (NOps != NumVectorElts) {
346       // Fill in the missing undef elements if this was a scalar_to_vector.
347       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
348 
349       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
350                                                      DL, EltVT);
351       for (unsigned i = NOps; i < NumVectorElts; ++i) {
352         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
353         RegSeqArgs[1 + (2 * i) + 1] =
354           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
355       }
356     }
357 
358     if (!IsRegSeq)
359       break;
360     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
361     return;
362   }
363   case ISD::BUILD_PAIR: {
364     SDValue RC, SubReg0, SubReg1;
365     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
366       break;
367     }
368     SDLoc DL(N);
369     if (N->getValueType(0) == MVT::i128) {
370       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
371       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
372       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
373     } else if (N->getValueType(0) == MVT::i64) {
374       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
375       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
376       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
377     } else {
378       llvm_unreachable("Unhandled value type for BUILD_PAIR");
379     }
380     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
381                             N->getOperand(1), SubReg1 };
382     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
383                                           N->getValueType(0), Ops));
384     return;
385   }
386 
387   case ISD::Constant:
388   case ISD::ConstantFP: {
389     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
390         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
391       break;
392 
393     uint64_t Imm;
394     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
395       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
396     else {
397       ConstantSDNode *C = cast<ConstantSDNode>(N);
398       Imm = C->getZExtValue();
399     }
400 
401     SDLoc DL(N);
402     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
403                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
404                                                     MVT::i32));
405     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
406                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
407     const SDValue Ops[] = {
408       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
409       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
410       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
411     };
412 
413     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
414                                           N->getValueType(0), Ops));
415     return;
416   }
417   case ISD::LOAD:
418   case ISD::STORE: {
419     N = glueCopyToM0(N);
420     break;
421   }
422 
423   case AMDGPUISD::BFE_I32:
424   case AMDGPUISD::BFE_U32: {
425     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
426       break;
427 
428     // There is a scalar version available, but unlike the vector version which
429     // has a separate operand for the offset and width, the scalar version packs
430     // the width and offset into a single operand. Try to move to the scalar
431     // version if the offsets are constant, so that we can try to keep extended
432     // loads of kernel arguments in SGPRs.
433 
434     // TODO: Technically we could try to pattern match scalar bitshifts of
435     // dynamic values, but it's probably not useful.
436     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
437     if (!Offset)
438       break;
439 
440     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
441     if (!Width)
442       break;
443 
444     bool Signed = Opc == AMDGPUISD::BFE_I32;
445 
446     uint32_t OffsetVal = Offset->getZExtValue();
447     uint32_t WidthVal = Width->getZExtValue();
448 
449     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
450                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
451     return;
452   }
453   case AMDGPUISD::DIV_SCALE: {
454     SelectDIV_SCALE(N);
455     return;
456   }
457   case ISD::CopyToReg: {
458     const SITargetLowering& Lowering =
459       *static_cast<const SITargetLowering*>(getTargetLowering());
460     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
461     break;
462   }
463   case ISD::AND:
464   case ISD::SRL:
465   case ISD::SRA:
466   case ISD::SIGN_EXTEND_INREG:
467     if (N->getValueType(0) != MVT::i32 ||
468         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
469       break;
470 
471     SelectS_BFE(N);
472     return;
473   case ISD::BRCOND:
474     SelectBRCOND(N);
475     return;
476 
477   case AMDGPUISD::ATOMIC_CMP_SWAP:
478     SelectATOMIC_CMP_SWAP(N);
479     return;
480   }
481 
482   SelectCode(N);
483 }
484 
485 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
486   if (!N->readMem())
487     return false;
488   if (CbId == -1)
489     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
490 
491   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
492 }
493 
494 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
495   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
496   const Instruction *Term = BB->getTerminator();
497   return Term->getMetadata("amdgpu.uniform") ||
498          Term->getMetadata("structurizecfg.uniform");
499 }
500 
501 StringRef AMDGPUDAGToDAGISel::getPassName() const {
502   return "AMDGPU DAG->DAG Pattern Instruction Selection";
503 }
504 
505 //===----------------------------------------------------------------------===//
506 // Complex Patterns
507 //===----------------------------------------------------------------------===//
508 
509 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
510                                                          SDValue& IntPtr) {
511   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
512     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
513                                        true);
514     return true;
515   }
516   return false;
517 }
518 
519 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
520     SDValue& BaseReg, SDValue &Offset) {
521   if (!isa<ConstantSDNode>(Addr)) {
522     BaseReg = Addr;
523     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
524     return true;
525   }
526   return false;
527 }
528 
529 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
530                                            SDValue &Offset) {
531   ConstantSDNode *IMMOffset;
532 
533   if (Addr.getOpcode() == ISD::ADD
534       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
535       && isInt<16>(IMMOffset->getZExtValue())) {
536 
537       Base = Addr.getOperand(0);
538       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
539                                          MVT::i32);
540       return true;
541   // If the pointer address is constant, we can move it to the offset field.
542   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
543              && isInt<16>(IMMOffset->getZExtValue())) {
544     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
545                                   SDLoc(CurDAG->getEntryNode()),
546                                   AMDGPU::ZERO, MVT::i32);
547     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
548                                        MVT::i32);
549     return true;
550   }
551 
552   // Default case, no offset
553   Base = Addr;
554   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
555   return true;
556 }
557 
558 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
559                                             SDValue &Offset) {
560   ConstantSDNode *C;
561   SDLoc DL(Addr);
562 
563   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
564     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
565     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
566   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
567             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
568     Base = Addr.getOperand(0);
569     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
570   } else {
571     Base = Addr;
572     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
573   }
574 
575   return true;
576 }
577 
578 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
579   SDLoc DL(N);
580   SDValue LHS = N->getOperand(0);
581   SDValue RHS = N->getOperand(1);
582 
583   unsigned Opcode = N->getOpcode();
584   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
585   bool ProduceCarry =
586       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
587   bool IsAdd =
588       (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
589 
590   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
591   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
592 
593   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
594                                        DL, MVT::i32, LHS, Sub0);
595   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
596                                        DL, MVT::i32, LHS, Sub1);
597 
598   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
599                                        DL, MVT::i32, RHS, Sub0);
600   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
601                                        DL, MVT::i32, RHS, Sub1);
602 
603   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
604 
605   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
606   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
607 
608   SDNode *AddLo;
609   if (!ConsumeCarry) {
610     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
611     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
612   } else {
613     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
614     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
615   }
616   SDValue AddHiArgs[] = {
617     SDValue(Hi0, 0),
618     SDValue(Hi1, 0),
619     SDValue(AddLo, 1)
620   };
621   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
622 
623   SDValue RegSequenceArgs[] = {
624     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
625     SDValue(AddLo,0),
626     Sub0,
627     SDValue(AddHi,0),
628     Sub1,
629   };
630   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
631                                                MVT::i64, RegSequenceArgs);
632 
633   if (ProduceCarry) {
634     // Replace the carry-use
635     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
636   }
637 
638   // Replace the remaining uses.
639   CurDAG->ReplaceAllUsesWith(N, RegSequence);
640   CurDAG->RemoveDeadNode(N);
641 }
642 
643 // We need to handle this here because tablegen doesn't support matching
644 // instructions with multiple outputs.
645 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
646   SDLoc SL(N);
647   EVT VT = N->getValueType(0);
648 
649   assert(VT == MVT::f32 || VT == MVT::f64);
650 
651   unsigned Opc
652     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
653 
654   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
655   // omod
656   SDValue Ops[8];
657 
658   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
659   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
660   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
661   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
662 }
663 
664 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
665                                          unsigned OffsetBits) const {
666   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
667       (OffsetBits == 8 && !isUInt<8>(Offset)))
668     return false;
669 
670   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
671       Subtarget->unsafeDSOffsetFoldingEnabled())
672     return true;
673 
674   // On Southern Islands instruction with a negative base value and an offset
675   // don't seem to work.
676   return CurDAG->SignBitIsZero(Base);
677 }
678 
679 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
680                                               SDValue &Offset) const {
681   SDLoc DL(Addr);
682   if (CurDAG->isBaseWithConstantOffset(Addr)) {
683     SDValue N0 = Addr.getOperand(0);
684     SDValue N1 = Addr.getOperand(1);
685     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
686     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
687       // (add n0, c0)
688       Base = N0;
689       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
690       return true;
691     }
692   } else if (Addr.getOpcode() == ISD::SUB) {
693     // sub C, x -> add (sub 0, x), C
694     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
695       int64_t ByteOffset = C->getSExtValue();
696       if (isUInt<16>(ByteOffset)) {
697         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
698 
699         // XXX - This is kind of hacky. Create a dummy sub node so we can check
700         // the known bits in isDSOffsetLegal. We need to emit the selected node
701         // here, so this is thrown away.
702         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
703                                       Zero, Addr.getOperand(1));
704 
705         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
706           MachineSDNode *MachineSub
707             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
708                                      Zero, Addr.getOperand(1));
709 
710           Base = SDValue(MachineSub, 0);
711           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
712           return true;
713         }
714       }
715     }
716   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
717     // If we have a constant address, prefer to put the constant into the
718     // offset. This can save moves to load the constant address since multiple
719     // operations can share the zero base address register, and enables merging
720     // into read2 / write2 instructions.
721 
722     SDLoc DL(Addr);
723 
724     if (isUInt<16>(CAddr->getZExtValue())) {
725       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
726       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
727                                  DL, MVT::i32, Zero);
728       Base = SDValue(MovZero, 0);
729       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
730       return true;
731     }
732   }
733 
734   // default case
735   Base = Addr;
736   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
737   return true;
738 }
739 
740 // TODO: If offset is too big, put low 16-bit into offset.
741 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
742                                                    SDValue &Offset0,
743                                                    SDValue &Offset1) const {
744   SDLoc DL(Addr);
745 
746   if (CurDAG->isBaseWithConstantOffset(Addr)) {
747     SDValue N0 = Addr.getOperand(0);
748     SDValue N1 = Addr.getOperand(1);
749     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
750     unsigned DWordOffset0 = C1->getZExtValue() / 4;
751     unsigned DWordOffset1 = DWordOffset0 + 1;
752     // (add n0, c0)
753     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
754       Base = N0;
755       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
756       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
757       return true;
758     }
759   } else if (Addr.getOpcode() == ISD::SUB) {
760     // sub C, x -> add (sub 0, x), C
761     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
762       unsigned DWordOffset0 = C->getZExtValue() / 4;
763       unsigned DWordOffset1 = DWordOffset0 + 1;
764 
765       if (isUInt<8>(DWordOffset0)) {
766         SDLoc DL(Addr);
767         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
768 
769         // XXX - This is kind of hacky. Create a dummy sub node so we can check
770         // the known bits in isDSOffsetLegal. We need to emit the selected node
771         // here, so this is thrown away.
772         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
773                                       Zero, Addr.getOperand(1));
774 
775         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
776           MachineSDNode *MachineSub
777             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
778                                      Zero, Addr.getOperand(1));
779 
780           Base = SDValue(MachineSub, 0);
781           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
782           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
783           return true;
784         }
785       }
786     }
787   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
788     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
789     unsigned DWordOffset1 = DWordOffset0 + 1;
790     assert(4 * DWordOffset0 == CAddr->getZExtValue());
791 
792     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
793       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
794       MachineSDNode *MovZero
795         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
796                                  DL, MVT::i32, Zero);
797       Base = SDValue(MovZero, 0);
798       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
799       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
800       return true;
801     }
802   }
803 
804   // default case
805 
806   // FIXME: This is broken on SI where we still need to check if the base
807   // pointer is positive here.
808   Base = Addr;
809   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
810   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
811   return true;
812 }
813 
814 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
815   return isUInt<12>(Imm->getZExtValue());
816 }
817 
818 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
819                                      SDValue &VAddr, SDValue &SOffset,
820                                      SDValue &Offset, SDValue &Offen,
821                                      SDValue &Idxen, SDValue &Addr64,
822                                      SDValue &GLC, SDValue &SLC,
823                                      SDValue &TFE) const {
824   // Subtarget prefers to use flat instruction
825   if (Subtarget->useFlatForGlobal())
826     return false;
827 
828   SDLoc DL(Addr);
829 
830   if (!GLC.getNode())
831     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
832   if (!SLC.getNode())
833     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
834   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
835 
836   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
837   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
838   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
839   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
840 
841   if (CurDAG->isBaseWithConstantOffset(Addr)) {
842     SDValue N0 = Addr.getOperand(0);
843     SDValue N1 = Addr.getOperand(1);
844     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
845 
846     if (N0.getOpcode() == ISD::ADD) {
847       // (add (add N2, N3), C1) -> addr64
848       SDValue N2 = N0.getOperand(0);
849       SDValue N3 = N0.getOperand(1);
850       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
851       Ptr = N2;
852       VAddr = N3;
853     } else {
854 
855       // (add N0, C1) -> offset
856       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
857       Ptr = N0;
858     }
859 
860     if (isLegalMUBUFImmOffset(C1)) {
861       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
862       return true;
863     }
864 
865     if (isUInt<32>(C1->getZExtValue())) {
866       // Illegal offset, store it in soffset.
867       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
868       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
869                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
870                         0);
871       return true;
872     }
873   }
874 
875   if (Addr.getOpcode() == ISD::ADD) {
876     // (add N0, N1) -> addr64
877     SDValue N0 = Addr.getOperand(0);
878     SDValue N1 = Addr.getOperand(1);
879     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
880     Ptr = N0;
881     VAddr = N1;
882     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
883     return true;
884   }
885 
886   // default case -> offset
887   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
888   Ptr = Addr;
889   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
890 
891   return true;
892 }
893 
894 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
895                                            SDValue &VAddr, SDValue &SOffset,
896                                            SDValue &Offset, SDValue &GLC,
897                                            SDValue &SLC, SDValue &TFE) const {
898   SDValue Ptr, Offen, Idxen, Addr64;
899 
900   // addr64 bit was removed for volcanic islands.
901   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
902     return false;
903 
904   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
905               GLC, SLC, TFE))
906     return false;
907 
908   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
909   if (C->getSExtValue()) {
910     SDLoc DL(Addr);
911 
912     const SITargetLowering& Lowering =
913       *static_cast<const SITargetLowering*>(getTargetLowering());
914 
915     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
916     return true;
917   }
918 
919   return false;
920 }
921 
922 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
923                                            SDValue &VAddr, SDValue &SOffset,
924                                            SDValue &Offset,
925                                            SDValue &SLC) const {
926   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
927   SDValue GLC, TFE;
928 
929   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
930 }
931 
932 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
933   if (auto FI = dyn_cast<FrameIndexSDNode>(N))
934     return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
935   return N;
936 }
937 
938 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
939                                             SDValue &VAddr, SDValue &SOffset,
940                                             SDValue &ImmOffset) const {
941 
942   SDLoc DL(Addr);
943   MachineFunction &MF = CurDAG->getMachineFunction();
944   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
945 
946   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
947   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
948 
949   // (add n0, c1)
950   if (CurDAG->isBaseWithConstantOffset(Addr)) {
951     SDValue N0 = Addr.getOperand(0);
952     SDValue N1 = Addr.getOperand(1);
953 
954     // Offsets in vaddr must be positive.
955     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
956     if (isLegalMUBUFImmOffset(C1)) {
957       VAddr = foldFrameIndex(N0);
958       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
959       return true;
960     }
961   }
962 
963   // (node)
964   VAddr = foldFrameIndex(Addr);
965   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
966   return true;
967 }
968 
969 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
970                                            SDValue &SOffset, SDValue &Offset,
971                                            SDValue &GLC, SDValue &SLC,
972                                            SDValue &TFE) const {
973   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
974   const SIInstrInfo *TII =
975     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
976 
977   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
978               GLC, SLC, TFE))
979     return false;
980 
981   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
982       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
983       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
984     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
985                     APInt::getAllOnesValue(32).getZExtValue(); // Size
986     SDLoc DL(Addr);
987 
988     const SITargetLowering& Lowering =
989       *static_cast<const SITargetLowering*>(getTargetLowering());
990 
991     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
992     return true;
993   }
994   return false;
995 }
996 
997 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
998                                            SDValue &Soffset, SDValue &Offset
999                                            ) const {
1000   SDValue GLC, SLC, TFE;
1001 
1002   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1003 }
1004 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1005                                            SDValue &Soffset, SDValue &Offset,
1006                                            SDValue &SLC) const {
1007   SDValue GLC, TFE;
1008 
1009   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1010 }
1011 
1012 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1013                                              SDValue &SOffset,
1014                                              SDValue &ImmOffset) const {
1015   SDLoc DL(Constant);
1016   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1017   uint32_t Overflow = 0;
1018 
1019   if (Imm >= 4096) {
1020     if (Imm <= 4095 + 64) {
1021       // Use an SOffset inline constant for 1..64
1022       Overflow = Imm - 4095;
1023       Imm = 4095;
1024     } else {
1025       // Try to keep the same value in SOffset for adjacent loads, so that
1026       // the corresponding register contents can be re-used.
1027       //
1028       // Load values with all low-bits set into SOffset, so that a larger
1029       // range of values can be covered using s_movk_i32
1030       uint32_t High = (Imm + 1) & ~4095;
1031       uint32_t Low = (Imm + 1) & 4095;
1032       Imm = Low;
1033       Overflow = High - 1;
1034     }
1035   }
1036 
1037   // There is a hardware bug in SI and CI which prevents address clamping in
1038   // MUBUF instructions from working correctly with SOffsets. The immediate
1039   // offset is unaffected.
1040   if (Overflow > 0 &&
1041       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1042     return false;
1043 
1044   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1045 
1046   if (Overflow <= 64)
1047     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1048   else
1049     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1050                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1051                       0);
1052 
1053   return true;
1054 }
1055 
1056 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1057                                                     SDValue &SOffset,
1058                                                     SDValue &ImmOffset) const {
1059   SDLoc DL(Offset);
1060 
1061   if (!isa<ConstantSDNode>(Offset))
1062     return false;
1063 
1064   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1065 }
1066 
1067 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1068                                                      SDValue &SOffset,
1069                                                      SDValue &ImmOffset,
1070                                                      SDValue &VOffset) const {
1071   SDLoc DL(Offset);
1072 
1073   // Don't generate an unnecessary voffset for constant offsets.
1074   if (isa<ConstantSDNode>(Offset)) {
1075     SDValue Tmp1, Tmp2;
1076 
1077     // When necessary, use a voffset in <= CI anyway to work around a hardware
1078     // bug.
1079     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1080         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1081       return false;
1082   }
1083 
1084   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1085     SDValue N0 = Offset.getOperand(0);
1086     SDValue N1 = Offset.getOperand(1);
1087     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1088         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1089       VOffset = N0;
1090       return true;
1091     }
1092   }
1093 
1094   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1095   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1096   VOffset = Offset;
1097 
1098   return true;
1099 }
1100 
1101 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1102                                     SDValue &VAddr,
1103                                     SDValue &SLC,
1104                                     SDValue &TFE) const {
1105   VAddr = Addr;
1106   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1107   return true;
1108 }
1109 
1110 ///
1111 /// \param EncodedOffset This is the immediate value that will be encoded
1112 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1113 ///        will be in units of dwords and on VI+ it will be units of bytes.
1114 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1115                                  int64_t EncodedOffset) {
1116   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1117      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1118 }
1119 
1120 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1121                                           SDValue &Offset, bool &Imm) const {
1122 
1123   // FIXME: Handle non-constant offsets.
1124   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1125   if (!C)
1126     return false;
1127 
1128   SDLoc SL(ByteOffsetNode);
1129   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1130   int64_t ByteOffset = C->getSExtValue();
1131   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1132       ByteOffset >> 2 : ByteOffset;
1133 
1134   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1135     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1136     Imm = true;
1137     return true;
1138   }
1139 
1140   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1141     return false;
1142 
1143   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1144     // 32-bit Immediates are supported on Sea Islands.
1145     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1146   } else {
1147     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1148     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1149                                             C32Bit), 0);
1150   }
1151   Imm = false;
1152   return true;
1153 }
1154 
1155 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1156                                      SDValue &Offset, bool &Imm) const {
1157 
1158   SDLoc SL(Addr);
1159   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1160     SDValue N0 = Addr.getOperand(0);
1161     SDValue N1 = Addr.getOperand(1);
1162 
1163     if (SelectSMRDOffset(N1, Offset, Imm)) {
1164       SBase = N0;
1165       return true;
1166     }
1167   }
1168   SBase = Addr;
1169   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1170   Imm = true;
1171   return true;
1172 }
1173 
1174 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1175                                        SDValue &Offset) const {
1176   bool Imm;
1177   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1178 }
1179 
1180 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1181                                          SDValue &Offset) const {
1182 
1183   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1184     return false;
1185 
1186   bool Imm;
1187   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1188     return false;
1189 
1190   return !Imm && isa<ConstantSDNode>(Offset);
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1194                                         SDValue &Offset) const {
1195   bool Imm;
1196   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1197          !isa<ConstantSDNode>(Offset);
1198 }
1199 
1200 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1201                                              SDValue &Offset) const {
1202   bool Imm;
1203   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1204 }
1205 
1206 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1207                                                SDValue &Offset) const {
1208   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1209     return false;
1210 
1211   bool Imm;
1212   if (!SelectSMRDOffset(Addr, Offset, Imm))
1213     return false;
1214 
1215   return !Imm && isa<ConstantSDNode>(Offset);
1216 }
1217 
1218 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1219                                               SDValue &Offset) const {
1220   bool Imm;
1221   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1222          !isa<ConstantSDNode>(Offset);
1223 }
1224 
1225 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1226                                             SDValue &Base,
1227                                             SDValue &Offset) const {
1228   SDLoc DL(Index);
1229 
1230   if (CurDAG->isBaseWithConstantOffset(Index)) {
1231     SDValue N0 = Index.getOperand(0);
1232     SDValue N1 = Index.getOperand(1);
1233     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1234 
1235     // (add n0, c0)
1236     Base = N0;
1237     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1238     return true;
1239   }
1240 
1241   if (isa<ConstantSDNode>(Index))
1242     return false;
1243 
1244   Base = Index;
1245   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1246   return true;
1247 }
1248 
1249 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1250                                      SDValue Val, uint32_t Offset,
1251                                      uint32_t Width) {
1252   // Transformation function, pack the offset and width of a BFE into
1253   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1254   // source, bits [5:0] contain the offset and bits [22:16] the width.
1255   uint32_t PackedVal = Offset | (Width << 16);
1256   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1257 
1258   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1259 }
1260 
1261 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1262   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1263   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1264   // Predicate: 0 < b <= c < 32
1265 
1266   const SDValue &Shl = N->getOperand(0);
1267   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1268   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1269 
1270   if (B && C) {
1271     uint32_t BVal = B->getZExtValue();
1272     uint32_t CVal = C->getZExtValue();
1273 
1274     if (0 < BVal && BVal <= CVal && CVal < 32) {
1275       bool Signed = N->getOpcode() == ISD::SRA;
1276       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1277 
1278       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1279                               32 - CVal));
1280       return;
1281     }
1282   }
1283   SelectCode(N);
1284 }
1285 
1286 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1287   switch (N->getOpcode()) {
1288   case ISD::AND:
1289     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1290       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1291       // Predicate: isMask(mask)
1292       const SDValue &Srl = N->getOperand(0);
1293       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1294       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1295 
1296       if (Shift && Mask) {
1297         uint32_t ShiftVal = Shift->getZExtValue();
1298         uint32_t MaskVal = Mask->getZExtValue();
1299 
1300         if (isMask_32(MaskVal)) {
1301           uint32_t WidthVal = countPopulation(MaskVal);
1302 
1303           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1304                                   Srl.getOperand(0), ShiftVal, WidthVal));
1305           return;
1306         }
1307       }
1308     }
1309     break;
1310   case ISD::SRL:
1311     if (N->getOperand(0).getOpcode() == ISD::AND) {
1312       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1313       // Predicate: isMask(mask >> b)
1314       const SDValue &And = N->getOperand(0);
1315       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1316       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1317 
1318       if (Shift && Mask) {
1319         uint32_t ShiftVal = Shift->getZExtValue();
1320         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1321 
1322         if (isMask_32(MaskVal)) {
1323           uint32_t WidthVal = countPopulation(MaskVal);
1324 
1325           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1326                                   And.getOperand(0), ShiftVal, WidthVal));
1327           return;
1328         }
1329       }
1330     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1331       SelectS_BFEFromShifts(N);
1332       return;
1333     }
1334     break;
1335   case ISD::SRA:
1336     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1337       SelectS_BFEFromShifts(N);
1338       return;
1339     }
1340     break;
1341 
1342   case ISD::SIGN_EXTEND_INREG: {
1343     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1344     SDValue Src = N->getOperand(0);
1345     if (Src.getOpcode() != ISD::SRL)
1346       break;
1347 
1348     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1349     if (!Amt)
1350       break;
1351 
1352     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1353     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1354                             Amt->getZExtValue(), Width));
1355     return;
1356   }
1357   }
1358 
1359   SelectCode(N);
1360 }
1361 
1362 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1363   assert(N->getOpcode() == ISD::BRCOND);
1364   if (!N->hasOneUse())
1365     return false;
1366 
1367   SDValue Cond = N->getOperand(1);
1368   if (Cond.getOpcode() == ISD::CopyToReg)
1369     Cond = Cond.getOperand(2);
1370 
1371   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1372     return false;
1373 
1374   MVT VT = Cond.getOperand(0).getSimpleValueType();
1375   if (VT == MVT::i32)
1376     return true;
1377 
1378   if (VT == MVT::i64) {
1379     auto ST = static_cast<const SISubtarget *>(Subtarget);
1380 
1381     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1382     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1383   }
1384 
1385   return false;
1386 }
1387 
1388 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1389   SDValue Cond = N->getOperand(1);
1390 
1391   if (isCBranchSCC(N)) {
1392     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1393     SelectCode(N);
1394     return;
1395   }
1396 
1397   // The result of VOPC instructions is or'd against ~EXEC before it is
1398   // written to vcc or another SGPR.  This means that the value '1' is always
1399   // written to the corresponding bit for results that are masked.  In order
1400   // to correctly check against vccz, we need to and VCC with the EXEC
1401   // register in order to clear the value from the masked bits.
1402 
1403   SDLoc SL(N);
1404 
1405   SDNode *MaskedCond =
1406         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1407                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1408                                Cond);
1409   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1410                                      SDValue(MaskedCond, 0),
1411                                      SDValue()); // Passing SDValue() adds a
1412                                                  // glue output.
1413   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1414                        N->getOperand(2), // Basic Block
1415                        VCC.getValue(0),  // Chain
1416                        VCC.getValue(1)); // Glue
1417   return;
1418 }
1419 
1420 // This is here because there isn't a way to use the generated sub0_sub1 as the
1421 // subreg index to EXTRACT_SUBREG in tablegen.
1422 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1423   MemSDNode *Mem = cast<MemSDNode>(N);
1424   unsigned AS = Mem->getAddressSpace();
1425   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1426     SelectCode(N);
1427     return;
1428   }
1429 
1430   MVT VT = N->getSimpleValueType(0);
1431   bool Is32 = (VT == MVT::i32);
1432   SDLoc SL(N);
1433 
1434   MachineSDNode *CmpSwap = nullptr;
1435   if (Subtarget->hasAddr64()) {
1436     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1437 
1438     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1439       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1440         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1441       SDValue CmpVal = Mem->getOperand(2);
1442 
1443       // XXX - Do we care about glue operands?
1444 
1445       SDValue Ops[] = {
1446         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1447       };
1448 
1449       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1450     }
1451   }
1452 
1453   if (!CmpSwap) {
1454     SDValue SRsrc, SOffset, Offset, SLC;
1455     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1456       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1457         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1458 
1459       SDValue CmpVal = Mem->getOperand(2);
1460       SDValue Ops[] = {
1461         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1462       };
1463 
1464       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1465     }
1466   }
1467 
1468   if (!CmpSwap) {
1469     SelectCode(N);
1470     return;
1471   }
1472 
1473   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1474   *MMOs = Mem->getMemOperand();
1475   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1476 
1477   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1478   SDValue Extract
1479     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1480 
1481   ReplaceUses(SDValue(N, 0), Extract);
1482   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1483   CurDAG->RemoveDeadNode(N);
1484 }
1485 
1486 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1487                                         SDValue &SrcMods) const {
1488 
1489   unsigned Mods = 0;
1490 
1491   Src = In;
1492 
1493   if (Src.getOpcode() == ISD::FNEG) {
1494     Mods |= SISrcMods::NEG;
1495     Src = Src.getOperand(0);
1496   }
1497 
1498   if (Src.getOpcode() == ISD::FABS) {
1499     Mods |= SISrcMods::ABS;
1500     Src = Src.getOperand(0);
1501   }
1502 
1503   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1504 
1505   return true;
1506 }
1507 
1508 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1509                                          SDValue &SrcMods) const {
1510   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1511   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1512 }
1513 
1514 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1515                                          SDValue &SrcMods, SDValue &Clamp,
1516                                          SDValue &Omod) const {
1517   SDLoc DL(In);
1518   // FIXME: Handle Clamp and Omod
1519   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1520   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1521 
1522   return SelectVOP3Mods(In, Src, SrcMods);
1523 }
1524 
1525 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1526                                            SDValue &SrcMods, SDValue &Clamp,
1527                                            SDValue &Omod) const {
1528   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1529 
1530   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1531                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1532                 cast<ConstantSDNode>(Omod)->isNullValue();
1533 }
1534 
1535 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1536                                               SDValue &SrcMods,
1537                                               SDValue &Omod) const {
1538   // FIXME: Handle Omod
1539   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1540 
1541   return SelectVOP3Mods(In, Src, SrcMods);
1542 }
1543 
1544 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1545                                                    SDValue &SrcMods,
1546                                                    SDValue &Clamp,
1547                                                    SDValue &Omod) const {
1548   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1549   return SelectVOP3Mods(In, Src, SrcMods);
1550 }
1551 
1552 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1553   const AMDGPUTargetLowering& Lowering =
1554     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1555   bool IsModified = false;
1556   do {
1557     IsModified = false;
1558     // Go over all selected nodes and try to fold them a bit more
1559     for (SDNode &Node : CurDAG->allnodes()) {
1560       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1561       if (!MachineNode)
1562         continue;
1563 
1564       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1565       if (ResNode != &Node) {
1566         ReplaceUses(&Node, ResNode);
1567         IsModified = true;
1568       }
1569     }
1570     CurDAG->RemoveDeadNodes();
1571   } while (IsModified);
1572 }
1573