1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 /// AMDGPU specific code to select AMDGPU machine instructions for
42 /// SelectionDAG operations.
43 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
44   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
45   // make the right decision when generating code for different targets.
46   const AMDGPUSubtarget *Subtarget;
47 
48 public:
49   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
50       : SelectionDAGISel(TM, OptLevel) {}
51 
52   virtual ~AMDGPUDAGToDAGISel();
53   bool runOnMachineFunction(MachineFunction &MF) override;
54   void Select(SDNode *N) override;
55   StringRef getPassName() const override;
56   void PostprocessISelDAG() override;
57 
58 private:
59   SDValue foldFrameIndex(SDValue N) const;
60   bool isInlineImmediate(const SDNode *N) const;
61   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
62                    const R600InstrInfo *TII);
63   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
64   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
65 
66   bool isConstantLoad(const MemSDNode *N, int cbID) const;
67   bool isUniformBr(const SDNode *N) const;
68 
69   SDNode *glueCopyToM0(SDNode *N) const;
70 
71   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
72   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
73   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
74                                        SDValue& Offset);
75   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
76   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
77   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
78                        unsigned OffsetBits) const;
79   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
80   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
81                                  SDValue &Offset1) const;
82   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
83                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
84                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
85                    SDValue &TFE) const;
86   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
87                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
88                          SDValue &SLC, SDValue &TFE) const;
89   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
90                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
91                          SDValue &SLC) const;
92   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
93                           SDValue &SOffset, SDValue &ImmOffset) const;
94   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
95                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
96                          SDValue &TFE) const;
97   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
98                          SDValue &Offset, SDValue &SLC) const;
99   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
100                          SDValue &Offset) const;
101   bool SelectMUBUFConstant(SDValue Constant,
102                            SDValue &SOffset,
103                            SDValue &ImmOffset) const;
104   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
105                                   SDValue &ImmOffset) const;
106   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
107                                    SDValue &ImmOffset, SDValue &VOffset) const;
108 
109   bool SelectFlat(SDValue Addr, SDValue &VAddr,
110                   SDValue &SLC, SDValue &TFE) const;
111 
112   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
113                         bool &Imm) const;
114   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
115                   bool &Imm) const;
116   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
117   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
118   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
119   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
120   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
121   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
122   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
123   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
124   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
125   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
126                        SDValue &Clamp, SDValue &Omod) const;
127   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
128                          SDValue &Clamp, SDValue &Omod) const;
129 
130   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
131                             SDValue &Omod) const;
132   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
133                                  SDValue &Clamp,
134                                  SDValue &Omod) const;
135 
136   void SelectADD_SUB_I64(SDNode *N);
137   void SelectDIV_SCALE(SDNode *N);
138 
139   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
140                    uint32_t Offset, uint32_t Width);
141   void SelectS_BFEFromShifts(SDNode *N);
142   void SelectS_BFE(SDNode *N);
143   bool isCBranchSCC(const SDNode *N) const;
144   void SelectBRCOND(SDNode *N);
145   void SelectATOMIC_CMP_SWAP(SDNode *N);
146 
147   // Include the pieces autogenerated from the target description.
148 #include "AMDGPUGenDAGISel.inc"
149 };
150 }  // end anonymous namespace
151 
152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
153 // DAG, ready for instruction scheduling.
154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
155                                         CodeGenOpt::Level OptLevel) {
156   return new AMDGPUDAGToDAGISel(TM, OptLevel);
157 }
158 
159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
160   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
161   return SelectionDAGISel::runOnMachineFunction(MF);
162 }
163 
164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
165 }
166 
167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
168   const SIInstrInfo *TII
169     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
170 
171   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
172     return TII->isInlineConstant(C->getAPIntValue());
173 
174   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
175     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
176 
177   return false;
178 }
179 
180 /// \brief Determine the register class for \p OpNo
181 /// \returns The register class of the virtual register that will be used for
182 /// the given operand number \OpNo or NULL if the register class cannot be
183 /// determined.
184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
185                                                           unsigned OpNo) const {
186   if (!N->isMachineOpcode()) {
187     if (N->getOpcode() == ISD::CopyToReg) {
188       unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
189       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
190         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
191         return MRI.getRegClass(Reg);
192       }
193 
194       const SIRegisterInfo *TRI
195         = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
196       return TRI->getPhysRegClass(Reg);
197     }
198 
199     return nullptr;
200   }
201 
202   switch (N->getMachineOpcode()) {
203   default: {
204     const MCInstrDesc &Desc =
205         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
206     unsigned OpIdx = Desc.getNumDefs() + OpNo;
207     if (OpIdx >= Desc.getNumOperands())
208       return nullptr;
209     int RegClass = Desc.OpInfo[OpIdx].RegClass;
210     if (RegClass == -1)
211       return nullptr;
212 
213     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
214   }
215   case AMDGPU::REG_SEQUENCE: {
216     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
217     const TargetRegisterClass *SuperRC =
218         Subtarget->getRegisterInfo()->getRegClass(RCID);
219 
220     SDValue SubRegOp = N->getOperand(OpNo + 1);
221     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
222     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
223                                                               SubRegIdx);
224   }
225   }
226 }
227 
228 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
229   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
230       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
231     return N;
232 
233   const SITargetLowering& Lowering =
234       *static_cast<const SITargetLowering*>(getTargetLowering());
235 
236   // Write max value to m0 before each load operation
237 
238   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
239                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
240 
241   SDValue Glue = M0.getValue(1);
242 
243   SmallVector <SDValue, 8> Ops;
244   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
245      Ops.push_back(N->getOperand(i));
246   }
247   Ops.push_back(Glue);
248   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
249 
250   return N;
251 }
252 
253 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
254   switch (NumVectorElts) {
255   case 1:
256     return AMDGPU::SReg_32RegClassID;
257   case 2:
258     return AMDGPU::SReg_64RegClassID;
259   case 4:
260     return AMDGPU::SReg_128RegClassID;
261   case 8:
262     return AMDGPU::SReg_256RegClassID;
263   case 16:
264     return AMDGPU::SReg_512RegClassID;
265   }
266 
267   llvm_unreachable("invalid vector size");
268 }
269 
270 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
271   unsigned int Opc = N->getOpcode();
272   if (N->isMachineOpcode()) {
273     N->setNodeId(-1);
274     return;   // Already selected.
275   }
276 
277   if (isa<AtomicSDNode>(N) ||
278       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
279     N = glueCopyToM0(N);
280 
281   switch (Opc) {
282   default: break;
283   // We are selecting i64 ADD here instead of custom lower it during
284   // DAG legalization, so we can fold some i64 ADDs used for address
285   // calculation into the LOAD and STORE instructions.
286   case ISD::ADD:
287   case ISD::ADDC:
288   case ISD::ADDE:
289   case ISD::SUB:
290   case ISD::SUBC:
291   case ISD::SUBE: {
292     if (N->getValueType(0) != MVT::i64 ||
293         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
294       break;
295 
296     SelectADD_SUB_I64(N);
297     return;
298   }
299   case ISD::SCALAR_TO_VECTOR:
300   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
301   case ISD::BUILD_VECTOR: {
302     unsigned RegClassID;
303     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
304     EVT VT = N->getValueType(0);
305     unsigned NumVectorElts = VT.getVectorNumElements();
306     EVT EltVT = VT.getVectorElementType();
307     assert(EltVT.bitsEq(MVT::i32));
308     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
309       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
310     } else {
311       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
312       // that adds a 128 bits reg copy when going through TwoAddressInstructions
313       // pass. We want to avoid 128 bits copies as much as possible because they
314       // can't be bundled by our scheduler.
315       switch(NumVectorElts) {
316       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
317       case 4:
318         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
319           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
320         else
321           RegClassID = AMDGPU::R600_Reg128RegClassID;
322         break;
323       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
324       }
325     }
326 
327     SDLoc DL(N);
328     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
329 
330     if (NumVectorElts == 1) {
331       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
332                            RegClass);
333       return;
334     }
335 
336     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
337                                   "supported yet");
338     // 16 = Max Num Vector Elements
339     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
340     // 1 = Vector Register Class
341     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
342 
343     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
344     bool IsRegSeq = true;
345     unsigned NOps = N->getNumOperands();
346     for (unsigned i = 0; i < NOps; i++) {
347       // XXX: Why is this here?
348       if (isa<RegisterSDNode>(N->getOperand(i))) {
349         IsRegSeq = false;
350         break;
351       }
352       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
353       RegSeqArgs[1 + (2 * i) + 1] =
354               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
355                                         MVT::i32);
356     }
357 
358     if (NOps != NumVectorElts) {
359       // Fill in the missing undef elements if this was a scalar_to_vector.
360       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
361 
362       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
363                                                      DL, EltVT);
364       for (unsigned i = NOps; i < NumVectorElts; ++i) {
365         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
366         RegSeqArgs[1 + (2 * i) + 1] =
367           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
368       }
369     }
370 
371     if (!IsRegSeq)
372       break;
373     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
374     return;
375   }
376   case ISD::BUILD_PAIR: {
377     SDValue RC, SubReg0, SubReg1;
378     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
379       break;
380     }
381     SDLoc DL(N);
382     if (N->getValueType(0) == MVT::i128) {
383       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
384       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
385       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
386     } else if (N->getValueType(0) == MVT::i64) {
387       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
388       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
389       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
390     } else {
391       llvm_unreachable("Unhandled value type for BUILD_PAIR");
392     }
393     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
394                             N->getOperand(1), SubReg1 };
395     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
396                                           N->getValueType(0), Ops));
397     return;
398   }
399 
400   case ISD::Constant:
401   case ISD::ConstantFP: {
402     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
403         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
404       break;
405 
406     uint64_t Imm;
407     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
408       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
409     else {
410       ConstantSDNode *C = cast<ConstantSDNode>(N);
411       Imm = C->getZExtValue();
412     }
413 
414     SDLoc DL(N);
415     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
416                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
417                                                     MVT::i32));
418     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
419                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
420     const SDValue Ops[] = {
421       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
422       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
423       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
424     };
425 
426     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
427                                           N->getValueType(0), Ops));
428     return;
429   }
430   case ISD::LOAD:
431   case ISD::STORE: {
432     N = glueCopyToM0(N);
433     break;
434   }
435 
436   case AMDGPUISD::BFE_I32:
437   case AMDGPUISD::BFE_U32: {
438     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
439       break;
440 
441     // There is a scalar version available, but unlike the vector version which
442     // has a separate operand for the offset and width, the scalar version packs
443     // the width and offset into a single operand. Try to move to the scalar
444     // version if the offsets are constant, so that we can try to keep extended
445     // loads of kernel arguments in SGPRs.
446 
447     // TODO: Technically we could try to pattern match scalar bitshifts of
448     // dynamic values, but it's probably not useful.
449     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
450     if (!Offset)
451       break;
452 
453     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
454     if (!Width)
455       break;
456 
457     bool Signed = Opc == AMDGPUISD::BFE_I32;
458 
459     uint32_t OffsetVal = Offset->getZExtValue();
460     uint32_t WidthVal = Width->getZExtValue();
461 
462     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
463                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
464     return;
465   }
466   case AMDGPUISD::DIV_SCALE: {
467     SelectDIV_SCALE(N);
468     return;
469   }
470   case ISD::CopyToReg: {
471     const SITargetLowering& Lowering =
472       *static_cast<const SITargetLowering*>(getTargetLowering());
473     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
474     break;
475   }
476   case ISD::AND:
477   case ISD::SRL:
478   case ISD::SRA:
479   case ISD::SIGN_EXTEND_INREG:
480     if (N->getValueType(0) != MVT::i32 ||
481         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
482       break;
483 
484     SelectS_BFE(N);
485     return;
486   case ISD::BRCOND:
487     SelectBRCOND(N);
488     return;
489 
490   case AMDGPUISD::ATOMIC_CMP_SWAP:
491     SelectATOMIC_CMP_SWAP(N);
492     return;
493   }
494 
495   SelectCode(N);
496 }
497 
498 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
499   if (!N->readMem())
500     return false;
501   if (CbId == -1)
502     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
503 
504   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
505 }
506 
507 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
508   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
509   const Instruction *Term = BB->getTerminator();
510   return Term->getMetadata("amdgpu.uniform") ||
511          Term->getMetadata("structurizecfg.uniform");
512 }
513 
514 StringRef AMDGPUDAGToDAGISel::getPassName() const {
515   return "AMDGPU DAG->DAG Pattern Instruction Selection";
516 }
517 
518 //===----------------------------------------------------------------------===//
519 // Complex Patterns
520 //===----------------------------------------------------------------------===//
521 
522 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
523                                                          SDValue& IntPtr) {
524   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
525     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
526                                        true);
527     return true;
528   }
529   return false;
530 }
531 
532 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
533     SDValue& BaseReg, SDValue &Offset) {
534   if (!isa<ConstantSDNode>(Addr)) {
535     BaseReg = Addr;
536     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
537     return true;
538   }
539   return false;
540 }
541 
542 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
543                                            SDValue &Offset) {
544   ConstantSDNode *IMMOffset;
545 
546   if (Addr.getOpcode() == ISD::ADD
547       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
548       && isInt<16>(IMMOffset->getZExtValue())) {
549 
550       Base = Addr.getOperand(0);
551       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
552                                          MVT::i32);
553       return true;
554   // If the pointer address is constant, we can move it to the offset field.
555   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
556              && isInt<16>(IMMOffset->getZExtValue())) {
557     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
558                                   SDLoc(CurDAG->getEntryNode()),
559                                   AMDGPU::ZERO, MVT::i32);
560     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
561                                        MVT::i32);
562     return true;
563   }
564 
565   // Default case, no offset
566   Base = Addr;
567   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
568   return true;
569 }
570 
571 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
572                                             SDValue &Offset) {
573   ConstantSDNode *C;
574   SDLoc DL(Addr);
575 
576   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
577     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
578     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
579   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
580             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
581     Base = Addr.getOperand(0);
582     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
583   } else {
584     Base = Addr;
585     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
586   }
587 
588   return true;
589 }
590 
591 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
592   SDLoc DL(N);
593   SDValue LHS = N->getOperand(0);
594   SDValue RHS = N->getOperand(1);
595 
596   unsigned Opcode = N->getOpcode();
597   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
598   bool ProduceCarry =
599       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
600   bool IsAdd =
601       (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
602 
603   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
604   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
605 
606   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
607                                        DL, MVT::i32, LHS, Sub0);
608   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
609                                        DL, MVT::i32, LHS, Sub1);
610 
611   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
612                                        DL, MVT::i32, RHS, Sub0);
613   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
614                                        DL, MVT::i32, RHS, Sub1);
615 
616   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
617 
618   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
619   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
620 
621   SDNode *AddLo;
622   if (!ConsumeCarry) {
623     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
624     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
625   } else {
626     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
627     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
628   }
629   SDValue AddHiArgs[] = {
630     SDValue(Hi0, 0),
631     SDValue(Hi1, 0),
632     SDValue(AddLo, 1)
633   };
634   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
635 
636   SDValue RegSequenceArgs[] = {
637     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
638     SDValue(AddLo,0),
639     Sub0,
640     SDValue(AddHi,0),
641     Sub1,
642   };
643   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
644                                                MVT::i64, RegSequenceArgs);
645 
646   if (ProduceCarry) {
647     // Replace the carry-use
648     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
649   }
650 
651   // Replace the remaining uses.
652   CurDAG->ReplaceAllUsesWith(N, RegSequence);
653   CurDAG->RemoveDeadNode(N);
654 }
655 
656 // We need to handle this here because tablegen doesn't support matching
657 // instructions with multiple outputs.
658 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
659   SDLoc SL(N);
660   EVT VT = N->getValueType(0);
661 
662   assert(VT == MVT::f32 || VT == MVT::f64);
663 
664   unsigned Opc
665     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
666 
667   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
668   // omod
669   SDValue Ops[8];
670 
671   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
672   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
673   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
674   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
675 }
676 
677 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
678                                          unsigned OffsetBits) const {
679   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
680       (OffsetBits == 8 && !isUInt<8>(Offset)))
681     return false;
682 
683   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
684       Subtarget->unsafeDSOffsetFoldingEnabled())
685     return true;
686 
687   // On Southern Islands instruction with a negative base value and an offset
688   // don't seem to work.
689   return CurDAG->SignBitIsZero(Base);
690 }
691 
692 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
693                                               SDValue &Offset) const {
694   SDLoc DL(Addr);
695   if (CurDAG->isBaseWithConstantOffset(Addr)) {
696     SDValue N0 = Addr.getOperand(0);
697     SDValue N1 = Addr.getOperand(1);
698     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
699     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
700       // (add n0, c0)
701       Base = N0;
702       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
703       return true;
704     }
705   } else if (Addr.getOpcode() == ISD::SUB) {
706     // sub C, x -> add (sub 0, x), C
707     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
708       int64_t ByteOffset = C->getSExtValue();
709       if (isUInt<16>(ByteOffset)) {
710         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
711 
712         // XXX - This is kind of hacky. Create a dummy sub node so we can check
713         // the known bits in isDSOffsetLegal. We need to emit the selected node
714         // here, so this is thrown away.
715         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
716                                       Zero, Addr.getOperand(1));
717 
718         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
719           MachineSDNode *MachineSub
720             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
721                                      Zero, Addr.getOperand(1));
722 
723           Base = SDValue(MachineSub, 0);
724           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
725           return true;
726         }
727       }
728     }
729   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
730     // If we have a constant address, prefer to put the constant into the
731     // offset. This can save moves to load the constant address since multiple
732     // operations can share the zero base address register, and enables merging
733     // into read2 / write2 instructions.
734 
735     SDLoc DL(Addr);
736 
737     if (isUInt<16>(CAddr->getZExtValue())) {
738       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
739       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
740                                  DL, MVT::i32, Zero);
741       Base = SDValue(MovZero, 0);
742       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
743       return true;
744     }
745   }
746 
747   // default case
748   Base = Addr;
749   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
750   return true;
751 }
752 
753 // TODO: If offset is too big, put low 16-bit into offset.
754 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
755                                                    SDValue &Offset0,
756                                                    SDValue &Offset1) const {
757   SDLoc DL(Addr);
758 
759   if (CurDAG->isBaseWithConstantOffset(Addr)) {
760     SDValue N0 = Addr.getOperand(0);
761     SDValue N1 = Addr.getOperand(1);
762     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
763     unsigned DWordOffset0 = C1->getZExtValue() / 4;
764     unsigned DWordOffset1 = DWordOffset0 + 1;
765     // (add n0, c0)
766     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
767       Base = N0;
768       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
769       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
770       return true;
771     }
772   } else if (Addr.getOpcode() == ISD::SUB) {
773     // sub C, x -> add (sub 0, x), C
774     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
775       unsigned DWordOffset0 = C->getZExtValue() / 4;
776       unsigned DWordOffset1 = DWordOffset0 + 1;
777 
778       if (isUInt<8>(DWordOffset0)) {
779         SDLoc DL(Addr);
780         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
781 
782         // XXX - This is kind of hacky. Create a dummy sub node so we can check
783         // the known bits in isDSOffsetLegal. We need to emit the selected node
784         // here, so this is thrown away.
785         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
786                                       Zero, Addr.getOperand(1));
787 
788         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
789           MachineSDNode *MachineSub
790             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
791                                      Zero, Addr.getOperand(1));
792 
793           Base = SDValue(MachineSub, 0);
794           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
795           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
796           return true;
797         }
798       }
799     }
800   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
801     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
802     unsigned DWordOffset1 = DWordOffset0 + 1;
803     assert(4 * DWordOffset0 == CAddr->getZExtValue());
804 
805     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
806       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
807       MachineSDNode *MovZero
808         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
809                                  DL, MVT::i32, Zero);
810       Base = SDValue(MovZero, 0);
811       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
812       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
813       return true;
814     }
815   }
816 
817   // default case
818 
819   // FIXME: This is broken on SI where we still need to check if the base
820   // pointer is positive here.
821   Base = Addr;
822   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
823   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
824   return true;
825 }
826 
827 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
828   return isUInt<12>(Imm->getZExtValue());
829 }
830 
831 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
832                                      SDValue &VAddr, SDValue &SOffset,
833                                      SDValue &Offset, SDValue &Offen,
834                                      SDValue &Idxen, SDValue &Addr64,
835                                      SDValue &GLC, SDValue &SLC,
836                                      SDValue &TFE) const {
837   // Subtarget prefers to use flat instruction
838   if (Subtarget->useFlatForGlobal())
839     return false;
840 
841   SDLoc DL(Addr);
842 
843   if (!GLC.getNode())
844     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
845   if (!SLC.getNode())
846     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
847   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
848 
849   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
850   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
851   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
852   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
853 
854   if (CurDAG->isBaseWithConstantOffset(Addr)) {
855     SDValue N0 = Addr.getOperand(0);
856     SDValue N1 = Addr.getOperand(1);
857     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
858 
859     if (N0.getOpcode() == ISD::ADD) {
860       // (add (add N2, N3), C1) -> addr64
861       SDValue N2 = N0.getOperand(0);
862       SDValue N3 = N0.getOperand(1);
863       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
864       Ptr = N2;
865       VAddr = N3;
866     } else {
867 
868       // (add N0, C1) -> offset
869       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
870       Ptr = N0;
871     }
872 
873     if (isLegalMUBUFImmOffset(C1)) {
874       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
875       return true;
876     }
877 
878     if (isUInt<32>(C1->getZExtValue())) {
879       // Illegal offset, store it in soffset.
880       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
881       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
882                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
883                         0);
884       return true;
885     }
886   }
887 
888   if (Addr.getOpcode() == ISD::ADD) {
889     // (add N0, N1) -> addr64
890     SDValue N0 = Addr.getOperand(0);
891     SDValue N1 = Addr.getOperand(1);
892     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
893     Ptr = N0;
894     VAddr = N1;
895     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
896     return true;
897   }
898 
899   // default case -> offset
900   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
901   Ptr = Addr;
902   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
903 
904   return true;
905 }
906 
907 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
908                                            SDValue &VAddr, SDValue &SOffset,
909                                            SDValue &Offset, SDValue &GLC,
910                                            SDValue &SLC, SDValue &TFE) const {
911   SDValue Ptr, Offen, Idxen, Addr64;
912 
913   // addr64 bit was removed for volcanic islands.
914   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
915     return false;
916 
917   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
918               GLC, SLC, TFE))
919     return false;
920 
921   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
922   if (C->getSExtValue()) {
923     SDLoc DL(Addr);
924 
925     const SITargetLowering& Lowering =
926       *static_cast<const SITargetLowering*>(getTargetLowering());
927 
928     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
929     return true;
930   }
931 
932   return false;
933 }
934 
935 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
936                                            SDValue &VAddr, SDValue &SOffset,
937                                            SDValue &Offset,
938                                            SDValue &SLC) const {
939   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
940   SDValue GLC, TFE;
941 
942   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
943 }
944 
945 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
946   if (auto FI = dyn_cast<FrameIndexSDNode>(N))
947     return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
948   return N;
949 }
950 
951 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
952                                             SDValue &VAddr, SDValue &SOffset,
953                                             SDValue &ImmOffset) const {
954 
955   SDLoc DL(Addr);
956   MachineFunction &MF = CurDAG->getMachineFunction();
957   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
958 
959   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
960   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
961 
962   // (add n0, c1)
963   if (CurDAG->isBaseWithConstantOffset(Addr)) {
964     SDValue N0 = Addr.getOperand(0);
965     SDValue N1 = Addr.getOperand(1);
966 
967     // Offsets in vaddr must be positive.
968     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
969     if (isLegalMUBUFImmOffset(C1)) {
970       VAddr = foldFrameIndex(N0);
971       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
972       return true;
973     }
974   }
975 
976   // (node)
977   VAddr = foldFrameIndex(Addr);
978   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
979   return true;
980 }
981 
982 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
983                                            SDValue &SOffset, SDValue &Offset,
984                                            SDValue &GLC, SDValue &SLC,
985                                            SDValue &TFE) const {
986   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
987   const SIInstrInfo *TII =
988     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
989 
990   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
991               GLC, SLC, TFE))
992     return false;
993 
994   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
995       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
996       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
997     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
998                     APInt::getAllOnesValue(32).getZExtValue(); // Size
999     SDLoc DL(Addr);
1000 
1001     const SITargetLowering& Lowering =
1002       *static_cast<const SITargetLowering*>(getTargetLowering());
1003 
1004     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1005     return true;
1006   }
1007   return false;
1008 }
1009 
1010 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1011                                            SDValue &Soffset, SDValue &Offset
1012                                            ) const {
1013   SDValue GLC, SLC, TFE;
1014 
1015   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1016 }
1017 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1018                                            SDValue &Soffset, SDValue &Offset,
1019                                            SDValue &SLC) const {
1020   SDValue GLC, TFE;
1021 
1022   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1023 }
1024 
1025 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1026                                              SDValue &SOffset,
1027                                              SDValue &ImmOffset) const {
1028   SDLoc DL(Constant);
1029   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1030   uint32_t Overflow = 0;
1031 
1032   if (Imm >= 4096) {
1033     if (Imm <= 4095 + 64) {
1034       // Use an SOffset inline constant for 1..64
1035       Overflow = Imm - 4095;
1036       Imm = 4095;
1037     } else {
1038       // Try to keep the same value in SOffset for adjacent loads, so that
1039       // the corresponding register contents can be re-used.
1040       //
1041       // Load values with all low-bits set into SOffset, so that a larger
1042       // range of values can be covered using s_movk_i32
1043       uint32_t High = (Imm + 1) & ~4095;
1044       uint32_t Low = (Imm + 1) & 4095;
1045       Imm = Low;
1046       Overflow = High - 1;
1047     }
1048   }
1049 
1050   // There is a hardware bug in SI and CI which prevents address clamping in
1051   // MUBUF instructions from working correctly with SOffsets. The immediate
1052   // offset is unaffected.
1053   if (Overflow > 0 &&
1054       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1055     return false;
1056 
1057   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1058 
1059   if (Overflow <= 64)
1060     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1061   else
1062     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1063                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1064                       0);
1065 
1066   return true;
1067 }
1068 
1069 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1070                                                     SDValue &SOffset,
1071                                                     SDValue &ImmOffset) const {
1072   SDLoc DL(Offset);
1073 
1074   if (!isa<ConstantSDNode>(Offset))
1075     return false;
1076 
1077   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1078 }
1079 
1080 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1081                                                      SDValue &SOffset,
1082                                                      SDValue &ImmOffset,
1083                                                      SDValue &VOffset) const {
1084   SDLoc DL(Offset);
1085 
1086   // Don't generate an unnecessary voffset for constant offsets.
1087   if (isa<ConstantSDNode>(Offset)) {
1088     SDValue Tmp1, Tmp2;
1089 
1090     // When necessary, use a voffset in <= CI anyway to work around a hardware
1091     // bug.
1092     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1093         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1094       return false;
1095   }
1096 
1097   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1098     SDValue N0 = Offset.getOperand(0);
1099     SDValue N1 = Offset.getOperand(1);
1100     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1101         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1102       VOffset = N0;
1103       return true;
1104     }
1105   }
1106 
1107   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1108   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1109   VOffset = Offset;
1110 
1111   return true;
1112 }
1113 
1114 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1115                                     SDValue &VAddr,
1116                                     SDValue &SLC,
1117                                     SDValue &TFE) const {
1118   VAddr = Addr;
1119   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1120   return true;
1121 }
1122 
1123 ///
1124 /// \param EncodedOffset This is the immediate value that will be encoded
1125 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1126 ///        will be in units of dwords and on VI+ it will be units of bytes.
1127 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1128                                  int64_t EncodedOffset) {
1129   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1130      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1131 }
1132 
1133 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1134                                           SDValue &Offset, bool &Imm) const {
1135 
1136   // FIXME: Handle non-constant offsets.
1137   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1138   if (!C)
1139     return false;
1140 
1141   SDLoc SL(ByteOffsetNode);
1142   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1143   int64_t ByteOffset = C->getSExtValue();
1144   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1145       ByteOffset >> 2 : ByteOffset;
1146 
1147   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1148     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1149     Imm = true;
1150     return true;
1151   }
1152 
1153   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1154     return false;
1155 
1156   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1157     // 32-bit Immediates are supported on Sea Islands.
1158     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1159   } else {
1160     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1161     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1162                                             C32Bit), 0);
1163   }
1164   Imm = false;
1165   return true;
1166 }
1167 
1168 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1169                                      SDValue &Offset, bool &Imm) const {
1170 
1171   SDLoc SL(Addr);
1172   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1173     SDValue N0 = Addr.getOperand(0);
1174     SDValue N1 = Addr.getOperand(1);
1175 
1176     if (SelectSMRDOffset(N1, Offset, Imm)) {
1177       SBase = N0;
1178       return true;
1179     }
1180   }
1181   SBase = Addr;
1182   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1183   Imm = true;
1184   return true;
1185 }
1186 
1187 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1188                                        SDValue &Offset) const {
1189   bool Imm;
1190   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1194                                          SDValue &Offset) const {
1195 
1196   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1197     return false;
1198 
1199   bool Imm;
1200   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1201     return false;
1202 
1203   return !Imm && isa<ConstantSDNode>(Offset);
1204 }
1205 
1206 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1207                                         SDValue &Offset) const {
1208   bool Imm;
1209   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1210          !isa<ConstantSDNode>(Offset);
1211 }
1212 
1213 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1214                                              SDValue &Offset) const {
1215   bool Imm;
1216   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1217 }
1218 
1219 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1220                                                SDValue &Offset) const {
1221   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1222     return false;
1223 
1224   bool Imm;
1225   if (!SelectSMRDOffset(Addr, Offset, Imm))
1226     return false;
1227 
1228   return !Imm && isa<ConstantSDNode>(Offset);
1229 }
1230 
1231 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1232                                               SDValue &Offset) const {
1233   bool Imm;
1234   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1235          !isa<ConstantSDNode>(Offset);
1236 }
1237 
1238 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1239                                             SDValue &Base,
1240                                             SDValue &Offset) const {
1241   SDLoc DL(Index);
1242 
1243   if (CurDAG->isBaseWithConstantOffset(Index)) {
1244     SDValue N0 = Index.getOperand(0);
1245     SDValue N1 = Index.getOperand(1);
1246     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1247 
1248     // (add n0, c0)
1249     Base = N0;
1250     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1251     return true;
1252   }
1253 
1254   if (isa<ConstantSDNode>(Index))
1255     return false;
1256 
1257   Base = Index;
1258   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1259   return true;
1260 }
1261 
1262 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1263                                      SDValue Val, uint32_t Offset,
1264                                      uint32_t Width) {
1265   // Transformation function, pack the offset and width of a BFE into
1266   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1267   // source, bits [5:0] contain the offset and bits [22:16] the width.
1268   uint32_t PackedVal = Offset | (Width << 16);
1269   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1270 
1271   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1272 }
1273 
1274 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1275   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1276   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1277   // Predicate: 0 < b <= c < 32
1278 
1279   const SDValue &Shl = N->getOperand(0);
1280   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1281   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1282 
1283   if (B && C) {
1284     uint32_t BVal = B->getZExtValue();
1285     uint32_t CVal = C->getZExtValue();
1286 
1287     if (0 < BVal && BVal <= CVal && CVal < 32) {
1288       bool Signed = N->getOpcode() == ISD::SRA;
1289       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1290 
1291       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1292                               32 - CVal));
1293       return;
1294     }
1295   }
1296   SelectCode(N);
1297 }
1298 
1299 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1300   switch (N->getOpcode()) {
1301   case ISD::AND:
1302     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1303       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1304       // Predicate: isMask(mask)
1305       const SDValue &Srl = N->getOperand(0);
1306       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1307       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1308 
1309       if (Shift && Mask) {
1310         uint32_t ShiftVal = Shift->getZExtValue();
1311         uint32_t MaskVal = Mask->getZExtValue();
1312 
1313         if (isMask_32(MaskVal)) {
1314           uint32_t WidthVal = countPopulation(MaskVal);
1315 
1316           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1317                                   Srl.getOperand(0), ShiftVal, WidthVal));
1318           return;
1319         }
1320       }
1321     }
1322     break;
1323   case ISD::SRL:
1324     if (N->getOperand(0).getOpcode() == ISD::AND) {
1325       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1326       // Predicate: isMask(mask >> b)
1327       const SDValue &And = N->getOperand(0);
1328       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1329       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1330 
1331       if (Shift && Mask) {
1332         uint32_t ShiftVal = Shift->getZExtValue();
1333         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1334 
1335         if (isMask_32(MaskVal)) {
1336           uint32_t WidthVal = countPopulation(MaskVal);
1337 
1338           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1339                                   And.getOperand(0), ShiftVal, WidthVal));
1340           return;
1341         }
1342       }
1343     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1344       SelectS_BFEFromShifts(N);
1345       return;
1346     }
1347     break;
1348   case ISD::SRA:
1349     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1350       SelectS_BFEFromShifts(N);
1351       return;
1352     }
1353     break;
1354 
1355   case ISD::SIGN_EXTEND_INREG: {
1356     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1357     SDValue Src = N->getOperand(0);
1358     if (Src.getOpcode() != ISD::SRL)
1359       break;
1360 
1361     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1362     if (!Amt)
1363       break;
1364 
1365     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1366     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1367                             Amt->getZExtValue(), Width));
1368     return;
1369   }
1370   }
1371 
1372   SelectCode(N);
1373 }
1374 
1375 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1376   assert(N->getOpcode() == ISD::BRCOND);
1377   if (!N->hasOneUse())
1378     return false;
1379 
1380   SDValue Cond = N->getOperand(1);
1381   if (Cond.getOpcode() == ISD::CopyToReg)
1382     Cond = Cond.getOperand(2);
1383 
1384   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1385     return false;
1386 
1387   MVT VT = Cond.getOperand(0).getSimpleValueType();
1388   if (VT == MVT::i32)
1389     return true;
1390 
1391   if (VT == MVT::i64) {
1392     auto ST = static_cast<const SISubtarget *>(Subtarget);
1393 
1394     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1395     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1396   }
1397 
1398   return false;
1399 }
1400 
1401 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1402   SDValue Cond = N->getOperand(1);
1403 
1404   if (isCBranchSCC(N)) {
1405     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1406     SelectCode(N);
1407     return;
1408   }
1409 
1410   SDLoc SL(N);
1411 
1412   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
1413   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1414                        N->getOperand(2), // Basic Block
1415                        VCC.getValue(0));
1416   return;
1417 }
1418 
1419 // This is here because there isn't a way to use the generated sub0_sub1 as the
1420 // subreg index to EXTRACT_SUBREG in tablegen.
1421 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1422   MemSDNode *Mem = cast<MemSDNode>(N);
1423   unsigned AS = Mem->getAddressSpace();
1424   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1425     SelectCode(N);
1426     return;
1427   }
1428 
1429   MVT VT = N->getSimpleValueType(0);
1430   bool Is32 = (VT == MVT::i32);
1431   SDLoc SL(N);
1432 
1433   MachineSDNode *CmpSwap = nullptr;
1434   if (Subtarget->hasAddr64()) {
1435     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1436 
1437     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1438       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1439         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1440       SDValue CmpVal = Mem->getOperand(2);
1441 
1442       // XXX - Do we care about glue operands?
1443 
1444       SDValue Ops[] = {
1445         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1446       };
1447 
1448       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1449     }
1450   }
1451 
1452   if (!CmpSwap) {
1453     SDValue SRsrc, SOffset, Offset, SLC;
1454     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1455       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1456         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1457 
1458       SDValue CmpVal = Mem->getOperand(2);
1459       SDValue Ops[] = {
1460         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1461       };
1462 
1463       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1464     }
1465   }
1466 
1467   if (!CmpSwap) {
1468     SelectCode(N);
1469     return;
1470   }
1471 
1472   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1473   *MMOs = Mem->getMemOperand();
1474   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1475 
1476   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1477   SDValue Extract
1478     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1479 
1480   ReplaceUses(SDValue(N, 0), Extract);
1481   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1482   CurDAG->RemoveDeadNode(N);
1483 }
1484 
1485 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1486                                         SDValue &SrcMods) const {
1487 
1488   unsigned Mods = 0;
1489 
1490   Src = In;
1491 
1492   if (Src.getOpcode() == ISD::FNEG) {
1493     Mods |= SISrcMods::NEG;
1494     Src = Src.getOperand(0);
1495   }
1496 
1497   if (Src.getOpcode() == ISD::FABS) {
1498     Mods |= SISrcMods::ABS;
1499     Src = Src.getOperand(0);
1500   }
1501 
1502   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1503 
1504   return true;
1505 }
1506 
1507 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1508                                          SDValue &SrcMods) const {
1509   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1510   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1511 }
1512 
1513 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1514                                          SDValue &SrcMods, SDValue &Clamp,
1515                                          SDValue &Omod) const {
1516   SDLoc DL(In);
1517   // FIXME: Handle Clamp and Omod
1518   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1519   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1520 
1521   return SelectVOP3Mods(In, Src, SrcMods);
1522 }
1523 
1524 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1525                                            SDValue &SrcMods, SDValue &Clamp,
1526                                            SDValue &Omod) const {
1527   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1528 
1529   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1530                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1531                 cast<ConstantSDNode>(Omod)->isNullValue();
1532 }
1533 
1534 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1535                                               SDValue &SrcMods,
1536                                               SDValue &Omod) const {
1537   // FIXME: Handle Omod
1538   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1539 
1540   return SelectVOP3Mods(In, Src, SrcMods);
1541 }
1542 
1543 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1544                                                    SDValue &SrcMods,
1545                                                    SDValue &Clamp,
1546                                                    SDValue &Omod) const {
1547   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1548   return SelectVOP3Mods(In, Src, SrcMods);
1549 }
1550 
1551 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1552   const AMDGPUTargetLowering& Lowering =
1553     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1554   bool IsModified = false;
1555   do {
1556     IsModified = false;
1557     // Go over all selected nodes and try to fold them a bit more
1558     for (SDNode &Node : CurDAG->allnodes()) {
1559       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1560       if (!MachineNode)
1561         continue;
1562 
1563       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1564       if (ResNode != &Node) {
1565         ReplaceUses(&Node, ResNode);
1566         IsModified = true;
1567       }
1568     }
1569     CurDAG->RemoveDeadNodes();
1570   } while (IsModified);
1571 }
1572