1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 static bool isCBranchSCC(const SDNode *N) {
42   assert(N->getOpcode() == ISD::BRCOND);
43   if (!N->hasOneUse())
44     return false;
45 
46   SDValue Cond = N->getOperand(1);
47   if (Cond.getOpcode() == ISD::CopyToReg)
48     Cond = Cond.getOperand(2);
49   return Cond.getOpcode() == ISD::SETCC &&
50          Cond.getOperand(0).getValueType() == MVT::i32 &&
51 	 Cond.hasOneUse();
52 }
53 
54 /// AMDGPU specific code to select AMDGPU machine instructions for
55 /// SelectionDAG operations.
56 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
57   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
58   // make the right decision when generating code for different targets.
59   const AMDGPUSubtarget *Subtarget;
60 
61 public:
62   AMDGPUDAGToDAGISel(TargetMachine &TM);
63   virtual ~AMDGPUDAGToDAGISel();
64   bool runOnMachineFunction(MachineFunction &MF) override;
65   void Select(SDNode *N) override;
66   const char *getPassName() const override;
67   void PreprocessISelDAG() override;
68   void PostprocessISelDAG() override;
69 
70 private:
71   bool isInlineImmediate(SDNode *N) const;
72   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
73                    const R600InstrInfo *TII);
74   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
76 
77   // Complex pattern selectors
78   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
79   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
80   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
81 
82   static bool checkType(const Value *ptr, unsigned int addrspace);
83   static bool checkPrivateAddress(const MachineMemOperand *Op);
84 
85   static bool isGlobalStore(const MemSDNode *N);
86   static bool isFlatStore(const MemSDNode *N);
87   static bool isPrivateStore(const StoreSDNode *N);
88   static bool isLocalStore(const StoreSDNode *N);
89   static bool isRegionStore(const StoreSDNode *N);
90 
91   bool isCPLoad(const LoadSDNode *N) const;
92   bool isConstantLoad(const MemSDNode *N, int cbID) const;
93   bool isGlobalLoad(const MemSDNode *N) const;
94   bool isFlatLoad(const MemSDNode *N) const;
95   bool isParamLoad(const LoadSDNode *N) const;
96   bool isPrivateLoad(const LoadSDNode *N) const;
97   bool isLocalLoad(const LoadSDNode *N) const;
98   bool isRegionLoad(const LoadSDNode *N) const;
99 
100   bool isUniformBr(const SDNode *N) const;
101 
102   SDNode *glueCopyToM0(SDNode *N) const;
103 
104   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
105   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
106   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
107                                        SDValue& Offset);
108   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
109   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
110   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
111                        unsigned OffsetBits) const;
112   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
113   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
114                                  SDValue &Offset1) const;
115   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
116                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
117                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
118                    SDValue &TFE) const;
119   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
120                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
121                          SDValue &SLC, SDValue &TFE) const;
122   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
123                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
124                          SDValue &SLC) const;
125   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
126                           SDValue &SOffset, SDValue &ImmOffset) const;
127   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
128                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
129                          SDValue &TFE) const;
130   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
131                          SDValue &Offset, SDValue &GLC) const;
132   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
133                          SDValue &Offset) const;
134   void SelectMUBUFConstant(SDValue Constant,
135                            SDValue &SOffset,
136                            SDValue &ImmOffset) const;
137   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
138                                   SDValue &ImmOffset) const;
139   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
140                                    SDValue &ImmOffset, SDValue &VOffset) const;
141   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
142                         bool &Imm) const;
143   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
144                   bool &Imm) const;
145   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
146   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
147   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
148   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
149   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
150   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
151   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
152   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
153   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
154                        SDValue &Clamp, SDValue &Omod) const;
155   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
156                          SDValue &Clamp, SDValue &Omod) const;
157 
158   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
159                             SDValue &Omod) const;
160   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
161                                  SDValue &Clamp,
162                                  SDValue &Omod) const;
163 
164   void SelectADD_SUB_I64(SDNode *N);
165   void SelectDIV_SCALE(SDNode *N);
166 
167   SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
168                    uint32_t Offset, uint32_t Width);
169   void SelectS_BFEFromShifts(SDNode *N);
170   void SelectS_BFE(SDNode *N);
171   void SelectBRCOND(SDNode *N);
172 
173   // Include the pieces autogenerated from the target description.
174 #include "AMDGPUGenDAGISel.inc"
175 };
176 }  // end anonymous namespace
177 
178 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
179 // DAG, ready for instruction scheduling.
180 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
181   return new AMDGPUDAGToDAGISel(TM);
182 }
183 
184 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
185     : SelectionDAGISel(TM) {}
186 
187 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
188   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
189   return SelectionDAGISel::runOnMachineFunction(MF);
190 }
191 
192 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
193 }
194 
195 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
196   const SITargetLowering *TL
197       = static_cast<const SITargetLowering *>(getTargetLowering());
198   return TL->analyzeImmediate(N) == 0;
199 }
200 
201 /// \brief Determine the register class for \p OpNo
202 /// \returns The register class of the virtual register that will be used for
203 /// the given operand number \OpNo or NULL if the register class cannot be
204 /// determined.
205 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
206                                                           unsigned OpNo) const {
207   if (!N->isMachineOpcode())
208     return nullptr;
209 
210   switch (N->getMachineOpcode()) {
211   default: {
212     const MCInstrDesc &Desc =
213         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
214     unsigned OpIdx = Desc.getNumDefs() + OpNo;
215     if (OpIdx >= Desc.getNumOperands())
216       return nullptr;
217     int RegClass = Desc.OpInfo[OpIdx].RegClass;
218     if (RegClass == -1)
219       return nullptr;
220 
221     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
222   }
223   case AMDGPU::REG_SEQUENCE: {
224     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
225     const TargetRegisterClass *SuperRC =
226         Subtarget->getRegisterInfo()->getRegClass(RCID);
227 
228     SDValue SubRegOp = N->getOperand(OpNo + 1);
229     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
230     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
231                                                               SubRegIdx);
232   }
233   }
234 }
235 
236 bool AMDGPUDAGToDAGISel::SelectADDRParam(
237   SDValue Addr, SDValue& R1, SDValue& R2) {
238 
239   if (Addr.getOpcode() == ISD::FrameIndex) {
240     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
241       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
242       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
243     } else {
244       R1 = Addr;
245       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
246     }
247   } else if (Addr.getOpcode() == ISD::ADD) {
248     R1 = Addr.getOperand(0);
249     R2 = Addr.getOperand(1);
250   } else {
251     R1 = Addr;
252     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
253   }
254   return true;
255 }
256 
257 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
258   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
259       Addr.getOpcode() == ISD::TargetGlobalAddress) {
260     return false;
261   }
262   return SelectADDRParam(Addr, R1, R2);
263 }
264 
265 
266 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
267   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
268       Addr.getOpcode() == ISD::TargetGlobalAddress) {
269     return false;
270   }
271 
272   if (Addr.getOpcode() == ISD::FrameIndex) {
273     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
274       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
275       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
276     } else {
277       R1 = Addr;
278       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
279     }
280   } else if (Addr.getOpcode() == ISD::ADD) {
281     R1 = Addr.getOperand(0);
282     R2 = Addr.getOperand(1);
283   } else {
284     R1 = Addr;
285     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
286   }
287   return true;
288 }
289 
290 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
291   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
292       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
293                  AMDGPUAS::LOCAL_ADDRESS))
294     return N;
295 
296   const SITargetLowering& Lowering =
297       *static_cast<const SITargetLowering*>(getTargetLowering());
298 
299   // Write max value to m0 before each load operation
300 
301   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
302                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
303 
304   SDValue Glue = M0.getValue(1);
305 
306   SmallVector <SDValue, 8> Ops;
307   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
308      Ops.push_back(N->getOperand(i));
309   }
310   Ops.push_back(Glue);
311   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
312 
313   return N;
314 }
315 
316 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
317   switch (NumVectorElts) {
318   case 1:
319     return AMDGPU::SReg_32RegClassID;
320   case 2:
321     return AMDGPU::SReg_64RegClassID;
322   case 4:
323     return AMDGPU::SReg_128RegClassID;
324   case 8:
325     return AMDGPU::SReg_256RegClassID;
326   case 16:
327     return AMDGPU::SReg_512RegClassID;
328   }
329 
330   llvm_unreachable("invalid vector size");
331 }
332 
333 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
334   unsigned int Opc = N->getOpcode();
335   if (N->isMachineOpcode()) {
336     N->setNodeId(-1);
337     return;   // Already selected.
338   }
339 
340   if (isa<AtomicSDNode>(N) ||
341       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
342     N = glueCopyToM0(N);
343 
344   switch (Opc) {
345   default: break;
346   // We are selecting i64 ADD here instead of custom lower it during
347   // DAG legalization, so we can fold some i64 ADDs used for address
348   // calculation into the LOAD and STORE instructions.
349   case ISD::ADD:
350   case ISD::SUB: {
351     if (N->getValueType(0) != MVT::i64 ||
352         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
353       break;
354 
355     SelectADD_SUB_I64(N);
356     return;
357   }
358   case ISD::SCALAR_TO_VECTOR:
359   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
360   case ISD::BUILD_VECTOR: {
361     unsigned RegClassID;
362     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
363     EVT VT = N->getValueType(0);
364     unsigned NumVectorElts = VT.getVectorNumElements();
365     EVT EltVT = VT.getVectorElementType();
366     assert(EltVT.bitsEq(MVT::i32));
367     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
368       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
369     } else {
370       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
371       // that adds a 128 bits reg copy when going through TwoAddressInstructions
372       // pass. We want to avoid 128 bits copies as much as possible because they
373       // can't be bundled by our scheduler.
374       switch(NumVectorElts) {
375       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
376       case 4:
377         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
378           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
379         else
380           RegClassID = AMDGPU::R600_Reg128RegClassID;
381         break;
382       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
383       }
384     }
385 
386     SDLoc DL(N);
387     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
388 
389     if (NumVectorElts == 1) {
390       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
391                            RegClass);
392       return;
393     }
394 
395     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
396                                   "supported yet");
397     // 16 = Max Num Vector Elements
398     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
399     // 1 = Vector Register Class
400     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
401 
402     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
403     bool IsRegSeq = true;
404     unsigned NOps = N->getNumOperands();
405     for (unsigned i = 0; i < NOps; i++) {
406       // XXX: Why is this here?
407       if (isa<RegisterSDNode>(N->getOperand(i))) {
408         IsRegSeq = false;
409         break;
410       }
411       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
412       RegSeqArgs[1 + (2 * i) + 1] =
413               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
414                                         MVT::i32);
415     }
416 
417     if (NOps != NumVectorElts) {
418       // Fill in the missing undef elements if this was a scalar_to_vector.
419       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
420 
421       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
422                                                      DL, EltVT);
423       for (unsigned i = NOps; i < NumVectorElts; ++i) {
424         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
425         RegSeqArgs[1 + (2 * i) + 1] =
426           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
427       }
428     }
429 
430     if (!IsRegSeq)
431       break;
432     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
433     return;
434   }
435   case ISD::BUILD_PAIR: {
436     SDValue RC, SubReg0, SubReg1;
437     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
438       break;
439     }
440     SDLoc DL(N);
441     if (N->getValueType(0) == MVT::i128) {
442       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
443       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
444       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
445     } else if (N->getValueType(0) == MVT::i64) {
446       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
447       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
448       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
449     } else {
450       llvm_unreachable("Unhandled value type for BUILD_PAIR");
451     }
452     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
453                             N->getOperand(1), SubReg1 };
454     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
455                                           N->getValueType(0), Ops));
456     return;
457   }
458 
459   case ISD::Constant:
460   case ISD::ConstantFP: {
461     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
462         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
463       break;
464 
465     uint64_t Imm;
466     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
467       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
468     else {
469       ConstantSDNode *C = cast<ConstantSDNode>(N);
470       Imm = C->getZExtValue();
471     }
472 
473     SDLoc DL(N);
474     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
475                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
476                                                     MVT::i32));
477     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
478                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
479     const SDValue Ops[] = {
480       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
481       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
482       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
483     };
484 
485     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
486                                           N->getValueType(0), Ops));
487     return;
488   }
489   case ISD::LOAD:
490   case ISD::STORE: {
491     N = glueCopyToM0(N);
492     break;
493   }
494 
495   case AMDGPUISD::BFE_I32:
496   case AMDGPUISD::BFE_U32: {
497     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
498       break;
499 
500     // There is a scalar version available, but unlike the vector version which
501     // has a separate operand for the offset and width, the scalar version packs
502     // the width and offset into a single operand. Try to move to the scalar
503     // version if the offsets are constant, so that we can try to keep extended
504     // loads of kernel arguments in SGPRs.
505 
506     // TODO: Technically we could try to pattern match scalar bitshifts of
507     // dynamic values, but it's probably not useful.
508     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
509     if (!Offset)
510       break;
511 
512     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
513     if (!Width)
514       break;
515 
516     bool Signed = Opc == AMDGPUISD::BFE_I32;
517 
518     uint32_t OffsetVal = Offset->getZExtValue();
519     uint32_t WidthVal = Width->getZExtValue();
520 
521     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
522                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
523     return;
524   }
525   case AMDGPUISD::DIV_SCALE: {
526     SelectDIV_SCALE(N);
527     return;
528   }
529   case ISD::CopyToReg: {
530     const SITargetLowering& Lowering =
531       *static_cast<const SITargetLowering*>(getTargetLowering());
532     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
533     break;
534   }
535   case ISD::AND:
536   case ISD::SRL:
537   case ISD::SRA:
538   case ISD::SIGN_EXTEND_INREG:
539     if (N->getValueType(0) != MVT::i32 ||
540         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
541       break;
542 
543     SelectS_BFE(N);
544     return;
545   case ISD::BRCOND:
546     SelectBRCOND(N);
547     return;
548   }
549 
550   SelectCode(N);
551 }
552 
553 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
554   assert(AS != 0 && "Use checkPrivateAddress instead.");
555   if (!Ptr)
556     return false;
557 
558   return Ptr->getType()->getPointerAddressSpace() == AS;
559 }
560 
561 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
562   if (Op->getPseudoValue())
563     return true;
564 
565   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
566     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
567 
568   return false;
569 }
570 
571 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
572   if (!N->writeMem())
573     return false;
574   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
575 }
576 
577 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
578   const Value *MemVal = N->getMemOperand()->getValue();
579   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
580           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
581           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
582 }
583 
584 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
585   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
586 }
587 
588 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
589   if (!N->writeMem())
590     return false;
591   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
592 }
593 
594 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
595   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
596 }
597 
598 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
599   if (!N->readMem())
600     return false;
601   const Value *MemVal = N->getMemOperand()->getValue();
602   if (CbId == -1)
603     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
604 
605   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
606 }
607 
608 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
609   if (!N->readMem())
610     return false;
611   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
612     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
613       return !isa<GlobalValue>(
614         GetUnderlyingObject(N->getMemOperand()->getValue(),
615 	CurDAG->getDataLayout()));
616 
617     //TODO: Why do we need this?
618     if (N->getMemoryVT().bitsLT(MVT::i32))
619       return true;
620   }
621 
622   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
623 }
624 
625 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
626   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
627 }
628 
629 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
630   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
631 }
632 
633 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
634   if (!N->readMem())
635     return false;
636   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
637 }
638 
639 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
640   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
641 }
642 
643 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
644   MachineMemOperand *MMO = N->getMemOperand();
645   if (checkPrivateAddress(N->getMemOperand())) {
646     if (MMO) {
647       const PseudoSourceValue *PSV = MMO->getPseudoValue();
648       if (PSV && PSV->isConstantPool()) {
649         return true;
650       }
651     }
652   }
653   return false;
654 }
655 
656 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
657   if (checkPrivateAddress(N->getMemOperand())) {
658     // Check to make sure we are not a constant pool load or a constant load
659     // that is marked as a private load
660     if (isCPLoad(N) || isConstantLoad(N, -1)) {
661       return false;
662     }
663   }
664 
665   const Value *MemVal = N->getMemOperand()->getValue();
666   return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
667     !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
668     !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
669     !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
670     !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
671     !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
672     !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS);
673 }
674 
675 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
676   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
677   const Instruction *Term = BB->getTerminator();
678   return Term->getMetadata("amdgpu.uniform") ||
679          Term->getMetadata("structurizecfg.uniform");
680 }
681 
682 const char *AMDGPUDAGToDAGISel::getPassName() const {
683   return "AMDGPU DAG->DAG Pattern Instruction Selection";
684 }
685 
686 //===----------------------------------------------------------------------===//
687 // Complex Patterns
688 //===----------------------------------------------------------------------===//
689 
690 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
691                                                          SDValue& IntPtr) {
692   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
693     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
694                                        true);
695     return true;
696   }
697   return false;
698 }
699 
700 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
701     SDValue& BaseReg, SDValue &Offset) {
702   if (!isa<ConstantSDNode>(Addr)) {
703     BaseReg = Addr;
704     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
705     return true;
706   }
707   return false;
708 }
709 
710 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
711                                            SDValue &Offset) {
712   ConstantSDNode *IMMOffset;
713 
714   if (Addr.getOpcode() == ISD::ADD
715       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
716       && isInt<16>(IMMOffset->getZExtValue())) {
717 
718       Base = Addr.getOperand(0);
719       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
720                                          MVT::i32);
721       return true;
722   // If the pointer address is constant, we can move it to the offset field.
723   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
724              && isInt<16>(IMMOffset->getZExtValue())) {
725     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
726                                   SDLoc(CurDAG->getEntryNode()),
727                                   AMDGPU::ZERO, MVT::i32);
728     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
729                                        MVT::i32);
730     return true;
731   }
732 
733   // Default case, no offset
734   Base = Addr;
735   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
736   return true;
737 }
738 
739 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
740                                             SDValue &Offset) {
741   ConstantSDNode *C;
742   SDLoc DL(Addr);
743 
744   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
745     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
746     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
747   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
748             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
749     Base = Addr.getOperand(0);
750     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
751   } else {
752     Base = Addr;
753     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
754   }
755 
756   return true;
757 }
758 
759 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
760   SDLoc DL(N);
761   SDValue LHS = N->getOperand(0);
762   SDValue RHS = N->getOperand(1);
763 
764   bool IsAdd = (N->getOpcode() == ISD::ADD);
765 
766   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
767   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
768 
769   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
770                                        DL, MVT::i32, LHS, Sub0);
771   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
772                                        DL, MVT::i32, LHS, Sub1);
773 
774   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
775                                        DL, MVT::i32, RHS, Sub0);
776   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
777                                        DL, MVT::i32, RHS, Sub1);
778 
779   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
780   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
781 
782 
783   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
784   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
785 
786   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
787   SDValue Carry(AddLo, 1);
788   SDNode *AddHi
789     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
790                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
791 
792   SDValue Args[5] = {
793     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
794     SDValue(AddLo,0),
795     Sub0,
796     SDValue(AddHi,0),
797     Sub1,
798   };
799   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
800 }
801 
802 // We need to handle this here because tablegen doesn't support matching
803 // instructions with multiple outputs.
804 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
805   SDLoc SL(N);
806   EVT VT = N->getValueType(0);
807 
808   assert(VT == MVT::f32 || VT == MVT::f64);
809 
810   unsigned Opc
811     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
812 
813   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
814   // omod
815   SDValue Ops[8];
816 
817   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
818   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
819   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
820   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
821 }
822 
823 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
824                                          unsigned OffsetBits) const {
825   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
826       (OffsetBits == 8 && !isUInt<8>(Offset)))
827     return false;
828 
829   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
830       Subtarget->unsafeDSOffsetFoldingEnabled())
831     return true;
832 
833   // On Southern Islands instruction with a negative base value and an offset
834   // don't seem to work.
835   return CurDAG->SignBitIsZero(Base);
836 }
837 
838 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
839                                               SDValue &Offset) const {
840   SDLoc DL(Addr);
841   if (CurDAG->isBaseWithConstantOffset(Addr)) {
842     SDValue N0 = Addr.getOperand(0);
843     SDValue N1 = Addr.getOperand(1);
844     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
845     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
846       // (add n0, c0)
847       Base = N0;
848       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
849       return true;
850     }
851   } else if (Addr.getOpcode() == ISD::SUB) {
852     // sub C, x -> add (sub 0, x), C
853     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
854       int64_t ByteOffset = C->getSExtValue();
855       if (isUInt<16>(ByteOffset)) {
856         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
857 
858         // XXX - This is kind of hacky. Create a dummy sub node so we can check
859         // the known bits in isDSOffsetLegal. We need to emit the selected node
860         // here, so this is thrown away.
861         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
862                                       Zero, Addr.getOperand(1));
863 
864         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
865           MachineSDNode *MachineSub
866             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
867                                      Zero, Addr.getOperand(1));
868 
869           Base = SDValue(MachineSub, 0);
870           Offset = Addr.getOperand(0);
871           return true;
872         }
873       }
874     }
875   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
876     // If we have a constant address, prefer to put the constant into the
877     // offset. This can save moves to load the constant address since multiple
878     // operations can share the zero base address register, and enables merging
879     // into read2 / write2 instructions.
880 
881     SDLoc DL(Addr);
882 
883     if (isUInt<16>(CAddr->getZExtValue())) {
884       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
885       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
886                                  DL, MVT::i32, Zero);
887       Base = SDValue(MovZero, 0);
888       Offset = Addr;
889       return true;
890     }
891   }
892 
893   // default case
894   Base = Addr;
895   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
896   return true;
897 }
898 
899 // TODO: If offset is too big, put low 16-bit into offset.
900 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
901                                                    SDValue &Offset0,
902                                                    SDValue &Offset1) const {
903   SDLoc DL(Addr);
904 
905   if (CurDAG->isBaseWithConstantOffset(Addr)) {
906     SDValue N0 = Addr.getOperand(0);
907     SDValue N1 = Addr.getOperand(1);
908     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
909     unsigned DWordOffset0 = C1->getZExtValue() / 4;
910     unsigned DWordOffset1 = DWordOffset0 + 1;
911     // (add n0, c0)
912     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
913       Base = N0;
914       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
915       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
916       return true;
917     }
918   } else if (Addr.getOpcode() == ISD::SUB) {
919     // sub C, x -> add (sub 0, x), C
920     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
921       unsigned DWordOffset0 = C->getZExtValue() / 4;
922       unsigned DWordOffset1 = DWordOffset0 + 1;
923 
924       if (isUInt<8>(DWordOffset0)) {
925         SDLoc DL(Addr);
926         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
927 
928         // XXX - This is kind of hacky. Create a dummy sub node so we can check
929         // the known bits in isDSOffsetLegal. We need to emit the selected node
930         // here, so this is thrown away.
931         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
932                                       Zero, Addr.getOperand(1));
933 
934         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
935           MachineSDNode *MachineSub
936             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
937                                      Zero, Addr.getOperand(1));
938 
939           Base = SDValue(MachineSub, 0);
940           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
941           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
942           return true;
943         }
944       }
945     }
946   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
947     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
948     unsigned DWordOffset1 = DWordOffset0 + 1;
949     assert(4 * DWordOffset0 == CAddr->getZExtValue());
950 
951     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
952       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
953       MachineSDNode *MovZero
954         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
955                                  DL, MVT::i32, Zero);
956       Base = SDValue(MovZero, 0);
957       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
958       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
959       return true;
960     }
961   }
962 
963   // default case
964   Base = Addr;
965   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
966   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
967   return true;
968 }
969 
970 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
971   return isUInt<12>(Imm->getZExtValue());
972 }
973 
974 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
975                                      SDValue &VAddr, SDValue &SOffset,
976                                      SDValue &Offset, SDValue &Offen,
977                                      SDValue &Idxen, SDValue &Addr64,
978                                      SDValue &GLC, SDValue &SLC,
979                                      SDValue &TFE) const {
980   // Subtarget prefers to use flat instruction
981   if (Subtarget->useFlatForGlobal())
982     return false;
983 
984   SDLoc DL(Addr);
985 
986   if (!GLC.getNode())
987     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
988   if (!SLC.getNode())
989     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
990   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
991 
992   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
993   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
994   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
995   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
996 
997   if (CurDAG->isBaseWithConstantOffset(Addr)) {
998     SDValue N0 = Addr.getOperand(0);
999     SDValue N1 = Addr.getOperand(1);
1000     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1001 
1002     if (N0.getOpcode() == ISD::ADD) {
1003       // (add (add N2, N3), C1) -> addr64
1004       SDValue N2 = N0.getOperand(0);
1005       SDValue N3 = N0.getOperand(1);
1006       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1007       Ptr = N2;
1008       VAddr = N3;
1009     } else {
1010 
1011       // (add N0, C1) -> offset
1012       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1013       Ptr = N0;
1014     }
1015 
1016     if (isLegalMUBUFImmOffset(C1)) {
1017         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1018         return true;
1019     } else if (isUInt<32>(C1->getZExtValue())) {
1020       // Illegal offset, store it in soffset.
1021       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1022       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1023                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1024                         0);
1025       return true;
1026     }
1027   }
1028 
1029   if (Addr.getOpcode() == ISD::ADD) {
1030     // (add N0, N1) -> addr64
1031     SDValue N0 = Addr.getOperand(0);
1032     SDValue N1 = Addr.getOperand(1);
1033     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1034     Ptr = N0;
1035     VAddr = N1;
1036     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1037     return true;
1038   }
1039 
1040   // default case -> offset
1041   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1042   Ptr = Addr;
1043   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1044 
1045   return true;
1046 }
1047 
1048 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1049                                            SDValue &VAddr, SDValue &SOffset,
1050                                            SDValue &Offset, SDValue &GLC,
1051                                            SDValue &SLC, SDValue &TFE) const {
1052   SDValue Ptr, Offen, Idxen, Addr64;
1053 
1054   // addr64 bit was removed for volcanic islands.
1055   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1056     return false;
1057 
1058   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1059               GLC, SLC, TFE))
1060     return false;
1061 
1062   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1063   if (C->getSExtValue()) {
1064     SDLoc DL(Addr);
1065 
1066     const SITargetLowering& Lowering =
1067       *static_cast<const SITargetLowering*>(getTargetLowering());
1068 
1069     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1070     return true;
1071   }
1072 
1073   return false;
1074 }
1075 
1076 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1077                                            SDValue &VAddr, SDValue &SOffset,
1078                                            SDValue &Offset,
1079                                            SDValue &SLC) const {
1080   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1081   SDValue GLC, TFE;
1082 
1083   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1084 }
1085 
1086 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1087                                             SDValue &VAddr, SDValue &SOffset,
1088                                             SDValue &ImmOffset) const {
1089 
1090   SDLoc DL(Addr);
1091   MachineFunction &MF = CurDAG->getMachineFunction();
1092   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1093 
1094   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1095   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1096 
1097   // (add n0, c1)
1098   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1099     SDValue N0 = Addr.getOperand(0);
1100     SDValue N1 = Addr.getOperand(1);
1101 
1102     // Offsets in vaddr must be positive.
1103     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1104     if (isLegalMUBUFImmOffset(C1)) {
1105       VAddr = N0;
1106       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1107       return true;
1108     }
1109   }
1110 
1111   // (node)
1112   VAddr = Addr;
1113   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1114   return true;
1115 }
1116 
1117 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1118                                            SDValue &SOffset, SDValue &Offset,
1119                                            SDValue &GLC, SDValue &SLC,
1120                                            SDValue &TFE) const {
1121   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1122   const SIInstrInfo *TII =
1123     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1124 
1125   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1126               GLC, SLC, TFE))
1127     return false;
1128 
1129   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1130       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1131       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1132     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1133                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1134     SDLoc DL(Addr);
1135 
1136     const SITargetLowering& Lowering =
1137       *static_cast<const SITargetLowering*>(getTargetLowering());
1138 
1139     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1140     return true;
1141   }
1142   return false;
1143 }
1144 
1145 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1146                                            SDValue &Soffset, SDValue &Offset
1147                                            ) const {
1148   SDValue GLC, SLC, TFE;
1149 
1150   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1151 }
1152 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1153                                            SDValue &Soffset, SDValue &Offset,
1154                                            SDValue &GLC) const {
1155   SDValue SLC, TFE;
1156 
1157   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1158 }
1159 
1160 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1161                                              SDValue &SOffset,
1162                                              SDValue &ImmOffset) const {
1163   SDLoc DL(Constant);
1164   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1165   uint32_t Overflow = 0;
1166 
1167   if (Imm >= 4096) {
1168     if (Imm <= 4095 + 64) {
1169       // Use an SOffset inline constant for 1..64
1170       Overflow = Imm - 4095;
1171       Imm = 4095;
1172     } else {
1173       // Try to keep the same value in SOffset for adjacent loads, so that
1174       // the corresponding register contents can be re-used.
1175       //
1176       // Load values with all low-bits set into SOffset, so that a larger
1177       // range of values can be covered using s_movk_i32
1178       uint32_t High = (Imm + 1) & ~4095;
1179       uint32_t Low = (Imm + 1) & 4095;
1180       Imm = Low;
1181       Overflow = High - 1;
1182     }
1183   }
1184 
1185   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1186 
1187   if (Overflow <= 64)
1188     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1189   else
1190     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1191                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1192                       0);
1193 }
1194 
1195 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1196                                                     SDValue &SOffset,
1197                                                     SDValue &ImmOffset) const {
1198   SDLoc DL(Offset);
1199 
1200   if (!isa<ConstantSDNode>(Offset))
1201     return false;
1202 
1203   SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1204 
1205   return true;
1206 }
1207 
1208 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1209                                                      SDValue &SOffset,
1210                                                      SDValue &ImmOffset,
1211                                                      SDValue &VOffset) const {
1212   SDLoc DL(Offset);
1213 
1214   // Don't generate an unnecessary voffset for constant offsets.
1215   if (isa<ConstantSDNode>(Offset))
1216     return false;
1217 
1218   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1219     SDValue N0 = Offset.getOperand(0);
1220     SDValue N1 = Offset.getOperand(1);
1221     SelectMUBUFConstant(N1, SOffset, ImmOffset);
1222     VOffset = N0;
1223   } else {
1224     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1225     ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1226     VOffset = Offset;
1227   }
1228 
1229   return true;
1230 }
1231 
1232 ///
1233 /// \param EncodedOffset This is the immediate value that will be encoded
1234 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1235 ///        will be in units of dwords and on VI+ it will be units of bytes.
1236 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1237                                  int64_t EncodedOffset) {
1238   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1239      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1240 }
1241 
1242 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1243                                           SDValue &Offset, bool &Imm) const {
1244 
1245   // FIXME: Handle non-constant offsets.
1246   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1247   if (!C)
1248     return false;
1249 
1250   SDLoc SL(ByteOffsetNode);
1251   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1252   int64_t ByteOffset = C->getSExtValue();
1253   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1254       ByteOffset >> 2 : ByteOffset;
1255 
1256   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1257     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1258     Imm = true;
1259     return true;
1260   }
1261 
1262   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1263     return false;
1264 
1265   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1266     // 32-bit Immediates are supported on Sea Islands.
1267     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1268   } else {
1269     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1270     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1271                                             C32Bit), 0);
1272   }
1273   Imm = false;
1274   return true;
1275 }
1276 
1277 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1278                                      SDValue &Offset, bool &Imm) const {
1279 
1280   SDLoc SL(Addr);
1281   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1282     SDValue N0 = Addr.getOperand(0);
1283     SDValue N1 = Addr.getOperand(1);
1284 
1285     if (SelectSMRDOffset(N1, Offset, Imm)) {
1286       SBase = N0;
1287       return true;
1288     }
1289   }
1290   SBase = Addr;
1291   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1292   Imm = true;
1293   return true;
1294 }
1295 
1296 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1297                                        SDValue &Offset) const {
1298   bool Imm;
1299   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1300 }
1301 
1302 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1303                                          SDValue &Offset) const {
1304 
1305   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1306     return false;
1307 
1308   bool Imm;
1309   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1310     return false;
1311 
1312   return !Imm && isa<ConstantSDNode>(Offset);
1313 }
1314 
1315 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1316                                         SDValue &Offset) const {
1317   bool Imm;
1318   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1319          !isa<ConstantSDNode>(Offset);
1320 }
1321 
1322 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1323                                              SDValue &Offset) const {
1324   bool Imm;
1325   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1326 }
1327 
1328 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1329                                                SDValue &Offset) const {
1330   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1331     return false;
1332 
1333   bool Imm;
1334   if (!SelectSMRDOffset(Addr, Offset, Imm))
1335     return false;
1336 
1337   return !Imm && isa<ConstantSDNode>(Offset);
1338 }
1339 
1340 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1341                                               SDValue &Offset) const {
1342   bool Imm;
1343   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1344          !isa<ConstantSDNode>(Offset);
1345 }
1346 
1347 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1348                                      uint32_t Offset, uint32_t Width) {
1349   // Transformation function, pack the offset and width of a BFE into
1350   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1351   // source, bits [5:0] contain the offset and bits [22:16] the width.
1352   uint32_t PackedVal = Offset | (Width << 16);
1353   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1354 
1355   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1356 }
1357 
1358 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1359   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1360   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1361   // Predicate: 0 < b <= c < 32
1362 
1363   const SDValue &Shl = N->getOperand(0);
1364   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1365   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1366 
1367   if (B && C) {
1368     uint32_t BVal = B->getZExtValue();
1369     uint32_t CVal = C->getZExtValue();
1370 
1371     if (0 < BVal && BVal <= CVal && CVal < 32) {
1372       bool Signed = N->getOpcode() == ISD::SRA;
1373       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1374 
1375       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1376                               32 - CVal));
1377       return;
1378     }
1379   }
1380   SelectCode(N);
1381 }
1382 
1383 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1384   switch (N->getOpcode()) {
1385   case ISD::AND:
1386     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1387       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1388       // Predicate: isMask(mask)
1389       const SDValue &Srl = N->getOperand(0);
1390       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1391       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1392 
1393       if (Shift && Mask) {
1394         uint32_t ShiftVal = Shift->getZExtValue();
1395         uint32_t MaskVal = Mask->getZExtValue();
1396 
1397         if (isMask_32(MaskVal)) {
1398           uint32_t WidthVal = countPopulation(MaskVal);
1399 
1400           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1401                                   Srl.getOperand(0), ShiftVal, WidthVal));
1402           return;
1403         }
1404       }
1405     }
1406     break;
1407   case ISD::SRL:
1408     if (N->getOperand(0).getOpcode() == ISD::AND) {
1409       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1410       // Predicate: isMask(mask >> b)
1411       const SDValue &And = N->getOperand(0);
1412       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1413       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1414 
1415       if (Shift && Mask) {
1416         uint32_t ShiftVal = Shift->getZExtValue();
1417         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1418 
1419         if (isMask_32(MaskVal)) {
1420           uint32_t WidthVal = countPopulation(MaskVal);
1421 
1422           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1423                                   And.getOperand(0), ShiftVal, WidthVal));
1424           return;
1425         }
1426       }
1427     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1428       SelectS_BFEFromShifts(N);
1429       return;
1430     }
1431     break;
1432   case ISD::SRA:
1433     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1434       SelectS_BFEFromShifts(N);
1435       return;
1436     }
1437     break;
1438 
1439   case ISD::SIGN_EXTEND_INREG: {
1440     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1441     SDValue Src = N->getOperand(0);
1442     if (Src.getOpcode() != ISD::SRL)
1443       break;
1444 
1445     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1446     if (!Amt)
1447       break;
1448 
1449     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1450     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1451                             Amt->getZExtValue(), Width));
1452     return;
1453   }
1454   }
1455 
1456   SelectCode(N);
1457 }
1458 
1459 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1460   SDValue Cond = N->getOperand(1);
1461 
1462   if (isCBranchSCC(N)) {
1463     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1464     SelectCode(N);
1465     return;
1466   }
1467 
1468   // The result of VOPC instructions is or'd against ~EXEC before it is
1469   // written to vcc or another SGPR.  This means that the value '1' is always
1470   // written to the corresponding bit for results that are masked.  In order
1471   // to correctly check against vccz, we need to and VCC with the EXEC
1472   // register in order to clear the value from the masked bits.
1473 
1474   SDLoc SL(N);
1475 
1476   SDNode *MaskedCond =
1477         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1478                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1479                                Cond);
1480   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1481                                      SDValue(MaskedCond, 0),
1482                                      SDValue()); // Passing SDValue() adds a
1483                                                  // glue output.
1484   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1485                        N->getOperand(2), // Basic Block
1486                        VCC.getValue(0),  // Chain
1487                        VCC.getValue(1)); // Glue
1488   return;
1489 }
1490 
1491 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1492                                         SDValue &SrcMods) const {
1493 
1494   unsigned Mods = 0;
1495 
1496   Src = In;
1497 
1498   if (Src.getOpcode() == ISD::FNEG) {
1499     Mods |= SISrcMods::NEG;
1500     Src = Src.getOperand(0);
1501   }
1502 
1503   if (Src.getOpcode() == ISD::FABS) {
1504     Mods |= SISrcMods::ABS;
1505     Src = Src.getOperand(0);
1506   }
1507 
1508   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1509 
1510   return true;
1511 }
1512 
1513 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1514                                          SDValue &SrcMods) const {
1515   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1516   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1517 }
1518 
1519 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1520                                          SDValue &SrcMods, SDValue &Clamp,
1521                                          SDValue &Omod) const {
1522   SDLoc DL(In);
1523   // FIXME: Handle Clamp and Omod
1524   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1525   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1526 
1527   return SelectVOP3Mods(In, Src, SrcMods);
1528 }
1529 
1530 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1531                                            SDValue &SrcMods, SDValue &Clamp,
1532                                            SDValue &Omod) const {
1533   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1534 
1535   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1536                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1537                 cast<ConstantSDNode>(Omod)->isNullValue();
1538 }
1539 
1540 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1541                                               SDValue &SrcMods,
1542                                               SDValue &Omod) const {
1543   // FIXME: Handle Omod
1544   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1545 
1546   return SelectVOP3Mods(In, Src, SrcMods);
1547 }
1548 
1549 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1550                                                    SDValue &SrcMods,
1551                                                    SDValue &Clamp,
1552                                                    SDValue &Omod) const {
1553   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1554   return SelectVOP3Mods(In, Src, SrcMods);
1555 }
1556 
1557 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1558   MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
1559 
1560   // Handle the perverse case where a frame index is being stored. We don't
1561   // want to see multiple frame index operands on the same instruction since
1562   // it complicates things and violates some assumptions about frame index
1563   // lowering.
1564   for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
1565        I != E; ++I) {
1566     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1567 
1568     // It's possible that we have a frame index defined in the function that
1569     // isn't used in this block.
1570     if (FI.use_empty())
1571       continue;
1572 
1573     // Skip over the AssertZext inserted during lowering.
1574     SDValue EffectiveFI = FI;
1575     auto It = FI->use_begin();
1576     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1577       EffectiveFI = SDValue(*It, 0);
1578       It = EffectiveFI->use_begin();
1579     }
1580 
1581     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1582       SDUse &Use = It.getUse();
1583       SDNode *User = Use.getUser();
1584       unsigned OpIdx = It.getOperandNo();
1585       ++It;
1586 
1587       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1588         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1589         if (OpIdx == PtrIdx)
1590           continue;
1591 
1592         unsigned OpN = M->getNumOperands();
1593         SDValue NewOps[8];
1594 
1595         assert(OpN < array_lengthof(NewOps));
1596         for (unsigned Op = 0; Op != OpN; ++Op) {
1597           if (Op != OpIdx) {
1598             NewOps[Op] = M->getOperand(Op);
1599             continue;
1600           }
1601 
1602           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1603                                                       SDLoc(M), MVT::i32, FI);
1604           NewOps[Op] = SDValue(Mov, 0);
1605         }
1606 
1607         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1608       }
1609     }
1610   }
1611 }
1612 
1613 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1614   const AMDGPUTargetLowering& Lowering =
1615     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1616   bool IsModified = false;
1617   do {
1618     IsModified = false;
1619     // Go over all selected nodes and try to fold them a bit more
1620     for (SDNode &Node : CurDAG->allnodes()) {
1621       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1622       if (!MachineNode)
1623         continue;
1624 
1625       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1626       if (ResNode != &Node) {
1627         ReplaceUses(&Node, ResNode);
1628         IsModified = true;
1629       }
1630     }
1631     CurDAG->RemoveDeadNodes();
1632   } while (IsModified);
1633 }
1634