1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/CodeGen/FunctionLoweringInfo.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 
28 using namespace llvm;
29 
30 namespace llvm {
31 class R600InstrInfo;
32 }
33 
34 //===----------------------------------------------------------------------===//
35 // Instruction Selector Implementation
36 //===----------------------------------------------------------------------===//
37 
38 namespace {
39 
40 static bool isCBranchSCC(const SDNode *N) {
41   assert(N->getOpcode() == ISD::BRCOND);
42   if (!N->hasOneUse())
43     return false;
44 
45   SDValue Cond = N->getOperand(1);
46   if (Cond.getOpcode() == ISD::CopyToReg)
47     Cond = Cond.getOperand(2);
48   return Cond.getOpcode() == ISD::SETCC &&
49          Cond.getOperand(0).getValueType() == MVT::i32 &&
50 	 Cond.hasOneUse();
51 }
52 
53 /// AMDGPU specific code to select AMDGPU machine instructions for
54 /// SelectionDAG operations.
55 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
56   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
57   // make the right decision when generating code for different targets.
58   const AMDGPUSubtarget *Subtarget;
59 
60 public:
61   AMDGPUDAGToDAGISel(TargetMachine &TM);
62   virtual ~AMDGPUDAGToDAGISel();
63   bool runOnMachineFunction(MachineFunction &MF) override;
64   SDNode *SelectImpl(SDNode *N) override;
65   const char *getPassName() const override;
66   void PreprocessISelDAG() override;
67   void PostprocessISelDAG() override;
68 
69 private:
70   bool isInlineImmediate(SDNode *N) const;
71   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
72                    const R600InstrInfo *TII);
73   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
74   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75 
76   // Complex pattern selectors
77   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
78   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
79   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
80 
81   static bool checkType(const Value *ptr, unsigned int addrspace);
82   static bool checkPrivateAddress(const MachineMemOperand *Op);
83 
84   static bool isGlobalStore(const MemSDNode *N);
85   static bool isFlatStore(const MemSDNode *N);
86   static bool isPrivateStore(const StoreSDNode *N);
87   static bool isLocalStore(const StoreSDNode *N);
88   static bool isRegionStore(const StoreSDNode *N);
89 
90   bool isCPLoad(const LoadSDNode *N) const;
91   bool isConstantLoad(const MemSDNode *N, int cbID) const;
92   bool isGlobalLoad(const MemSDNode *N) const;
93   bool isFlatLoad(const MemSDNode *N) const;
94   bool isParamLoad(const LoadSDNode *N) const;
95   bool isPrivateLoad(const LoadSDNode *N) const;
96   bool isLocalLoad(const LoadSDNode *N) const;
97   bool isRegionLoad(const LoadSDNode *N) const;
98 
99   bool isUniformBr(const SDNode *N) const;
100 
101   SDNode *glueCopyToM0(SDNode *N) const;
102 
103   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
104   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
105   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
106                                        SDValue& Offset);
107   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
108   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
109   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
110                        unsigned OffsetBits) const;
111   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
112   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
113                                  SDValue &Offset1) const;
114   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
115                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
116                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
117                    SDValue &TFE) const;
118   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
119                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
120                          SDValue &SLC, SDValue &TFE) const;
121   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
122                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
123                          SDValue &SLC) const;
124   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
125                           SDValue &SOffset, SDValue &ImmOffset) const;
126   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
127                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
128                          SDValue &TFE) const;
129   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
130                          SDValue &Offset, SDValue &GLC) const;
131   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
132                          SDValue &Offset) const;
133   void SelectMUBUFConstant(SDValue Constant,
134                            SDValue &SOffset,
135                            SDValue &ImmOffset) const;
136   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
137                                   SDValue &ImmOffset) const;
138   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
139                                    SDValue &ImmOffset, SDValue &VOffset) const;
140   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
141                         bool &Imm) const;
142   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
143                   bool &Imm) const;
144   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
145   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
146   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
147   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
148   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
149   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
150   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
151   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
152   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
153                        SDValue &Clamp, SDValue &Omod) const;
154   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
155                          SDValue &Clamp, SDValue &Omod) const;
156 
157   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
158                             SDValue &Omod) const;
159   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
160                                  SDValue &Clamp,
161                                  SDValue &Omod) const;
162 
163   SDNode *SelectADD_SUB_I64(SDNode *N);
164   SDNode *SelectDIV_SCALE(SDNode *N);
165 
166   SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
167                    uint32_t Offset, uint32_t Width);
168   SDNode *SelectS_BFEFromShifts(SDNode *N);
169   SDNode *SelectS_BFE(SDNode *N);
170   SDNode *SelectBRCOND(SDNode *N);
171 
172   // Include the pieces autogenerated from the target description.
173 #include "AMDGPUGenDAGISel.inc"
174 };
175 }  // end anonymous namespace
176 
177 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
178 // DAG, ready for instruction scheduling.
179 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
180   return new AMDGPUDAGToDAGISel(TM);
181 }
182 
183 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
184     : SelectionDAGISel(TM) {}
185 
186 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
187   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
188   return SelectionDAGISel::runOnMachineFunction(MF);
189 }
190 
191 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
192 }
193 
194 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
195   const SITargetLowering *TL
196       = static_cast<const SITargetLowering *>(getTargetLowering());
197   return TL->analyzeImmediate(N) == 0;
198 }
199 
200 /// \brief Determine the register class for \p OpNo
201 /// \returns The register class of the virtual register that will be used for
202 /// the given operand number \OpNo or NULL if the register class cannot be
203 /// determined.
204 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
205                                                           unsigned OpNo) const {
206   if (!N->isMachineOpcode())
207     return nullptr;
208 
209   switch (N->getMachineOpcode()) {
210   default: {
211     const MCInstrDesc &Desc =
212         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
213     unsigned OpIdx = Desc.getNumDefs() + OpNo;
214     if (OpIdx >= Desc.getNumOperands())
215       return nullptr;
216     int RegClass = Desc.OpInfo[OpIdx].RegClass;
217     if (RegClass == -1)
218       return nullptr;
219 
220     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
221   }
222   case AMDGPU::REG_SEQUENCE: {
223     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
224     const TargetRegisterClass *SuperRC =
225         Subtarget->getRegisterInfo()->getRegClass(RCID);
226 
227     SDValue SubRegOp = N->getOperand(OpNo + 1);
228     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
229     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
230                                                               SubRegIdx);
231   }
232   }
233 }
234 
235 bool AMDGPUDAGToDAGISel::SelectADDRParam(
236   SDValue Addr, SDValue& R1, SDValue& R2) {
237 
238   if (Addr.getOpcode() == ISD::FrameIndex) {
239     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
240       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
241       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
242     } else {
243       R1 = Addr;
244       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
245     }
246   } else if (Addr.getOpcode() == ISD::ADD) {
247     R1 = Addr.getOperand(0);
248     R2 = Addr.getOperand(1);
249   } else {
250     R1 = Addr;
251     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
252   }
253   return true;
254 }
255 
256 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
257   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
258       Addr.getOpcode() == ISD::TargetGlobalAddress) {
259     return false;
260   }
261   return SelectADDRParam(Addr, R1, R2);
262 }
263 
264 
265 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
266   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
267       Addr.getOpcode() == ISD::TargetGlobalAddress) {
268     return false;
269   }
270 
271   if (Addr.getOpcode() == ISD::FrameIndex) {
272     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
273       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
274       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
275     } else {
276       R1 = Addr;
277       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
278     }
279   } else if (Addr.getOpcode() == ISD::ADD) {
280     R1 = Addr.getOperand(0);
281     R2 = Addr.getOperand(1);
282   } else {
283     R1 = Addr;
284     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
285   }
286   return true;
287 }
288 
289 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
290   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
291       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
292                  AMDGPUAS::LOCAL_ADDRESS))
293     return N;
294 
295   const SITargetLowering& Lowering =
296       *static_cast<const SITargetLowering*>(getTargetLowering());
297 
298   // Write max value to m0 before each load operation
299 
300   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
301                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
302 
303   SDValue Glue = M0.getValue(1);
304 
305   SmallVector <SDValue, 8> Ops;
306   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
307      Ops.push_back(N->getOperand(i));
308   }
309   Ops.push_back(Glue);
310   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
311 
312   return N;
313 }
314 
315 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
316   switch (NumVectorElts) {
317   case 1:
318     return AMDGPU::SReg_32RegClassID;
319   case 2:
320     return AMDGPU::SReg_64RegClassID;
321   case 4:
322     return AMDGPU::SReg_128RegClassID;
323   case 8:
324     return AMDGPU::SReg_256RegClassID;
325   case 16:
326     return AMDGPU::SReg_512RegClassID;
327   }
328 
329   llvm_unreachable("invalid vector size");
330 }
331 
332 SDNode *AMDGPUDAGToDAGISel::SelectImpl(SDNode *N) {
333   unsigned int Opc = N->getOpcode();
334   if (N->isMachineOpcode()) {
335     N->setNodeId(-1);
336     return nullptr;   // Already selected.
337   }
338 
339   if (isa<AtomicSDNode>(N) ||
340       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
341     N = glueCopyToM0(N);
342 
343   switch (Opc) {
344   default: break;
345   // We are selecting i64 ADD here instead of custom lower it during
346   // DAG legalization, so we can fold some i64 ADDs used for address
347   // calculation into the LOAD and STORE instructions.
348   case ISD::ADD:
349   case ISD::SUB: {
350     if (N->getValueType(0) != MVT::i64 ||
351         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
352       break;
353 
354     return SelectADD_SUB_I64(N);
355   }
356   case ISD::SCALAR_TO_VECTOR:
357   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
358   case ISD::BUILD_VECTOR: {
359     unsigned RegClassID;
360     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
361     EVT VT = N->getValueType(0);
362     unsigned NumVectorElts = VT.getVectorNumElements();
363     EVT EltVT = VT.getVectorElementType();
364     assert(EltVT.bitsEq(MVT::i32));
365     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
366       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
367     } else {
368       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
369       // that adds a 128 bits reg copy when going through TwoAddressInstructions
370       // pass. We want to avoid 128 bits copies as much as possible because they
371       // can't be bundled by our scheduler.
372       switch(NumVectorElts) {
373       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
374       case 4:
375         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
376           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
377         else
378           RegClassID = AMDGPU::R600_Reg128RegClassID;
379         break;
380       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
381       }
382     }
383 
384     SDLoc DL(N);
385     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
386 
387     if (NumVectorElts == 1) {
388       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
389                                   N->getOperand(0), RegClass);
390     }
391 
392     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
393                                   "supported yet");
394     // 16 = Max Num Vector Elements
395     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
396     // 1 = Vector Register Class
397     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
398 
399     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
400     bool IsRegSeq = true;
401     unsigned NOps = N->getNumOperands();
402     for (unsigned i = 0; i < NOps; i++) {
403       // XXX: Why is this here?
404       if (isa<RegisterSDNode>(N->getOperand(i))) {
405         IsRegSeq = false;
406         break;
407       }
408       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
409       RegSeqArgs[1 + (2 * i) + 1] =
410               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
411                                         MVT::i32);
412     }
413 
414     if (NOps != NumVectorElts) {
415       // Fill in the missing undef elements if this was a scalar_to_vector.
416       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
417 
418       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
419                                                      DL, EltVT);
420       for (unsigned i = NOps; i < NumVectorElts; ++i) {
421         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
422         RegSeqArgs[1 + (2 * i) + 1] =
423           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
424       }
425     }
426 
427     if (!IsRegSeq)
428       break;
429     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
430                                 RegSeqArgs);
431   }
432   case ISD::BUILD_PAIR: {
433     SDValue RC, SubReg0, SubReg1;
434     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
435       break;
436     }
437     SDLoc DL(N);
438     if (N->getValueType(0) == MVT::i128) {
439       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
440       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
441       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
442     } else if (N->getValueType(0) == MVT::i64) {
443       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
444       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
445       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
446     } else {
447       llvm_unreachable("Unhandled value type for BUILD_PAIR");
448     }
449     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
450                             N->getOperand(1), SubReg1 };
451     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
452                                   DL, N->getValueType(0), Ops);
453   }
454 
455   case ISD::Constant:
456   case ISD::ConstantFP: {
457     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
458         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
459       break;
460 
461     uint64_t Imm;
462     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
463       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
464     else {
465       ConstantSDNode *C = cast<ConstantSDNode>(N);
466       Imm = C->getZExtValue();
467     }
468 
469     SDLoc DL(N);
470     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
471                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
472                                                     MVT::i32));
473     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
474                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
475     const SDValue Ops[] = {
476       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
477       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
478       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
479     };
480 
481     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
482                                   N->getValueType(0), Ops);
483   }
484   case ISD::LOAD:
485   case ISD::STORE: {
486     N = glueCopyToM0(N);
487     break;
488   }
489 
490   case AMDGPUISD::BFE_I32:
491   case AMDGPUISD::BFE_U32: {
492     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
493       break;
494 
495     // There is a scalar version available, but unlike the vector version which
496     // has a separate operand for the offset and width, the scalar version packs
497     // the width and offset into a single operand. Try to move to the scalar
498     // version if the offsets are constant, so that we can try to keep extended
499     // loads of kernel arguments in SGPRs.
500 
501     // TODO: Technically we could try to pattern match scalar bitshifts of
502     // dynamic values, but it's probably not useful.
503     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
504     if (!Offset)
505       break;
506 
507     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
508     if (!Width)
509       break;
510 
511     bool Signed = Opc == AMDGPUISD::BFE_I32;
512 
513     uint32_t OffsetVal = Offset->getZExtValue();
514     uint32_t WidthVal = Width->getZExtValue();
515 
516     return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
517                     N->getOperand(0), OffsetVal, WidthVal);
518   }
519   case AMDGPUISD::DIV_SCALE: {
520     return SelectDIV_SCALE(N);
521   }
522   case ISD::CopyToReg: {
523     const SITargetLowering& Lowering =
524       *static_cast<const SITargetLowering*>(getTargetLowering());
525     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
526     break;
527   }
528   case ISD::AND:
529   case ISD::SRL:
530   case ISD::SRA:
531   case ISD::SIGN_EXTEND_INREG:
532     if (N->getValueType(0) != MVT::i32 ||
533         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
534       break;
535 
536     return SelectS_BFE(N);
537   case ISD::BRCOND:
538     return SelectBRCOND(N);
539   }
540 
541   return SelectCode(N);
542 }
543 
544 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
545   assert(AS != 0 && "Use checkPrivateAddress instead.");
546   if (!Ptr)
547     return false;
548 
549   return Ptr->getType()->getPointerAddressSpace() == AS;
550 }
551 
552 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
553   if (Op->getPseudoValue())
554     return true;
555 
556   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
557     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
558 
559   return false;
560 }
561 
562 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
563   if (!N->writeMem())
564     return false;
565   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
566 }
567 
568 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
569   const Value *MemVal = N->getMemOperand()->getValue();
570   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
571           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
572           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
573 }
574 
575 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
576   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
577 }
578 
579 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
580   if (!N->writeMem())
581     return false;
582   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
583 }
584 
585 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
586   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
587 }
588 
589 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
590   if (!N->readMem())
591     return false;
592   const Value *MemVal = N->getMemOperand()->getValue();
593   if (CbId == -1)
594     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
595 
596   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
597 }
598 
599 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
600   if (!N->readMem())
601     return false;
602   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
603     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
604         N->getMemoryVT().bitsLT(MVT::i32))
605       return true;
606 
607   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
608 }
609 
610 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
611   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
612 }
613 
614 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
615   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
616 }
617 
618 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
619   if (!N->readMem())
620     return false;
621   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
622 }
623 
624 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
625   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
626 }
627 
628 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
629   MachineMemOperand *MMO = N->getMemOperand();
630   if (checkPrivateAddress(N->getMemOperand())) {
631     if (MMO) {
632       const PseudoSourceValue *PSV = MMO->getPseudoValue();
633       if (PSV && PSV->isConstantPool()) {
634         return true;
635       }
636     }
637   }
638   return false;
639 }
640 
641 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
642   if (checkPrivateAddress(N->getMemOperand())) {
643     // Check to make sure we are not a constant pool load or a constant load
644     // that is marked as a private load
645     if (isCPLoad(N) || isConstantLoad(N, -1)) {
646       return false;
647     }
648   }
649 
650   const Value *MemVal = N->getMemOperand()->getValue();
651   return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
652     !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
653     !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
654     !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
655     !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
656     !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
657     !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS);
658 }
659 
660 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
661   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
662   const Instruction *Term = BB->getTerminator();
663   return Term->getMetadata("amdgpu.uniform") ||
664          Term->getMetadata("structurizecfg.uniform");
665 }
666 
667 const char *AMDGPUDAGToDAGISel::getPassName() const {
668   return "AMDGPU DAG->DAG Pattern Instruction Selection";
669 }
670 
671 //===----------------------------------------------------------------------===//
672 // Complex Patterns
673 //===----------------------------------------------------------------------===//
674 
675 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
676                                                          SDValue& IntPtr) {
677   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
678     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
679                                        true);
680     return true;
681   }
682   return false;
683 }
684 
685 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
686     SDValue& BaseReg, SDValue &Offset) {
687   if (!isa<ConstantSDNode>(Addr)) {
688     BaseReg = Addr;
689     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
690     return true;
691   }
692   return false;
693 }
694 
695 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
696                                            SDValue &Offset) {
697   ConstantSDNode *IMMOffset;
698 
699   if (Addr.getOpcode() == ISD::ADD
700       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
701       && isInt<16>(IMMOffset->getZExtValue())) {
702 
703       Base = Addr.getOperand(0);
704       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
705                                          MVT::i32);
706       return true;
707   // If the pointer address is constant, we can move it to the offset field.
708   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
709              && isInt<16>(IMMOffset->getZExtValue())) {
710     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
711                                   SDLoc(CurDAG->getEntryNode()),
712                                   AMDGPU::ZERO, MVT::i32);
713     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
714                                        MVT::i32);
715     return true;
716   }
717 
718   // Default case, no offset
719   Base = Addr;
720   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
721   return true;
722 }
723 
724 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
725                                             SDValue &Offset) {
726   ConstantSDNode *C;
727   SDLoc DL(Addr);
728 
729   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
730     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
731     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
732   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
733             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
734     Base = Addr.getOperand(0);
735     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
736   } else {
737     Base = Addr;
738     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
739   }
740 
741   return true;
742 }
743 
744 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
745   SDLoc DL(N);
746   SDValue LHS = N->getOperand(0);
747   SDValue RHS = N->getOperand(1);
748 
749   bool IsAdd = (N->getOpcode() == ISD::ADD);
750 
751   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
752   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
753 
754   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
755                                        DL, MVT::i32, LHS, Sub0);
756   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
757                                        DL, MVT::i32, LHS, Sub1);
758 
759   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
760                                        DL, MVT::i32, RHS, Sub0);
761   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
762                                        DL, MVT::i32, RHS, Sub1);
763 
764   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
765   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
766 
767 
768   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
769   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
770 
771   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
772   SDValue Carry(AddLo, 1);
773   SDNode *AddHi
774     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
775                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
776 
777   SDValue Args[5] = {
778     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
779     SDValue(AddLo,0),
780     Sub0,
781     SDValue(AddHi,0),
782     Sub1,
783   };
784   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
785 }
786 
787 // We need to handle this here because tablegen doesn't support matching
788 // instructions with multiple outputs.
789 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
790   SDLoc SL(N);
791   EVT VT = N->getValueType(0);
792 
793   assert(VT == MVT::f32 || VT == MVT::f64);
794 
795   unsigned Opc
796     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
797 
798   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
799   // omod
800   SDValue Ops[8];
801 
802   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
803   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
804   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
805   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
806 }
807 
808 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
809                                          unsigned OffsetBits) const {
810   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
811       (OffsetBits == 8 && !isUInt<8>(Offset)))
812     return false;
813 
814   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
815       Subtarget->unsafeDSOffsetFoldingEnabled())
816     return true;
817 
818   // On Southern Islands instruction with a negative base value and an offset
819   // don't seem to work.
820   return CurDAG->SignBitIsZero(Base);
821 }
822 
823 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
824                                               SDValue &Offset) const {
825   SDLoc DL(Addr);
826   if (CurDAG->isBaseWithConstantOffset(Addr)) {
827     SDValue N0 = Addr.getOperand(0);
828     SDValue N1 = Addr.getOperand(1);
829     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
830     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
831       // (add n0, c0)
832       Base = N0;
833       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
834       return true;
835     }
836   } else if (Addr.getOpcode() == ISD::SUB) {
837     // sub C, x -> add (sub 0, x), C
838     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
839       int64_t ByteOffset = C->getSExtValue();
840       if (isUInt<16>(ByteOffset)) {
841         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
842 
843         // XXX - This is kind of hacky. Create a dummy sub node so we can check
844         // the known bits in isDSOffsetLegal. We need to emit the selected node
845         // here, so this is thrown away.
846         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
847                                       Zero, Addr.getOperand(1));
848 
849         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
850           MachineSDNode *MachineSub
851             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
852                                      Zero, Addr.getOperand(1));
853 
854           Base = SDValue(MachineSub, 0);
855           Offset = Addr.getOperand(0);
856           return true;
857         }
858       }
859     }
860   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
861     // If we have a constant address, prefer to put the constant into the
862     // offset. This can save moves to load the constant address since multiple
863     // operations can share the zero base address register, and enables merging
864     // into read2 / write2 instructions.
865 
866     SDLoc DL(Addr);
867 
868     if (isUInt<16>(CAddr->getZExtValue())) {
869       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
870       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
871                                  DL, MVT::i32, Zero);
872       Base = SDValue(MovZero, 0);
873       Offset = Addr;
874       return true;
875     }
876   }
877 
878   // default case
879   Base = Addr;
880   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
881   return true;
882 }
883 
884 // TODO: If offset is too big, put low 16-bit into offset.
885 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
886                                                    SDValue &Offset0,
887                                                    SDValue &Offset1) const {
888   SDLoc DL(Addr);
889 
890   if (CurDAG->isBaseWithConstantOffset(Addr)) {
891     SDValue N0 = Addr.getOperand(0);
892     SDValue N1 = Addr.getOperand(1);
893     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
894     unsigned DWordOffset0 = C1->getZExtValue() / 4;
895     unsigned DWordOffset1 = DWordOffset0 + 1;
896     // (add n0, c0)
897     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
898       Base = N0;
899       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
900       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
901       return true;
902     }
903   } else if (Addr.getOpcode() == ISD::SUB) {
904     // sub C, x -> add (sub 0, x), C
905     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
906       unsigned DWordOffset0 = C->getZExtValue() / 4;
907       unsigned DWordOffset1 = DWordOffset0 + 1;
908 
909       if (isUInt<8>(DWordOffset0)) {
910         SDLoc DL(Addr);
911         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
912 
913         // XXX - This is kind of hacky. Create a dummy sub node so we can check
914         // the known bits in isDSOffsetLegal. We need to emit the selected node
915         // here, so this is thrown away.
916         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
917                                       Zero, Addr.getOperand(1));
918 
919         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
920           MachineSDNode *MachineSub
921             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
922                                      Zero, Addr.getOperand(1));
923 
924           Base = SDValue(MachineSub, 0);
925           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
926           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
927           return true;
928         }
929       }
930     }
931   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
932     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
933     unsigned DWordOffset1 = DWordOffset0 + 1;
934     assert(4 * DWordOffset0 == CAddr->getZExtValue());
935 
936     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
937       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
938       MachineSDNode *MovZero
939         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
940                                  DL, MVT::i32, Zero);
941       Base = SDValue(MovZero, 0);
942       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
943       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
944       return true;
945     }
946   }
947 
948   // default case
949   Base = Addr;
950   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
951   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
952   return true;
953 }
954 
955 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
956   return isUInt<12>(Imm->getZExtValue());
957 }
958 
959 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
960                                      SDValue &VAddr, SDValue &SOffset,
961                                      SDValue &Offset, SDValue &Offen,
962                                      SDValue &Idxen, SDValue &Addr64,
963                                      SDValue &GLC, SDValue &SLC,
964                                      SDValue &TFE) const {
965   // Subtarget prefers to use flat instruction
966   if (Subtarget->useFlatForGlobal())
967     return false;
968 
969   SDLoc DL(Addr);
970 
971   if (!GLC.getNode())
972     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
973   if (!SLC.getNode())
974     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
975   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
976 
977   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
978   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
979   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
980   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
981 
982   if (CurDAG->isBaseWithConstantOffset(Addr)) {
983     SDValue N0 = Addr.getOperand(0);
984     SDValue N1 = Addr.getOperand(1);
985     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
986 
987     if (N0.getOpcode() == ISD::ADD) {
988       // (add (add N2, N3), C1) -> addr64
989       SDValue N2 = N0.getOperand(0);
990       SDValue N3 = N0.getOperand(1);
991       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
992       Ptr = N2;
993       VAddr = N3;
994     } else {
995 
996       // (add N0, C1) -> offset
997       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
998       Ptr = N0;
999     }
1000 
1001     if (isLegalMUBUFImmOffset(C1)) {
1002         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1003         return true;
1004     } else if (isUInt<32>(C1->getZExtValue())) {
1005       // Illegal offset, store it in soffset.
1006       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1007       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1008                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1009                         0);
1010       return true;
1011     }
1012   }
1013 
1014   if (Addr.getOpcode() == ISD::ADD) {
1015     // (add N0, N1) -> addr64
1016     SDValue N0 = Addr.getOperand(0);
1017     SDValue N1 = Addr.getOperand(1);
1018     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1019     Ptr = N0;
1020     VAddr = N1;
1021     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1022     return true;
1023   }
1024 
1025   // default case -> offset
1026   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1027   Ptr = Addr;
1028   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1029 
1030   return true;
1031 }
1032 
1033 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1034                                            SDValue &VAddr, SDValue &SOffset,
1035                                            SDValue &Offset, SDValue &GLC,
1036                                            SDValue &SLC, SDValue &TFE) const {
1037   SDValue Ptr, Offen, Idxen, Addr64;
1038 
1039   // addr64 bit was removed for volcanic islands.
1040   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1041     return false;
1042 
1043   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1044               GLC, SLC, TFE))
1045     return false;
1046 
1047   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1048   if (C->getSExtValue()) {
1049     SDLoc DL(Addr);
1050 
1051     const SITargetLowering& Lowering =
1052       *static_cast<const SITargetLowering*>(getTargetLowering());
1053 
1054     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1055     return true;
1056   }
1057 
1058   return false;
1059 }
1060 
1061 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1062                                            SDValue &VAddr, SDValue &SOffset,
1063                                            SDValue &Offset,
1064                                            SDValue &SLC) const {
1065   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1066   SDValue GLC, TFE;
1067 
1068   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1069 }
1070 
1071 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1072                                             SDValue &VAddr, SDValue &SOffset,
1073                                             SDValue &ImmOffset) const {
1074 
1075   SDLoc DL(Addr);
1076   MachineFunction &MF = CurDAG->getMachineFunction();
1077   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1078 
1079   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1080   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1081 
1082   // (add n0, c1)
1083   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1084     SDValue N0 = Addr.getOperand(0);
1085     SDValue N1 = Addr.getOperand(1);
1086 
1087     // Offsets in vaddr must be positive.
1088     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1089     if (isLegalMUBUFImmOffset(C1)) {
1090       VAddr = N0;
1091       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1092       return true;
1093     }
1094   }
1095 
1096   // (node)
1097   VAddr = Addr;
1098   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1099   return true;
1100 }
1101 
1102 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1103                                            SDValue &SOffset, SDValue &Offset,
1104                                            SDValue &GLC, SDValue &SLC,
1105                                            SDValue &TFE) const {
1106   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1107   const SIInstrInfo *TII =
1108     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1109 
1110   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1111               GLC, SLC, TFE))
1112     return false;
1113 
1114   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1115       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1116       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1117     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1118                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1119     SDLoc DL(Addr);
1120 
1121     const SITargetLowering& Lowering =
1122       *static_cast<const SITargetLowering*>(getTargetLowering());
1123 
1124     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1125     return true;
1126   }
1127   return false;
1128 }
1129 
1130 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1131                                            SDValue &Soffset, SDValue &Offset
1132                                            ) const {
1133   SDValue GLC, SLC, TFE;
1134 
1135   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1136 }
1137 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1138                                            SDValue &Soffset, SDValue &Offset,
1139                                            SDValue &GLC) const {
1140   SDValue SLC, TFE;
1141 
1142   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1143 }
1144 
1145 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1146                                              SDValue &SOffset,
1147                                              SDValue &ImmOffset) const {
1148   SDLoc DL(Constant);
1149   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1150   uint32_t Overflow = 0;
1151 
1152   if (Imm >= 4096) {
1153     if (Imm <= 4095 + 64) {
1154       // Use an SOffset inline constant for 1..64
1155       Overflow = Imm - 4095;
1156       Imm = 4095;
1157     } else {
1158       // Try to keep the same value in SOffset for adjacent loads, so that
1159       // the corresponding register contents can be re-used.
1160       //
1161       // Load values with all low-bits set into SOffset, so that a larger
1162       // range of values can be covered using s_movk_i32
1163       uint32_t High = (Imm + 1) & ~4095;
1164       uint32_t Low = (Imm + 1) & 4095;
1165       Imm = Low;
1166       Overflow = High - 1;
1167     }
1168   }
1169 
1170   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1171 
1172   if (Overflow <= 64)
1173     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1174   else
1175     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1176                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1177                       0);
1178 }
1179 
1180 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1181                                                     SDValue &SOffset,
1182                                                     SDValue &ImmOffset) const {
1183   SDLoc DL(Offset);
1184 
1185   if (!isa<ConstantSDNode>(Offset))
1186     return false;
1187 
1188   SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1189 
1190   return true;
1191 }
1192 
1193 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1194                                                      SDValue &SOffset,
1195                                                      SDValue &ImmOffset,
1196                                                      SDValue &VOffset) const {
1197   SDLoc DL(Offset);
1198 
1199   // Don't generate an unnecessary voffset for constant offsets.
1200   if (isa<ConstantSDNode>(Offset))
1201     return false;
1202 
1203   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1204     SDValue N0 = Offset.getOperand(0);
1205     SDValue N1 = Offset.getOperand(1);
1206     SelectMUBUFConstant(N1, SOffset, ImmOffset);
1207     VOffset = N0;
1208   } else {
1209     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1210     ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1211     VOffset = Offset;
1212   }
1213 
1214   return true;
1215 }
1216 
1217 ///
1218 /// \param EncodedOffset This is the immediate value that will be encoded
1219 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1220 ///        will be in units of dwords and on VI+ it will be units of bytes.
1221 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1222                                  int64_t EncodedOffset) {
1223   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1224      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1225 }
1226 
1227 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1228                                           SDValue &Offset, bool &Imm) const {
1229 
1230   // FIXME: Handle non-constant offsets.
1231   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1232   if (!C)
1233     return false;
1234 
1235   SDLoc SL(ByteOffsetNode);
1236   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1237   int64_t ByteOffset = C->getSExtValue();
1238   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1239       ByteOffset >> 2 : ByteOffset;
1240 
1241   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1242     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1243     Imm = true;
1244     return true;
1245   }
1246 
1247   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1248     return false;
1249 
1250   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1251     // 32-bit Immediates are supported on Sea Islands.
1252     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1253   } else {
1254     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1255     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1256                                             C32Bit), 0);
1257   }
1258   Imm = false;
1259   return true;
1260 }
1261 
1262 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1263                                      SDValue &Offset, bool &Imm) const {
1264 
1265   SDLoc SL(Addr);
1266   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1267     SDValue N0 = Addr.getOperand(0);
1268     SDValue N1 = Addr.getOperand(1);
1269 
1270     if (SelectSMRDOffset(N1, Offset, Imm)) {
1271       SBase = N0;
1272       return true;
1273     }
1274   }
1275   SBase = Addr;
1276   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1277   Imm = true;
1278   return true;
1279 }
1280 
1281 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1282                                        SDValue &Offset) const {
1283   bool Imm;
1284   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1285 }
1286 
1287 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1288                                          SDValue &Offset) const {
1289 
1290   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1291     return false;
1292 
1293   bool Imm;
1294   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1295     return false;
1296 
1297   return !Imm && isa<ConstantSDNode>(Offset);
1298 }
1299 
1300 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1301                                         SDValue &Offset) const {
1302   bool Imm;
1303   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1304          !isa<ConstantSDNode>(Offset);
1305 }
1306 
1307 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1308                                              SDValue &Offset) const {
1309   bool Imm;
1310   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1311 }
1312 
1313 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1314                                                SDValue &Offset) const {
1315   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1316     return false;
1317 
1318   bool Imm;
1319   if (!SelectSMRDOffset(Addr, Offset, Imm))
1320     return false;
1321 
1322   return !Imm && isa<ConstantSDNode>(Offset);
1323 }
1324 
1325 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1326                                               SDValue &Offset) const {
1327   bool Imm;
1328   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1329          !isa<ConstantSDNode>(Offset);
1330 }
1331 
1332 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1333                                      uint32_t Offset, uint32_t Width) {
1334   // Transformation function, pack the offset and width of a BFE into
1335   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1336   // source, bits [5:0] contain the offset and bits [22:16] the width.
1337   uint32_t PackedVal = Offset | (Width << 16);
1338   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1339 
1340   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1341 }
1342 
1343 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1344   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1345   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1346   // Predicate: 0 < b <= c < 32
1347 
1348   const SDValue &Shl = N->getOperand(0);
1349   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1350   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1351 
1352   if (B && C) {
1353     uint32_t BVal = B->getZExtValue();
1354     uint32_t CVal = C->getZExtValue();
1355 
1356     if (0 < BVal && BVal <= CVal && CVal < 32) {
1357       bool Signed = N->getOpcode() == ISD::SRA;
1358       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1359 
1360       return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1361                       CVal - BVal, 32 - CVal);
1362     }
1363   }
1364   return SelectCode(N);
1365 }
1366 
1367 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1368   switch (N->getOpcode()) {
1369   case ISD::AND:
1370     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1371       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1372       // Predicate: isMask(mask)
1373       const SDValue &Srl = N->getOperand(0);
1374       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1375       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1376 
1377       if (Shift && Mask) {
1378         uint32_t ShiftVal = Shift->getZExtValue();
1379         uint32_t MaskVal = Mask->getZExtValue();
1380 
1381         if (isMask_32(MaskVal)) {
1382           uint32_t WidthVal = countPopulation(MaskVal);
1383 
1384           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1385                           ShiftVal, WidthVal);
1386         }
1387       }
1388     }
1389     break;
1390   case ISD::SRL:
1391     if (N->getOperand(0).getOpcode() == ISD::AND) {
1392       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1393       // Predicate: isMask(mask >> b)
1394       const SDValue &And = N->getOperand(0);
1395       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1396       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1397 
1398       if (Shift && Mask) {
1399         uint32_t ShiftVal = Shift->getZExtValue();
1400         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1401 
1402         if (isMask_32(MaskVal)) {
1403           uint32_t WidthVal = countPopulation(MaskVal);
1404 
1405           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1406                           ShiftVal, WidthVal);
1407         }
1408       }
1409     } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1410       return SelectS_BFEFromShifts(N);
1411     break;
1412   case ISD::SRA:
1413     if (N->getOperand(0).getOpcode() == ISD::SHL)
1414       return SelectS_BFEFromShifts(N);
1415     break;
1416 
1417   case ISD::SIGN_EXTEND_INREG: {
1418     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1419     SDValue Src = N->getOperand(0);
1420     if (Src.getOpcode() != ISD::SRL)
1421       break;
1422 
1423     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1424     if (!Amt)
1425       break;
1426 
1427     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1428     return getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1429                     Amt->getZExtValue(), Width);
1430   }
1431   }
1432 
1433   return SelectCode(N);
1434 }
1435 
1436 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1437   SDValue Cond = N->getOperand(1);
1438 
1439   if (isCBranchSCC(N)) {
1440     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1441     return SelectCode(N);
1442   }
1443 
1444   // The result of VOPC instructions is or'd against ~EXEC before it is
1445   // written to vcc or another SGPR.  This means that the value '1' is always
1446   // written to the corresponding bit for results that are masked.  In order
1447   // to correctly check against vccz, we need to and VCC with the EXEC
1448   // register in order to clear the value from the masked bits.
1449 
1450   SDLoc SL(N);
1451 
1452   SDNode *MaskedCond =
1453         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1454                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1455                                Cond);
1456   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1457                                      SDValue(MaskedCond, 0),
1458                                      SDValue()); // Passing SDValue() adds a
1459                                                  // glue output.
1460   return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1461                               N->getOperand(2), // Basic Block
1462                               VCC.getValue(0),  // Chain
1463                               VCC.getValue(1)); // Glue
1464 }
1465 
1466 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1467                                         SDValue &SrcMods) const {
1468 
1469   unsigned Mods = 0;
1470 
1471   Src = In;
1472 
1473   if (Src.getOpcode() == ISD::FNEG) {
1474     Mods |= SISrcMods::NEG;
1475     Src = Src.getOperand(0);
1476   }
1477 
1478   if (Src.getOpcode() == ISD::FABS) {
1479     Mods |= SISrcMods::ABS;
1480     Src = Src.getOperand(0);
1481   }
1482 
1483   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1484 
1485   return true;
1486 }
1487 
1488 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1489                                          SDValue &SrcMods) const {
1490   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1491   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1492 }
1493 
1494 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1495                                          SDValue &SrcMods, SDValue &Clamp,
1496                                          SDValue &Omod) const {
1497   SDLoc DL(In);
1498   // FIXME: Handle Clamp and Omod
1499   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1500   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1501 
1502   return SelectVOP3Mods(In, Src, SrcMods);
1503 }
1504 
1505 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1506                                            SDValue &SrcMods, SDValue &Clamp,
1507                                            SDValue &Omod) const {
1508   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1509 
1510   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1511                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1512                 cast<ConstantSDNode>(Omod)->isNullValue();
1513 }
1514 
1515 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1516                                               SDValue &SrcMods,
1517                                               SDValue &Omod) const {
1518   // FIXME: Handle Omod
1519   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1520 
1521   return SelectVOP3Mods(In, Src, SrcMods);
1522 }
1523 
1524 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1525                                                    SDValue &SrcMods,
1526                                                    SDValue &Clamp,
1527                                                    SDValue &Omod) const {
1528   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1529   return SelectVOP3Mods(In, Src, SrcMods);
1530 }
1531 
1532 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1533   MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
1534 
1535   // Handle the perverse case where a frame index is being stored. We don't
1536   // want to see multiple frame index operands on the same instruction since
1537   // it complicates things and violates some assumptions about frame index
1538   // lowering.
1539   for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
1540        I != E; ++I) {
1541     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1542 
1543     // It's possible that we have a frame index defined in the function that
1544     // isn't used in this block.
1545     if (FI.use_empty())
1546       continue;
1547 
1548     // Skip over the AssertZext inserted during lowering.
1549     SDValue EffectiveFI = FI;
1550     auto It = FI->use_begin();
1551     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1552       EffectiveFI = SDValue(*It, 0);
1553       It = EffectiveFI->use_begin();
1554     }
1555 
1556     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1557       SDUse &Use = It.getUse();
1558       SDNode *User = Use.getUser();
1559       unsigned OpIdx = It.getOperandNo();
1560       ++It;
1561 
1562       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1563         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1564         if (OpIdx == PtrIdx)
1565           continue;
1566 
1567         unsigned OpN = M->getNumOperands();
1568         SDValue NewOps[8];
1569 
1570         assert(OpN < array_lengthof(NewOps));
1571         for (unsigned Op = 0; Op != OpN; ++Op) {
1572           if (Op != OpIdx) {
1573             NewOps[Op] = M->getOperand(Op);
1574             continue;
1575           }
1576 
1577           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1578                                                       SDLoc(M), MVT::i32, FI);
1579           NewOps[Op] = SDValue(Mov, 0);
1580         }
1581 
1582         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1583       }
1584     }
1585   }
1586 }
1587 
1588 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1589   const AMDGPUTargetLowering& Lowering =
1590     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1591   bool IsModified = false;
1592   do {
1593     IsModified = false;
1594     // Go over all selected nodes and try to fold them a bit more
1595     for (SDNode &Node : CurDAG->allnodes()) {
1596       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1597       if (!MachineNode)
1598         continue;
1599 
1600       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1601       if (ResNode != &Node) {
1602         ReplaceUses(&Node, ResNode);
1603         IsModified = true;
1604       }
1605     }
1606     CurDAG->RemoveDeadNodes();
1607   } while (IsModified);
1608 }
1609