1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/CodeGen/FunctionLoweringInfo.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/PseudoSourceValue.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 
28 using namespace llvm;
29 
30 namespace llvm {
31 class R600InstrInfo;
32 }
33 
34 //===----------------------------------------------------------------------===//
35 // Instruction Selector Implementation
36 //===----------------------------------------------------------------------===//
37 
38 namespace {
39 
40 static bool isCBranchSCC(const SDNode *N) {
41   assert(N->getOpcode() == ISD::BRCOND);
42   if (!N->hasOneUse())
43     return false;
44 
45   SDValue Cond = N->getOperand(1);
46   if (Cond.getOpcode() == ISD::CopyToReg)
47     Cond = Cond.getOperand(2);
48   return Cond.getOpcode() == ISD::SETCC &&
49          Cond.getOperand(0).getValueType() == MVT::i32 &&
50 	 Cond.hasOneUse();
51 }
52 
53 /// AMDGPU specific code to select AMDGPU machine instructions for
54 /// SelectionDAG operations.
55 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
56   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
57   // make the right decision when generating code for different targets.
58   const AMDGPUSubtarget *Subtarget;
59 
60 public:
61   AMDGPUDAGToDAGISel(TargetMachine &TM);
62   virtual ~AMDGPUDAGToDAGISel();
63   bool runOnMachineFunction(MachineFunction &MF) override;
64   SDNode *Select(SDNode *N) override;
65   const char *getPassName() const override;
66   void PreprocessISelDAG() override;
67   void PostprocessISelDAG() override;
68 
69 private:
70   bool isInlineImmediate(SDNode *N) const;
71   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
72                    const R600InstrInfo *TII);
73   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
74   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75 
76   // Complex pattern selectors
77   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
78   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
79   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
80 
81   static bool checkType(const Value *ptr, unsigned int addrspace);
82   static bool checkPrivateAddress(const MachineMemOperand *Op);
83 
84   static bool isGlobalStore(const MemSDNode *N);
85   static bool isFlatStore(const MemSDNode *N);
86   static bool isPrivateStore(const StoreSDNode *N);
87   static bool isLocalStore(const StoreSDNode *N);
88   static bool isRegionStore(const StoreSDNode *N);
89 
90   bool isCPLoad(const LoadSDNode *N) const;
91   bool isConstantLoad(const MemSDNode *N, int cbID) const;
92   bool isGlobalLoad(const MemSDNode *N) const;
93   bool isFlatLoad(const MemSDNode *N) const;
94   bool isParamLoad(const LoadSDNode *N) const;
95   bool isPrivateLoad(const LoadSDNode *N) const;
96   bool isLocalLoad(const LoadSDNode *N) const;
97   bool isRegionLoad(const LoadSDNode *N) const;
98 
99   bool isUniformBr(const SDNode *N) const;
100 
101   SDNode *glueCopyToM0(SDNode *N) const;
102 
103   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
104   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
105   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
106                                        SDValue& Offset);
107   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
108   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
109   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
110                        unsigned OffsetBits) const;
111   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
112   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
113                                  SDValue &Offset1) const;
114   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
115                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
116                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
117                    SDValue &TFE) const;
118   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
119                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
120                          SDValue &SLC, SDValue &TFE) const;
121   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
122                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
123                          SDValue &SLC) const;
124   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
125                           SDValue &SOffset, SDValue &ImmOffset) const;
126   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
127                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
128                          SDValue &TFE) const;
129   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
130                          SDValue &Offset, SDValue &GLC) const;
131   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
132                          SDValue &Offset) const;
133   void SelectMUBUFConstant(SDValue Constant,
134                            SDValue &SOffset,
135                            SDValue &ImmOffset) const;
136   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
137                                   SDValue &ImmOffset) const;
138   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
139                                    SDValue &ImmOffset, SDValue &VOffset) const;
140   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
141                         bool &Imm) const;
142   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
143                   bool &Imm) const;
144   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
145   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
146   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
147   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
148   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
149   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
150   SDNode *SelectAddrSpaceCast(SDNode *N);
151   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
152   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
153   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
154                        SDValue &Clamp, SDValue &Omod) const;
155   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
156                          SDValue &Clamp, SDValue &Omod) const;
157 
158   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
159                             SDValue &Omod) const;
160   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
161                                  SDValue &Clamp,
162                                  SDValue &Omod) const;
163 
164   SDNode *SelectADD_SUB_I64(SDNode *N);
165   SDNode *SelectDIV_SCALE(SDNode *N);
166 
167   SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
168                    uint32_t Offset, uint32_t Width);
169   SDNode *SelectS_BFEFromShifts(SDNode *N);
170   SDNode *SelectS_BFE(SDNode *N);
171   SDNode *SelectBRCOND(SDNode *N);
172 
173   // Include the pieces autogenerated from the target description.
174 #include "AMDGPUGenDAGISel.inc"
175 };
176 }  // end anonymous namespace
177 
178 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
179 // DAG, ready for instruction scheduling.
180 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
181   return new AMDGPUDAGToDAGISel(TM);
182 }
183 
184 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
185     : SelectionDAGISel(TM) {}
186 
187 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
188   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
189   return SelectionDAGISel::runOnMachineFunction(MF);
190 }
191 
192 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
193 }
194 
195 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
196   const SITargetLowering *TL
197       = static_cast<const SITargetLowering *>(getTargetLowering());
198   return TL->analyzeImmediate(N) == 0;
199 }
200 
201 /// \brief Determine the register class for \p OpNo
202 /// \returns The register class of the virtual register that will be used for
203 /// the given operand number \OpNo or NULL if the register class cannot be
204 /// determined.
205 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
206                                                           unsigned OpNo) const {
207   if (!N->isMachineOpcode())
208     return nullptr;
209 
210   switch (N->getMachineOpcode()) {
211   default: {
212     const MCInstrDesc &Desc =
213         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
214     unsigned OpIdx = Desc.getNumDefs() + OpNo;
215     if (OpIdx >= Desc.getNumOperands())
216       return nullptr;
217     int RegClass = Desc.OpInfo[OpIdx].RegClass;
218     if (RegClass == -1)
219       return nullptr;
220 
221     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
222   }
223   case AMDGPU::REG_SEQUENCE: {
224     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
225     const TargetRegisterClass *SuperRC =
226         Subtarget->getRegisterInfo()->getRegClass(RCID);
227 
228     SDValue SubRegOp = N->getOperand(OpNo + 1);
229     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
230     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
231                                                               SubRegIdx);
232   }
233   }
234 }
235 
236 bool AMDGPUDAGToDAGISel::SelectADDRParam(
237   SDValue Addr, SDValue& R1, SDValue& R2) {
238 
239   if (Addr.getOpcode() == ISD::FrameIndex) {
240     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
241       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
242       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
243     } else {
244       R1 = Addr;
245       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
246     }
247   } else if (Addr.getOpcode() == ISD::ADD) {
248     R1 = Addr.getOperand(0);
249     R2 = Addr.getOperand(1);
250   } else {
251     R1 = Addr;
252     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
253   }
254   return true;
255 }
256 
257 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
258   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
259       Addr.getOpcode() == ISD::TargetGlobalAddress) {
260     return false;
261   }
262   return SelectADDRParam(Addr, R1, R2);
263 }
264 
265 
266 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
267   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
268       Addr.getOpcode() == ISD::TargetGlobalAddress) {
269     return false;
270   }
271 
272   if (Addr.getOpcode() == ISD::FrameIndex) {
273     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
274       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
275       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
276     } else {
277       R1 = Addr;
278       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
279     }
280   } else if (Addr.getOpcode() == ISD::ADD) {
281     R1 = Addr.getOperand(0);
282     R2 = Addr.getOperand(1);
283   } else {
284     R1 = Addr;
285     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
286   }
287   return true;
288 }
289 
290 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
291   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
292       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
293                  AMDGPUAS::LOCAL_ADDRESS))
294     return N;
295 
296   const SITargetLowering& Lowering =
297       *static_cast<const SITargetLowering*>(getTargetLowering());
298 
299   // Write max value to m0 before each load operation
300 
301   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
302                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
303 
304   SDValue Glue = M0.getValue(1);
305 
306   SmallVector <SDValue, 8> Ops;
307   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
308      Ops.push_back(N->getOperand(i));
309   }
310   Ops.push_back(Glue);
311   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
312 
313   return N;
314 }
315 
316 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
317   switch (NumVectorElts) {
318   case 1:
319     return AMDGPU::SReg_32RegClassID;
320   case 2:
321     return AMDGPU::SReg_64RegClassID;
322   case 4:
323     return AMDGPU::SReg_128RegClassID;
324   case 8:
325     return AMDGPU::SReg_256RegClassID;
326   case 16:
327     return AMDGPU::SReg_512RegClassID;
328   }
329 
330   llvm_unreachable("invalid vector size");
331 }
332 
333 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
334   unsigned int Opc = N->getOpcode();
335   if (N->isMachineOpcode()) {
336     N->setNodeId(-1);
337     return nullptr;   // Already selected.
338   }
339 
340   if (isa<AtomicSDNode>(N) ||
341       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
342     N = glueCopyToM0(N);
343 
344   switch (Opc) {
345   default: break;
346   // We are selecting i64 ADD here instead of custom lower it during
347   // DAG legalization, so we can fold some i64 ADDs used for address
348   // calculation into the LOAD and STORE instructions.
349   case ISD::ADD:
350   case ISD::SUB: {
351     if (N->getValueType(0) != MVT::i64 ||
352         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
353       break;
354 
355     return SelectADD_SUB_I64(N);
356   }
357   case ISD::SCALAR_TO_VECTOR:
358   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
359   case ISD::BUILD_VECTOR: {
360     unsigned RegClassID;
361     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
362     EVT VT = N->getValueType(0);
363     unsigned NumVectorElts = VT.getVectorNumElements();
364     EVT EltVT = VT.getVectorElementType();
365     assert(EltVT.bitsEq(MVT::i32));
366     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
367       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
368     } else {
369       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
370       // that adds a 128 bits reg copy when going through TwoAddressInstructions
371       // pass. We want to avoid 128 bits copies as much as possible because they
372       // can't be bundled by our scheduler.
373       switch(NumVectorElts) {
374       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
375       case 4:
376         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
377           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
378         else
379           RegClassID = AMDGPU::R600_Reg128RegClassID;
380         break;
381       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
382       }
383     }
384 
385     SDLoc DL(N);
386     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
387 
388     if (NumVectorElts == 1) {
389       return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
390                                   N->getOperand(0), RegClass);
391     }
392 
393     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
394                                   "supported yet");
395     // 16 = Max Num Vector Elements
396     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
397     // 1 = Vector Register Class
398     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
399 
400     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
401     bool IsRegSeq = true;
402     unsigned NOps = N->getNumOperands();
403     for (unsigned i = 0; i < NOps; i++) {
404       // XXX: Why is this here?
405       if (isa<RegisterSDNode>(N->getOperand(i))) {
406         IsRegSeq = false;
407         break;
408       }
409       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
410       RegSeqArgs[1 + (2 * i) + 1] =
411               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
412                                         MVT::i32);
413     }
414 
415     if (NOps != NumVectorElts) {
416       // Fill in the missing undef elements if this was a scalar_to_vector.
417       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
418 
419       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
420                                                      DL, EltVT);
421       for (unsigned i = NOps; i < NumVectorElts; ++i) {
422         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
423         RegSeqArgs[1 + (2 * i) + 1] =
424           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
425       }
426     }
427 
428     if (!IsRegSeq)
429       break;
430     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
431                                 RegSeqArgs);
432   }
433   case ISD::BUILD_PAIR: {
434     SDValue RC, SubReg0, SubReg1;
435     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
436       break;
437     }
438     SDLoc DL(N);
439     if (N->getValueType(0) == MVT::i128) {
440       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
441       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
442       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
443     } else if (N->getValueType(0) == MVT::i64) {
444       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
445       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
446       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
447     } else {
448       llvm_unreachable("Unhandled value type for BUILD_PAIR");
449     }
450     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
451                             N->getOperand(1), SubReg1 };
452     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
453                                   DL, N->getValueType(0), Ops);
454   }
455 
456   case ISD::Constant:
457   case ISD::ConstantFP: {
458     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
459         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
460       break;
461 
462     uint64_t Imm;
463     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
464       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
465     else {
466       ConstantSDNode *C = cast<ConstantSDNode>(N);
467       Imm = C->getZExtValue();
468     }
469 
470     SDLoc DL(N);
471     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
472                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
473                                                     MVT::i32));
474     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
475                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
476     const SDValue Ops[] = {
477       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
478       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
479       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
480     };
481 
482     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
483                                   N->getValueType(0), Ops);
484   }
485   case ISD::LOAD:
486   case ISD::STORE: {
487     N = glueCopyToM0(N);
488     break;
489   }
490 
491   case AMDGPUISD::BFE_I32:
492   case AMDGPUISD::BFE_U32: {
493     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
494       break;
495 
496     // There is a scalar version available, but unlike the vector version which
497     // has a separate operand for the offset and width, the scalar version packs
498     // the width and offset into a single operand. Try to move to the scalar
499     // version if the offsets are constant, so that we can try to keep extended
500     // loads of kernel arguments in SGPRs.
501 
502     // TODO: Technically we could try to pattern match scalar bitshifts of
503     // dynamic values, but it's probably not useful.
504     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
505     if (!Offset)
506       break;
507 
508     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
509     if (!Width)
510       break;
511 
512     bool Signed = Opc == AMDGPUISD::BFE_I32;
513 
514     uint32_t OffsetVal = Offset->getZExtValue();
515     uint32_t WidthVal = Width->getZExtValue();
516 
517     return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
518                     N->getOperand(0), OffsetVal, WidthVal);
519   }
520   case AMDGPUISD::DIV_SCALE: {
521     return SelectDIV_SCALE(N);
522   }
523   case ISD::CopyToReg: {
524     const SITargetLowering& Lowering =
525       *static_cast<const SITargetLowering*>(getTargetLowering());
526     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
527     break;
528   }
529   case ISD::ADDRSPACECAST:
530     return SelectAddrSpaceCast(N);
531   case ISD::AND:
532   case ISD::SRL:
533   case ISD::SRA:
534     if (N->getValueType(0) != MVT::i32 ||
535         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
536       break;
537 
538     return SelectS_BFE(N);
539   case ISD::BRCOND:
540     return SelectBRCOND(N);
541   }
542 
543   return SelectCode(N);
544 }
545 
546 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
547   assert(AS != 0 && "Use checkPrivateAddress instead.");
548   if (!Ptr)
549     return false;
550 
551   return Ptr->getType()->getPointerAddressSpace() == AS;
552 }
553 
554 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
555   if (Op->getPseudoValue())
556     return true;
557 
558   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
559     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
560 
561   return false;
562 }
563 
564 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
565   if (!N->writeMem())
566     return false;
567   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
568 }
569 
570 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
571   const Value *MemVal = N->getMemOperand()->getValue();
572   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
573           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
574           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
575 }
576 
577 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
578   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
579 }
580 
581 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
582   if (!N->writeMem())
583     return false;
584   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
585 }
586 
587 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
588   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
589 }
590 
591 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
592   if (!N->readMem())
593     return false;
594   const Value *MemVal = N->getMemOperand()->getValue();
595   if (CbId == -1)
596     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
597 
598   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
599 }
600 
601 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
602   if (!N->readMem())
603     return false;
604   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
605     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
606         N->getMemoryVT().bitsLT(MVT::i32))
607       return true;
608 
609   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
610 }
611 
612 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
613   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
614 }
615 
616 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
617   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
618 }
619 
620 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
621   if (!N->readMem())
622     return false;
623   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
624 }
625 
626 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
627   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
628 }
629 
630 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
631   MachineMemOperand *MMO = N->getMemOperand();
632   if (checkPrivateAddress(N->getMemOperand())) {
633     if (MMO) {
634       const PseudoSourceValue *PSV = MMO->getPseudoValue();
635       if (PSV && PSV->isConstantPool()) {
636         return true;
637       }
638     }
639   }
640   return false;
641 }
642 
643 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
644   if (checkPrivateAddress(N->getMemOperand())) {
645     // Check to make sure we are not a constant pool load or a constant load
646     // that is marked as a private load
647     if (isCPLoad(N) || isConstantLoad(N, -1)) {
648       return false;
649     }
650   }
651 
652   const Value *MemVal = N->getMemOperand()->getValue();
653   return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
654     !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
655     !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
656     !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
657     !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
658     !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
659     !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS);
660 }
661 
662 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
663   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
664   const Instruction *Term = BB->getTerminator();
665   return Term->getMetadata("amdgpu.uniform") ||
666          Term->getMetadata("structurizecfg.uniform");
667 }
668 
669 const char *AMDGPUDAGToDAGISel::getPassName() const {
670   return "AMDGPU DAG->DAG Pattern Instruction Selection";
671 }
672 
673 //===----------------------------------------------------------------------===//
674 // Complex Patterns
675 //===----------------------------------------------------------------------===//
676 
677 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
678                                                          SDValue& IntPtr) {
679   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
680     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
681                                        true);
682     return true;
683   }
684   return false;
685 }
686 
687 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
688     SDValue& BaseReg, SDValue &Offset) {
689   if (!isa<ConstantSDNode>(Addr)) {
690     BaseReg = Addr;
691     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
692     return true;
693   }
694   return false;
695 }
696 
697 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
698                                            SDValue &Offset) {
699   ConstantSDNode *IMMOffset;
700 
701   if (Addr.getOpcode() == ISD::ADD
702       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
703       && isInt<16>(IMMOffset->getZExtValue())) {
704 
705       Base = Addr.getOperand(0);
706       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
707                                          MVT::i32);
708       return true;
709   // If the pointer address is constant, we can move it to the offset field.
710   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
711              && isInt<16>(IMMOffset->getZExtValue())) {
712     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
713                                   SDLoc(CurDAG->getEntryNode()),
714                                   AMDGPU::ZERO, MVT::i32);
715     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
716                                        MVT::i32);
717     return true;
718   }
719 
720   // Default case, no offset
721   Base = Addr;
722   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
723   return true;
724 }
725 
726 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
727                                             SDValue &Offset) {
728   ConstantSDNode *C;
729   SDLoc DL(Addr);
730 
731   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
732     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
733     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
734   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
735             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
736     Base = Addr.getOperand(0);
737     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
738   } else {
739     Base = Addr;
740     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
741   }
742 
743   return true;
744 }
745 
746 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
747   SDLoc DL(N);
748   SDValue LHS = N->getOperand(0);
749   SDValue RHS = N->getOperand(1);
750 
751   bool IsAdd = (N->getOpcode() == ISD::ADD);
752 
753   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
754   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
755 
756   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
757                                        DL, MVT::i32, LHS, Sub0);
758   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
759                                        DL, MVT::i32, LHS, Sub1);
760 
761   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
762                                        DL, MVT::i32, RHS, Sub0);
763   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
764                                        DL, MVT::i32, RHS, Sub1);
765 
766   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
767   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
768 
769 
770   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
771   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
772 
773   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
774   SDValue Carry(AddLo, 1);
775   SDNode *AddHi
776     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
777                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
778 
779   SDValue Args[5] = {
780     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
781     SDValue(AddLo,0),
782     Sub0,
783     SDValue(AddHi,0),
784     Sub1,
785   };
786   return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
787 }
788 
789 // We need to handle this here because tablegen doesn't support matching
790 // instructions with multiple outputs.
791 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
792   SDLoc SL(N);
793   EVT VT = N->getValueType(0);
794 
795   assert(VT == MVT::f32 || VT == MVT::f64);
796 
797   unsigned Opc
798     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
799 
800   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
801   // omod
802   SDValue Ops[8];
803 
804   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
805   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
806   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
807   return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
808 }
809 
810 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
811                                          unsigned OffsetBits) const {
812   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
813       (OffsetBits == 8 && !isUInt<8>(Offset)))
814     return false;
815 
816   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
817       Subtarget->unsafeDSOffsetFoldingEnabled())
818     return true;
819 
820   // On Southern Islands instruction with a negative base value and an offset
821   // don't seem to work.
822   return CurDAG->SignBitIsZero(Base);
823 }
824 
825 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
826                                               SDValue &Offset) const {
827   if (CurDAG->isBaseWithConstantOffset(Addr)) {
828     SDValue N0 = Addr.getOperand(0);
829     SDValue N1 = Addr.getOperand(1);
830     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
831     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
832       // (add n0, c0)
833       Base = N0;
834       Offset = N1;
835       return true;
836     }
837   } else if (Addr.getOpcode() == ISD::SUB) {
838     // sub C, x -> add (sub 0, x), C
839     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
840       int64_t ByteOffset = C->getSExtValue();
841       if (isUInt<16>(ByteOffset)) {
842         SDLoc DL(Addr);
843         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
844 
845         // XXX - This is kind of hacky. Create a dummy sub node so we can check
846         // the known bits in isDSOffsetLegal. We need to emit the selected node
847         // here, so this is thrown away.
848         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
849                                       Zero, Addr.getOperand(1));
850 
851         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
852           MachineSDNode *MachineSub
853             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
854                                      Zero, Addr.getOperand(1));
855 
856           Base = SDValue(MachineSub, 0);
857           Offset = Addr.getOperand(0);
858           return true;
859         }
860       }
861     }
862   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
863     // If we have a constant address, prefer to put the constant into the
864     // offset. This can save moves to load the constant address since multiple
865     // operations can share the zero base address register, and enables merging
866     // into read2 / write2 instructions.
867 
868     SDLoc DL(Addr);
869 
870     if (isUInt<16>(CAddr->getZExtValue())) {
871       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
872       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
873                                  DL, MVT::i32, Zero);
874       Base = SDValue(MovZero, 0);
875       Offset = Addr;
876       return true;
877     }
878   }
879 
880   // default case
881   Base = Addr;
882   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
883   return true;
884 }
885 
886 // TODO: If offset is too big, put low 16-bit into offset.
887 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
888                                                    SDValue &Offset0,
889                                                    SDValue &Offset1) const {
890   SDLoc DL(Addr);
891 
892   if (CurDAG->isBaseWithConstantOffset(Addr)) {
893     SDValue N0 = Addr.getOperand(0);
894     SDValue N1 = Addr.getOperand(1);
895     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
896     unsigned DWordOffset0 = C1->getZExtValue() / 4;
897     unsigned DWordOffset1 = DWordOffset0 + 1;
898     // (add n0, c0)
899     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
900       Base = N0;
901       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
902       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
903       return true;
904     }
905   } else if (Addr.getOpcode() == ISD::SUB) {
906     // sub C, x -> add (sub 0, x), C
907     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
908       unsigned DWordOffset0 = C->getZExtValue() / 4;
909       unsigned DWordOffset1 = DWordOffset0 + 1;
910 
911       if (isUInt<8>(DWordOffset0)) {
912         SDLoc DL(Addr);
913         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
914 
915         // XXX - This is kind of hacky. Create a dummy sub node so we can check
916         // the known bits in isDSOffsetLegal. We need to emit the selected node
917         // here, so this is thrown away.
918         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
919                                       Zero, Addr.getOperand(1));
920 
921         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
922           MachineSDNode *MachineSub
923             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
924                                      Zero, Addr.getOperand(1));
925 
926           Base = SDValue(MachineSub, 0);
927           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
928           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
929           return true;
930         }
931       }
932     }
933   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
934     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
935     unsigned DWordOffset1 = DWordOffset0 + 1;
936     assert(4 * DWordOffset0 == CAddr->getZExtValue());
937 
938     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
939       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
940       MachineSDNode *MovZero
941         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
942                                  DL, MVT::i32, Zero);
943       Base = SDValue(MovZero, 0);
944       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
945       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
946       return true;
947     }
948   }
949 
950   // default case
951   Base = Addr;
952   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
953   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
954   return true;
955 }
956 
957 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
958   return isUInt<12>(Imm->getZExtValue());
959 }
960 
961 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
962                                      SDValue &VAddr, SDValue &SOffset,
963                                      SDValue &Offset, SDValue &Offen,
964                                      SDValue &Idxen, SDValue &Addr64,
965                                      SDValue &GLC, SDValue &SLC,
966                                      SDValue &TFE) const {
967   // Subtarget prefers to use flat instruction
968   if (Subtarget->useFlatForGlobal())
969     return false;
970 
971   SDLoc DL(Addr);
972 
973   if (!GLC.getNode())
974     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
975   if (!SLC.getNode())
976     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
977   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
978 
979   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
980   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
981   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
982   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
983 
984   if (CurDAG->isBaseWithConstantOffset(Addr)) {
985     SDValue N0 = Addr.getOperand(0);
986     SDValue N1 = Addr.getOperand(1);
987     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
988 
989     if (N0.getOpcode() == ISD::ADD) {
990       // (add (add N2, N3), C1) -> addr64
991       SDValue N2 = N0.getOperand(0);
992       SDValue N3 = N0.getOperand(1);
993       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
994       Ptr = N2;
995       VAddr = N3;
996     } else {
997 
998       // (add N0, C1) -> offset
999       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1000       Ptr = N0;
1001     }
1002 
1003     if (isLegalMUBUFImmOffset(C1)) {
1004         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1005         return true;
1006     } else if (isUInt<32>(C1->getZExtValue())) {
1007       // Illegal offset, store it in soffset.
1008       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1009       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1010                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1011                         0);
1012       return true;
1013     }
1014   }
1015 
1016   if (Addr.getOpcode() == ISD::ADD) {
1017     // (add N0, N1) -> addr64
1018     SDValue N0 = Addr.getOperand(0);
1019     SDValue N1 = Addr.getOperand(1);
1020     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1021     Ptr = N0;
1022     VAddr = N1;
1023     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1024     return true;
1025   }
1026 
1027   // default case -> offset
1028   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1029   Ptr = Addr;
1030   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1031 
1032   return true;
1033 }
1034 
1035 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1036                                            SDValue &VAddr, SDValue &SOffset,
1037                                            SDValue &Offset, SDValue &GLC,
1038                                            SDValue &SLC, SDValue &TFE) const {
1039   SDValue Ptr, Offen, Idxen, Addr64;
1040 
1041   // addr64 bit was removed for volcanic islands.
1042   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1043     return false;
1044 
1045   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1046               GLC, SLC, TFE))
1047     return false;
1048 
1049   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1050   if (C->getSExtValue()) {
1051     SDLoc DL(Addr);
1052 
1053     const SITargetLowering& Lowering =
1054       *static_cast<const SITargetLowering*>(getTargetLowering());
1055 
1056     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1057     return true;
1058   }
1059 
1060   return false;
1061 }
1062 
1063 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1064                                            SDValue &VAddr, SDValue &SOffset,
1065                                            SDValue &Offset,
1066                                            SDValue &SLC) const {
1067   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1068   SDValue GLC, TFE;
1069 
1070   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1071 }
1072 
1073 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1074                                             SDValue &VAddr, SDValue &SOffset,
1075                                             SDValue &ImmOffset) const {
1076 
1077   SDLoc DL(Addr);
1078   MachineFunction &MF = CurDAG->getMachineFunction();
1079   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1080 
1081   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1082   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1083 
1084   // (add n0, c1)
1085   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1086     SDValue N0 = Addr.getOperand(0);
1087     SDValue N1 = Addr.getOperand(1);
1088 
1089     // Offsets in vaddr must be positive.
1090     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1091     if (isLegalMUBUFImmOffset(C1)) {
1092       VAddr = N0;
1093       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1094       return true;
1095     }
1096   }
1097 
1098   // (node)
1099   VAddr = Addr;
1100   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1101   return true;
1102 }
1103 
1104 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1105                                            SDValue &SOffset, SDValue &Offset,
1106                                            SDValue &GLC, SDValue &SLC,
1107                                            SDValue &TFE) const {
1108   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1109   const SIInstrInfo *TII =
1110     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1111 
1112   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1113               GLC, SLC, TFE))
1114     return false;
1115 
1116   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1117       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1118       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1119     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1120                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1121     SDLoc DL(Addr);
1122 
1123     const SITargetLowering& Lowering =
1124       *static_cast<const SITargetLowering*>(getTargetLowering());
1125 
1126     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1127     return true;
1128   }
1129   return false;
1130 }
1131 
1132 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1133                                            SDValue &Soffset, SDValue &Offset
1134                                            ) const {
1135   SDValue GLC, SLC, TFE;
1136 
1137   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1138 }
1139 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1140                                            SDValue &Soffset, SDValue &Offset,
1141                                            SDValue &GLC) const {
1142   SDValue SLC, TFE;
1143 
1144   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1145 }
1146 
1147 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1148                                              SDValue &SOffset,
1149                                              SDValue &ImmOffset) const {
1150   SDLoc DL(Constant);
1151   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1152   uint32_t Overflow = 0;
1153 
1154   if (Imm >= 4096) {
1155     if (Imm <= 4095 + 64) {
1156       // Use an SOffset inline constant for 1..64
1157       Overflow = Imm - 4095;
1158       Imm = 4095;
1159     } else {
1160       // Try to keep the same value in SOffset for adjacent loads, so that
1161       // the corresponding register contents can be re-used.
1162       //
1163       // Load values with all low-bits set into SOffset, so that a larger
1164       // range of values can be covered using s_movk_i32
1165       uint32_t High = (Imm + 1) & ~4095;
1166       uint32_t Low = (Imm + 1) & 4095;
1167       Imm = Low;
1168       Overflow = High - 1;
1169     }
1170   }
1171 
1172   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1173 
1174   if (Overflow <= 64)
1175     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1176   else
1177     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1178                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1179                       0);
1180 }
1181 
1182 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1183                                                     SDValue &SOffset,
1184                                                     SDValue &ImmOffset) const {
1185   SDLoc DL(Offset);
1186 
1187   if (!isa<ConstantSDNode>(Offset))
1188     return false;
1189 
1190   SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1191 
1192   return true;
1193 }
1194 
1195 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1196                                                      SDValue &SOffset,
1197                                                      SDValue &ImmOffset,
1198                                                      SDValue &VOffset) const {
1199   SDLoc DL(Offset);
1200 
1201   // Don't generate an unnecessary voffset for constant offsets.
1202   if (isa<ConstantSDNode>(Offset))
1203     return false;
1204 
1205   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1206     SDValue N0 = Offset.getOperand(0);
1207     SDValue N1 = Offset.getOperand(1);
1208     SelectMUBUFConstant(N1, SOffset, ImmOffset);
1209     VOffset = N0;
1210   } else {
1211     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1212     ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1213     VOffset = Offset;
1214   }
1215 
1216   return true;
1217 }
1218 
1219 ///
1220 /// \param EncodedOffset This is the immediate value that will be encoded
1221 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1222 ///        will be in units of dwords and on VI+ it will be units of bytes.
1223 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1224                                  int64_t EncodedOffset) {
1225   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1226      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1227 }
1228 
1229 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1230                                           SDValue &Offset, bool &Imm) const {
1231 
1232   // FIXME: Handle non-constant offsets.
1233   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1234   if (!C)
1235     return false;
1236 
1237   SDLoc SL(ByteOffsetNode);
1238   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1239   int64_t ByteOffset = C->getSExtValue();
1240   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1241       ByteOffset >> 2 : ByteOffset;
1242 
1243   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1244     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1245     Imm = true;
1246     return true;
1247   }
1248 
1249   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1250     return false;
1251 
1252   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1253     // 32-bit Immediates are supported on Sea Islands.
1254     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1255   } else {
1256     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1257     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1258                                             C32Bit), 0);
1259   }
1260   Imm = false;
1261   return true;
1262 }
1263 
1264 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1265                                      SDValue &Offset, bool &Imm) const {
1266 
1267   SDLoc SL(Addr);
1268   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1269     SDValue N0 = Addr.getOperand(0);
1270     SDValue N1 = Addr.getOperand(1);
1271 
1272     if (SelectSMRDOffset(N1, Offset, Imm)) {
1273       SBase = N0;
1274       return true;
1275     }
1276   }
1277   SBase = Addr;
1278   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1279   Imm = true;
1280   return true;
1281 }
1282 
1283 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1284                                        SDValue &Offset) const {
1285   bool Imm;
1286   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1287 }
1288 
1289 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1290                                          SDValue &Offset) const {
1291 
1292   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1293     return false;
1294 
1295   bool Imm;
1296   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1297     return false;
1298 
1299   return !Imm && isa<ConstantSDNode>(Offset);
1300 }
1301 
1302 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1303                                         SDValue &Offset) const {
1304   bool Imm;
1305   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1306          !isa<ConstantSDNode>(Offset);
1307 }
1308 
1309 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1310                                              SDValue &Offset) const {
1311   bool Imm;
1312   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1313 }
1314 
1315 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1316                                                SDValue &Offset) const {
1317   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1318     return false;
1319 
1320   bool Imm;
1321   if (!SelectSMRDOffset(Addr, Offset, Imm))
1322     return false;
1323 
1324   return !Imm && isa<ConstantSDNode>(Offset);
1325 }
1326 
1327 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1328                                               SDValue &Offset) const {
1329   bool Imm;
1330   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1331          !isa<ConstantSDNode>(Offset);
1332 }
1333 
1334 // FIXME: This is incorrect and only enough to be able to compile.
1335 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
1336   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
1337   SDLoc DL(N);
1338 
1339   const MachineFunction &MF = CurDAG->getMachineFunction();
1340   DiagnosticInfoUnsupported NotImplemented(
1341       *MF.getFunction(), "addrspacecast not implemented", DL.getDebugLoc());
1342   CurDAG->getContext()->diagnose(NotImplemented);
1343 
1344   assert(Subtarget->hasFlatAddressSpace() &&
1345          "addrspacecast only supported with flat address space!");
1346 
1347   assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
1348           ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
1349          "Can only cast to / from flat address space!");
1350 
1351   // The flat instructions read the address as the index of the VGPR holding the
1352   // address, so casting should just be reinterpreting the base VGPR, so just
1353   // insert trunc / bitcast / zext.
1354 
1355   SDValue Src = ASC->getOperand(0);
1356   EVT DestVT = ASC->getValueType(0);
1357   EVT SrcVT = Src.getValueType();
1358 
1359   unsigned SrcSize = SrcVT.getSizeInBits();
1360   unsigned DestSize = DestVT.getSizeInBits();
1361 
1362   if (SrcSize > DestSize) {
1363     assert(SrcSize == 64 && DestSize == 32);
1364     return CurDAG->getMachineNode(
1365       TargetOpcode::EXTRACT_SUBREG,
1366       DL,
1367       DestVT,
1368       Src,
1369       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
1370   }
1371 
1372   if (DestSize > SrcSize) {
1373     assert(SrcSize == 32 && DestSize == 64);
1374 
1375     // FIXME: This is probably wrong, we should never be defining
1376     // a register class with both VGPRs and SGPRs
1377     SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL,
1378                                            MVT::i32);
1379 
1380     const SDValue Ops[] = {
1381       RC,
1382       Src,
1383       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
1384       SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1385                                      CurDAG->getConstant(0, DL, MVT::i32)), 0),
1386       CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
1387     };
1388 
1389     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
1390                                   DL, N->getValueType(0), Ops);
1391   }
1392 
1393   assert(SrcSize == 64 && DestSize == 64);
1394   return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
1395 }
1396 
1397 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
1398                                      uint32_t Offset, uint32_t Width) {
1399   // Transformation function, pack the offset and width of a BFE into
1400   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1401   // source, bits [5:0] contain the offset and bits [22:16] the width.
1402   uint32_t PackedVal = Offset | (Width << 16);
1403   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1404 
1405   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1406 }
1407 
1408 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1409   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1410   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1411   // Predicate: 0 < b <= c < 32
1412 
1413   const SDValue &Shl = N->getOperand(0);
1414   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1415   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1416 
1417   if (B && C) {
1418     uint32_t BVal = B->getZExtValue();
1419     uint32_t CVal = C->getZExtValue();
1420 
1421     if (0 < BVal && BVal <= CVal && CVal < 32) {
1422       bool Signed = N->getOpcode() == ISD::SRA;
1423       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1424 
1425       return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
1426                       CVal - BVal, 32 - CVal);
1427     }
1428   }
1429   return SelectCode(N);
1430 }
1431 
1432 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1433   switch (N->getOpcode()) {
1434   case ISD::AND:
1435     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1436       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1437       // Predicate: isMask(mask)
1438       const SDValue &Srl = N->getOperand(0);
1439       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1440       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1441 
1442       if (Shift && Mask) {
1443         uint32_t ShiftVal = Shift->getZExtValue();
1444         uint32_t MaskVal = Mask->getZExtValue();
1445 
1446         if (isMask_32(MaskVal)) {
1447           uint32_t WidthVal = countPopulation(MaskVal);
1448 
1449           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
1450                           ShiftVal, WidthVal);
1451         }
1452       }
1453     }
1454     break;
1455   case ISD::SRL:
1456     if (N->getOperand(0).getOpcode() == ISD::AND) {
1457       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1458       // Predicate: isMask(mask >> b)
1459       const SDValue &And = N->getOperand(0);
1460       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1461       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1462 
1463       if (Shift && Mask) {
1464         uint32_t ShiftVal = Shift->getZExtValue();
1465         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1466 
1467         if (isMask_32(MaskVal)) {
1468           uint32_t WidthVal = countPopulation(MaskVal);
1469 
1470           return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
1471                           ShiftVal, WidthVal);
1472         }
1473       }
1474     } else if (N->getOperand(0).getOpcode() == ISD::SHL)
1475       return SelectS_BFEFromShifts(N);
1476     break;
1477   case ISD::SRA:
1478     if (N->getOperand(0).getOpcode() == ISD::SHL)
1479       return SelectS_BFEFromShifts(N);
1480     break;
1481   }
1482 
1483   return SelectCode(N);
1484 }
1485 
1486 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1487   SDValue Cond = N->getOperand(1);
1488 
1489   if (isCBranchSCC(N)) {
1490     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1491     return SelectCode(N);
1492   }
1493 
1494   // The result of VOPC instructions is or'd against ~EXEC before it is
1495   // written to vcc or another SGPR.  This means that the value '1' is always
1496   // written to the corresponding bit for results that are masked.  In order
1497   // to correctly check against vccz, we need to and VCC with the EXEC
1498   // register in order to clear the value from the masked bits.
1499 
1500   SDLoc SL(N);
1501 
1502   SDNode *MaskedCond =
1503         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1504                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1505                                Cond);
1506   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1507                                      SDValue(MaskedCond, 0),
1508                                      SDValue()); // Passing SDValue() adds a
1509                                                  // glue output.
1510   return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1511                               N->getOperand(2), // Basic Block
1512                               VCC.getValue(0),  // Chain
1513                               VCC.getValue(1)); // Glue
1514 }
1515 
1516 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1517                                         SDValue &SrcMods) const {
1518 
1519   unsigned Mods = 0;
1520 
1521   Src = In;
1522 
1523   if (Src.getOpcode() == ISD::FNEG) {
1524     Mods |= SISrcMods::NEG;
1525     Src = Src.getOperand(0);
1526   }
1527 
1528   if (Src.getOpcode() == ISD::FABS) {
1529     Mods |= SISrcMods::ABS;
1530     Src = Src.getOperand(0);
1531   }
1532 
1533   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1534 
1535   return true;
1536 }
1537 
1538 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1539                                          SDValue &SrcMods) const {
1540   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1541   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1542 }
1543 
1544 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1545                                          SDValue &SrcMods, SDValue &Clamp,
1546                                          SDValue &Omod) const {
1547   SDLoc DL(In);
1548   // FIXME: Handle Clamp and Omod
1549   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1550   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1551 
1552   return SelectVOP3Mods(In, Src, SrcMods);
1553 }
1554 
1555 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1556                                            SDValue &SrcMods, SDValue &Clamp,
1557                                            SDValue &Omod) const {
1558   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1559 
1560   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1561                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1562                 cast<ConstantSDNode>(Omod)->isNullValue();
1563 }
1564 
1565 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1566                                               SDValue &SrcMods,
1567                                               SDValue &Omod) const {
1568   // FIXME: Handle Omod
1569   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1570 
1571   return SelectVOP3Mods(In, Src, SrcMods);
1572 }
1573 
1574 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1575                                                    SDValue &SrcMods,
1576                                                    SDValue &Clamp,
1577                                                    SDValue &Omod) const {
1578   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1579   return SelectVOP3Mods(In, Src, SrcMods);
1580 }
1581 
1582 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1583   bool Modified = false;
1584 
1585   MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
1586 
1587   // Handle the perverse case where a frame index is being stored. We don't
1588   // want to see multiple frame index operands on the same instruction since
1589   // it complicates things and violates some assumptions about frame index
1590   // lowering.
1591   for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
1592        I != E; ++I) {
1593     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1594 
1595     // It's possible that we have a frame index defined in the function that
1596     // isn't used in this block.
1597     if (FI.use_empty())
1598       continue;
1599 
1600     // Skip over the AssertZext inserted during lowering.
1601     SDValue EffectiveFI = FI;
1602     auto It = FI->use_begin();
1603     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1604       EffectiveFI = SDValue(*It, 0);
1605       It = EffectiveFI->use_begin();
1606     }
1607 
1608     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1609       SDUse &Use = It.getUse();
1610       SDNode *User = Use.getUser();
1611       unsigned OpIdx = It.getOperandNo();
1612       ++It;
1613 
1614       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1615         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1616         if (OpIdx == PtrIdx)
1617           continue;
1618 
1619         unsigned OpN = M->getNumOperands();
1620         SDValue NewOps[8];
1621 
1622         assert(OpN < array_lengthof(NewOps));
1623         for (unsigned Op = 0; Op != OpN; ++Op) {
1624           if (Op != OpIdx) {
1625             NewOps[Op] = M->getOperand(Op);
1626             continue;
1627           }
1628 
1629           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1630                                                       SDLoc(M), MVT::i32, FI);
1631           NewOps[Op] = SDValue(Mov, 0);
1632         }
1633 
1634         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1635         Modified = true;
1636       }
1637     }
1638   }
1639 
1640   // XXX - Other targets seem to be able to do this without a worklist.
1641   SmallVector<LoadSDNode *, 8> LoadsToReplace;
1642   SmallVector<StoreSDNode *, 8> StoresToReplace;
1643 
1644   for (SDNode &Node : CurDAG->allnodes()) {
1645     if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
1646       EVT VT = LD->getValueType(0);
1647       if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
1648         continue;
1649 
1650       // To simplify the TableGen patters, we replace all i64 loads with v2i32
1651       // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
1652       // legalization, however, so places (ExpandUnalignedLoad) in the DAG
1653       // legalizer assume that if i64 is legal, so doing this promotion early
1654       // can cause problems.
1655       LoadsToReplace.push_back(LD);
1656     } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
1657       // Handle i64 stores here for the same reason mentioned above for loads.
1658       SDValue Value = ST->getValue();
1659       if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
1660         continue;
1661       StoresToReplace.push_back(ST);
1662     }
1663   }
1664 
1665   for (LoadSDNode *LD : LoadsToReplace) {
1666     SDLoc SL(LD);
1667 
1668     SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
1669                                       LD->getBasePtr(), LD->getMemOperand());
1670     SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
1671                                       MVT::i64, NewLoad);
1672     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
1673     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
1674     Modified = true;
1675   }
1676 
1677   for (StoreSDNode *ST : StoresToReplace) {
1678     SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
1679                                        MVT::v2i32, ST->getValue());
1680     const SDValue StoreOps[] = {
1681       ST->getChain(),
1682       NewValue,
1683       ST->getBasePtr(),
1684       ST->getOffset()
1685     };
1686 
1687     CurDAG->UpdateNodeOperands(ST, StoreOps);
1688     Modified = true;
1689   }
1690 
1691   // XXX - Is this necessary?
1692   if (Modified)
1693     CurDAG->RemoveDeadNodes();
1694 }
1695 
1696 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1697   const AMDGPUTargetLowering& Lowering =
1698     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1699   bool IsModified = false;
1700   do {
1701     IsModified = false;
1702     // Go over all selected nodes and try to fold them a bit more
1703     for (SDNode &Node : CurDAG->allnodes()) {
1704       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1705       if (!MachineNode)
1706         continue;
1707 
1708       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1709       if (ResNode != &Node) {
1710         ReplaceUses(&Node, ResNode);
1711         IsModified = true;
1712       }
1713     }
1714     CurDAG->RemoveDeadNodes();
1715   } while (IsModified);
1716 }
1717