1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 static bool isCBranchSCC(const SDNode *N) {
42   assert(N->getOpcode() == ISD::BRCOND);
43   if (!N->hasOneUse())
44     return false;
45 
46   SDValue Cond = N->getOperand(1);
47   if (Cond.getOpcode() == ISD::CopyToReg)
48     Cond = Cond.getOperand(2);
49   return Cond.getOpcode() == ISD::SETCC &&
50          Cond.getOperand(0).getValueType() == MVT::i32 &&
51 	 Cond.hasOneUse();
52 }
53 
54 /// AMDGPU specific code to select AMDGPU machine instructions for
55 /// SelectionDAG operations.
56 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
57   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
58   // make the right decision when generating code for different targets.
59   const AMDGPUSubtarget *Subtarget;
60 
61 public:
62   AMDGPUDAGToDAGISel(TargetMachine &TM);
63   virtual ~AMDGPUDAGToDAGISel();
64   bool runOnMachineFunction(MachineFunction &MF) override;
65   void Select(SDNode *N) override;
66   const char *getPassName() const override;
67   void PreprocessISelDAG() override;
68   void PostprocessISelDAG() override;
69 
70 private:
71   bool isInlineImmediate(SDNode *N) const;
72   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
73                    const R600InstrInfo *TII);
74   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
75   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
76 
77   // Complex pattern selectors
78   bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
79   bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
80   bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
81 
82   static bool checkType(const Value *ptr, unsigned int addrspace);
83   static bool checkPrivateAddress(const MachineMemOperand *Op);
84 
85   static bool isGlobalStore(const MemSDNode *N);
86   static bool isFlatStore(const MemSDNode *N);
87   static bool isPrivateStore(const StoreSDNode *N);
88   static bool isLocalStore(const StoreSDNode *N);
89   static bool isRegionStore(const StoreSDNode *N);
90 
91   bool isCPLoad(const LoadSDNode *N) const;
92   bool isConstantLoad(const MemSDNode *N, int cbID) const;
93   bool isGlobalLoad(const MemSDNode *N) const;
94   bool isFlatLoad(const MemSDNode *N) const;
95   bool isParamLoad(const LoadSDNode *N) const;
96   bool isPrivateLoad(const LoadSDNode *N) const;
97   bool isLocalLoad(const LoadSDNode *N) const;
98   bool isRegionLoad(const LoadSDNode *N) const;
99 
100   bool isUniformBr(const SDNode *N) const;
101 
102   SDNode *glueCopyToM0(SDNode *N) const;
103 
104   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
105   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
106   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
107                                        SDValue& Offset);
108   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
109   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
110   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
111                        unsigned OffsetBits) const;
112   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
113   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
114                                  SDValue &Offset1) const;
115   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
116                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
117                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
118                    SDValue &TFE) const;
119   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
120                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
121                          SDValue &SLC, SDValue &TFE) const;
122   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
123                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
124                          SDValue &SLC) const;
125   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
126                           SDValue &SOffset, SDValue &ImmOffset) const;
127   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
128                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
129                          SDValue &TFE) const;
130   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
131                          SDValue &Offset, SDValue &SLC) const;
132   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
133                          SDValue &Offset) const;
134   void SelectMUBUFConstant(SDValue Constant,
135                            SDValue &SOffset,
136                            SDValue &ImmOffset) const;
137   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
138                                   SDValue &ImmOffset) const;
139   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
140                                    SDValue &ImmOffset, SDValue &VOffset) const;
141 
142   bool SelectFlat(SDValue Addr, SDValue &VAddr,
143                   SDValue &SLC, SDValue &TFE) const;
144 
145   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
146                         bool &Imm) const;
147   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
148                   bool &Imm) const;
149   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
150   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
151   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
152   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
153   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
154   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
155   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
156   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
157   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
158                        SDValue &Clamp, SDValue &Omod) const;
159   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
160                          SDValue &Clamp, SDValue &Omod) const;
161 
162   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
163                             SDValue &Omod) const;
164   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
165                                  SDValue &Clamp,
166                                  SDValue &Omod) const;
167 
168   void SelectADD_SUB_I64(SDNode *N);
169   void SelectDIV_SCALE(SDNode *N);
170 
171   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
172                    uint32_t Offset, uint32_t Width);
173   void SelectS_BFEFromShifts(SDNode *N);
174   void SelectS_BFE(SDNode *N);
175   void SelectBRCOND(SDNode *N);
176   void SelectATOMIC_CMP_SWAP(SDNode *N);
177 
178   // Include the pieces autogenerated from the target description.
179 #include "AMDGPUGenDAGISel.inc"
180 };
181 }  // end anonymous namespace
182 
183 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
184 // DAG, ready for instruction scheduling.
185 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
186   return new AMDGPUDAGToDAGISel(TM);
187 }
188 
189 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
190     : SelectionDAGISel(TM) {}
191 
192 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
193   Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget());
194   return SelectionDAGISel::runOnMachineFunction(MF);
195 }
196 
197 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
198 }
199 
200 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
201   const SITargetLowering *TL
202       = static_cast<const SITargetLowering *>(getTargetLowering());
203   return TL->analyzeImmediate(N) == 0;
204 }
205 
206 /// \brief Determine the register class for \p OpNo
207 /// \returns The register class of the virtual register that will be used for
208 /// the given operand number \OpNo or NULL if the register class cannot be
209 /// determined.
210 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
211                                                           unsigned OpNo) const {
212   if (!N->isMachineOpcode())
213     return nullptr;
214 
215   switch (N->getMachineOpcode()) {
216   default: {
217     const MCInstrDesc &Desc =
218         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
219     unsigned OpIdx = Desc.getNumDefs() + OpNo;
220     if (OpIdx >= Desc.getNumOperands())
221       return nullptr;
222     int RegClass = Desc.OpInfo[OpIdx].RegClass;
223     if (RegClass == -1)
224       return nullptr;
225 
226     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
227   }
228   case AMDGPU::REG_SEQUENCE: {
229     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
230     const TargetRegisterClass *SuperRC =
231         Subtarget->getRegisterInfo()->getRegClass(RCID);
232 
233     SDValue SubRegOp = N->getOperand(OpNo + 1);
234     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
235     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
236                                                               SubRegIdx);
237   }
238   }
239 }
240 
241 bool AMDGPUDAGToDAGISel::SelectADDRParam(
242   SDValue Addr, SDValue& R1, SDValue& R2) {
243 
244   if (Addr.getOpcode() == ISD::FrameIndex) {
245     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
246       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
247       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
248     } else {
249       R1 = Addr;
250       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
251     }
252   } else if (Addr.getOpcode() == ISD::ADD) {
253     R1 = Addr.getOperand(0);
254     R2 = Addr.getOperand(1);
255   } else {
256     R1 = Addr;
257     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
258   }
259   return true;
260 }
261 
262 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
263   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
264       Addr.getOpcode() == ISD::TargetGlobalAddress) {
265     return false;
266   }
267   return SelectADDRParam(Addr, R1, R2);
268 }
269 
270 
271 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
272   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
273       Addr.getOpcode() == ISD::TargetGlobalAddress) {
274     return false;
275   }
276 
277   if (Addr.getOpcode() == ISD::FrameIndex) {
278     if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
279       R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
280       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
281     } else {
282       R1 = Addr;
283       R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
284     }
285   } else if (Addr.getOpcode() == ISD::ADD) {
286     R1 = Addr.getOperand(0);
287     R2 = Addr.getOperand(1);
288   } else {
289     R1 = Addr;
290     R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64);
291   }
292   return true;
293 }
294 
295 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
296   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
297       !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(),
298                  AMDGPUAS::LOCAL_ADDRESS))
299     return N;
300 
301   const SITargetLowering& Lowering =
302       *static_cast<const SITargetLowering*>(getTargetLowering());
303 
304   // Write max value to m0 before each load operation
305 
306   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
307                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
308 
309   SDValue Glue = M0.getValue(1);
310 
311   SmallVector <SDValue, 8> Ops;
312   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
313      Ops.push_back(N->getOperand(i));
314   }
315   Ops.push_back(Glue);
316   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
317 
318   return N;
319 }
320 
321 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
322   switch (NumVectorElts) {
323   case 1:
324     return AMDGPU::SReg_32RegClassID;
325   case 2:
326     return AMDGPU::SReg_64RegClassID;
327   case 4:
328     return AMDGPU::SReg_128RegClassID;
329   case 8:
330     return AMDGPU::SReg_256RegClassID;
331   case 16:
332     return AMDGPU::SReg_512RegClassID;
333   }
334 
335   llvm_unreachable("invalid vector size");
336 }
337 
338 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
339   unsigned int Opc = N->getOpcode();
340   if (N->isMachineOpcode()) {
341     N->setNodeId(-1);
342     return;   // Already selected.
343   }
344 
345   if (isa<AtomicSDNode>(N) ||
346       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
347     N = glueCopyToM0(N);
348 
349   switch (Opc) {
350   default: break;
351   // We are selecting i64 ADD here instead of custom lower it during
352   // DAG legalization, so we can fold some i64 ADDs used for address
353   // calculation into the LOAD and STORE instructions.
354   case ISD::ADD:
355   case ISD::SUB: {
356     if (N->getValueType(0) != MVT::i64 ||
357         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
358       break;
359 
360     SelectADD_SUB_I64(N);
361     return;
362   }
363   case ISD::SCALAR_TO_VECTOR:
364   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
365   case ISD::BUILD_VECTOR: {
366     unsigned RegClassID;
367     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
368     EVT VT = N->getValueType(0);
369     unsigned NumVectorElts = VT.getVectorNumElements();
370     EVT EltVT = VT.getVectorElementType();
371     assert(EltVT.bitsEq(MVT::i32));
372     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
373       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
374     } else {
375       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
376       // that adds a 128 bits reg copy when going through TwoAddressInstructions
377       // pass. We want to avoid 128 bits copies as much as possible because they
378       // can't be bundled by our scheduler.
379       switch(NumVectorElts) {
380       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
381       case 4:
382         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
383           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
384         else
385           RegClassID = AMDGPU::R600_Reg128RegClassID;
386         break;
387       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
388       }
389     }
390 
391     SDLoc DL(N);
392     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
393 
394     if (NumVectorElts == 1) {
395       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
396                            RegClass);
397       return;
398     }
399 
400     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
401                                   "supported yet");
402     // 16 = Max Num Vector Elements
403     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
404     // 1 = Vector Register Class
405     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
406 
407     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
408     bool IsRegSeq = true;
409     unsigned NOps = N->getNumOperands();
410     for (unsigned i = 0; i < NOps; i++) {
411       // XXX: Why is this here?
412       if (isa<RegisterSDNode>(N->getOperand(i))) {
413         IsRegSeq = false;
414         break;
415       }
416       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
417       RegSeqArgs[1 + (2 * i) + 1] =
418               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
419                                         MVT::i32);
420     }
421 
422     if (NOps != NumVectorElts) {
423       // Fill in the missing undef elements if this was a scalar_to_vector.
424       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
425 
426       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
427                                                      DL, EltVT);
428       for (unsigned i = NOps; i < NumVectorElts; ++i) {
429         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
430         RegSeqArgs[1 + (2 * i) + 1] =
431           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
432       }
433     }
434 
435     if (!IsRegSeq)
436       break;
437     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
438     return;
439   }
440   case ISD::BUILD_PAIR: {
441     SDValue RC, SubReg0, SubReg1;
442     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
443       break;
444     }
445     SDLoc DL(N);
446     if (N->getValueType(0) == MVT::i128) {
447       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
448       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
449       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
450     } else if (N->getValueType(0) == MVT::i64) {
451       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
452       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
453       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
454     } else {
455       llvm_unreachable("Unhandled value type for BUILD_PAIR");
456     }
457     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
458                             N->getOperand(1), SubReg1 };
459     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
460                                           N->getValueType(0), Ops));
461     return;
462   }
463 
464   case ISD::Constant:
465   case ISD::ConstantFP: {
466     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
467         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
468       break;
469 
470     uint64_t Imm;
471     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
472       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
473     else {
474       ConstantSDNode *C = cast<ConstantSDNode>(N);
475       Imm = C->getZExtValue();
476     }
477 
478     SDLoc DL(N);
479     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
480                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
481                                                     MVT::i32));
482     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
483                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
484     const SDValue Ops[] = {
485       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
486       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
487       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
488     };
489 
490     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
491                                           N->getValueType(0), Ops));
492     return;
493   }
494   case ISD::LOAD:
495   case ISD::STORE: {
496     N = glueCopyToM0(N);
497     break;
498   }
499 
500   case AMDGPUISD::BFE_I32:
501   case AMDGPUISD::BFE_U32: {
502     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
503       break;
504 
505     // There is a scalar version available, but unlike the vector version which
506     // has a separate operand for the offset and width, the scalar version packs
507     // the width and offset into a single operand. Try to move to the scalar
508     // version if the offsets are constant, so that we can try to keep extended
509     // loads of kernel arguments in SGPRs.
510 
511     // TODO: Technically we could try to pattern match scalar bitshifts of
512     // dynamic values, but it's probably not useful.
513     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
514     if (!Offset)
515       break;
516 
517     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
518     if (!Width)
519       break;
520 
521     bool Signed = Opc == AMDGPUISD::BFE_I32;
522 
523     uint32_t OffsetVal = Offset->getZExtValue();
524     uint32_t WidthVal = Width->getZExtValue();
525 
526     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
527                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
528     return;
529   }
530   case AMDGPUISD::DIV_SCALE: {
531     SelectDIV_SCALE(N);
532     return;
533   }
534   case ISD::CopyToReg: {
535     const SITargetLowering& Lowering =
536       *static_cast<const SITargetLowering*>(getTargetLowering());
537     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
538     break;
539   }
540   case ISD::AND:
541   case ISD::SRL:
542   case ISD::SRA:
543   case ISD::SIGN_EXTEND_INREG:
544     if (N->getValueType(0) != MVT::i32 ||
545         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
546       break;
547 
548     SelectS_BFE(N);
549     return;
550   case ISD::BRCOND:
551     SelectBRCOND(N);
552     return;
553 
554   case AMDGPUISD::ATOMIC_CMP_SWAP:
555     SelectATOMIC_CMP_SWAP(N);
556     return;
557   }
558 
559   SelectCode(N);
560 }
561 
562 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
563   assert(AS != 0 && "Use checkPrivateAddress instead.");
564   if (!Ptr)
565     return false;
566 
567   return Ptr->getType()->getPointerAddressSpace() == AS;
568 }
569 
570 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
571   if (Op->getPseudoValue())
572     return true;
573 
574   if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
575     return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
576 
577   return false;
578 }
579 
580 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) {
581   if (!N->writeMem())
582     return false;
583   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
584 }
585 
586 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
587   const Value *MemVal = N->getMemOperand()->getValue();
588   return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
589           !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
590           !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
591 }
592 
593 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
594   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
595 }
596 
597 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) {
598   if (!N->writeMem())
599     return false;
600   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
601 }
602 
603 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
604   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
605 }
606 
607 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
608   if (!N->readMem())
609     return false;
610   const Value *MemVal = N->getMemOperand()->getValue();
611   if (CbId == -1)
612     return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
613 
614   return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
615 }
616 
617 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const {
618   if (!N->readMem())
619     return false;
620   if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) {
621     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
622       return !isa<GlobalValue>(
623         GetUnderlyingObject(N->getMemOperand()->getValue(),
624 	CurDAG->getDataLayout()));
625 
626     //TODO: Why do we need this?
627     if (N->getMemoryVT().bitsLT(MVT::i32))
628       return true;
629   }
630 
631   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
632 }
633 
634 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
635   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
636 }
637 
638 bool AMDGPUDAGToDAGISel::isLocalLoad(const  LoadSDNode *N) const {
639   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
640 }
641 
642 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const {
643   if (!N->readMem())
644     return false;
645   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS);
646 }
647 
648 bool AMDGPUDAGToDAGISel::isRegionLoad(const  LoadSDNode *N) const {
649   return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
650 }
651 
652 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
653   MachineMemOperand *MMO = N->getMemOperand();
654   if (checkPrivateAddress(N->getMemOperand())) {
655     if (MMO) {
656       const PseudoSourceValue *PSV = MMO->getPseudoValue();
657       if (PSV && PSV->isConstantPool()) {
658         return true;
659       }
660     }
661   }
662   return false;
663 }
664 
665 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
666   if (checkPrivateAddress(N->getMemOperand())) {
667     // Check to make sure we are not a constant pool load or a constant load
668     // that is marked as a private load
669     if (isCPLoad(N) || isConstantLoad(N, -1)) {
670       return false;
671     }
672   }
673 
674   const Value *MemVal = N->getMemOperand()->getValue();
675   return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
676     !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
677     !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) &&
678     !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
679     !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
680     !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
681     !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS);
682 }
683 
684 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
685   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
686   const Instruction *Term = BB->getTerminator();
687   return Term->getMetadata("amdgpu.uniform") ||
688          Term->getMetadata("structurizecfg.uniform");
689 }
690 
691 const char *AMDGPUDAGToDAGISel::getPassName() const {
692   return "AMDGPU DAG->DAG Pattern Instruction Selection";
693 }
694 
695 //===----------------------------------------------------------------------===//
696 // Complex Patterns
697 //===----------------------------------------------------------------------===//
698 
699 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
700                                                          SDValue& IntPtr) {
701   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
702     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
703                                        true);
704     return true;
705   }
706   return false;
707 }
708 
709 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
710     SDValue& BaseReg, SDValue &Offset) {
711   if (!isa<ConstantSDNode>(Addr)) {
712     BaseReg = Addr;
713     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
714     return true;
715   }
716   return false;
717 }
718 
719 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
720                                            SDValue &Offset) {
721   ConstantSDNode *IMMOffset;
722 
723   if (Addr.getOpcode() == ISD::ADD
724       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
725       && isInt<16>(IMMOffset->getZExtValue())) {
726 
727       Base = Addr.getOperand(0);
728       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
729                                          MVT::i32);
730       return true;
731   // If the pointer address is constant, we can move it to the offset field.
732   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
733              && isInt<16>(IMMOffset->getZExtValue())) {
734     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
735                                   SDLoc(CurDAG->getEntryNode()),
736                                   AMDGPU::ZERO, MVT::i32);
737     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
738                                        MVT::i32);
739     return true;
740   }
741 
742   // Default case, no offset
743   Base = Addr;
744   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
745   return true;
746 }
747 
748 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
749                                             SDValue &Offset) {
750   ConstantSDNode *C;
751   SDLoc DL(Addr);
752 
753   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
754     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
755     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
756   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
757             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
758     Base = Addr.getOperand(0);
759     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
760   } else {
761     Base = Addr;
762     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
763   }
764 
765   return true;
766 }
767 
768 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
769   SDLoc DL(N);
770   SDValue LHS = N->getOperand(0);
771   SDValue RHS = N->getOperand(1);
772 
773   bool IsAdd = (N->getOpcode() == ISD::ADD);
774 
775   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
776   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
777 
778   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
779                                        DL, MVT::i32, LHS, Sub0);
780   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
781                                        DL, MVT::i32, LHS, Sub1);
782 
783   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
784                                        DL, MVT::i32, RHS, Sub0);
785   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
786                                        DL, MVT::i32, RHS, Sub1);
787 
788   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
789   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
790 
791 
792   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
793   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
794 
795   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
796   SDValue Carry(AddLo, 1);
797   SDNode *AddHi
798     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
799                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
800 
801   SDValue Args[5] = {
802     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
803     SDValue(AddLo,0),
804     Sub0,
805     SDValue(AddHi,0),
806     Sub1,
807   };
808   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
809 }
810 
811 // We need to handle this here because tablegen doesn't support matching
812 // instructions with multiple outputs.
813 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
814   SDLoc SL(N);
815   EVT VT = N->getValueType(0);
816 
817   assert(VT == MVT::f32 || VT == MVT::f64);
818 
819   unsigned Opc
820     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
821 
822   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
823   // omod
824   SDValue Ops[8];
825 
826   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
827   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
828   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
829   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
830 }
831 
832 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
833                                          unsigned OffsetBits) const {
834   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
835       (OffsetBits == 8 && !isUInt<8>(Offset)))
836     return false;
837 
838   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
839       Subtarget->unsafeDSOffsetFoldingEnabled())
840     return true;
841 
842   // On Southern Islands instruction with a negative base value and an offset
843   // don't seem to work.
844   return CurDAG->SignBitIsZero(Base);
845 }
846 
847 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
848                                               SDValue &Offset) const {
849   SDLoc DL(Addr);
850   if (CurDAG->isBaseWithConstantOffset(Addr)) {
851     SDValue N0 = Addr.getOperand(0);
852     SDValue N1 = Addr.getOperand(1);
853     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
854     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
855       // (add n0, c0)
856       Base = N0;
857       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
858       return true;
859     }
860   } else if (Addr.getOpcode() == ISD::SUB) {
861     // sub C, x -> add (sub 0, x), C
862     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
863       int64_t ByteOffset = C->getSExtValue();
864       if (isUInt<16>(ByteOffset)) {
865         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
866 
867         // XXX - This is kind of hacky. Create a dummy sub node so we can check
868         // the known bits in isDSOffsetLegal. We need to emit the selected node
869         // here, so this is thrown away.
870         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
871                                       Zero, Addr.getOperand(1));
872 
873         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
874           MachineSDNode *MachineSub
875             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
876                                      Zero, Addr.getOperand(1));
877 
878           Base = SDValue(MachineSub, 0);
879           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
880           return true;
881         }
882       }
883     }
884   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
885     // If we have a constant address, prefer to put the constant into the
886     // offset. This can save moves to load the constant address since multiple
887     // operations can share the zero base address register, and enables merging
888     // into read2 / write2 instructions.
889 
890     SDLoc DL(Addr);
891 
892     if (isUInt<16>(CAddr->getZExtValue())) {
893       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
894       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
895                                  DL, MVT::i32, Zero);
896       Base = SDValue(MovZero, 0);
897       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
898       return true;
899     }
900   }
901 
902   // default case
903   Base = Addr;
904   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
905   return true;
906 }
907 
908 // TODO: If offset is too big, put low 16-bit into offset.
909 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
910                                                    SDValue &Offset0,
911                                                    SDValue &Offset1) const {
912   SDLoc DL(Addr);
913 
914   if (CurDAG->isBaseWithConstantOffset(Addr)) {
915     SDValue N0 = Addr.getOperand(0);
916     SDValue N1 = Addr.getOperand(1);
917     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
918     unsigned DWordOffset0 = C1->getZExtValue() / 4;
919     unsigned DWordOffset1 = DWordOffset0 + 1;
920     // (add n0, c0)
921     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
922       Base = N0;
923       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
924       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
925       return true;
926     }
927   } else if (Addr.getOpcode() == ISD::SUB) {
928     // sub C, x -> add (sub 0, x), C
929     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
930       unsigned DWordOffset0 = C->getZExtValue() / 4;
931       unsigned DWordOffset1 = DWordOffset0 + 1;
932 
933       if (isUInt<8>(DWordOffset0)) {
934         SDLoc DL(Addr);
935         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
936 
937         // XXX - This is kind of hacky. Create a dummy sub node so we can check
938         // the known bits in isDSOffsetLegal. We need to emit the selected node
939         // here, so this is thrown away.
940         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
941                                       Zero, Addr.getOperand(1));
942 
943         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
944           MachineSDNode *MachineSub
945             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
946                                      Zero, Addr.getOperand(1));
947 
948           Base = SDValue(MachineSub, 0);
949           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
950           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
951           return true;
952         }
953       }
954     }
955   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
956     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
957     unsigned DWordOffset1 = DWordOffset0 + 1;
958     assert(4 * DWordOffset0 == CAddr->getZExtValue());
959 
960     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
961       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
962       MachineSDNode *MovZero
963         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
964                                  DL, MVT::i32, Zero);
965       Base = SDValue(MovZero, 0);
966       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
967       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
968       return true;
969     }
970   }
971 
972   // default case
973   Base = Addr;
974   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
975   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
976   return true;
977 }
978 
979 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
980   return isUInt<12>(Imm->getZExtValue());
981 }
982 
983 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
984                                      SDValue &VAddr, SDValue &SOffset,
985                                      SDValue &Offset, SDValue &Offen,
986                                      SDValue &Idxen, SDValue &Addr64,
987                                      SDValue &GLC, SDValue &SLC,
988                                      SDValue &TFE) const {
989   // Subtarget prefers to use flat instruction
990   if (Subtarget->useFlatForGlobal())
991     return false;
992 
993   SDLoc DL(Addr);
994 
995   if (!GLC.getNode())
996     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
997   if (!SLC.getNode())
998     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
999   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
1000 
1001   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1002   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1003   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1004   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1005 
1006   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1007     SDValue N0 = Addr.getOperand(0);
1008     SDValue N1 = Addr.getOperand(1);
1009     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1010 
1011     if (N0.getOpcode() == ISD::ADD) {
1012       // (add (add N2, N3), C1) -> addr64
1013       SDValue N2 = N0.getOperand(0);
1014       SDValue N3 = N0.getOperand(1);
1015       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1016       Ptr = N2;
1017       VAddr = N3;
1018     } else {
1019 
1020       // (add N0, C1) -> offset
1021       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1022       Ptr = N0;
1023     }
1024 
1025     if (isLegalMUBUFImmOffset(C1)) {
1026       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1027       return true;
1028     }
1029 
1030     if (isUInt<32>(C1->getZExtValue())) {
1031       // Illegal offset, store it in soffset.
1032       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1033       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1034                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
1035                         0);
1036       return true;
1037     }
1038   }
1039 
1040   if (Addr.getOpcode() == ISD::ADD) {
1041     // (add N0, N1) -> addr64
1042     SDValue N0 = Addr.getOperand(0);
1043     SDValue N1 = Addr.getOperand(1);
1044     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1045     Ptr = N0;
1046     VAddr = N1;
1047     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1048     return true;
1049   }
1050 
1051   // default case -> offset
1052   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1053   Ptr = Addr;
1054   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1055 
1056   return true;
1057 }
1058 
1059 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1060                                            SDValue &VAddr, SDValue &SOffset,
1061                                            SDValue &Offset, SDValue &GLC,
1062                                            SDValue &SLC, SDValue &TFE) const {
1063   SDValue Ptr, Offen, Idxen, Addr64;
1064 
1065   // addr64 bit was removed for volcanic islands.
1066   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
1067     return false;
1068 
1069   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1070               GLC, SLC, TFE))
1071     return false;
1072 
1073   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
1074   if (C->getSExtValue()) {
1075     SDLoc DL(Addr);
1076 
1077     const SITargetLowering& Lowering =
1078       *static_cast<const SITargetLowering*>(getTargetLowering());
1079 
1080     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
1081     return true;
1082   }
1083 
1084   return false;
1085 }
1086 
1087 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1088                                            SDValue &VAddr, SDValue &SOffset,
1089                                            SDValue &Offset,
1090                                            SDValue &SLC) const {
1091   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
1092   SDValue GLC, TFE;
1093 
1094   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
1095 }
1096 
1097 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
1098                                             SDValue &VAddr, SDValue &SOffset,
1099                                             SDValue &ImmOffset) const {
1100 
1101   SDLoc DL(Addr);
1102   MachineFunction &MF = CurDAG->getMachineFunction();
1103   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
1104 
1105   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1106   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
1107 
1108   // (add n0, c1)
1109   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1110     SDValue N0 = Addr.getOperand(0);
1111     SDValue N1 = Addr.getOperand(1);
1112 
1113     // Offsets in vaddr must be positive.
1114     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1115     if (isLegalMUBUFImmOffset(C1)) {
1116       VAddr = N0;
1117       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
1118       return true;
1119     }
1120   }
1121 
1122   // (node)
1123   VAddr = Addr;
1124   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1125   return true;
1126 }
1127 
1128 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1129                                            SDValue &SOffset, SDValue &Offset,
1130                                            SDValue &GLC, SDValue &SLC,
1131                                            SDValue &TFE) const {
1132   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1133   const SIInstrInfo *TII =
1134     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
1135 
1136   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
1137               GLC, SLC, TFE))
1138     return false;
1139 
1140   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1141       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1142       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1143     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1144                     APInt::getAllOnesValue(32).getZExtValue(); // Size
1145     SDLoc DL(Addr);
1146 
1147     const SITargetLowering& Lowering =
1148       *static_cast<const SITargetLowering*>(getTargetLowering());
1149 
1150     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
1151     return true;
1152   }
1153   return false;
1154 }
1155 
1156 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1157                                            SDValue &Soffset, SDValue &Offset
1158                                            ) const {
1159   SDValue GLC, SLC, TFE;
1160 
1161   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1162 }
1163 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1164                                            SDValue &Soffset, SDValue &Offset,
1165                                            SDValue &SLC) const {
1166   SDValue GLC, TFE;
1167 
1168   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
1169 }
1170 
1171 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
1172                                              SDValue &SOffset,
1173                                              SDValue &ImmOffset) const {
1174   SDLoc DL(Constant);
1175   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
1176   uint32_t Overflow = 0;
1177 
1178   if (Imm >= 4096) {
1179     if (Imm <= 4095 + 64) {
1180       // Use an SOffset inline constant for 1..64
1181       Overflow = Imm - 4095;
1182       Imm = 4095;
1183     } else {
1184       // Try to keep the same value in SOffset for adjacent loads, so that
1185       // the corresponding register contents can be re-used.
1186       //
1187       // Load values with all low-bits set into SOffset, so that a larger
1188       // range of values can be covered using s_movk_i32
1189       uint32_t High = (Imm + 1) & ~4095;
1190       uint32_t Low = (Imm + 1) & 4095;
1191       Imm = Low;
1192       Overflow = High - 1;
1193     }
1194   }
1195 
1196   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1197 
1198   if (Overflow <= 64)
1199     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1200   else
1201     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1202                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1203                       0);
1204 }
1205 
1206 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1207                                                     SDValue &SOffset,
1208                                                     SDValue &ImmOffset) const {
1209   SDLoc DL(Offset);
1210 
1211   if (!isa<ConstantSDNode>(Offset))
1212     return false;
1213 
1214   SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1215 
1216   return true;
1217 }
1218 
1219 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1220                                                      SDValue &SOffset,
1221                                                      SDValue &ImmOffset,
1222                                                      SDValue &VOffset) const {
1223   SDLoc DL(Offset);
1224 
1225   // Don't generate an unnecessary voffset for constant offsets.
1226   if (isa<ConstantSDNode>(Offset))
1227     return false;
1228 
1229   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1230     SDValue N0 = Offset.getOperand(0);
1231     SDValue N1 = Offset.getOperand(1);
1232     SelectMUBUFConstant(N1, SOffset, ImmOffset);
1233     VOffset = N0;
1234   } else {
1235     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1236     ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1237     VOffset = Offset;
1238   }
1239 
1240   return true;
1241 }
1242 
1243 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1244                                     SDValue &VAddr,
1245                                     SDValue &SLC,
1246                                     SDValue &TFE) const {
1247   VAddr = Addr;
1248   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1249   return true;
1250 }
1251 
1252 ///
1253 /// \param EncodedOffset This is the immediate value that will be encoded
1254 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1255 ///        will be in units of dwords and on VI+ it will be units of bytes.
1256 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1257                                  int64_t EncodedOffset) {
1258   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1259      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1260 }
1261 
1262 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1263                                           SDValue &Offset, bool &Imm) const {
1264 
1265   // FIXME: Handle non-constant offsets.
1266   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1267   if (!C)
1268     return false;
1269 
1270   SDLoc SL(ByteOffsetNode);
1271   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1272   int64_t ByteOffset = C->getSExtValue();
1273   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1274       ByteOffset >> 2 : ByteOffset;
1275 
1276   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1277     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1278     Imm = true;
1279     return true;
1280   }
1281 
1282   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1283     return false;
1284 
1285   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1286     // 32-bit Immediates are supported on Sea Islands.
1287     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1288   } else {
1289     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1290     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1291                                             C32Bit), 0);
1292   }
1293   Imm = false;
1294   return true;
1295 }
1296 
1297 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1298                                      SDValue &Offset, bool &Imm) const {
1299 
1300   SDLoc SL(Addr);
1301   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1302     SDValue N0 = Addr.getOperand(0);
1303     SDValue N1 = Addr.getOperand(1);
1304 
1305     if (SelectSMRDOffset(N1, Offset, Imm)) {
1306       SBase = N0;
1307       return true;
1308     }
1309   }
1310   SBase = Addr;
1311   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1312   Imm = true;
1313   return true;
1314 }
1315 
1316 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1317                                        SDValue &Offset) const {
1318   bool Imm;
1319   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1320 }
1321 
1322 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1323                                          SDValue &Offset) const {
1324 
1325   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1326     return false;
1327 
1328   bool Imm;
1329   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1330     return false;
1331 
1332   return !Imm && isa<ConstantSDNode>(Offset);
1333 }
1334 
1335 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1336                                         SDValue &Offset) const {
1337   bool Imm;
1338   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1339          !isa<ConstantSDNode>(Offset);
1340 }
1341 
1342 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1343                                              SDValue &Offset) const {
1344   bool Imm;
1345   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1346 }
1347 
1348 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1349                                                SDValue &Offset) const {
1350   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1351     return false;
1352 
1353   bool Imm;
1354   if (!SelectSMRDOffset(Addr, Offset, Imm))
1355     return false;
1356 
1357   return !Imm && isa<ConstantSDNode>(Offset);
1358 }
1359 
1360 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1361                                               SDValue &Offset) const {
1362   bool Imm;
1363   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1364          !isa<ConstantSDNode>(Offset);
1365 }
1366 
1367 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1368                                      SDValue Val, uint32_t Offset,
1369                                      uint32_t Width) {
1370   // Transformation function, pack the offset and width of a BFE into
1371   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1372   // source, bits [5:0] contain the offset and bits [22:16] the width.
1373   uint32_t PackedVal = Offset | (Width << 16);
1374   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1375 
1376   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1377 }
1378 
1379 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1380   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1381   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1382   // Predicate: 0 < b <= c < 32
1383 
1384   const SDValue &Shl = N->getOperand(0);
1385   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1386   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1387 
1388   if (B && C) {
1389     uint32_t BVal = B->getZExtValue();
1390     uint32_t CVal = C->getZExtValue();
1391 
1392     if (0 < BVal && BVal <= CVal && CVal < 32) {
1393       bool Signed = N->getOpcode() == ISD::SRA;
1394       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1395 
1396       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1397                               32 - CVal));
1398       return;
1399     }
1400   }
1401   SelectCode(N);
1402 }
1403 
1404 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1405   switch (N->getOpcode()) {
1406   case ISD::AND:
1407     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1408       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1409       // Predicate: isMask(mask)
1410       const SDValue &Srl = N->getOperand(0);
1411       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1412       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1413 
1414       if (Shift && Mask) {
1415         uint32_t ShiftVal = Shift->getZExtValue();
1416         uint32_t MaskVal = Mask->getZExtValue();
1417 
1418         if (isMask_32(MaskVal)) {
1419           uint32_t WidthVal = countPopulation(MaskVal);
1420 
1421           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1422                                   Srl.getOperand(0), ShiftVal, WidthVal));
1423           return;
1424         }
1425       }
1426     }
1427     break;
1428   case ISD::SRL:
1429     if (N->getOperand(0).getOpcode() == ISD::AND) {
1430       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1431       // Predicate: isMask(mask >> b)
1432       const SDValue &And = N->getOperand(0);
1433       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1434       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1435 
1436       if (Shift && Mask) {
1437         uint32_t ShiftVal = Shift->getZExtValue();
1438         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1439 
1440         if (isMask_32(MaskVal)) {
1441           uint32_t WidthVal = countPopulation(MaskVal);
1442 
1443           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1444                                   And.getOperand(0), ShiftVal, WidthVal));
1445           return;
1446         }
1447       }
1448     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1449       SelectS_BFEFromShifts(N);
1450       return;
1451     }
1452     break;
1453   case ISD::SRA:
1454     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1455       SelectS_BFEFromShifts(N);
1456       return;
1457     }
1458     break;
1459 
1460   case ISD::SIGN_EXTEND_INREG: {
1461     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1462     SDValue Src = N->getOperand(0);
1463     if (Src.getOpcode() != ISD::SRL)
1464       break;
1465 
1466     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1467     if (!Amt)
1468       break;
1469 
1470     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1471     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1472                             Amt->getZExtValue(), Width));
1473     return;
1474   }
1475   }
1476 
1477   SelectCode(N);
1478 }
1479 
1480 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1481   SDValue Cond = N->getOperand(1);
1482 
1483   if (isCBranchSCC(N)) {
1484     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1485     SelectCode(N);
1486     return;
1487   }
1488 
1489   // The result of VOPC instructions is or'd against ~EXEC before it is
1490   // written to vcc or another SGPR.  This means that the value '1' is always
1491   // written to the corresponding bit for results that are masked.  In order
1492   // to correctly check against vccz, we need to and VCC with the EXEC
1493   // register in order to clear the value from the masked bits.
1494 
1495   SDLoc SL(N);
1496 
1497   SDNode *MaskedCond =
1498         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1499                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1500                                Cond);
1501   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1502                                      SDValue(MaskedCond, 0),
1503                                      SDValue()); // Passing SDValue() adds a
1504                                                  // glue output.
1505   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1506                        N->getOperand(2), // Basic Block
1507                        VCC.getValue(0),  // Chain
1508                        VCC.getValue(1)); // Glue
1509   return;
1510 }
1511 
1512 // This is here because there isn't a way to use the generated sub0_sub1 as the
1513 // subreg index to EXTRACT_SUBREG in tablegen.
1514 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1515   MemSDNode *Mem = cast<MemSDNode>(N);
1516   unsigned AS = Mem->getAddressSpace();
1517   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1518     SelectCode(N);
1519     return;
1520   }
1521 
1522   MVT VT = N->getSimpleValueType(0);
1523   bool Is32 = (VT == MVT::i32);
1524   SDLoc SL(N);
1525 
1526   MachineSDNode *CmpSwap = nullptr;
1527   if (Subtarget->hasAddr64()) {
1528     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1529 
1530     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1531       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1532         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1533       SDValue CmpVal = Mem->getOperand(2);
1534 
1535       // XXX - Do we care about glue operands?
1536 
1537       SDValue Ops[] = {
1538         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1539       };
1540 
1541       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1542     }
1543   }
1544 
1545   if (!CmpSwap) {
1546     SDValue SRsrc, SOffset, Offset, SLC;
1547     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1548       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1549         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1550 
1551       SDValue CmpVal = Mem->getOperand(2);
1552       SDValue Ops[] = {
1553         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1554       };
1555 
1556       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1557     }
1558   }
1559 
1560   if (!CmpSwap) {
1561     SelectCode(N);
1562     return;
1563   }
1564 
1565   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1566   *MMOs = Mem->getMemOperand();
1567   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1568 
1569   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1570   SDValue Extract
1571     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1572 
1573   ReplaceUses(SDValue(N, 0), Extract);
1574   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1575   CurDAG->RemoveDeadNode(N);
1576 }
1577 
1578 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1579                                         SDValue &SrcMods) const {
1580 
1581   unsigned Mods = 0;
1582 
1583   Src = In;
1584 
1585   if (Src.getOpcode() == ISD::FNEG) {
1586     Mods |= SISrcMods::NEG;
1587     Src = Src.getOperand(0);
1588   }
1589 
1590   if (Src.getOpcode() == ISD::FABS) {
1591     Mods |= SISrcMods::ABS;
1592     Src = Src.getOperand(0);
1593   }
1594 
1595   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1596 
1597   return true;
1598 }
1599 
1600 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1601                                          SDValue &SrcMods) const {
1602   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1603   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1604 }
1605 
1606 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1607                                          SDValue &SrcMods, SDValue &Clamp,
1608                                          SDValue &Omod) const {
1609   SDLoc DL(In);
1610   // FIXME: Handle Clamp and Omod
1611   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1612   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1613 
1614   return SelectVOP3Mods(In, Src, SrcMods);
1615 }
1616 
1617 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1618                                            SDValue &SrcMods, SDValue &Clamp,
1619                                            SDValue &Omod) const {
1620   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1621 
1622   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1623                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1624                 cast<ConstantSDNode>(Omod)->isNullValue();
1625 }
1626 
1627 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1628                                               SDValue &SrcMods,
1629                                               SDValue &Omod) const {
1630   // FIXME: Handle Omod
1631   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1632 
1633   return SelectVOP3Mods(In, Src, SrcMods);
1634 }
1635 
1636 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1637                                                    SDValue &SrcMods,
1638                                                    SDValue &Clamp,
1639                                                    SDValue &Omod) const {
1640   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1641   return SelectVOP3Mods(In, Src, SrcMods);
1642 }
1643 
1644 void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
1645   MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
1646 
1647   // Handle the perverse case where a frame index is being stored. We don't
1648   // want to see multiple frame index operands on the same instruction since
1649   // it complicates things and violates some assumptions about frame index
1650   // lowering.
1651   for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
1652        I != E; ++I) {
1653     SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
1654 
1655     // It's possible that we have a frame index defined in the function that
1656     // isn't used in this block.
1657     if (FI.use_empty())
1658       continue;
1659 
1660     // Skip over the AssertZext inserted during lowering.
1661     SDValue EffectiveFI = FI;
1662     auto It = FI->use_begin();
1663     if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
1664       EffectiveFI = SDValue(*It, 0);
1665       It = EffectiveFI->use_begin();
1666     }
1667 
1668     for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
1669       SDUse &Use = It.getUse();
1670       SDNode *User = Use.getUser();
1671       unsigned OpIdx = It.getOperandNo();
1672       ++It;
1673 
1674       if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
1675         unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
1676         if (OpIdx == PtrIdx)
1677           continue;
1678 
1679         unsigned OpN = M->getNumOperands();
1680         SDValue NewOps[8];
1681 
1682         assert(OpN < array_lengthof(NewOps));
1683         for (unsigned Op = 0; Op != OpN; ++Op) {
1684           if (Op != OpIdx) {
1685             NewOps[Op] = M->getOperand(Op);
1686             continue;
1687           }
1688 
1689           MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1690                                                       SDLoc(M), MVT::i32, FI);
1691           NewOps[Op] = SDValue(Mov, 0);
1692         }
1693 
1694         CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
1695       }
1696     }
1697   }
1698 }
1699 
1700 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1701   const AMDGPUTargetLowering& Lowering =
1702     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1703   bool IsModified = false;
1704   do {
1705     IsModified = false;
1706     // Go over all selected nodes and try to fold them a bit more
1707     for (SDNode &Node : CurDAG->allnodes()) {
1708       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1709       if (!MachineNode)
1710         continue;
1711 
1712       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1713       if (ResNode != &Node) {
1714         ReplaceUses(&Node, ResNode);
1715         IsModified = true;
1716       }
1717     }
1718     CurDAG->RemoveDeadNodes();
1719   } while (IsModified);
1720 }
1721