1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //==-----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Defines an instruction selector for the AMDGPU target.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUInstrInfo.h"
16 #include "AMDGPUIntrinsicInfo.h"
17 #include "AMDGPUISelLowering.h" // For AMDGPUISD
18 #include "AMDGPUSubtarget.h"
19 #include "SIISelLowering.h"
20 #include "SIMachineFunctionInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/FunctionLoweringInfo.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGISel.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 
29 using namespace llvm;
30 
31 namespace llvm {
32 class R600InstrInfo;
33 }
34 
35 //===----------------------------------------------------------------------===//
36 // Instruction Selector Implementation
37 //===----------------------------------------------------------------------===//
38 
39 namespace {
40 
41 /// AMDGPU specific code to select AMDGPU machine instructions for
42 /// SelectionDAG operations.
43 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
44   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
45   // make the right decision when generating code for different targets.
46   const AMDGPUSubtarget *Subtarget;
47 
48 public:
49   explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
50       : SelectionDAGISel(TM, OptLevel) {}
51 
52   virtual ~AMDGPUDAGToDAGISel();
53   bool runOnMachineFunction(MachineFunction &MF) override;
54   void Select(SDNode *N) override;
55   StringRef getPassName() const override;
56   void PostprocessISelDAG() override;
57 
58 private:
59   SDValue foldFrameIndex(SDValue N) const;
60   bool isInlineImmediate(const SDNode *N) const;
61   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
62                    const R600InstrInfo *TII);
63   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
64   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
65 
66   bool isConstantLoad(const MemSDNode *N, int cbID) const;
67   bool isUniformBr(const SDNode *N) const;
68 
69   SDNode *glueCopyToM0(SDNode *N) const;
70 
71   const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
72   bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
73   bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
74                                        SDValue& Offset);
75   bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
76   bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
77   bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
78                        unsigned OffsetBits) const;
79   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
80   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
81                                  SDValue &Offset1) const;
82   bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
83                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
84                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
85                    SDValue &TFE) const;
86   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
87                          SDValue &SOffset, SDValue &Offset, SDValue &GLC,
88                          SDValue &SLC, SDValue &TFE) const;
89   bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
90                          SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
91                          SDValue &SLC) const;
92   bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
93                           SDValue &SOffset, SDValue &ImmOffset) const;
94   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
95                          SDValue &Offset, SDValue &GLC, SDValue &SLC,
96                          SDValue &TFE) const;
97   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
98                          SDValue &Offset, SDValue &SLC) const;
99   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
100                          SDValue &Offset) const;
101   bool SelectMUBUFConstant(SDValue Constant,
102                            SDValue &SOffset,
103                            SDValue &ImmOffset) const;
104   bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
105                                   SDValue &ImmOffset) const;
106   bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
107                                    SDValue &ImmOffset, SDValue &VOffset) const;
108 
109   bool SelectFlat(SDValue Addr, SDValue &VAddr,
110                   SDValue &SLC, SDValue &TFE) const;
111 
112   bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
113                         bool &Imm) const;
114   bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
115                   bool &Imm) const;
116   bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
117   bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
118   bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
119   bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
120   bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
121   bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
122   bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
123   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
124   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
125   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
126                        SDValue &Clamp, SDValue &Omod) const;
127   bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
128                          SDValue &Clamp, SDValue &Omod) const;
129 
130   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
131                             SDValue &Omod) const;
132   bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
133                                  SDValue &Clamp,
134                                  SDValue &Omod) const;
135 
136   void SelectADD_SUB_I64(SDNode *N);
137   void SelectDIV_SCALE(SDNode *N);
138 
139   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
140                    uint32_t Offset, uint32_t Width);
141   void SelectS_BFEFromShifts(SDNode *N);
142   void SelectS_BFE(SDNode *N);
143   bool isCBranchSCC(const SDNode *N) const;
144   void SelectBRCOND(SDNode *N);
145   void SelectATOMIC_CMP_SWAP(SDNode *N);
146 
147   // Include the pieces autogenerated from the target description.
148 #include "AMDGPUGenDAGISel.inc"
149 };
150 }  // end anonymous namespace
151 
152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
153 // DAG, ready for instruction scheduling.
154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
155                                         CodeGenOpt::Level OptLevel) {
156   return new AMDGPUDAGToDAGISel(TM, OptLevel);
157 }
158 
159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
160   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
161   return SelectionDAGISel::runOnMachineFunction(MF);
162 }
163 
164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
165 }
166 
167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
168   const SIInstrInfo *TII
169     = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
170 
171   if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
172     return TII->isInlineConstant(C->getAPIntValue());
173 
174   if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
175     return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
176 
177   return false;
178 }
179 
180 /// \brief Determine the register class for \p OpNo
181 /// \returns The register class of the virtual register that will be used for
182 /// the given operand number \OpNo or NULL if the register class cannot be
183 /// determined.
184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
185                                                           unsigned OpNo) const {
186   if (!N->isMachineOpcode())
187     return nullptr;
188 
189   switch (N->getMachineOpcode()) {
190   default: {
191     const MCInstrDesc &Desc =
192         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
193     unsigned OpIdx = Desc.getNumDefs() + OpNo;
194     if (OpIdx >= Desc.getNumOperands())
195       return nullptr;
196     int RegClass = Desc.OpInfo[OpIdx].RegClass;
197     if (RegClass == -1)
198       return nullptr;
199 
200     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
201   }
202   case AMDGPU::REG_SEQUENCE: {
203     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
204     const TargetRegisterClass *SuperRC =
205         Subtarget->getRegisterInfo()->getRegClass(RCID);
206 
207     SDValue SubRegOp = N->getOperand(OpNo + 1);
208     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
209     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
210                                                               SubRegIdx);
211   }
212   }
213 }
214 
215 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
216   if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
217       cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
218     return N;
219 
220   const SITargetLowering& Lowering =
221       *static_cast<const SITargetLowering*>(getTargetLowering());
222 
223   // Write max value to m0 before each load operation
224 
225   SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
226                                  CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
227 
228   SDValue Glue = M0.getValue(1);
229 
230   SmallVector <SDValue, 8> Ops;
231   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
232      Ops.push_back(N->getOperand(i));
233   }
234   Ops.push_back(Glue);
235   CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
236 
237   return N;
238 }
239 
240 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
241   switch (NumVectorElts) {
242   case 1:
243     return AMDGPU::SReg_32RegClassID;
244   case 2:
245     return AMDGPU::SReg_64RegClassID;
246   case 4:
247     return AMDGPU::SReg_128RegClassID;
248   case 8:
249     return AMDGPU::SReg_256RegClassID;
250   case 16:
251     return AMDGPU::SReg_512RegClassID;
252   }
253 
254   llvm_unreachable("invalid vector size");
255 }
256 
257 void AMDGPUDAGToDAGISel::Select(SDNode *N) {
258   unsigned int Opc = N->getOpcode();
259   if (N->isMachineOpcode()) {
260     N->setNodeId(-1);
261     return;   // Already selected.
262   }
263 
264   if (isa<AtomicSDNode>(N) ||
265       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC))
266     N = glueCopyToM0(N);
267 
268   switch (Opc) {
269   default: break;
270   // We are selecting i64 ADD here instead of custom lower it during
271   // DAG legalization, so we can fold some i64 ADDs used for address
272   // calculation into the LOAD and STORE instructions.
273   case ISD::ADD:
274   case ISD::SUB: {
275     if (N->getValueType(0) != MVT::i64 ||
276         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
277       break;
278 
279     SelectADD_SUB_I64(N);
280     return;
281   }
282   case ISD::SCALAR_TO_VECTOR:
283   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
284   case ISD::BUILD_VECTOR: {
285     unsigned RegClassID;
286     const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
287     EVT VT = N->getValueType(0);
288     unsigned NumVectorElts = VT.getVectorNumElements();
289     EVT EltVT = VT.getVectorElementType();
290     assert(EltVT.bitsEq(MVT::i32));
291     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
292       RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
293     } else {
294       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
295       // that adds a 128 bits reg copy when going through TwoAddressInstructions
296       // pass. We want to avoid 128 bits copies as much as possible because they
297       // can't be bundled by our scheduler.
298       switch(NumVectorElts) {
299       case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
300       case 4:
301         if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
302           RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
303         else
304           RegClassID = AMDGPU::R600_Reg128RegClassID;
305         break;
306       default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
307       }
308     }
309 
310     SDLoc DL(N);
311     SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
312 
313     if (NumVectorElts == 1) {
314       CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
315                            RegClass);
316       return;
317     }
318 
319     assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
320                                   "supported yet");
321     // 16 = Max Num Vector Elements
322     // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
323     // 1 = Vector Register Class
324     SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
325 
326     RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
327     bool IsRegSeq = true;
328     unsigned NOps = N->getNumOperands();
329     for (unsigned i = 0; i < NOps; i++) {
330       // XXX: Why is this here?
331       if (isa<RegisterSDNode>(N->getOperand(i))) {
332         IsRegSeq = false;
333         break;
334       }
335       RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
336       RegSeqArgs[1 + (2 * i) + 1] =
337               CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL,
338                                         MVT::i32);
339     }
340 
341     if (NOps != NumVectorElts) {
342       // Fill in the missing undef elements if this was a scalar_to_vector.
343       assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
344 
345       MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
346                                                      DL, EltVT);
347       for (unsigned i = NOps; i < NumVectorElts; ++i) {
348         RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
349         RegSeqArgs[1 + (2 * i) + 1] =
350           CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32);
351       }
352     }
353 
354     if (!IsRegSeq)
355       break;
356     CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
357     return;
358   }
359   case ISD::BUILD_PAIR: {
360     SDValue RC, SubReg0, SubReg1;
361     if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
362       break;
363     }
364     SDLoc DL(N);
365     if (N->getValueType(0) == MVT::i128) {
366       RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
367       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
368       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
369     } else if (N->getValueType(0) == MVT::i64) {
370       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
371       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
372       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
373     } else {
374       llvm_unreachable("Unhandled value type for BUILD_PAIR");
375     }
376     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
377                             N->getOperand(1), SubReg1 };
378     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
379                                           N->getValueType(0), Ops));
380     return;
381   }
382 
383   case ISD::Constant:
384   case ISD::ConstantFP: {
385     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
386         N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
387       break;
388 
389     uint64_t Imm;
390     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
391       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
392     else {
393       ConstantSDNode *C = cast<ConstantSDNode>(N);
394       Imm = C->getZExtValue();
395     }
396 
397     SDLoc DL(N);
398     SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
399                                 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
400                                                     MVT::i32));
401     SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
402                                 CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
403     const SDValue Ops[] = {
404       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
405       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
406       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
407     };
408 
409     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
410                                           N->getValueType(0), Ops));
411     return;
412   }
413   case ISD::LOAD:
414   case ISD::STORE: {
415     N = glueCopyToM0(N);
416     break;
417   }
418 
419   case AMDGPUISD::BFE_I32:
420   case AMDGPUISD::BFE_U32: {
421     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
422       break;
423 
424     // There is a scalar version available, but unlike the vector version which
425     // has a separate operand for the offset and width, the scalar version packs
426     // the width and offset into a single operand. Try to move to the scalar
427     // version if the offsets are constant, so that we can try to keep extended
428     // loads of kernel arguments in SGPRs.
429 
430     // TODO: Technically we could try to pattern match scalar bitshifts of
431     // dynamic values, but it's probably not useful.
432     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
433     if (!Offset)
434       break;
435 
436     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
437     if (!Width)
438       break;
439 
440     bool Signed = Opc == AMDGPUISD::BFE_I32;
441 
442     uint32_t OffsetVal = Offset->getZExtValue();
443     uint32_t WidthVal = Width->getZExtValue();
444 
445     ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
446                             SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
447     return;
448   }
449   case AMDGPUISD::DIV_SCALE: {
450     SelectDIV_SCALE(N);
451     return;
452   }
453   case ISD::CopyToReg: {
454     const SITargetLowering& Lowering =
455       *static_cast<const SITargetLowering*>(getTargetLowering());
456     Lowering.legalizeTargetIndependentNode(N, *CurDAG);
457     break;
458   }
459   case ISD::AND:
460   case ISD::SRL:
461   case ISD::SRA:
462   case ISD::SIGN_EXTEND_INREG:
463     if (N->getValueType(0) != MVT::i32 ||
464         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
465       break;
466 
467     SelectS_BFE(N);
468     return;
469   case ISD::BRCOND:
470     SelectBRCOND(N);
471     return;
472 
473   case AMDGPUISD::ATOMIC_CMP_SWAP:
474     SelectATOMIC_CMP_SWAP(N);
475     return;
476   }
477 
478   SelectCode(N);
479 }
480 
481 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
482   if (!N->readMem())
483     return false;
484   if (CbId == -1)
485     return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
486 
487   return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
488 }
489 
490 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
491   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
492   const Instruction *Term = BB->getTerminator();
493   return Term->getMetadata("amdgpu.uniform") ||
494          Term->getMetadata("structurizecfg.uniform");
495 }
496 
497 StringRef AMDGPUDAGToDAGISel::getPassName() const {
498   return "AMDGPU DAG->DAG Pattern Instruction Selection";
499 }
500 
501 //===----------------------------------------------------------------------===//
502 // Complex Patterns
503 //===----------------------------------------------------------------------===//
504 
505 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
506                                                          SDValue& IntPtr) {
507   if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
508     IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
509                                        true);
510     return true;
511   }
512   return false;
513 }
514 
515 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
516     SDValue& BaseReg, SDValue &Offset) {
517   if (!isa<ConstantSDNode>(Addr)) {
518     BaseReg = Addr;
519     Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
520     return true;
521   }
522   return false;
523 }
524 
525 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
526                                            SDValue &Offset) {
527   ConstantSDNode *IMMOffset;
528 
529   if (Addr.getOpcode() == ISD::ADD
530       && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
531       && isInt<16>(IMMOffset->getZExtValue())) {
532 
533       Base = Addr.getOperand(0);
534       Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
535                                          MVT::i32);
536       return true;
537   // If the pointer address is constant, we can move it to the offset field.
538   } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
539              && isInt<16>(IMMOffset->getZExtValue())) {
540     Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
541                                   SDLoc(CurDAG->getEntryNode()),
542                                   AMDGPU::ZERO, MVT::i32);
543     Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
544                                        MVT::i32);
545     return true;
546   }
547 
548   // Default case, no offset
549   Base = Addr;
550   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
551   return true;
552 }
553 
554 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
555                                             SDValue &Offset) {
556   ConstantSDNode *C;
557   SDLoc DL(Addr);
558 
559   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
560     Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
561     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
562   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
563             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
564     Base = Addr.getOperand(0);
565     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
566   } else {
567     Base = Addr;
568     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
569   }
570 
571   return true;
572 }
573 
574 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
575   SDLoc DL(N);
576   SDValue LHS = N->getOperand(0);
577   SDValue RHS = N->getOperand(1);
578 
579   bool IsAdd = (N->getOpcode() == ISD::ADD);
580 
581   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
582   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
583 
584   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
585                                        DL, MVT::i32, LHS, Sub0);
586   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
587                                        DL, MVT::i32, LHS, Sub1);
588 
589   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
590                                        DL, MVT::i32, RHS, Sub0);
591   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
592                                        DL, MVT::i32, RHS, Sub1);
593 
594   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
595   SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
596 
597   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
598   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
599 
600   SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
601   SDValue Carry(AddLo, 1);
602   SDNode *AddHi
603     = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
604                              SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
605 
606   SDValue Args[5] = {
607     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
608     SDValue(AddLo,0),
609     Sub0,
610     SDValue(AddHi,0),
611     Sub1,
612   };
613   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
614 }
615 
616 // We need to handle this here because tablegen doesn't support matching
617 // instructions with multiple outputs.
618 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
619   SDLoc SL(N);
620   EVT VT = N->getValueType(0);
621 
622   assert(VT == MVT::f32 || VT == MVT::f64);
623 
624   unsigned Opc
625     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
626 
627   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
628   // omod
629   SDValue Ops[8];
630 
631   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
632   SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
633   SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
634   CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
635 }
636 
637 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
638                                          unsigned OffsetBits) const {
639   if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
640       (OffsetBits == 8 && !isUInt<8>(Offset)))
641     return false;
642 
643   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
644       Subtarget->unsafeDSOffsetFoldingEnabled())
645     return true;
646 
647   // On Southern Islands instruction with a negative base value and an offset
648   // don't seem to work.
649   return CurDAG->SignBitIsZero(Base);
650 }
651 
652 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
653                                               SDValue &Offset) const {
654   SDLoc DL(Addr);
655   if (CurDAG->isBaseWithConstantOffset(Addr)) {
656     SDValue N0 = Addr.getOperand(0);
657     SDValue N1 = Addr.getOperand(1);
658     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
659     if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
660       // (add n0, c0)
661       Base = N0;
662       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
663       return true;
664     }
665   } else if (Addr.getOpcode() == ISD::SUB) {
666     // sub C, x -> add (sub 0, x), C
667     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
668       int64_t ByteOffset = C->getSExtValue();
669       if (isUInt<16>(ByteOffset)) {
670         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
671 
672         // XXX - This is kind of hacky. Create a dummy sub node so we can check
673         // the known bits in isDSOffsetLegal. We need to emit the selected node
674         // here, so this is thrown away.
675         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
676                                       Zero, Addr.getOperand(1));
677 
678         if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
679           MachineSDNode *MachineSub
680             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
681                                      Zero, Addr.getOperand(1));
682 
683           Base = SDValue(MachineSub, 0);
684           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
685           return true;
686         }
687       }
688     }
689   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
690     // If we have a constant address, prefer to put the constant into the
691     // offset. This can save moves to load the constant address since multiple
692     // operations can share the zero base address register, and enables merging
693     // into read2 / write2 instructions.
694 
695     SDLoc DL(Addr);
696 
697     if (isUInt<16>(CAddr->getZExtValue())) {
698       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
699       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
700                                  DL, MVT::i32, Zero);
701       Base = SDValue(MovZero, 0);
702       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
703       return true;
704     }
705   }
706 
707   // default case
708   Base = Addr;
709   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
710   return true;
711 }
712 
713 // TODO: If offset is too big, put low 16-bit into offset.
714 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
715                                                    SDValue &Offset0,
716                                                    SDValue &Offset1) const {
717   SDLoc DL(Addr);
718 
719   if (CurDAG->isBaseWithConstantOffset(Addr)) {
720     SDValue N0 = Addr.getOperand(0);
721     SDValue N1 = Addr.getOperand(1);
722     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
723     unsigned DWordOffset0 = C1->getZExtValue() / 4;
724     unsigned DWordOffset1 = DWordOffset0 + 1;
725     // (add n0, c0)
726     if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
727       Base = N0;
728       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
729       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
730       return true;
731     }
732   } else if (Addr.getOpcode() == ISD::SUB) {
733     // sub C, x -> add (sub 0, x), C
734     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
735       unsigned DWordOffset0 = C->getZExtValue() / 4;
736       unsigned DWordOffset1 = DWordOffset0 + 1;
737 
738       if (isUInt<8>(DWordOffset0)) {
739         SDLoc DL(Addr);
740         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
741 
742         // XXX - This is kind of hacky. Create a dummy sub node so we can check
743         // the known bits in isDSOffsetLegal. We need to emit the selected node
744         // here, so this is thrown away.
745         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
746                                       Zero, Addr.getOperand(1));
747 
748         if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
749           MachineSDNode *MachineSub
750             = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
751                                      Zero, Addr.getOperand(1));
752 
753           Base = SDValue(MachineSub, 0);
754           Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
755           Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
756           return true;
757         }
758       }
759     }
760   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
761     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
762     unsigned DWordOffset1 = DWordOffset0 + 1;
763     assert(4 * DWordOffset0 == CAddr->getZExtValue());
764 
765     if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
766       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
767       MachineSDNode *MovZero
768         = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
769                                  DL, MVT::i32, Zero);
770       Base = SDValue(MovZero, 0);
771       Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
772       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
773       return true;
774     }
775   }
776 
777   // default case
778 
779   // FIXME: This is broken on SI where we still need to check if the base
780   // pointer is positive here.
781   Base = Addr;
782   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
783   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
784   return true;
785 }
786 
787 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
788   return isUInt<12>(Imm->getZExtValue());
789 }
790 
791 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
792                                      SDValue &VAddr, SDValue &SOffset,
793                                      SDValue &Offset, SDValue &Offen,
794                                      SDValue &Idxen, SDValue &Addr64,
795                                      SDValue &GLC, SDValue &SLC,
796                                      SDValue &TFE) const {
797   // Subtarget prefers to use flat instruction
798   if (Subtarget->useFlatForGlobal())
799     return false;
800 
801   SDLoc DL(Addr);
802 
803   if (!GLC.getNode())
804     GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
805   if (!SLC.getNode())
806     SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
807   TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
808 
809   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
810   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
811   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
812   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
813 
814   if (CurDAG->isBaseWithConstantOffset(Addr)) {
815     SDValue N0 = Addr.getOperand(0);
816     SDValue N1 = Addr.getOperand(1);
817     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
818 
819     if (N0.getOpcode() == ISD::ADD) {
820       // (add (add N2, N3), C1) -> addr64
821       SDValue N2 = N0.getOperand(0);
822       SDValue N3 = N0.getOperand(1);
823       Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
824       Ptr = N2;
825       VAddr = N3;
826     } else {
827 
828       // (add N0, C1) -> offset
829       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
830       Ptr = N0;
831     }
832 
833     if (isLegalMUBUFImmOffset(C1)) {
834       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
835       return true;
836     }
837 
838     if (isUInt<32>(C1->getZExtValue())) {
839       // Illegal offset, store it in soffset.
840       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
841       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
842                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
843                         0);
844       return true;
845     }
846   }
847 
848   if (Addr.getOpcode() == ISD::ADD) {
849     // (add N0, N1) -> addr64
850     SDValue N0 = Addr.getOperand(0);
851     SDValue N1 = Addr.getOperand(1);
852     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
853     Ptr = N0;
854     VAddr = N1;
855     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
856     return true;
857   }
858 
859   // default case -> offset
860   VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
861   Ptr = Addr;
862   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
863 
864   return true;
865 }
866 
867 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
868                                            SDValue &VAddr, SDValue &SOffset,
869                                            SDValue &Offset, SDValue &GLC,
870                                            SDValue &SLC, SDValue &TFE) const {
871   SDValue Ptr, Offen, Idxen, Addr64;
872 
873   // addr64 bit was removed for volcanic islands.
874   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
875     return false;
876 
877   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
878               GLC, SLC, TFE))
879     return false;
880 
881   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
882   if (C->getSExtValue()) {
883     SDLoc DL(Addr);
884 
885     const SITargetLowering& Lowering =
886       *static_cast<const SITargetLowering*>(getTargetLowering());
887 
888     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
889     return true;
890   }
891 
892   return false;
893 }
894 
895 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
896                                            SDValue &VAddr, SDValue &SOffset,
897                                            SDValue &Offset,
898                                            SDValue &SLC) const {
899   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
900   SDValue GLC, TFE;
901 
902   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
903 }
904 
905 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
906   if (auto FI = dyn_cast<FrameIndexSDNode>(N))
907     return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
908   return N;
909 }
910 
911 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
912                                             SDValue &VAddr, SDValue &SOffset,
913                                             SDValue &ImmOffset) const {
914 
915   SDLoc DL(Addr);
916   MachineFunction &MF = CurDAG->getMachineFunction();
917   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
918 
919   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
920   SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
921 
922   // (add n0, c1)
923   if (CurDAG->isBaseWithConstantOffset(Addr)) {
924     SDValue N0 = Addr.getOperand(0);
925     SDValue N1 = Addr.getOperand(1);
926 
927     // Offsets in vaddr must be positive.
928     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
929     if (isLegalMUBUFImmOffset(C1)) {
930       VAddr = foldFrameIndex(N0);
931       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
932       return true;
933     }
934   }
935 
936   // (node)
937   VAddr = foldFrameIndex(Addr);
938   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
939   return true;
940 }
941 
942 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
943                                            SDValue &SOffset, SDValue &Offset,
944                                            SDValue &GLC, SDValue &SLC,
945                                            SDValue &TFE) const {
946   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
947   const SIInstrInfo *TII =
948     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
949 
950   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
951               GLC, SLC, TFE))
952     return false;
953 
954   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
955       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
956       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
957     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
958                     APInt::getAllOnesValue(32).getZExtValue(); // Size
959     SDLoc DL(Addr);
960 
961     const SITargetLowering& Lowering =
962       *static_cast<const SITargetLowering*>(getTargetLowering());
963 
964     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
965     return true;
966   }
967   return false;
968 }
969 
970 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
971                                            SDValue &Soffset, SDValue &Offset
972                                            ) const {
973   SDValue GLC, SLC, TFE;
974 
975   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
976 }
977 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
978                                            SDValue &Soffset, SDValue &Offset,
979                                            SDValue &SLC) const {
980   SDValue GLC, TFE;
981 
982   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
983 }
984 
985 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
986                                              SDValue &SOffset,
987                                              SDValue &ImmOffset) const {
988   SDLoc DL(Constant);
989   uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
990   uint32_t Overflow = 0;
991 
992   if (Imm >= 4096) {
993     if (Imm <= 4095 + 64) {
994       // Use an SOffset inline constant for 1..64
995       Overflow = Imm - 4095;
996       Imm = 4095;
997     } else {
998       // Try to keep the same value in SOffset for adjacent loads, so that
999       // the corresponding register contents can be re-used.
1000       //
1001       // Load values with all low-bits set into SOffset, so that a larger
1002       // range of values can be covered using s_movk_i32
1003       uint32_t High = (Imm + 1) & ~4095;
1004       uint32_t Low = (Imm + 1) & 4095;
1005       Imm = Low;
1006       Overflow = High - 1;
1007     }
1008   }
1009 
1010   // There is a hardware bug in SI and CI which prevents address clamping in
1011   // MUBUF instructions from working correctly with SOffsets. The immediate
1012   // offset is unaffected.
1013   if (Overflow > 0 &&
1014       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1015     return false;
1016 
1017   ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
1018 
1019   if (Overflow <= 64)
1020     SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
1021   else
1022     SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
1023                       CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
1024                       0);
1025 
1026   return true;
1027 }
1028 
1029 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
1030                                                     SDValue &SOffset,
1031                                                     SDValue &ImmOffset) const {
1032   SDLoc DL(Offset);
1033 
1034   if (!isa<ConstantSDNode>(Offset))
1035     return false;
1036 
1037   return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
1038 }
1039 
1040 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
1041                                                      SDValue &SOffset,
1042                                                      SDValue &ImmOffset,
1043                                                      SDValue &VOffset) const {
1044   SDLoc DL(Offset);
1045 
1046   // Don't generate an unnecessary voffset for constant offsets.
1047   if (isa<ConstantSDNode>(Offset)) {
1048     SDValue Tmp1, Tmp2;
1049 
1050     // When necessary, use a voffset in <= CI anyway to work around a hardware
1051     // bug.
1052     if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
1053         SelectMUBUFConstant(Offset, Tmp1, Tmp2))
1054       return false;
1055   }
1056 
1057   if (CurDAG->isBaseWithConstantOffset(Offset)) {
1058     SDValue N0 = Offset.getOperand(0);
1059     SDValue N1 = Offset.getOperand(1);
1060     if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
1061         SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
1062       VOffset = N0;
1063       return true;
1064     }
1065   }
1066 
1067   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1068   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
1069   VOffset = Offset;
1070 
1071   return true;
1072 }
1073 
1074 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
1075                                     SDValue &VAddr,
1076                                     SDValue &SLC,
1077                                     SDValue &TFE) const {
1078   VAddr = Addr;
1079   TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
1080   return true;
1081 }
1082 
1083 ///
1084 /// \param EncodedOffset This is the immediate value that will be encoded
1085 ///        directly into the instruction.  On SI/CI the \p EncodedOffset
1086 ///        will be in units of dwords and on VI+ it will be units of bytes.
1087 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
1088                                  int64_t EncodedOffset) {
1089   return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1090      isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
1091 }
1092 
1093 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1094                                           SDValue &Offset, bool &Imm) const {
1095 
1096   // FIXME: Handle non-constant offsets.
1097   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1098   if (!C)
1099     return false;
1100 
1101   SDLoc SL(ByteOffsetNode);
1102   AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
1103   int64_t ByteOffset = C->getSExtValue();
1104   int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
1105       ByteOffset >> 2 : ByteOffset;
1106 
1107   if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
1108     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1109     Imm = true;
1110     return true;
1111   }
1112 
1113   if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
1114     return false;
1115 
1116   if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
1117     // 32-bit Immediates are supported on Sea Islands.
1118     Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
1119   } else {
1120     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1121     Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
1122                                             C32Bit), 0);
1123   }
1124   Imm = false;
1125   return true;
1126 }
1127 
1128 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
1129                                      SDValue &Offset, bool &Imm) const {
1130 
1131   SDLoc SL(Addr);
1132   if (CurDAG->isBaseWithConstantOffset(Addr)) {
1133     SDValue N0 = Addr.getOperand(0);
1134     SDValue N1 = Addr.getOperand(1);
1135 
1136     if (SelectSMRDOffset(N1, Offset, Imm)) {
1137       SBase = N0;
1138       return true;
1139     }
1140   }
1141   SBase = Addr;
1142   Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
1143   Imm = true;
1144   return true;
1145 }
1146 
1147 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
1148                                        SDValue &Offset) const {
1149   bool Imm;
1150   return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
1151 }
1152 
1153 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
1154                                          SDValue &Offset) const {
1155 
1156   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1157     return false;
1158 
1159   bool Imm;
1160   if (!SelectSMRD(Addr, SBase, Offset, Imm))
1161     return false;
1162 
1163   return !Imm && isa<ConstantSDNode>(Offset);
1164 }
1165 
1166 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
1167                                         SDValue &Offset) const {
1168   bool Imm;
1169   return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
1170          !isa<ConstantSDNode>(Offset);
1171 }
1172 
1173 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
1174                                              SDValue &Offset) const {
1175   bool Imm;
1176   return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
1177 }
1178 
1179 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
1180                                                SDValue &Offset) const {
1181   if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
1182     return false;
1183 
1184   bool Imm;
1185   if (!SelectSMRDOffset(Addr, Offset, Imm))
1186     return false;
1187 
1188   return !Imm && isa<ConstantSDNode>(Offset);
1189 }
1190 
1191 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
1192                                               SDValue &Offset) const {
1193   bool Imm;
1194   return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
1195          !isa<ConstantSDNode>(Offset);
1196 }
1197 
1198 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
1199                                             SDValue &Base,
1200                                             SDValue &Offset) const {
1201   SDLoc DL(Index);
1202 
1203   if (CurDAG->isBaseWithConstantOffset(Index)) {
1204     SDValue N0 = Index.getOperand(0);
1205     SDValue N1 = Index.getOperand(1);
1206     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
1207 
1208     // (add n0, c0)
1209     Base = N0;
1210     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
1211     return true;
1212   }
1213 
1214   if (isa<ConstantSDNode>(Index))
1215     return false;
1216 
1217   Base = Index;
1218   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1219   return true;
1220 }
1221 
1222 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
1223                                      SDValue Val, uint32_t Offset,
1224                                      uint32_t Width) {
1225   // Transformation function, pack the offset and width of a BFE into
1226   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1227   // source, bits [5:0] contain the offset and bits [22:16] the width.
1228   uint32_t PackedVal = Offset | (Width << 16);
1229   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
1230 
1231   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
1232 }
1233 
1234 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
1235   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
1236   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
1237   // Predicate: 0 < b <= c < 32
1238 
1239   const SDValue &Shl = N->getOperand(0);
1240   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
1241   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
1242 
1243   if (B && C) {
1244     uint32_t BVal = B->getZExtValue();
1245     uint32_t CVal = C->getZExtValue();
1246 
1247     if (0 < BVal && BVal <= CVal && CVal < 32) {
1248       bool Signed = N->getOpcode() == ISD::SRA;
1249       unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
1250 
1251       ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
1252                               32 - CVal));
1253       return;
1254     }
1255   }
1256   SelectCode(N);
1257 }
1258 
1259 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
1260   switch (N->getOpcode()) {
1261   case ISD::AND:
1262     if (N->getOperand(0).getOpcode() == ISD::SRL) {
1263       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
1264       // Predicate: isMask(mask)
1265       const SDValue &Srl = N->getOperand(0);
1266       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
1267       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
1268 
1269       if (Shift && Mask) {
1270         uint32_t ShiftVal = Shift->getZExtValue();
1271         uint32_t MaskVal = Mask->getZExtValue();
1272 
1273         if (isMask_32(MaskVal)) {
1274           uint32_t WidthVal = countPopulation(MaskVal);
1275 
1276           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1277                                   Srl.getOperand(0), ShiftVal, WidthVal));
1278           return;
1279         }
1280       }
1281     }
1282     break;
1283   case ISD::SRL:
1284     if (N->getOperand(0).getOpcode() == ISD::AND) {
1285       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
1286       // Predicate: isMask(mask >> b)
1287       const SDValue &And = N->getOperand(0);
1288       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
1289       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
1290 
1291       if (Shift && Mask) {
1292         uint32_t ShiftVal = Shift->getZExtValue();
1293         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
1294 
1295         if (isMask_32(MaskVal)) {
1296           uint32_t WidthVal = countPopulation(MaskVal);
1297 
1298           ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
1299                                   And.getOperand(0), ShiftVal, WidthVal));
1300           return;
1301         }
1302       }
1303     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
1304       SelectS_BFEFromShifts(N);
1305       return;
1306     }
1307     break;
1308   case ISD::SRA:
1309     if (N->getOperand(0).getOpcode() == ISD::SHL) {
1310       SelectS_BFEFromShifts(N);
1311       return;
1312     }
1313     break;
1314 
1315   case ISD::SIGN_EXTEND_INREG: {
1316     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
1317     SDValue Src = N->getOperand(0);
1318     if (Src.getOpcode() != ISD::SRL)
1319       break;
1320 
1321     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1322     if (!Amt)
1323       break;
1324 
1325     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
1326     ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
1327                             Amt->getZExtValue(), Width));
1328     return;
1329   }
1330   }
1331 
1332   SelectCode(N);
1333 }
1334 
1335 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
1336   assert(N->getOpcode() == ISD::BRCOND);
1337   if (!N->hasOneUse())
1338     return false;
1339 
1340   SDValue Cond = N->getOperand(1);
1341   if (Cond.getOpcode() == ISD::CopyToReg)
1342     Cond = Cond.getOperand(2);
1343 
1344   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
1345     return false;
1346 
1347   MVT VT = Cond.getOperand(0).getSimpleValueType();
1348   if (VT == MVT::i32)
1349     return true;
1350 
1351   if (VT == MVT::i64) {
1352     auto ST = static_cast<const SISubtarget *>(Subtarget);
1353 
1354     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
1355     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
1356   }
1357 
1358   return false;
1359 }
1360 
1361 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
1362   SDValue Cond = N->getOperand(1);
1363 
1364   if (isCBranchSCC(N)) {
1365     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
1366     SelectCode(N);
1367     return;
1368   }
1369 
1370   // The result of VOPC instructions is or'd against ~EXEC before it is
1371   // written to vcc or another SGPR.  This means that the value '1' is always
1372   // written to the corresponding bit for results that are masked.  In order
1373   // to correctly check against vccz, we need to and VCC with the EXEC
1374   // register in order to clear the value from the masked bits.
1375 
1376   SDLoc SL(N);
1377 
1378   SDNode *MaskedCond =
1379         CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
1380                                CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
1381                                Cond);
1382   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
1383                                      SDValue(MaskedCond, 0),
1384                                      SDValue()); // Passing SDValue() adds a
1385                                                  // glue output.
1386   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
1387                        N->getOperand(2), // Basic Block
1388                        VCC.getValue(0),  // Chain
1389                        VCC.getValue(1)); // Glue
1390   return;
1391 }
1392 
1393 // This is here because there isn't a way to use the generated sub0_sub1 as the
1394 // subreg index to EXTRACT_SUBREG in tablegen.
1395 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
1396   MemSDNode *Mem = cast<MemSDNode>(N);
1397   unsigned AS = Mem->getAddressSpace();
1398   if (AS == AMDGPUAS::FLAT_ADDRESS) {
1399     SelectCode(N);
1400     return;
1401   }
1402 
1403   MVT VT = N->getSimpleValueType(0);
1404   bool Is32 = (VT == MVT::i32);
1405   SDLoc SL(N);
1406 
1407   MachineSDNode *CmpSwap = nullptr;
1408   if (Subtarget->hasAddr64()) {
1409     SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC;
1410 
1411     if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
1412       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 :
1413         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64;
1414       SDValue CmpVal = Mem->getOperand(2);
1415 
1416       // XXX - Do we care about glue operands?
1417 
1418       SDValue Ops[] = {
1419         CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1420       };
1421 
1422       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1423     }
1424   }
1425 
1426   if (!CmpSwap) {
1427     SDValue SRsrc, SOffset, Offset, SLC;
1428     if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
1429       unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET :
1430         AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET;
1431 
1432       SDValue CmpVal = Mem->getOperand(2);
1433       SDValue Ops[] = {
1434         CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
1435       };
1436 
1437       CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
1438     }
1439   }
1440 
1441   if (!CmpSwap) {
1442     SelectCode(N);
1443     return;
1444   }
1445 
1446   MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
1447   *MMOs = Mem->getMemOperand();
1448   CmpSwap->setMemRefs(MMOs, MMOs + 1);
1449 
1450   unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
1451   SDValue Extract
1452     = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
1453 
1454   ReplaceUses(SDValue(N, 0), Extract);
1455   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
1456   CurDAG->RemoveDeadNode(N);
1457 }
1458 
1459 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
1460                                         SDValue &SrcMods) const {
1461 
1462   unsigned Mods = 0;
1463 
1464   Src = In;
1465 
1466   if (Src.getOpcode() == ISD::FNEG) {
1467     Mods |= SISrcMods::NEG;
1468     Src = Src.getOperand(0);
1469   }
1470 
1471   if (Src.getOpcode() == ISD::FABS) {
1472     Mods |= SISrcMods::ABS;
1473     Src = Src.getOperand(0);
1474   }
1475 
1476   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
1477 
1478   return true;
1479 }
1480 
1481 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
1482                                          SDValue &SrcMods) const {
1483   bool Res = SelectVOP3Mods(In, Src, SrcMods);
1484   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
1485 }
1486 
1487 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
1488                                          SDValue &SrcMods, SDValue &Clamp,
1489                                          SDValue &Omod) const {
1490   SDLoc DL(In);
1491   // FIXME: Handle Clamp and Omod
1492   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
1493   Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
1494 
1495   return SelectVOP3Mods(In, Src, SrcMods);
1496 }
1497 
1498 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
1499                                            SDValue &SrcMods, SDValue &Clamp,
1500                                            SDValue &Omod) const {
1501   bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
1502 
1503   return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
1504                 cast<ConstantSDNode>(Clamp)->isNullValue() &&
1505                 cast<ConstantSDNode>(Omod)->isNullValue();
1506 }
1507 
1508 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
1509                                               SDValue &SrcMods,
1510                                               SDValue &Omod) const {
1511   // FIXME: Handle Omod
1512   Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1513 
1514   return SelectVOP3Mods(In, Src, SrcMods);
1515 }
1516 
1517 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
1518                                                    SDValue &SrcMods,
1519                                                    SDValue &Clamp,
1520                                                    SDValue &Omod) const {
1521   Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
1522   return SelectVOP3Mods(In, Src, SrcMods);
1523 }
1524 
1525 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
1526   const AMDGPUTargetLowering& Lowering =
1527     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
1528   bool IsModified = false;
1529   do {
1530     IsModified = false;
1531     // Go over all selected nodes and try to fold them a bit more
1532     for (SDNode &Node : CurDAG->allnodes()) {
1533       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
1534       if (!MachineNode)
1535         continue;
1536 
1537       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
1538       if (ResNode != &Node) {
1539         ReplaceUses(&Node, ResNode);
1540         IsModified = true;
1541       }
1542     }
1543     CurDAG->RemoveDeadNodes();
1544   } while (IsModified);
1545 }
1546