1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "SIDefines.h" 22 #include "SIISelLowering.h" 23 #include "SIInstrInfo.h" 24 #include "SIMachineFunctionInfo.h" 25 #include "SIRegisterInfo.h" 26 #include "llvm/ADT/APInt.h" 27 #include "llvm/ADT/SmallVector.h" 28 #include "llvm/ADT/StringRef.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/CodeGen/FunctionLoweringInfo.h" 31 #include "llvm/CodeGen/ISDOpcodes.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/MachineValueType.h" 35 #include "llvm/CodeGen/SelectionDAG.h" 36 #include "llvm/CodeGen/SelectionDAGISel.h" 37 #include "llvm/CodeGen/SelectionDAGNodes.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Instruction.h" 41 #include "llvm/MC/MCInstrDesc.h" 42 #include "llvm/Support/Casting.h" 43 #include "llvm/Support/CodeGen.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/MathExtras.h" 46 #include <cassert> 47 #include <cstdint> 48 #include <new> 49 #include <vector> 50 51 using namespace llvm; 52 53 namespace llvm { 54 55 class R600InstrInfo; 56 57 } // end namespace llvm 58 59 //===----------------------------------------------------------------------===// 60 // Instruction Selector Implementation 61 //===----------------------------------------------------------------------===// 62 63 namespace { 64 65 /// AMDGPU specific code to select AMDGPU machine instructions for 66 /// SelectionDAG operations. 67 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 68 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 69 // make the right decision when generating code for different targets. 70 const AMDGPUSubtarget *Subtarget; 71 AMDGPUAS AMDGPUASI; 72 73 public: 74 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 75 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 76 : SelectionDAGISel(*TM, OptLevel) { 77 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 78 } 79 ~AMDGPUDAGToDAGISel() override = default; 80 81 void getAnalysisUsage(AnalysisUsage &AU) const override { 82 AU.addRequired<AMDGPUArgumentUsageInfo>(); 83 SelectionDAGISel::getAnalysisUsage(AU); 84 } 85 86 bool runOnMachineFunction(MachineFunction &MF) override; 87 void Select(SDNode *N) override; 88 StringRef getPassName() const override; 89 void PostprocessISelDAG() override; 90 91 protected: 92 void SelectBuildVector(SDNode *N, unsigned RegClassID); 93 94 private: 95 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 96 bool isNoNanSrc(SDValue N) const; 97 bool isInlineImmediate(const SDNode *N) const; 98 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 99 const R600InstrInfo *TII); 100 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 101 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 102 103 bool isConstantLoad(const MemSDNode *N, int cbID) const; 104 bool isUniformBr(const SDNode *N) const; 105 106 SDNode *glueCopyToM0(SDNode *N) const; 107 108 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 109 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 110 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 111 SDValue& Offset); 112 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 113 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 114 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 115 unsigned OffsetBits) const; 116 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 117 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 118 SDValue &Offset1) const; 119 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 120 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 121 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 122 SDValue &TFE) const; 123 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 124 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 125 SDValue &SLC, SDValue &TFE) const; 126 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 127 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 128 SDValue &SLC) const; 129 bool SelectMUBUFScratchOffen(SDNode *Root, 130 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 131 SDValue &SOffset, SDValue &ImmOffset) const; 132 bool SelectMUBUFScratchOffset(SDNode *Root, 133 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 134 SDValue &Offset) const; 135 136 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 137 SDValue &Offset, SDValue &GLC, SDValue &SLC, 138 SDValue &TFE) const; 139 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 140 SDValue &Offset, SDValue &SLC) const; 141 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 142 SDValue &Offset) const; 143 bool SelectMUBUFConstant(SDValue Constant, 144 SDValue &SOffset, 145 SDValue &ImmOffset) const; 146 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 147 SDValue &ImmOffset) const; 148 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 149 SDValue &ImmOffset, SDValue &VOffset) const; 150 151 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 152 SDValue &Offset, SDValue &SLC) const; 153 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 154 SDValue &Offset, SDValue &SLC) const; 155 156 template <bool IsSigned> 157 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 158 SDValue &Offset, SDValue &SLC) const; 159 160 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 161 bool &Imm) const; 162 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 163 bool &Imm) const; 164 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 165 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 166 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 167 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 168 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 169 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 170 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 171 172 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 173 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 174 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 175 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 176 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 177 SDValue &Clamp, SDValue &Omod) const; 178 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp, SDValue &Omod) const; 180 181 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 182 SDValue &Clamp, 183 SDValue &Omod) const; 184 185 bool SelectVOP3OMods(SDValue In, SDValue &Src, 186 SDValue &Clamp, SDValue &Omod) const; 187 188 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 189 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 190 SDValue &Clamp) const; 191 192 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 193 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 194 SDValue &Clamp) const; 195 196 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 197 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 198 SDValue &Clamp) const; 199 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 200 201 void SelectADD_SUB_I64(SDNode *N); 202 void SelectUADDO_USUBO(SDNode *N); 203 void SelectDIV_SCALE(SDNode *N); 204 void SelectFMA_W_CHAIN(SDNode *N); 205 void SelectFMUL_W_CHAIN(SDNode *N); 206 207 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 208 uint32_t Offset, uint32_t Width); 209 void SelectS_BFEFromShifts(SDNode *N); 210 void SelectS_BFE(SDNode *N); 211 bool isCBranchSCC(const SDNode *N) const; 212 void SelectBRCOND(SDNode *N); 213 void SelectFMAD(SDNode *N); 214 void SelectATOMIC_CMP_SWAP(SDNode *N); 215 216 protected: 217 // Include the pieces autogenerated from the target description. 218 #include "AMDGPUGenDAGISel.inc" 219 }; 220 221 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 222 public: 223 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 224 AMDGPUDAGToDAGISel(TM, OptLevel) {} 225 226 void Select(SDNode *N) override; 227 228 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 229 SDValue &Offset) override; 230 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 231 SDValue &Offset) override; 232 }; 233 234 } // end anonymous namespace 235 236 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 237 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 238 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 239 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 240 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 241 242 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 243 // DAG, ready for instruction scheduling. 244 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 245 CodeGenOpt::Level OptLevel) { 246 return new AMDGPUDAGToDAGISel(TM, OptLevel); 247 } 248 249 /// \brief This pass converts a legalized DAG into a R600-specific 250 // DAG, ready for instruction scheduling. 251 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 252 CodeGenOpt::Level OptLevel) { 253 return new R600DAGToDAGISel(TM, OptLevel); 254 } 255 256 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 257 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 258 return SelectionDAGISel::runOnMachineFunction(MF); 259 } 260 261 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 262 if (TM.Options.NoNaNsFPMath) 263 return true; 264 265 // TODO: Move into isKnownNeverNaN 266 if (N->getFlags().isDefined()) 267 return N->getFlags().hasNoNaNs(); 268 269 return CurDAG->isKnownNeverNaN(N); 270 } 271 272 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 273 const SIInstrInfo *TII 274 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 275 276 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 277 return TII->isInlineConstant(C->getAPIntValue()); 278 279 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 280 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 281 282 return false; 283 } 284 285 /// \brief Determine the register class for \p OpNo 286 /// \returns The register class of the virtual register that will be used for 287 /// the given operand number \OpNo or NULL if the register class cannot be 288 /// determined. 289 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 290 unsigned OpNo) const { 291 if (!N->isMachineOpcode()) { 292 if (N->getOpcode() == ISD::CopyToReg) { 293 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 294 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 295 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 296 return MRI.getRegClass(Reg); 297 } 298 299 const SIRegisterInfo *TRI 300 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 301 return TRI->getPhysRegClass(Reg); 302 } 303 304 return nullptr; 305 } 306 307 switch (N->getMachineOpcode()) { 308 default: { 309 const MCInstrDesc &Desc = 310 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 311 unsigned OpIdx = Desc.getNumDefs() + OpNo; 312 if (OpIdx >= Desc.getNumOperands()) 313 return nullptr; 314 int RegClass = Desc.OpInfo[OpIdx].RegClass; 315 if (RegClass == -1) 316 return nullptr; 317 318 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 319 } 320 case AMDGPU::REG_SEQUENCE: { 321 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 322 const TargetRegisterClass *SuperRC = 323 Subtarget->getRegisterInfo()->getRegClass(RCID); 324 325 SDValue SubRegOp = N->getOperand(OpNo + 1); 326 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 327 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 328 SubRegIdx); 329 } 330 } 331 } 332 333 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 334 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 335 return N; 336 337 const SITargetLowering& Lowering = 338 *static_cast<const SITargetLowering*>(getTargetLowering()); 339 340 // Write max value to m0 before each load operation 341 342 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 343 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 344 345 SDValue Glue = M0.getValue(1); 346 347 SmallVector <SDValue, 8> Ops; 348 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 349 Ops.push_back(N->getOperand(i)); 350 } 351 Ops.push_back(Glue); 352 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 353 354 return N; 355 } 356 357 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 358 switch (NumVectorElts) { 359 case 1: 360 return AMDGPU::SReg_32_XM0RegClassID; 361 case 2: 362 return AMDGPU::SReg_64RegClassID; 363 case 4: 364 return AMDGPU::SReg_128RegClassID; 365 case 8: 366 return AMDGPU::SReg_256RegClassID; 367 case 16: 368 return AMDGPU::SReg_512RegClassID; 369 } 370 371 llvm_unreachable("invalid vector size"); 372 } 373 374 static bool getConstantValue(SDValue N, uint32_t &Out) { 375 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 376 Out = C->getAPIntValue().getZExtValue(); 377 return true; 378 } 379 380 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 381 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 382 return true; 383 } 384 385 return false; 386 } 387 388 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 389 EVT VT = N->getValueType(0); 390 unsigned NumVectorElts = VT.getVectorNumElements(); 391 EVT EltVT = VT.getVectorElementType(); 392 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 393 SDLoc DL(N); 394 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 395 396 if (NumVectorElts == 1) { 397 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 398 RegClass); 399 return; 400 } 401 402 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 403 "supported yet"); 404 // 16 = Max Num Vector Elements 405 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 406 // 1 = Vector Register Class 407 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 408 409 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 410 bool IsRegSeq = true; 411 unsigned NOps = N->getNumOperands(); 412 for (unsigned i = 0; i < NOps; i++) { 413 // XXX: Why is this here? 414 if (isa<RegisterSDNode>(N->getOperand(i))) { 415 IsRegSeq = false; 416 break; 417 } 418 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 419 RegSeqArgs[1 + (2 * i) + 1] = 420 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 421 MVT::i32); 422 } 423 if (NOps != NumVectorElts) { 424 // Fill in the missing undef elements if this was a scalar_to_vector. 425 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 426 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 427 DL, EltVT); 428 for (unsigned i = NOps; i < NumVectorElts; ++i) { 429 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 430 RegSeqArgs[1 + (2 * i) + 1] = 431 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 432 } 433 } 434 435 if (!IsRegSeq) 436 SelectCode(N); 437 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 438 } 439 440 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 441 unsigned int Opc = N->getOpcode(); 442 if (N->isMachineOpcode()) { 443 N->setNodeId(-1); 444 return; // Already selected. 445 } 446 447 if (isa<AtomicSDNode>(N) || 448 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 449 N = glueCopyToM0(N); 450 451 switch (Opc) { 452 default: break; 453 // We are selecting i64 ADD here instead of custom lower it during 454 // DAG legalization, so we can fold some i64 ADDs used for address 455 // calculation into the LOAD and STORE instructions. 456 case ISD::ADD: 457 case ISD::ADDC: 458 case ISD::ADDE: 459 case ISD::SUB: 460 case ISD::SUBC: 461 case ISD::SUBE: { 462 if (N->getValueType(0) != MVT::i64) 463 break; 464 465 SelectADD_SUB_I64(N); 466 return; 467 } 468 case ISD::UADDO: 469 case ISD::USUBO: { 470 SelectUADDO_USUBO(N); 471 return; 472 } 473 case AMDGPUISD::FMUL_W_CHAIN: { 474 SelectFMUL_W_CHAIN(N); 475 return; 476 } 477 case AMDGPUISD::FMA_W_CHAIN: { 478 SelectFMA_W_CHAIN(N); 479 return; 480 } 481 482 case ISD::SCALAR_TO_VECTOR: 483 case ISD::BUILD_VECTOR: { 484 EVT VT = N->getValueType(0); 485 unsigned NumVectorElts = VT.getVectorNumElements(); 486 487 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 488 if (Opc == ISD::BUILD_VECTOR) { 489 uint32_t LHSVal, RHSVal; 490 if (getConstantValue(N->getOperand(0), LHSVal) && 491 getConstantValue(N->getOperand(1), RHSVal)) { 492 uint32_t K = LHSVal | (RHSVal << 16); 493 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 494 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 495 return; 496 } 497 } 498 499 break; 500 } 501 502 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 503 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 504 SelectBuildVector(N, RegClassID); 505 return; 506 } 507 case ISD::BUILD_PAIR: { 508 SDValue RC, SubReg0, SubReg1; 509 SDLoc DL(N); 510 if (N->getValueType(0) == MVT::i128) { 511 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 512 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 513 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 514 } else if (N->getValueType(0) == MVT::i64) { 515 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 516 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 517 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 518 } else { 519 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 520 } 521 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 522 N->getOperand(1), SubReg1 }; 523 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 524 N->getValueType(0), Ops)); 525 return; 526 } 527 528 case ISD::Constant: 529 case ISD::ConstantFP: { 530 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 531 break; 532 533 uint64_t Imm; 534 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 535 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 536 else { 537 ConstantSDNode *C = cast<ConstantSDNode>(N); 538 Imm = C->getZExtValue(); 539 } 540 541 SDLoc DL(N); 542 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 543 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 544 MVT::i32)); 545 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 546 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 547 const SDValue Ops[] = { 548 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 549 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 550 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 551 }; 552 553 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 554 N->getValueType(0), Ops)); 555 return; 556 } 557 case ISD::LOAD: 558 case ISD::STORE: { 559 N = glueCopyToM0(N); 560 break; 561 } 562 563 case AMDGPUISD::BFE_I32: 564 case AMDGPUISD::BFE_U32: { 565 // There is a scalar version available, but unlike the vector version which 566 // has a separate operand for the offset and width, the scalar version packs 567 // the width and offset into a single operand. Try to move to the scalar 568 // version if the offsets are constant, so that we can try to keep extended 569 // loads of kernel arguments in SGPRs. 570 571 // TODO: Technically we could try to pattern match scalar bitshifts of 572 // dynamic values, but it's probably not useful. 573 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 574 if (!Offset) 575 break; 576 577 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 578 if (!Width) 579 break; 580 581 bool Signed = Opc == AMDGPUISD::BFE_I32; 582 583 uint32_t OffsetVal = Offset->getZExtValue(); 584 uint32_t WidthVal = Width->getZExtValue(); 585 586 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 587 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 588 return; 589 } 590 case AMDGPUISD::DIV_SCALE: { 591 SelectDIV_SCALE(N); 592 return; 593 } 594 case ISD::CopyToReg: { 595 const SITargetLowering& Lowering = 596 *static_cast<const SITargetLowering*>(getTargetLowering()); 597 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 598 break; 599 } 600 case ISD::AND: 601 case ISD::SRL: 602 case ISD::SRA: 603 case ISD::SIGN_EXTEND_INREG: 604 if (N->getValueType(0) != MVT::i32) 605 break; 606 607 SelectS_BFE(N); 608 return; 609 case ISD::BRCOND: 610 SelectBRCOND(N); 611 return; 612 case ISD::FMAD: 613 SelectFMAD(N); 614 return; 615 case AMDGPUISD::ATOMIC_CMP_SWAP: 616 SelectATOMIC_CMP_SWAP(N); 617 return; 618 } 619 620 SelectCode(N); 621 } 622 623 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 624 if (!N->readMem()) 625 return false; 626 if (CbId == -1) 627 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 628 629 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 630 } 631 632 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 633 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 634 const Instruction *Term = BB->getTerminator(); 635 return Term->getMetadata("amdgpu.uniform") || 636 Term->getMetadata("structurizecfg.uniform"); 637 } 638 639 StringRef AMDGPUDAGToDAGISel::getPassName() const { 640 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 641 } 642 643 //===----------------------------------------------------------------------===// 644 // Complex Patterns 645 //===----------------------------------------------------------------------===// 646 647 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 648 SDValue& IntPtr) { 649 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 650 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 651 true); 652 return true; 653 } 654 return false; 655 } 656 657 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 658 SDValue& BaseReg, SDValue &Offset) { 659 if (!isa<ConstantSDNode>(Addr)) { 660 BaseReg = Addr; 661 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 662 return true; 663 } 664 return false; 665 } 666 667 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 668 SDValue &Offset) { 669 return false; 670 } 671 672 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 673 SDValue &Offset) { 674 ConstantSDNode *C; 675 SDLoc DL(Addr); 676 677 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 678 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 679 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 680 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 681 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 682 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 683 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 684 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 685 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 686 Base = Addr.getOperand(0); 687 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 688 } else { 689 Base = Addr; 690 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 691 } 692 693 return true; 694 } 695 696 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 697 SDLoc DL(N); 698 SDValue LHS = N->getOperand(0); 699 SDValue RHS = N->getOperand(1); 700 701 unsigned Opcode = N->getOpcode(); 702 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 703 bool ProduceCarry = 704 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 705 bool IsAdd = 706 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 707 708 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 709 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 710 711 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 712 DL, MVT::i32, LHS, Sub0); 713 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 714 DL, MVT::i32, LHS, Sub1); 715 716 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 717 DL, MVT::i32, RHS, Sub0); 718 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 719 DL, MVT::i32, RHS, Sub1); 720 721 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 722 723 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 724 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 725 726 SDNode *AddLo; 727 if (!ConsumeCarry) { 728 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 729 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 730 } else { 731 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 732 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 733 } 734 SDValue AddHiArgs[] = { 735 SDValue(Hi0, 0), 736 SDValue(Hi1, 0), 737 SDValue(AddLo, 1) 738 }; 739 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 740 741 SDValue RegSequenceArgs[] = { 742 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 743 SDValue(AddLo,0), 744 Sub0, 745 SDValue(AddHi,0), 746 Sub1, 747 }; 748 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 749 MVT::i64, RegSequenceArgs); 750 751 if (ProduceCarry) { 752 // Replace the carry-use 753 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 754 } 755 756 // Replace the remaining uses. 757 CurDAG->ReplaceAllUsesWith(N, RegSequence); 758 CurDAG->RemoveDeadNode(N); 759 } 760 761 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 762 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 763 // carry out despite the _i32 name. These were renamed in VI to _U32. 764 // FIXME: We should probably rename the opcodes here. 765 unsigned Opc = N->getOpcode() == ISD::UADDO ? 766 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 767 768 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 769 { N->getOperand(0), N->getOperand(1) }); 770 } 771 772 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 773 SDLoc SL(N); 774 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 775 SDValue Ops[10]; 776 777 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 778 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 779 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 780 Ops[8] = N->getOperand(0); 781 Ops[9] = N->getOperand(4); 782 783 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 784 } 785 786 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 787 SDLoc SL(N); 788 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 789 SDValue Ops[8]; 790 791 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 792 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 793 Ops[6] = N->getOperand(0); 794 Ops[7] = N->getOperand(3); 795 796 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 797 } 798 799 // We need to handle this here because tablegen doesn't support matching 800 // instructions with multiple outputs. 801 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 802 SDLoc SL(N); 803 EVT VT = N->getValueType(0); 804 805 assert(VT == MVT::f32 || VT == MVT::f64); 806 807 unsigned Opc 808 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 809 810 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 811 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 812 } 813 814 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 815 unsigned OffsetBits) const { 816 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 817 (OffsetBits == 8 && !isUInt<8>(Offset))) 818 return false; 819 820 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 821 Subtarget->unsafeDSOffsetFoldingEnabled()) 822 return true; 823 824 // On Southern Islands instruction with a negative base value and an offset 825 // don't seem to work. 826 return CurDAG->SignBitIsZero(Base); 827 } 828 829 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 830 SDValue &Offset) const { 831 SDLoc DL(Addr); 832 if (CurDAG->isBaseWithConstantOffset(Addr)) { 833 SDValue N0 = Addr.getOperand(0); 834 SDValue N1 = Addr.getOperand(1); 835 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 836 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 837 // (add n0, c0) 838 Base = N0; 839 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 840 return true; 841 } 842 } else if (Addr.getOpcode() == ISD::SUB) { 843 // sub C, x -> add (sub 0, x), C 844 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 845 int64_t ByteOffset = C->getSExtValue(); 846 if (isUInt<16>(ByteOffset)) { 847 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 848 849 // XXX - This is kind of hacky. Create a dummy sub node so we can check 850 // the known bits in isDSOffsetLegal. We need to emit the selected node 851 // here, so this is thrown away. 852 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 853 Zero, Addr.getOperand(1)); 854 855 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 856 MachineSDNode *MachineSub 857 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 858 Zero, Addr.getOperand(1)); 859 860 Base = SDValue(MachineSub, 0); 861 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 862 return true; 863 } 864 } 865 } 866 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 867 // If we have a constant address, prefer to put the constant into the 868 // offset. This can save moves to load the constant address since multiple 869 // operations can share the zero base address register, and enables merging 870 // into read2 / write2 instructions. 871 872 SDLoc DL(Addr); 873 874 if (isUInt<16>(CAddr->getZExtValue())) { 875 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 876 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 877 DL, MVT::i32, Zero); 878 Base = SDValue(MovZero, 0); 879 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 880 return true; 881 } 882 } 883 884 // default case 885 Base = Addr; 886 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 887 return true; 888 } 889 890 // TODO: If offset is too big, put low 16-bit into offset. 891 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 892 SDValue &Offset0, 893 SDValue &Offset1) const { 894 SDLoc DL(Addr); 895 896 if (CurDAG->isBaseWithConstantOffset(Addr)) { 897 SDValue N0 = Addr.getOperand(0); 898 SDValue N1 = Addr.getOperand(1); 899 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 900 unsigned DWordOffset0 = C1->getZExtValue() / 4; 901 unsigned DWordOffset1 = DWordOffset0 + 1; 902 // (add n0, c0) 903 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 904 Base = N0; 905 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 906 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 907 return true; 908 } 909 } else if (Addr.getOpcode() == ISD::SUB) { 910 // sub C, x -> add (sub 0, x), C 911 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 912 unsigned DWordOffset0 = C->getZExtValue() / 4; 913 unsigned DWordOffset1 = DWordOffset0 + 1; 914 915 if (isUInt<8>(DWordOffset0)) { 916 SDLoc DL(Addr); 917 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 918 919 // XXX - This is kind of hacky. Create a dummy sub node so we can check 920 // the known bits in isDSOffsetLegal. We need to emit the selected node 921 // here, so this is thrown away. 922 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 923 Zero, Addr.getOperand(1)); 924 925 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 926 MachineSDNode *MachineSub 927 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 928 Zero, Addr.getOperand(1)); 929 930 Base = SDValue(MachineSub, 0); 931 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 932 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 933 return true; 934 } 935 } 936 } 937 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 938 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 939 unsigned DWordOffset1 = DWordOffset0 + 1; 940 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 941 942 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 943 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 944 MachineSDNode *MovZero 945 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 946 DL, MVT::i32, Zero); 947 Base = SDValue(MovZero, 0); 948 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 949 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 950 return true; 951 } 952 } 953 954 // default case 955 956 // FIXME: This is broken on SI where we still need to check if the base 957 // pointer is positive here. 958 Base = Addr; 959 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 960 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 961 return true; 962 } 963 964 static bool isLegalMUBUFImmOffset(unsigned Imm) { 965 return isUInt<12>(Imm); 966 } 967 968 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 969 return isLegalMUBUFImmOffset(Imm->getZExtValue()); 970 } 971 972 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 973 SDValue &VAddr, SDValue &SOffset, 974 SDValue &Offset, SDValue &Offen, 975 SDValue &Idxen, SDValue &Addr64, 976 SDValue &GLC, SDValue &SLC, 977 SDValue &TFE) const { 978 // Subtarget prefers to use flat instruction 979 if (Subtarget->useFlatForGlobal()) 980 return false; 981 982 SDLoc DL(Addr); 983 984 if (!GLC.getNode()) 985 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 986 if (!SLC.getNode()) 987 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 988 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 989 990 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 991 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 992 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 993 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 994 995 if (CurDAG->isBaseWithConstantOffset(Addr)) { 996 SDValue N0 = Addr.getOperand(0); 997 SDValue N1 = Addr.getOperand(1); 998 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 999 1000 if (N0.getOpcode() == ISD::ADD) { 1001 // (add (add N2, N3), C1) -> addr64 1002 SDValue N2 = N0.getOperand(0); 1003 SDValue N3 = N0.getOperand(1); 1004 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1005 Ptr = N2; 1006 VAddr = N3; 1007 } else { 1008 // (add N0, C1) -> offset 1009 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1010 Ptr = N0; 1011 } 1012 1013 if (isLegalMUBUFImmOffset(C1)) { 1014 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1015 return true; 1016 } 1017 1018 if (isUInt<32>(C1->getZExtValue())) { 1019 // Illegal offset, store it in soffset. 1020 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1021 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1022 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1023 0); 1024 return true; 1025 } 1026 } 1027 1028 if (Addr.getOpcode() == ISD::ADD) { 1029 // (add N0, N1) -> addr64 1030 SDValue N0 = Addr.getOperand(0); 1031 SDValue N1 = Addr.getOperand(1); 1032 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1033 Ptr = N0; 1034 VAddr = N1; 1035 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1036 return true; 1037 } 1038 1039 // default case -> offset 1040 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1041 Ptr = Addr; 1042 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1043 1044 return true; 1045 } 1046 1047 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1048 SDValue &VAddr, SDValue &SOffset, 1049 SDValue &Offset, SDValue &GLC, 1050 SDValue &SLC, SDValue &TFE) const { 1051 SDValue Ptr, Offen, Idxen, Addr64; 1052 1053 // addr64 bit was removed for volcanic islands. 1054 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1055 return false; 1056 1057 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1058 GLC, SLC, TFE)) 1059 return false; 1060 1061 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1062 if (C->getSExtValue()) { 1063 SDLoc DL(Addr); 1064 1065 const SITargetLowering& Lowering = 1066 *static_cast<const SITargetLowering*>(getTargetLowering()); 1067 1068 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1069 return true; 1070 } 1071 1072 return false; 1073 } 1074 1075 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1076 SDValue &VAddr, SDValue &SOffset, 1077 SDValue &Offset, 1078 SDValue &SLC) const { 1079 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1080 SDValue GLC, TFE; 1081 1082 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1083 } 1084 1085 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1086 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1087 return PSV && PSV->isStack(); 1088 } 1089 1090 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1091 const MachineFunction &MF = CurDAG->getMachineFunction(); 1092 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1093 1094 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1095 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1096 FI->getValueType(0)); 1097 1098 // If we can resolve this to a frame index access, this is relative to the 1099 // frame pointer SGPR. 1100 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1101 MVT::i32)); 1102 } 1103 1104 // If we don't know this private access is a local stack object, it needs to 1105 // be relative to the entry point's scratch wave offset register. 1106 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1107 MVT::i32)); 1108 } 1109 1110 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, 1111 SDValue Addr, SDValue &Rsrc, 1112 SDValue &VAddr, SDValue &SOffset, 1113 SDValue &ImmOffset) const { 1114 1115 SDLoc DL(Addr); 1116 MachineFunction &MF = CurDAG->getMachineFunction(); 1117 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1118 1119 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1120 1121 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1122 unsigned Imm = CAddr->getZExtValue(); 1123 assert(!isLegalMUBUFImmOffset(Imm) && 1124 "should have been selected by other pattern"); 1125 1126 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1127 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1128 DL, MVT::i32, HighBits); 1129 VAddr = SDValue(MovHighBits, 0); 1130 1131 // In a call sequence, stores to the argument stack area are relative to the 1132 // stack pointer. 1133 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1134 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1135 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1136 1137 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1138 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1139 return true; 1140 } 1141 1142 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1143 // (add n0, c1) 1144 1145 SDValue N0 = Addr.getOperand(0); 1146 SDValue N1 = Addr.getOperand(1); 1147 1148 // Offsets in vaddr must be positive. 1149 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1150 if (isLegalMUBUFImmOffset(C1)) { 1151 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1152 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1153 return true; 1154 } 1155 } 1156 1157 // (node) 1158 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1159 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1160 return true; 1161 } 1162 1163 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, 1164 SDValue Addr, 1165 SDValue &SRsrc, 1166 SDValue &SOffset, 1167 SDValue &Offset) const { 1168 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1169 if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) 1170 return false; 1171 1172 SDLoc DL(Addr); 1173 MachineFunction &MF = CurDAG->getMachineFunction(); 1174 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1175 1176 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1177 1178 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1179 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1180 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1181 1182 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1183 // offset if we know this is in a call sequence. 1184 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1185 1186 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1187 return true; 1188 } 1189 1190 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1191 SDValue &SOffset, SDValue &Offset, 1192 SDValue &GLC, SDValue &SLC, 1193 SDValue &TFE) const { 1194 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1195 const SIInstrInfo *TII = 1196 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1197 1198 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1199 GLC, SLC, TFE)) 1200 return false; 1201 1202 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1203 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1204 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1205 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1206 APInt::getAllOnesValue(32).getZExtValue(); // Size 1207 SDLoc DL(Addr); 1208 1209 const SITargetLowering& Lowering = 1210 *static_cast<const SITargetLowering*>(getTargetLowering()); 1211 1212 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1213 return true; 1214 } 1215 return false; 1216 } 1217 1218 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1219 SDValue &Soffset, SDValue &Offset 1220 ) const { 1221 SDValue GLC, SLC, TFE; 1222 1223 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1224 } 1225 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1226 SDValue &Soffset, SDValue &Offset, 1227 SDValue &SLC) const { 1228 SDValue GLC, TFE; 1229 1230 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1231 } 1232 1233 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1234 SDValue &SOffset, 1235 SDValue &ImmOffset) const { 1236 SDLoc DL(Constant); 1237 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1238 uint32_t Overflow = 0; 1239 1240 if (Imm >= 4096) { 1241 if (Imm <= 4095 + 64) { 1242 // Use an SOffset inline constant for 1..64 1243 Overflow = Imm - 4095; 1244 Imm = 4095; 1245 } else { 1246 // Try to keep the same value in SOffset for adjacent loads, so that 1247 // the corresponding register contents can be re-used. 1248 // 1249 // Load values with all low-bits set into SOffset, so that a larger 1250 // range of values can be covered using s_movk_i32 1251 uint32_t High = (Imm + 1) & ~4095; 1252 uint32_t Low = (Imm + 1) & 4095; 1253 Imm = Low; 1254 Overflow = High - 1; 1255 } 1256 } 1257 1258 // There is a hardware bug in SI and CI which prevents address clamping in 1259 // MUBUF instructions from working correctly with SOffsets. The immediate 1260 // offset is unaffected. 1261 if (Overflow > 0 && 1262 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1263 return false; 1264 1265 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1266 1267 if (Overflow <= 64) 1268 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1269 else 1270 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1271 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1272 0); 1273 1274 return true; 1275 } 1276 1277 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1278 SDValue &SOffset, 1279 SDValue &ImmOffset) const { 1280 SDLoc DL(Offset); 1281 1282 if (!isa<ConstantSDNode>(Offset)) 1283 return false; 1284 1285 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1286 } 1287 1288 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1289 SDValue &SOffset, 1290 SDValue &ImmOffset, 1291 SDValue &VOffset) const { 1292 SDLoc DL(Offset); 1293 1294 // Don't generate an unnecessary voffset for constant offsets. 1295 if (isa<ConstantSDNode>(Offset)) { 1296 SDValue Tmp1, Tmp2; 1297 1298 // When necessary, use a voffset in <= CI anyway to work around a hardware 1299 // bug. 1300 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1301 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1302 return false; 1303 } 1304 1305 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1306 SDValue N0 = Offset.getOperand(0); 1307 SDValue N1 = Offset.getOperand(1); 1308 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1309 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1310 VOffset = N0; 1311 return true; 1312 } 1313 } 1314 1315 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1316 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1317 VOffset = Offset; 1318 1319 return true; 1320 } 1321 1322 template <bool IsSigned> 1323 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1324 SDValue &VAddr, 1325 SDValue &Offset, 1326 SDValue &SLC) const { 1327 int64_t OffsetVal = 0; 1328 1329 if (Subtarget->hasFlatInstOffsets() && 1330 CurDAG->isBaseWithConstantOffset(Addr)) { 1331 SDValue N0 = Addr.getOperand(0); 1332 SDValue N1 = Addr.getOperand(1); 1333 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1334 1335 if ((IsSigned && isInt<13>(COffsetVal)) || 1336 (!IsSigned && isUInt<12>(COffsetVal))) { 1337 Addr = N0; 1338 OffsetVal = COffsetVal; 1339 } 1340 } 1341 1342 VAddr = Addr; 1343 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1344 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1345 1346 return true; 1347 } 1348 1349 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1350 SDValue &VAddr, 1351 SDValue &Offset, 1352 SDValue &SLC) const { 1353 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1354 } 1355 1356 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1357 SDValue &VAddr, 1358 SDValue &Offset, 1359 SDValue &SLC) const { 1360 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1361 } 1362 1363 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1364 SDValue &Offset, bool &Imm) const { 1365 1366 // FIXME: Handle non-constant offsets. 1367 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1368 if (!C) 1369 return false; 1370 1371 SDLoc SL(ByteOffsetNode); 1372 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1373 int64_t ByteOffset = C->getSExtValue(); 1374 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1375 1376 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1377 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1378 Imm = true; 1379 return true; 1380 } 1381 1382 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1383 return false; 1384 1385 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1386 // 32-bit Immediates are supported on Sea Islands. 1387 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1388 } else { 1389 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1390 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1391 C32Bit), 0); 1392 } 1393 Imm = false; 1394 return true; 1395 } 1396 1397 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1398 SDValue &Offset, bool &Imm) const { 1399 SDLoc SL(Addr); 1400 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1401 SDValue N0 = Addr.getOperand(0); 1402 SDValue N1 = Addr.getOperand(1); 1403 1404 if (SelectSMRDOffset(N1, Offset, Imm)) { 1405 SBase = N0; 1406 return true; 1407 } 1408 } 1409 SBase = Addr; 1410 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1411 Imm = true; 1412 return true; 1413 } 1414 1415 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1416 SDValue &Offset) const { 1417 bool Imm; 1418 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1419 } 1420 1421 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1422 SDValue &Offset) const { 1423 1424 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1425 return false; 1426 1427 bool Imm; 1428 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1429 return false; 1430 1431 return !Imm && isa<ConstantSDNode>(Offset); 1432 } 1433 1434 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1435 SDValue &Offset) const { 1436 bool Imm; 1437 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1438 !isa<ConstantSDNode>(Offset); 1439 } 1440 1441 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1442 SDValue &Offset) const { 1443 bool Imm; 1444 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1445 } 1446 1447 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1448 SDValue &Offset) const { 1449 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1450 return false; 1451 1452 bool Imm; 1453 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1454 return false; 1455 1456 return !Imm && isa<ConstantSDNode>(Offset); 1457 } 1458 1459 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1460 SDValue &Offset) const { 1461 bool Imm; 1462 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1463 !isa<ConstantSDNode>(Offset); 1464 } 1465 1466 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1467 SDValue &Base, 1468 SDValue &Offset) const { 1469 SDLoc DL(Index); 1470 1471 if (CurDAG->isBaseWithConstantOffset(Index)) { 1472 SDValue N0 = Index.getOperand(0); 1473 SDValue N1 = Index.getOperand(1); 1474 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1475 1476 // (add n0, c0) 1477 Base = N0; 1478 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1479 return true; 1480 } 1481 1482 if (isa<ConstantSDNode>(Index)) 1483 return false; 1484 1485 Base = Index; 1486 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1487 return true; 1488 } 1489 1490 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1491 SDValue Val, uint32_t Offset, 1492 uint32_t Width) { 1493 // Transformation function, pack the offset and width of a BFE into 1494 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1495 // source, bits [5:0] contain the offset and bits [22:16] the width. 1496 uint32_t PackedVal = Offset | (Width << 16); 1497 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1498 1499 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1500 } 1501 1502 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1503 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1504 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1505 // Predicate: 0 < b <= c < 32 1506 1507 const SDValue &Shl = N->getOperand(0); 1508 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1509 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1510 1511 if (B && C) { 1512 uint32_t BVal = B->getZExtValue(); 1513 uint32_t CVal = C->getZExtValue(); 1514 1515 if (0 < BVal && BVal <= CVal && CVal < 32) { 1516 bool Signed = N->getOpcode() == ISD::SRA; 1517 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1518 1519 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1520 32 - CVal)); 1521 return; 1522 } 1523 } 1524 SelectCode(N); 1525 } 1526 1527 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1528 switch (N->getOpcode()) { 1529 case ISD::AND: 1530 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1531 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1532 // Predicate: isMask(mask) 1533 const SDValue &Srl = N->getOperand(0); 1534 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1535 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1536 1537 if (Shift && Mask) { 1538 uint32_t ShiftVal = Shift->getZExtValue(); 1539 uint32_t MaskVal = Mask->getZExtValue(); 1540 1541 if (isMask_32(MaskVal)) { 1542 uint32_t WidthVal = countPopulation(MaskVal); 1543 1544 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1545 Srl.getOperand(0), ShiftVal, WidthVal)); 1546 return; 1547 } 1548 } 1549 } 1550 break; 1551 case ISD::SRL: 1552 if (N->getOperand(0).getOpcode() == ISD::AND) { 1553 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1554 // Predicate: isMask(mask >> b) 1555 const SDValue &And = N->getOperand(0); 1556 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1557 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1558 1559 if (Shift && Mask) { 1560 uint32_t ShiftVal = Shift->getZExtValue(); 1561 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1562 1563 if (isMask_32(MaskVal)) { 1564 uint32_t WidthVal = countPopulation(MaskVal); 1565 1566 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1567 And.getOperand(0), ShiftVal, WidthVal)); 1568 return; 1569 } 1570 } 1571 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1572 SelectS_BFEFromShifts(N); 1573 return; 1574 } 1575 break; 1576 case ISD::SRA: 1577 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1578 SelectS_BFEFromShifts(N); 1579 return; 1580 } 1581 break; 1582 1583 case ISD::SIGN_EXTEND_INREG: { 1584 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1585 SDValue Src = N->getOperand(0); 1586 if (Src.getOpcode() != ISD::SRL) 1587 break; 1588 1589 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1590 if (!Amt) 1591 break; 1592 1593 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1594 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1595 Amt->getZExtValue(), Width)); 1596 return; 1597 } 1598 } 1599 1600 SelectCode(N); 1601 } 1602 1603 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1604 assert(N->getOpcode() == ISD::BRCOND); 1605 if (!N->hasOneUse()) 1606 return false; 1607 1608 SDValue Cond = N->getOperand(1); 1609 if (Cond.getOpcode() == ISD::CopyToReg) 1610 Cond = Cond.getOperand(2); 1611 1612 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1613 return false; 1614 1615 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1616 if (VT == MVT::i32) 1617 return true; 1618 1619 if (VT == MVT::i64) { 1620 auto ST = static_cast<const SISubtarget *>(Subtarget); 1621 1622 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1623 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1624 } 1625 1626 return false; 1627 } 1628 1629 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1630 SDValue Cond = N->getOperand(1); 1631 1632 if (Cond.isUndef()) { 1633 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1634 N->getOperand(2), N->getOperand(0)); 1635 return; 1636 } 1637 1638 if (isCBranchSCC(N)) { 1639 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1640 SelectCode(N); 1641 return; 1642 } 1643 1644 SDLoc SL(N); 1645 1646 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1647 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1648 N->getOperand(2), // Basic Block 1649 VCC.getValue(0)); 1650 } 1651 1652 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { 1653 MVT VT = N->getSimpleValueType(0); 1654 if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { 1655 SelectCode(N); 1656 return; 1657 } 1658 1659 SDValue Src0 = N->getOperand(0); 1660 SDValue Src1 = N->getOperand(1); 1661 SDValue Src2 = N->getOperand(2); 1662 unsigned Src0Mods, Src1Mods, Src2Mods; 1663 1664 // Avoid using v_mad_mix_f32 unless there is actually an operand using the 1665 // conversion from f16. 1666 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1667 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1668 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1669 1670 assert(!Subtarget->hasFP32Denormals() && 1671 "fmad selected with denormals enabled"); 1672 // TODO: We can select this with f32 denormals enabled if all the sources are 1673 // converted from f16 (in which case fmad isn't legal). 1674 1675 if (Sel0 || Sel1 || Sel2) { 1676 // For dummy operands. 1677 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1678 SDValue Ops[] = { 1679 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1680 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1681 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1682 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1683 Zero, Zero 1684 }; 1685 1686 CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); 1687 } else { 1688 SelectCode(N); 1689 } 1690 } 1691 1692 // This is here because there isn't a way to use the generated sub0_sub1 as the 1693 // subreg index to EXTRACT_SUBREG in tablegen. 1694 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1695 MemSDNode *Mem = cast<MemSDNode>(N); 1696 unsigned AS = Mem->getAddressSpace(); 1697 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1698 SelectCode(N); 1699 return; 1700 } 1701 1702 MVT VT = N->getSimpleValueType(0); 1703 bool Is32 = (VT == MVT::i32); 1704 SDLoc SL(N); 1705 1706 MachineSDNode *CmpSwap = nullptr; 1707 if (Subtarget->hasAddr64()) { 1708 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1709 1710 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1711 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1712 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1713 SDValue CmpVal = Mem->getOperand(2); 1714 1715 // XXX - Do we care about glue operands? 1716 1717 SDValue Ops[] = { 1718 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1719 }; 1720 1721 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1722 } 1723 } 1724 1725 if (!CmpSwap) { 1726 SDValue SRsrc, SOffset, Offset, SLC; 1727 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1728 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1729 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1730 1731 SDValue CmpVal = Mem->getOperand(2); 1732 SDValue Ops[] = { 1733 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1734 }; 1735 1736 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1737 } 1738 } 1739 1740 if (!CmpSwap) { 1741 SelectCode(N); 1742 return; 1743 } 1744 1745 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1746 *MMOs = Mem->getMemOperand(); 1747 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1748 1749 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1750 SDValue Extract 1751 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1752 1753 ReplaceUses(SDValue(N, 0), Extract); 1754 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1755 CurDAG->RemoveDeadNode(N); 1756 } 1757 1758 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1759 unsigned &Mods) const { 1760 Mods = 0; 1761 Src = In; 1762 1763 if (Src.getOpcode() == ISD::FNEG) { 1764 Mods |= SISrcMods::NEG; 1765 Src = Src.getOperand(0); 1766 } 1767 1768 if (Src.getOpcode() == ISD::FABS) { 1769 Mods |= SISrcMods::ABS; 1770 Src = Src.getOperand(0); 1771 } 1772 1773 return true; 1774 } 1775 1776 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1777 SDValue &SrcMods) const { 1778 unsigned Mods; 1779 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1780 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1781 return true; 1782 } 1783 1784 return false; 1785 } 1786 1787 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1788 SDValue &SrcMods) const { 1789 SelectVOP3Mods(In, Src, SrcMods); 1790 return isNoNanSrc(Src); 1791 } 1792 1793 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1794 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1795 return false; 1796 1797 Src = In; 1798 return true; 1799 } 1800 1801 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1802 SDValue &SrcMods, SDValue &Clamp, 1803 SDValue &Omod) const { 1804 SDLoc DL(In); 1805 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1806 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1807 1808 return SelectVOP3Mods(In, Src, SrcMods); 1809 } 1810 1811 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1812 SDValue &SrcMods, 1813 SDValue &Clamp, 1814 SDValue &Omod) const { 1815 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1816 return SelectVOP3Mods(In, Src, SrcMods); 1817 } 1818 1819 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1820 SDValue &Clamp, SDValue &Omod) const { 1821 Src = In; 1822 1823 SDLoc DL(In); 1824 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1825 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1826 1827 return true; 1828 } 1829 1830 static SDValue stripBitcast(SDValue Val) { 1831 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1832 } 1833 1834 // Figure out if this is really an extract of the high 16-bits of a dword. 1835 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1836 In = stripBitcast(In); 1837 if (In.getOpcode() != ISD::TRUNCATE) 1838 return false; 1839 1840 SDValue Srl = In.getOperand(0); 1841 if (Srl.getOpcode() == ISD::SRL) { 1842 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1843 if (ShiftAmt->getZExtValue() == 16) { 1844 Out = stripBitcast(Srl.getOperand(0)); 1845 return true; 1846 } 1847 } 1848 } 1849 1850 return false; 1851 } 1852 1853 // Look through operations that obscure just looking at the low 16-bits of the 1854 // same register. 1855 static SDValue stripExtractLoElt(SDValue In) { 1856 if (In.getOpcode() == ISD::TRUNCATE) { 1857 SDValue Src = In.getOperand(0); 1858 if (Src.getValueType().getSizeInBits() == 32) 1859 return stripBitcast(Src); 1860 } 1861 1862 return In; 1863 } 1864 1865 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1866 SDValue &SrcMods) const { 1867 unsigned Mods = 0; 1868 Src = In; 1869 1870 if (Src.getOpcode() == ISD::FNEG) { 1871 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1872 Src = Src.getOperand(0); 1873 } 1874 1875 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1876 unsigned VecMods = Mods; 1877 1878 SDValue Lo = stripBitcast(Src.getOperand(0)); 1879 SDValue Hi = stripBitcast(Src.getOperand(1)); 1880 1881 if (Lo.getOpcode() == ISD::FNEG) { 1882 Lo = stripBitcast(Lo.getOperand(0)); 1883 Mods ^= SISrcMods::NEG; 1884 } 1885 1886 if (Hi.getOpcode() == ISD::FNEG) { 1887 Hi = stripBitcast(Hi.getOperand(0)); 1888 Mods ^= SISrcMods::NEG_HI; 1889 } 1890 1891 if (isExtractHiElt(Lo, Lo)) 1892 Mods |= SISrcMods::OP_SEL_0; 1893 1894 if (isExtractHiElt(Hi, Hi)) 1895 Mods |= SISrcMods::OP_SEL_1; 1896 1897 Lo = stripExtractLoElt(Lo); 1898 Hi = stripExtractLoElt(Hi); 1899 1900 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1901 // Really a scalar input. Just select from the low half of the register to 1902 // avoid packing. 1903 1904 Src = Lo; 1905 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1906 return true; 1907 } 1908 1909 Mods = VecMods; 1910 } 1911 1912 // Packed instructions do not have abs modifiers. 1913 Mods |= SISrcMods::OP_SEL_1; 1914 1915 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1916 return true; 1917 } 1918 1919 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1920 SDValue &SrcMods, 1921 SDValue &Clamp) const { 1922 SDLoc SL(In); 1923 1924 // FIXME: Handle clamp and op_sel 1925 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1926 1927 return SelectVOP3PMods(In, Src, SrcMods); 1928 } 1929 1930 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 1931 SDValue &SrcMods) const { 1932 Src = In; 1933 // FIXME: Handle op_sel 1934 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1935 return true; 1936 } 1937 1938 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 1939 SDValue &SrcMods, 1940 SDValue &Clamp) const { 1941 SDLoc SL(In); 1942 1943 // FIXME: Handle clamp 1944 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1945 1946 return SelectVOP3OpSel(In, Src, SrcMods); 1947 } 1948 1949 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 1950 SDValue &SrcMods) const { 1951 // FIXME: Handle op_sel 1952 return SelectVOP3Mods(In, Src, SrcMods); 1953 } 1954 1955 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 1956 SDValue &SrcMods, 1957 SDValue &Clamp) const { 1958 SDLoc SL(In); 1959 1960 // FIXME: Handle clamp 1961 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1962 1963 return SelectVOP3OpSelMods(In, Src, SrcMods); 1964 } 1965 1966 // The return value is not whether the match is possible (which it always is), 1967 // but whether or not it a conversion is really used. 1968 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 1969 unsigned &Mods) const { 1970 Mods = 0; 1971 SelectVOP3ModsImpl(In, Src, Mods); 1972 1973 if (Src.getOpcode() == ISD::FP_EXTEND) { 1974 Src = Src.getOperand(0); 1975 assert(Src.getValueType() == MVT::f16); 1976 Src = stripBitcast(Src); 1977 1978 // op_sel/op_sel_hi decide the source type and source. 1979 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 1980 // If the sources's op_sel is set, it picks the high half of the source 1981 // register. 1982 1983 Mods |= SISrcMods::OP_SEL_1; 1984 if (isExtractHiElt(Src, Src)) 1985 Mods |= SISrcMods::OP_SEL_0; 1986 1987 return true; 1988 } 1989 1990 return false; 1991 } 1992 1993 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1994 const AMDGPUTargetLowering& Lowering = 1995 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1996 bool IsModified = false; 1997 do { 1998 IsModified = false; 1999 // Go over all selected nodes and try to fold them a bit more 2000 for (SDNode &Node : CurDAG->allnodes()) { 2001 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 2002 if (!MachineNode) 2003 continue; 2004 2005 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2006 if (ResNode != &Node) { 2007 ReplaceUses(&Node, ResNode); 2008 IsModified = true; 2009 } 2010 } 2011 CurDAG->RemoveDeadNodes(); 2012 } while (IsModified); 2013 } 2014 2015 void R600DAGToDAGISel::Select(SDNode *N) { 2016 unsigned int Opc = N->getOpcode(); 2017 if (N->isMachineOpcode()) { 2018 N->setNodeId(-1); 2019 return; // Already selected. 2020 } 2021 2022 switch (Opc) { 2023 default: break; 2024 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2025 case ISD::SCALAR_TO_VECTOR: 2026 case ISD::BUILD_VECTOR: { 2027 EVT VT = N->getValueType(0); 2028 unsigned NumVectorElts = VT.getVectorNumElements(); 2029 unsigned RegClassID; 2030 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2031 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2032 // pass. We want to avoid 128 bits copies as much as possible because they 2033 // can't be bundled by our scheduler. 2034 switch(NumVectorElts) { 2035 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2036 case 4: 2037 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2038 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2039 else 2040 RegClassID = AMDGPU::R600_Reg128RegClassID; 2041 break; 2042 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2043 } 2044 SelectBuildVector(N, RegClassID); 2045 return; 2046 } 2047 } 2048 2049 SelectCode(N); 2050 } 2051 2052 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2053 SDValue &Offset) { 2054 ConstantSDNode *C; 2055 SDLoc DL(Addr); 2056 2057 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2058 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2059 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2060 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2061 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2062 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2063 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2064 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2065 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2066 Base = Addr.getOperand(0); 2067 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2068 } else { 2069 Base = Addr; 2070 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2071 } 2072 2073 return true; 2074 } 2075 2076 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2077 SDValue &Offset) { 2078 ConstantSDNode *IMMOffset; 2079 2080 if (Addr.getOpcode() == ISD::ADD 2081 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2082 && isInt<16>(IMMOffset->getZExtValue())) { 2083 2084 Base = Addr.getOperand(0); 2085 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2086 MVT::i32); 2087 return true; 2088 // If the pointer address is constant, we can move it to the offset field. 2089 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2090 && isInt<16>(IMMOffset->getZExtValue())) { 2091 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2092 SDLoc(CurDAG->getEntryNode()), 2093 AMDGPU::ZERO, MVT::i32); 2094 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2095 MVT::i32); 2096 return true; 2097 } 2098 2099 // Default case, no offset 2100 Base = Addr; 2101 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2102 return true; 2103 } 2104