1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUISelLowering.h" // For AMDGPUISD 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "SIRegisterInfo.h" 23 #include "SIISelLowering.h" 24 #include "SIMachineFunctionInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 71 public: 72 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 73 : SelectionDAGISel(TM, OptLevel) {} 74 ~AMDGPUDAGToDAGISel() override = default; 75 76 bool runOnMachineFunction(MachineFunction &MF) override; 77 void Select(SDNode *N) override; 78 StringRef getPassName() const override; 79 void PostprocessISelDAG() override; 80 81 private: 82 SDValue foldFrameIndex(SDValue N) const; 83 bool isNoNanSrc(SDValue N) const; 84 bool isInlineImmediate(const SDNode *N) const; 85 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 86 const R600InstrInfo *TII); 87 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 88 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 89 90 bool isConstantLoad(const MemSDNode *N, int cbID) const; 91 bool isUniformBr(const SDNode *N) const; 92 93 SDNode *glueCopyToM0(SDNode *N) const; 94 95 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 96 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 97 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 98 SDValue& Offset); 99 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 100 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 101 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 102 unsigned OffsetBits) const; 103 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 104 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 105 SDValue &Offset1) const; 106 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 107 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 108 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 109 SDValue &TFE) const; 110 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 111 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 112 SDValue &SLC, SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 114 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 115 SDValue &SLC) const; 116 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 117 SDValue &SOffset, SDValue &ImmOffset) const; 118 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 119 SDValue &Offset, SDValue &GLC, SDValue &SLC, 120 SDValue &TFE) const; 121 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 122 SDValue &Offset, SDValue &SLC) const; 123 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 124 SDValue &Offset) const; 125 bool SelectMUBUFConstant(SDValue Constant, 126 SDValue &SOffset, 127 SDValue &ImmOffset) const; 128 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 129 SDValue &ImmOffset) const; 130 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 131 SDValue &ImmOffset, SDValue &VOffset) const; 132 133 bool SelectFlat(SDValue Addr, SDValue &VAddr, 134 SDValue &SLC, SDValue &TFE) const; 135 136 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 137 bool &Imm) const; 138 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 139 bool &Imm) const; 140 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 141 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 142 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 143 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 144 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 145 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 146 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 147 148 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 149 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 150 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 151 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 152 SDValue &Clamp, SDValue &Omod) const; 153 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 154 SDValue &Clamp, SDValue &Omod) const; 155 156 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 157 SDValue &Omod) const; 158 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 159 SDValue &Clamp, 160 SDValue &Omod) const; 161 162 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 163 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 164 SDValue &Clamp) const; 165 166 void SelectADD_SUB_I64(SDNode *N); 167 void SelectUADDO_USUBO(SDNode *N); 168 void SelectDIV_SCALE(SDNode *N); 169 void SelectFMA_W_CHAIN(SDNode *N); 170 void SelectFMUL_W_CHAIN(SDNode *N); 171 172 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 173 uint32_t Offset, uint32_t Width); 174 void SelectS_BFEFromShifts(SDNode *N); 175 void SelectS_BFE(SDNode *N); 176 bool isCBranchSCC(const SDNode *N) const; 177 void SelectBRCOND(SDNode *N); 178 void SelectATOMIC_CMP_SWAP(SDNode *N); 179 180 // Include the pieces autogenerated from the target description. 181 #include "AMDGPUGenDAGISel.inc" 182 }; 183 184 } // end anonymous namespace 185 186 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 187 // DAG, ready for instruction scheduling. 188 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 189 CodeGenOpt::Level OptLevel) { 190 return new AMDGPUDAGToDAGISel(TM, OptLevel); 191 } 192 193 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 194 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 195 return SelectionDAGISel::runOnMachineFunction(MF); 196 } 197 198 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 199 if (TM.Options.NoNaNsFPMath) 200 return true; 201 202 // TODO: Move into isKnownNeverNaN 203 if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) 204 return BO->Flags.hasNoNaNs(); 205 206 return CurDAG->isKnownNeverNaN(N); 207 } 208 209 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 210 const SIInstrInfo *TII 211 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 212 213 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 214 return TII->isInlineConstant(C->getAPIntValue()); 215 216 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 217 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 218 219 return false; 220 } 221 222 /// \brief Determine the register class for \p OpNo 223 /// \returns The register class of the virtual register that will be used for 224 /// the given operand number \OpNo or NULL if the register class cannot be 225 /// determined. 226 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 227 unsigned OpNo) const { 228 if (!N->isMachineOpcode()) { 229 if (N->getOpcode() == ISD::CopyToReg) { 230 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 231 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 232 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 233 return MRI.getRegClass(Reg); 234 } 235 236 const SIRegisterInfo *TRI 237 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 238 return TRI->getPhysRegClass(Reg); 239 } 240 241 return nullptr; 242 } 243 244 switch (N->getMachineOpcode()) { 245 default: { 246 const MCInstrDesc &Desc = 247 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 248 unsigned OpIdx = Desc.getNumDefs() + OpNo; 249 if (OpIdx >= Desc.getNumOperands()) 250 return nullptr; 251 int RegClass = Desc.OpInfo[OpIdx].RegClass; 252 if (RegClass == -1) 253 return nullptr; 254 255 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 256 } 257 case AMDGPU::REG_SEQUENCE: { 258 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 259 const TargetRegisterClass *SuperRC = 260 Subtarget->getRegisterInfo()->getRegClass(RCID); 261 262 SDValue SubRegOp = N->getOperand(OpNo + 1); 263 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 264 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 265 SubRegIdx); 266 } 267 } 268 } 269 270 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 271 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 272 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 273 return N; 274 275 const SITargetLowering& Lowering = 276 *static_cast<const SITargetLowering*>(getTargetLowering()); 277 278 // Write max value to m0 before each load operation 279 280 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 281 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 282 283 SDValue Glue = M0.getValue(1); 284 285 SmallVector <SDValue, 8> Ops; 286 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 287 Ops.push_back(N->getOperand(i)); 288 } 289 Ops.push_back(Glue); 290 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 291 292 return N; 293 } 294 295 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 296 switch (NumVectorElts) { 297 case 1: 298 return AMDGPU::SReg_32_XM0RegClassID; 299 case 2: 300 return AMDGPU::SReg_64RegClassID; 301 case 4: 302 return AMDGPU::SReg_128RegClassID; 303 case 8: 304 return AMDGPU::SReg_256RegClassID; 305 case 16: 306 return AMDGPU::SReg_512RegClassID; 307 } 308 309 llvm_unreachable("invalid vector size"); 310 } 311 312 static bool getConstantValue(SDValue N, uint32_t &Out) { 313 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 314 Out = C->getAPIntValue().getZExtValue(); 315 return true; 316 } 317 318 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 319 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 320 return true; 321 } 322 323 return false; 324 } 325 326 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 327 unsigned int Opc = N->getOpcode(); 328 if (N->isMachineOpcode()) { 329 N->setNodeId(-1); 330 return; // Already selected. 331 } 332 333 if (isa<AtomicSDNode>(N) || 334 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 335 N = glueCopyToM0(N); 336 337 switch (Opc) { 338 default: break; 339 // We are selecting i64 ADD here instead of custom lower it during 340 // DAG legalization, so we can fold some i64 ADDs used for address 341 // calculation into the LOAD and STORE instructions. 342 case ISD::ADD: 343 case ISD::ADDC: 344 case ISD::ADDE: 345 case ISD::SUB: 346 case ISD::SUBC: 347 case ISD::SUBE: { 348 if (N->getValueType(0) != MVT::i64 || 349 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 350 break; 351 352 SelectADD_SUB_I64(N); 353 return; 354 } 355 case ISD::UADDO: 356 case ISD::USUBO: { 357 SelectUADDO_USUBO(N); 358 return; 359 } 360 case AMDGPUISD::FMUL_W_CHAIN: { 361 SelectFMUL_W_CHAIN(N); 362 return; 363 } 364 case AMDGPUISD::FMA_W_CHAIN: { 365 SelectFMA_W_CHAIN(N); 366 return; 367 } 368 369 case ISD::SCALAR_TO_VECTOR: 370 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 371 case ISD::BUILD_VECTOR: { 372 unsigned RegClassID; 373 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 374 EVT VT = N->getValueType(0); 375 unsigned NumVectorElts = VT.getVectorNumElements(); 376 EVT EltVT = VT.getVectorElementType(); 377 378 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 379 if (Opc == ISD::BUILD_VECTOR) { 380 uint32_t LHSVal, RHSVal; 381 if (getConstantValue(N->getOperand(0), LHSVal) && 382 getConstantValue(N->getOperand(1), RHSVal)) { 383 uint32_t K = LHSVal | (RHSVal << 16); 384 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 385 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 386 return; 387 } 388 } 389 390 break; 391 } 392 393 assert(EltVT.bitsEq(MVT::i32)); 394 395 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 396 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 397 } else { 398 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 399 // that adds a 128 bits reg copy when going through TwoAddressInstructions 400 // pass. We want to avoid 128 bits copies as much as possible because they 401 // can't be bundled by our scheduler. 402 switch(NumVectorElts) { 403 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 404 case 4: 405 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 406 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 407 else 408 RegClassID = AMDGPU::R600_Reg128RegClassID; 409 break; 410 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 411 } 412 } 413 414 SDLoc DL(N); 415 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 416 417 if (NumVectorElts == 1) { 418 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 419 RegClass); 420 return; 421 } 422 423 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 424 "supported yet"); 425 // 16 = Max Num Vector Elements 426 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 427 // 1 = Vector Register Class 428 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 429 430 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 431 bool IsRegSeq = true; 432 unsigned NOps = N->getNumOperands(); 433 for (unsigned i = 0; i < NOps; i++) { 434 // XXX: Why is this here? 435 if (isa<RegisterSDNode>(N->getOperand(i))) { 436 IsRegSeq = false; 437 break; 438 } 439 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 440 RegSeqArgs[1 + (2 * i) + 1] = 441 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 442 MVT::i32); 443 } 444 445 if (NOps != NumVectorElts) { 446 // Fill in the missing undef elements if this was a scalar_to_vector. 447 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 448 449 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 450 DL, EltVT); 451 for (unsigned i = NOps; i < NumVectorElts; ++i) { 452 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 453 RegSeqArgs[1 + (2 * i) + 1] = 454 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 455 } 456 } 457 458 if (!IsRegSeq) 459 break; 460 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 461 return; 462 } 463 case ISD::BUILD_PAIR: { 464 SDValue RC, SubReg0, SubReg1; 465 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 466 break; 467 } 468 SDLoc DL(N); 469 if (N->getValueType(0) == MVT::i128) { 470 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 471 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 472 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 473 } else if (N->getValueType(0) == MVT::i64) { 474 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 475 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 476 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 477 } else { 478 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 479 } 480 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 481 N->getOperand(1), SubReg1 }; 482 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 483 N->getValueType(0), Ops)); 484 return; 485 } 486 487 case ISD::Constant: 488 case ISD::ConstantFP: { 489 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 490 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 491 break; 492 493 uint64_t Imm; 494 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 495 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 496 else { 497 ConstantSDNode *C = cast<ConstantSDNode>(N); 498 Imm = C->getZExtValue(); 499 } 500 501 SDLoc DL(N); 502 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 503 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 504 MVT::i32)); 505 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 506 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 507 const SDValue Ops[] = { 508 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 509 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 510 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 511 }; 512 513 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 514 N->getValueType(0), Ops)); 515 return; 516 } 517 case ISD::LOAD: 518 case ISD::STORE: { 519 N = glueCopyToM0(N); 520 break; 521 } 522 523 case AMDGPUISD::BFE_I32: 524 case AMDGPUISD::BFE_U32: { 525 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 526 break; 527 528 // There is a scalar version available, but unlike the vector version which 529 // has a separate operand for the offset and width, the scalar version packs 530 // the width and offset into a single operand. Try to move to the scalar 531 // version if the offsets are constant, so that we can try to keep extended 532 // loads of kernel arguments in SGPRs. 533 534 // TODO: Technically we could try to pattern match scalar bitshifts of 535 // dynamic values, but it's probably not useful. 536 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 537 if (!Offset) 538 break; 539 540 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 541 if (!Width) 542 break; 543 544 bool Signed = Opc == AMDGPUISD::BFE_I32; 545 546 uint32_t OffsetVal = Offset->getZExtValue(); 547 uint32_t WidthVal = Width->getZExtValue(); 548 549 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 550 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 551 return; 552 } 553 case AMDGPUISD::DIV_SCALE: { 554 SelectDIV_SCALE(N); 555 return; 556 } 557 case ISD::CopyToReg: { 558 const SITargetLowering& Lowering = 559 *static_cast<const SITargetLowering*>(getTargetLowering()); 560 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 561 break; 562 } 563 case ISD::AND: 564 case ISD::SRL: 565 case ISD::SRA: 566 case ISD::SIGN_EXTEND_INREG: 567 if (N->getValueType(0) != MVT::i32 || 568 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 569 break; 570 571 SelectS_BFE(N); 572 return; 573 case ISD::BRCOND: 574 SelectBRCOND(N); 575 return; 576 577 case AMDGPUISD::ATOMIC_CMP_SWAP: 578 SelectATOMIC_CMP_SWAP(N); 579 return; 580 } 581 582 SelectCode(N); 583 } 584 585 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 586 if (!N->readMem()) 587 return false; 588 if (CbId == -1) 589 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 590 591 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 592 } 593 594 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 595 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 596 const Instruction *Term = BB->getTerminator(); 597 return Term->getMetadata("amdgpu.uniform") || 598 Term->getMetadata("structurizecfg.uniform"); 599 } 600 601 StringRef AMDGPUDAGToDAGISel::getPassName() const { 602 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 603 } 604 605 //===----------------------------------------------------------------------===// 606 // Complex Patterns 607 //===----------------------------------------------------------------------===// 608 609 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 610 SDValue& IntPtr) { 611 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 612 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 613 true); 614 return true; 615 } 616 return false; 617 } 618 619 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 620 SDValue& BaseReg, SDValue &Offset) { 621 if (!isa<ConstantSDNode>(Addr)) { 622 BaseReg = Addr; 623 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 624 return true; 625 } 626 return false; 627 } 628 629 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 630 SDValue &Offset) { 631 ConstantSDNode *IMMOffset; 632 633 if (Addr.getOpcode() == ISD::ADD 634 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 635 && isInt<16>(IMMOffset->getZExtValue())) { 636 637 Base = Addr.getOperand(0); 638 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 639 MVT::i32); 640 return true; 641 // If the pointer address is constant, we can move it to the offset field. 642 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 643 && isInt<16>(IMMOffset->getZExtValue())) { 644 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 645 SDLoc(CurDAG->getEntryNode()), 646 AMDGPU::ZERO, MVT::i32); 647 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 648 MVT::i32); 649 return true; 650 } 651 652 // Default case, no offset 653 Base = Addr; 654 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 655 return true; 656 } 657 658 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 659 SDValue &Offset) { 660 ConstantSDNode *C; 661 SDLoc DL(Addr); 662 663 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 664 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 665 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 666 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 667 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 668 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 669 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 670 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 671 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 672 Base = Addr.getOperand(0); 673 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 674 } else { 675 Base = Addr; 676 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 677 } 678 679 return true; 680 } 681 682 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 683 SDLoc DL(N); 684 SDValue LHS = N->getOperand(0); 685 SDValue RHS = N->getOperand(1); 686 687 unsigned Opcode = N->getOpcode(); 688 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 689 bool ProduceCarry = 690 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 691 bool IsAdd = 692 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 693 694 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 695 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 696 697 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 698 DL, MVT::i32, LHS, Sub0); 699 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 700 DL, MVT::i32, LHS, Sub1); 701 702 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 703 DL, MVT::i32, RHS, Sub0); 704 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 705 DL, MVT::i32, RHS, Sub1); 706 707 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 708 709 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 710 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 711 712 SDNode *AddLo; 713 if (!ConsumeCarry) { 714 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 715 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 716 } else { 717 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 718 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 719 } 720 SDValue AddHiArgs[] = { 721 SDValue(Hi0, 0), 722 SDValue(Hi1, 0), 723 SDValue(AddLo, 1) 724 }; 725 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 726 727 SDValue RegSequenceArgs[] = { 728 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 729 SDValue(AddLo,0), 730 Sub0, 731 SDValue(AddHi,0), 732 Sub1, 733 }; 734 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 735 MVT::i64, RegSequenceArgs); 736 737 if (ProduceCarry) { 738 // Replace the carry-use 739 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 740 } 741 742 // Replace the remaining uses. 743 CurDAG->ReplaceAllUsesWith(N, RegSequence); 744 CurDAG->RemoveDeadNode(N); 745 } 746 747 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 748 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 749 // carry out despite the _i32 name. These were renamed in VI to _U32. 750 // FIXME: We should probably rename the opcodes here. 751 unsigned Opc = N->getOpcode() == ISD::UADDO ? 752 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 753 754 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 755 { N->getOperand(0), N->getOperand(1) }); 756 } 757 758 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 759 SDLoc SL(N); 760 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 761 SDValue Ops[10]; 762 763 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 764 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 765 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 766 Ops[8] = N->getOperand(0); 767 Ops[9] = N->getOperand(4); 768 769 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 770 } 771 772 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 773 SDLoc SL(N); 774 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 775 SDValue Ops[8]; 776 777 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 778 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 779 Ops[6] = N->getOperand(0); 780 Ops[7] = N->getOperand(3); 781 782 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 783 } 784 785 // We need to handle this here because tablegen doesn't support matching 786 // instructions with multiple outputs. 787 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 788 SDLoc SL(N); 789 EVT VT = N->getValueType(0); 790 791 assert(VT == MVT::f32 || VT == MVT::f64); 792 793 unsigned Opc 794 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 795 796 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 797 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 798 } 799 800 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 801 unsigned OffsetBits) const { 802 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 803 (OffsetBits == 8 && !isUInt<8>(Offset))) 804 return false; 805 806 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 807 Subtarget->unsafeDSOffsetFoldingEnabled()) 808 return true; 809 810 // On Southern Islands instruction with a negative base value and an offset 811 // don't seem to work. 812 return CurDAG->SignBitIsZero(Base); 813 } 814 815 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 816 SDValue &Offset) const { 817 SDLoc DL(Addr); 818 if (CurDAG->isBaseWithConstantOffset(Addr)) { 819 SDValue N0 = Addr.getOperand(0); 820 SDValue N1 = Addr.getOperand(1); 821 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 822 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 823 // (add n0, c0) 824 Base = N0; 825 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 826 return true; 827 } 828 } else if (Addr.getOpcode() == ISD::SUB) { 829 // sub C, x -> add (sub 0, x), C 830 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 831 int64_t ByteOffset = C->getSExtValue(); 832 if (isUInt<16>(ByteOffset)) { 833 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 834 835 // XXX - This is kind of hacky. Create a dummy sub node so we can check 836 // the known bits in isDSOffsetLegal. We need to emit the selected node 837 // here, so this is thrown away. 838 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 839 Zero, Addr.getOperand(1)); 840 841 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 842 MachineSDNode *MachineSub 843 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 844 Zero, Addr.getOperand(1)); 845 846 Base = SDValue(MachineSub, 0); 847 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 848 return true; 849 } 850 } 851 } 852 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 853 // If we have a constant address, prefer to put the constant into the 854 // offset. This can save moves to load the constant address since multiple 855 // operations can share the zero base address register, and enables merging 856 // into read2 / write2 instructions. 857 858 SDLoc DL(Addr); 859 860 if (isUInt<16>(CAddr->getZExtValue())) { 861 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 862 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 863 DL, MVT::i32, Zero); 864 Base = SDValue(MovZero, 0); 865 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 866 return true; 867 } 868 } 869 870 // default case 871 Base = Addr; 872 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 873 return true; 874 } 875 876 // TODO: If offset is too big, put low 16-bit into offset. 877 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 878 SDValue &Offset0, 879 SDValue &Offset1) const { 880 SDLoc DL(Addr); 881 882 if (CurDAG->isBaseWithConstantOffset(Addr)) { 883 SDValue N0 = Addr.getOperand(0); 884 SDValue N1 = Addr.getOperand(1); 885 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 886 unsigned DWordOffset0 = C1->getZExtValue() / 4; 887 unsigned DWordOffset1 = DWordOffset0 + 1; 888 // (add n0, c0) 889 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 890 Base = N0; 891 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 892 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 893 return true; 894 } 895 } else if (Addr.getOpcode() == ISD::SUB) { 896 // sub C, x -> add (sub 0, x), C 897 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 898 unsigned DWordOffset0 = C->getZExtValue() / 4; 899 unsigned DWordOffset1 = DWordOffset0 + 1; 900 901 if (isUInt<8>(DWordOffset0)) { 902 SDLoc DL(Addr); 903 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 904 905 // XXX - This is kind of hacky. Create a dummy sub node so we can check 906 // the known bits in isDSOffsetLegal. We need to emit the selected node 907 // here, so this is thrown away. 908 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 909 Zero, Addr.getOperand(1)); 910 911 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 912 MachineSDNode *MachineSub 913 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 914 Zero, Addr.getOperand(1)); 915 916 Base = SDValue(MachineSub, 0); 917 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 918 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 919 return true; 920 } 921 } 922 } 923 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 924 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 925 unsigned DWordOffset1 = DWordOffset0 + 1; 926 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 927 928 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 929 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 930 MachineSDNode *MovZero 931 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 932 DL, MVT::i32, Zero); 933 Base = SDValue(MovZero, 0); 934 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 935 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 936 return true; 937 } 938 } 939 940 // default case 941 942 // FIXME: This is broken on SI where we still need to check if the base 943 // pointer is positive here. 944 Base = Addr; 945 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 946 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 947 return true; 948 } 949 950 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 951 return isUInt<12>(Imm->getZExtValue()); 952 } 953 954 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 955 SDValue &VAddr, SDValue &SOffset, 956 SDValue &Offset, SDValue &Offen, 957 SDValue &Idxen, SDValue &Addr64, 958 SDValue &GLC, SDValue &SLC, 959 SDValue &TFE) const { 960 // Subtarget prefers to use flat instruction 961 if (Subtarget->useFlatForGlobal()) 962 return false; 963 964 SDLoc DL(Addr); 965 966 if (!GLC.getNode()) 967 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 968 if (!SLC.getNode()) 969 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 970 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 971 972 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 973 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 974 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 975 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 976 977 if (CurDAG->isBaseWithConstantOffset(Addr)) { 978 SDValue N0 = Addr.getOperand(0); 979 SDValue N1 = Addr.getOperand(1); 980 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 981 982 if (N0.getOpcode() == ISD::ADD) { 983 // (add (add N2, N3), C1) -> addr64 984 SDValue N2 = N0.getOperand(0); 985 SDValue N3 = N0.getOperand(1); 986 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 987 Ptr = N2; 988 VAddr = N3; 989 } else { 990 // (add N0, C1) -> offset 991 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 992 Ptr = N0; 993 } 994 995 if (isLegalMUBUFImmOffset(C1)) { 996 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 997 return true; 998 } 999 1000 if (isUInt<32>(C1->getZExtValue())) { 1001 // Illegal offset, store it in soffset. 1002 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1003 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1004 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1005 0); 1006 return true; 1007 } 1008 } 1009 1010 if (Addr.getOpcode() == ISD::ADD) { 1011 // (add N0, N1) -> addr64 1012 SDValue N0 = Addr.getOperand(0); 1013 SDValue N1 = Addr.getOperand(1); 1014 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1015 Ptr = N0; 1016 VAddr = N1; 1017 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1018 return true; 1019 } 1020 1021 // default case -> offset 1022 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1023 Ptr = Addr; 1024 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1025 1026 return true; 1027 } 1028 1029 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1030 SDValue &VAddr, SDValue &SOffset, 1031 SDValue &Offset, SDValue &GLC, 1032 SDValue &SLC, SDValue &TFE) const { 1033 SDValue Ptr, Offen, Idxen, Addr64; 1034 1035 // addr64 bit was removed for volcanic islands. 1036 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1037 return false; 1038 1039 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1040 GLC, SLC, TFE)) 1041 return false; 1042 1043 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1044 if (C->getSExtValue()) { 1045 SDLoc DL(Addr); 1046 1047 const SITargetLowering& Lowering = 1048 *static_cast<const SITargetLowering*>(getTargetLowering()); 1049 1050 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1051 return true; 1052 } 1053 1054 return false; 1055 } 1056 1057 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1058 SDValue &VAddr, SDValue &SOffset, 1059 SDValue &Offset, 1060 SDValue &SLC) const { 1061 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1062 SDValue GLC, TFE; 1063 1064 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1065 } 1066 1067 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1068 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 1069 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 1070 return N; 1071 } 1072 1073 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1074 SDValue &VAddr, SDValue &SOffset, 1075 SDValue &ImmOffset) const { 1076 1077 SDLoc DL(Addr); 1078 MachineFunction &MF = CurDAG->getMachineFunction(); 1079 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1080 1081 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1082 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1083 1084 // (add n0, c1) 1085 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1086 SDValue N0 = Addr.getOperand(0); 1087 SDValue N1 = Addr.getOperand(1); 1088 1089 // Offsets in vaddr must be positive. 1090 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1091 if (isLegalMUBUFImmOffset(C1)) { 1092 VAddr = foldFrameIndex(N0); 1093 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1094 return true; 1095 } 1096 } 1097 1098 // (node) 1099 VAddr = foldFrameIndex(Addr); 1100 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1101 return true; 1102 } 1103 1104 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1105 SDValue &SOffset, SDValue &Offset, 1106 SDValue &GLC, SDValue &SLC, 1107 SDValue &TFE) const { 1108 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1109 const SIInstrInfo *TII = 1110 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1111 1112 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1113 GLC, SLC, TFE)) 1114 return false; 1115 1116 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1117 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1118 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1119 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1120 APInt::getAllOnesValue(32).getZExtValue(); // Size 1121 SDLoc DL(Addr); 1122 1123 const SITargetLowering& Lowering = 1124 *static_cast<const SITargetLowering*>(getTargetLowering()); 1125 1126 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1127 return true; 1128 } 1129 return false; 1130 } 1131 1132 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1133 SDValue &Soffset, SDValue &Offset 1134 ) const { 1135 SDValue GLC, SLC, TFE; 1136 1137 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1138 } 1139 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1140 SDValue &Soffset, SDValue &Offset, 1141 SDValue &SLC) const { 1142 SDValue GLC, TFE; 1143 1144 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1145 } 1146 1147 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1148 SDValue &SOffset, 1149 SDValue &ImmOffset) const { 1150 SDLoc DL(Constant); 1151 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1152 uint32_t Overflow = 0; 1153 1154 if (Imm >= 4096) { 1155 if (Imm <= 4095 + 64) { 1156 // Use an SOffset inline constant for 1..64 1157 Overflow = Imm - 4095; 1158 Imm = 4095; 1159 } else { 1160 // Try to keep the same value in SOffset for adjacent loads, so that 1161 // the corresponding register contents can be re-used. 1162 // 1163 // Load values with all low-bits set into SOffset, so that a larger 1164 // range of values can be covered using s_movk_i32 1165 uint32_t High = (Imm + 1) & ~4095; 1166 uint32_t Low = (Imm + 1) & 4095; 1167 Imm = Low; 1168 Overflow = High - 1; 1169 } 1170 } 1171 1172 // There is a hardware bug in SI and CI which prevents address clamping in 1173 // MUBUF instructions from working correctly with SOffsets. The immediate 1174 // offset is unaffected. 1175 if (Overflow > 0 && 1176 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1177 return false; 1178 1179 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1180 1181 if (Overflow <= 64) 1182 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1183 else 1184 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1185 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1186 0); 1187 1188 return true; 1189 } 1190 1191 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1192 SDValue &SOffset, 1193 SDValue &ImmOffset) const { 1194 SDLoc DL(Offset); 1195 1196 if (!isa<ConstantSDNode>(Offset)) 1197 return false; 1198 1199 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1200 } 1201 1202 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1203 SDValue &SOffset, 1204 SDValue &ImmOffset, 1205 SDValue &VOffset) const { 1206 SDLoc DL(Offset); 1207 1208 // Don't generate an unnecessary voffset for constant offsets. 1209 if (isa<ConstantSDNode>(Offset)) { 1210 SDValue Tmp1, Tmp2; 1211 1212 // When necessary, use a voffset in <= CI anyway to work around a hardware 1213 // bug. 1214 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1215 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1216 return false; 1217 } 1218 1219 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1220 SDValue N0 = Offset.getOperand(0); 1221 SDValue N1 = Offset.getOperand(1); 1222 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1223 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1224 VOffset = N0; 1225 return true; 1226 } 1227 } 1228 1229 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1230 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1231 VOffset = Offset; 1232 1233 return true; 1234 } 1235 1236 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1237 SDValue &VAddr, 1238 SDValue &SLC, 1239 SDValue &TFE) const { 1240 VAddr = Addr; 1241 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1242 return true; 1243 } 1244 1245 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1246 SDValue &Offset, bool &Imm) const { 1247 1248 // FIXME: Handle non-constant offsets. 1249 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1250 if (!C) 1251 return false; 1252 1253 SDLoc SL(ByteOffsetNode); 1254 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1255 int64_t ByteOffset = C->getSExtValue(); 1256 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1257 1258 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1259 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1260 Imm = true; 1261 return true; 1262 } 1263 1264 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1265 return false; 1266 1267 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1268 // 32-bit Immediates are supported on Sea Islands. 1269 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1270 } else { 1271 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1272 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1273 C32Bit), 0); 1274 } 1275 Imm = false; 1276 return true; 1277 } 1278 1279 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1280 SDValue &Offset, bool &Imm) const { 1281 SDLoc SL(Addr); 1282 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1283 SDValue N0 = Addr.getOperand(0); 1284 SDValue N1 = Addr.getOperand(1); 1285 1286 if (SelectSMRDOffset(N1, Offset, Imm)) { 1287 SBase = N0; 1288 return true; 1289 } 1290 } 1291 SBase = Addr; 1292 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1293 Imm = true; 1294 return true; 1295 } 1296 1297 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1298 SDValue &Offset) const { 1299 bool Imm; 1300 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1301 } 1302 1303 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1304 SDValue &Offset) const { 1305 1306 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1307 return false; 1308 1309 bool Imm; 1310 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1311 return false; 1312 1313 return !Imm && isa<ConstantSDNode>(Offset); 1314 } 1315 1316 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1317 SDValue &Offset) const { 1318 bool Imm; 1319 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1320 !isa<ConstantSDNode>(Offset); 1321 } 1322 1323 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1324 SDValue &Offset) const { 1325 bool Imm; 1326 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1327 } 1328 1329 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1330 SDValue &Offset) const { 1331 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1332 return false; 1333 1334 bool Imm; 1335 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1336 return false; 1337 1338 return !Imm && isa<ConstantSDNode>(Offset); 1339 } 1340 1341 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1342 SDValue &Offset) const { 1343 bool Imm; 1344 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1345 !isa<ConstantSDNode>(Offset); 1346 } 1347 1348 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1349 SDValue &Base, 1350 SDValue &Offset) const { 1351 SDLoc DL(Index); 1352 1353 if (CurDAG->isBaseWithConstantOffset(Index)) { 1354 SDValue N0 = Index.getOperand(0); 1355 SDValue N1 = Index.getOperand(1); 1356 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1357 1358 // (add n0, c0) 1359 Base = N0; 1360 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1361 return true; 1362 } 1363 1364 if (isa<ConstantSDNode>(Index)) 1365 return false; 1366 1367 Base = Index; 1368 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1369 return true; 1370 } 1371 1372 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1373 SDValue Val, uint32_t Offset, 1374 uint32_t Width) { 1375 // Transformation function, pack the offset and width of a BFE into 1376 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1377 // source, bits [5:0] contain the offset and bits [22:16] the width. 1378 uint32_t PackedVal = Offset | (Width << 16); 1379 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1380 1381 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1382 } 1383 1384 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1385 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1386 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1387 // Predicate: 0 < b <= c < 32 1388 1389 const SDValue &Shl = N->getOperand(0); 1390 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1391 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1392 1393 if (B && C) { 1394 uint32_t BVal = B->getZExtValue(); 1395 uint32_t CVal = C->getZExtValue(); 1396 1397 if (0 < BVal && BVal <= CVal && CVal < 32) { 1398 bool Signed = N->getOpcode() == ISD::SRA; 1399 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1400 1401 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1402 32 - CVal)); 1403 return; 1404 } 1405 } 1406 SelectCode(N); 1407 } 1408 1409 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1410 switch (N->getOpcode()) { 1411 case ISD::AND: 1412 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1413 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1414 // Predicate: isMask(mask) 1415 const SDValue &Srl = N->getOperand(0); 1416 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1417 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1418 1419 if (Shift && Mask) { 1420 uint32_t ShiftVal = Shift->getZExtValue(); 1421 uint32_t MaskVal = Mask->getZExtValue(); 1422 1423 if (isMask_32(MaskVal)) { 1424 uint32_t WidthVal = countPopulation(MaskVal); 1425 1426 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1427 Srl.getOperand(0), ShiftVal, WidthVal)); 1428 return; 1429 } 1430 } 1431 } 1432 break; 1433 case ISD::SRL: 1434 if (N->getOperand(0).getOpcode() == ISD::AND) { 1435 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1436 // Predicate: isMask(mask >> b) 1437 const SDValue &And = N->getOperand(0); 1438 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1439 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1440 1441 if (Shift && Mask) { 1442 uint32_t ShiftVal = Shift->getZExtValue(); 1443 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1444 1445 if (isMask_32(MaskVal)) { 1446 uint32_t WidthVal = countPopulation(MaskVal); 1447 1448 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1449 And.getOperand(0), ShiftVal, WidthVal)); 1450 return; 1451 } 1452 } 1453 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1454 SelectS_BFEFromShifts(N); 1455 return; 1456 } 1457 break; 1458 case ISD::SRA: 1459 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1460 SelectS_BFEFromShifts(N); 1461 return; 1462 } 1463 break; 1464 1465 case ISD::SIGN_EXTEND_INREG: { 1466 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1467 SDValue Src = N->getOperand(0); 1468 if (Src.getOpcode() != ISD::SRL) 1469 break; 1470 1471 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1472 if (!Amt) 1473 break; 1474 1475 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1476 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1477 Amt->getZExtValue(), Width)); 1478 return; 1479 } 1480 } 1481 1482 SelectCode(N); 1483 } 1484 1485 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1486 assert(N->getOpcode() == ISD::BRCOND); 1487 if (!N->hasOneUse()) 1488 return false; 1489 1490 SDValue Cond = N->getOperand(1); 1491 if (Cond.getOpcode() == ISD::CopyToReg) 1492 Cond = Cond.getOperand(2); 1493 1494 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1495 return false; 1496 1497 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1498 if (VT == MVT::i32) 1499 return true; 1500 1501 if (VT == MVT::i64) { 1502 auto ST = static_cast<const SISubtarget *>(Subtarget); 1503 1504 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1505 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1506 } 1507 1508 return false; 1509 } 1510 1511 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1512 SDValue Cond = N->getOperand(1); 1513 1514 if (Cond.isUndef()) { 1515 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1516 N->getOperand(2), N->getOperand(0)); 1517 return; 1518 } 1519 1520 if (isCBranchSCC(N)) { 1521 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1522 SelectCode(N); 1523 return; 1524 } 1525 1526 SDLoc SL(N); 1527 1528 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1529 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1530 N->getOperand(2), // Basic Block 1531 VCC.getValue(0)); 1532 } 1533 1534 // This is here because there isn't a way to use the generated sub0_sub1 as the 1535 // subreg index to EXTRACT_SUBREG in tablegen. 1536 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1537 MemSDNode *Mem = cast<MemSDNode>(N); 1538 unsigned AS = Mem->getAddressSpace(); 1539 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1540 SelectCode(N); 1541 return; 1542 } 1543 1544 MVT VT = N->getSimpleValueType(0); 1545 bool Is32 = (VT == MVT::i32); 1546 SDLoc SL(N); 1547 1548 MachineSDNode *CmpSwap = nullptr; 1549 if (Subtarget->hasAddr64()) { 1550 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1551 1552 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1553 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1554 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1555 SDValue CmpVal = Mem->getOperand(2); 1556 1557 // XXX - Do we care about glue operands? 1558 1559 SDValue Ops[] = { 1560 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1561 }; 1562 1563 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1564 } 1565 } 1566 1567 if (!CmpSwap) { 1568 SDValue SRsrc, SOffset, Offset, SLC; 1569 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1570 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1571 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1572 1573 SDValue CmpVal = Mem->getOperand(2); 1574 SDValue Ops[] = { 1575 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1576 }; 1577 1578 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1579 } 1580 } 1581 1582 if (!CmpSwap) { 1583 SelectCode(N); 1584 return; 1585 } 1586 1587 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1588 *MMOs = Mem->getMemOperand(); 1589 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1590 1591 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1592 SDValue Extract 1593 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1594 1595 ReplaceUses(SDValue(N, 0), Extract); 1596 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1597 CurDAG->RemoveDeadNode(N); 1598 } 1599 1600 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1601 SDValue &SrcMods) const { 1602 unsigned Mods = 0; 1603 Src = In; 1604 1605 if (Src.getOpcode() == ISD::FNEG) { 1606 Mods |= SISrcMods::NEG; 1607 Src = Src.getOperand(0); 1608 } 1609 1610 if (Src.getOpcode() == ISD::FABS) { 1611 Mods |= SISrcMods::ABS; 1612 Src = Src.getOperand(0); 1613 } 1614 1615 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1616 return true; 1617 } 1618 1619 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1620 SDValue &SrcMods) const { 1621 SelectVOP3Mods(In, Src, SrcMods); 1622 return isNoNanSrc(Src); 1623 } 1624 1625 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1626 SDValue &SrcMods) const { 1627 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1628 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1629 } 1630 1631 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1632 SDValue &SrcMods, SDValue &Clamp, 1633 SDValue &Omod) const { 1634 SDLoc DL(In); 1635 // FIXME: Handle Clamp and Omod 1636 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1637 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1638 1639 return SelectVOP3Mods(In, Src, SrcMods); 1640 } 1641 1642 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1643 SDValue &SrcMods, SDValue &Clamp, 1644 SDValue &Omod) const { 1645 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1646 1647 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1648 cast<ConstantSDNode>(Clamp)->isNullValue() && 1649 cast<ConstantSDNode>(Omod)->isNullValue(); 1650 } 1651 1652 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1653 SDValue &SrcMods, 1654 SDValue &Omod) const { 1655 // FIXME: Handle Omod 1656 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1657 1658 return SelectVOP3Mods(In, Src, SrcMods); 1659 } 1660 1661 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1662 SDValue &SrcMods, 1663 SDValue &Clamp, 1664 SDValue &Omod) const { 1665 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1666 return SelectVOP3Mods(In, Src, SrcMods); 1667 } 1668 1669 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1670 SDValue &SrcMods) const { 1671 unsigned Mods = 0; 1672 Src = In; 1673 1674 // FIXME: Look for on separate components 1675 if (Src.getOpcode() == ISD::FNEG) { 1676 Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); 1677 Src = Src.getOperand(0); 1678 } 1679 1680 // Packed instructions do not have abs modifiers. 1681 1682 // FIXME: Handle abs/neg of individual components. 1683 // FIXME: Handle swizzling with op_sel 1684 Mods |= SISrcMods::OP_SEL_1; 1685 1686 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1687 return true; 1688 } 1689 1690 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1691 SDValue &SrcMods, 1692 SDValue &Clamp) const { 1693 SDLoc SL(In); 1694 1695 // FIXME: Handle clamp and op_sel 1696 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1697 1698 return SelectVOP3PMods(In, Src, SrcMods); 1699 } 1700 1701 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1702 const AMDGPUTargetLowering& Lowering = 1703 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1704 bool IsModified = false; 1705 do { 1706 IsModified = false; 1707 // Go over all selected nodes and try to fold them a bit more 1708 for (SDNode &Node : CurDAG->allnodes()) { 1709 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1710 if (!MachineNode) 1711 continue; 1712 1713 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1714 if (ResNode != &Node) { 1715 ReplaceUses(&Node, ResNode); 1716 IsModified = true; 1717 } 1718 } 1719 CurDAG->RemoveDeadNodes(); 1720 } while (IsModified); 1721 } 1722