1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUISelLowering.h" // For AMDGPUISD 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "SIRegisterInfo.h" 23 #include "SIISelLowering.h" 24 #include "SIMachineFunctionInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 AMDGPUAS AMDGPUASI; 71 72 public: 73 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 74 : SelectionDAGISel(TM, OptLevel){ 75 AMDGPUASI = AMDGPU::getAMDGPUAS(TM); 76 } 77 ~AMDGPUDAGToDAGISel() override = default; 78 79 bool runOnMachineFunction(MachineFunction &MF) override; 80 void Select(SDNode *N) override; 81 StringRef getPassName() const override; 82 void PostprocessISelDAG() override; 83 84 private: 85 SDValue foldFrameIndex(SDValue N) const; 86 bool isNoNanSrc(SDValue N) const; 87 bool isInlineImmediate(const SDNode *N) const; 88 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 89 const R600InstrInfo *TII); 90 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 91 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 92 93 bool isConstantLoad(const MemSDNode *N, int cbID) const; 94 bool isUniformBr(const SDNode *N) const; 95 96 SDNode *glueCopyToM0(SDNode *N) const; 97 98 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 99 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 100 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 101 SDValue& Offset); 102 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 103 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 104 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 105 unsigned OffsetBits) const; 106 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 107 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 108 SDValue &Offset1) const; 109 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 110 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 111 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 114 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 115 SDValue &SLC, SDValue &TFE) const; 116 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 117 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 118 SDValue &SLC) const; 119 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 120 SDValue &SOffset, SDValue &ImmOffset) const; 121 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 122 SDValue &Offset, SDValue &GLC, SDValue &SLC, 123 SDValue &TFE) const; 124 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 125 SDValue &Offset, SDValue &SLC) const; 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 127 SDValue &Offset) const; 128 bool SelectMUBUFConstant(SDValue Constant, 129 SDValue &SOffset, 130 SDValue &ImmOffset) const; 131 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 132 SDValue &ImmOffset) const; 133 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 134 SDValue &ImmOffset, SDValue &VOffset) const; 135 136 bool SelectFlat(SDValue Addr, SDValue &VAddr, 137 SDValue &SLC, SDValue &TFE) const; 138 139 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 140 bool &Imm) const; 141 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 142 bool &Imm) const; 143 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 144 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 145 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 146 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 147 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 148 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 149 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 150 151 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 152 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 153 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 154 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 155 SDValue &Clamp, SDValue &Omod) const; 156 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 157 SDValue &Clamp, SDValue &Omod) const; 158 159 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 160 SDValue &Omod) const; 161 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 162 SDValue &Clamp, 163 SDValue &Omod) const; 164 165 bool SelectVOP3OMods(SDValue In, SDValue &Src, 166 SDValue &Clamp, SDValue &Omod) const; 167 168 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 169 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 170 SDValue &Clamp) const; 171 172 void SelectADD_SUB_I64(SDNode *N); 173 void SelectUADDO_USUBO(SDNode *N); 174 void SelectDIV_SCALE(SDNode *N); 175 void SelectFMA_W_CHAIN(SDNode *N); 176 void SelectFMUL_W_CHAIN(SDNode *N); 177 178 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 179 uint32_t Offset, uint32_t Width); 180 void SelectS_BFEFromShifts(SDNode *N); 181 void SelectS_BFE(SDNode *N); 182 bool isCBranchSCC(const SDNode *N) const; 183 void SelectBRCOND(SDNode *N); 184 void SelectATOMIC_CMP_SWAP(SDNode *N); 185 186 // Include the pieces autogenerated from the target description. 187 #include "AMDGPUGenDAGISel.inc" 188 }; 189 190 } // end anonymous namespace 191 192 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 193 // DAG, ready for instruction scheduling. 194 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 195 CodeGenOpt::Level OptLevel) { 196 return new AMDGPUDAGToDAGISel(TM, OptLevel); 197 } 198 199 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 200 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 201 return SelectionDAGISel::runOnMachineFunction(MF); 202 } 203 204 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 205 if (TM.Options.NoNaNsFPMath) 206 return true; 207 208 // TODO: Move into isKnownNeverNaN 209 if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) 210 return BO->Flags.hasNoNaNs(); 211 212 return CurDAG->isKnownNeverNaN(N); 213 } 214 215 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 216 const SIInstrInfo *TII 217 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 218 219 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 220 return TII->isInlineConstant(C->getAPIntValue()); 221 222 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 223 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 224 225 return false; 226 } 227 228 /// \brief Determine the register class for \p OpNo 229 /// \returns The register class of the virtual register that will be used for 230 /// the given operand number \OpNo or NULL if the register class cannot be 231 /// determined. 232 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 233 unsigned OpNo) const { 234 if (!N->isMachineOpcode()) { 235 if (N->getOpcode() == ISD::CopyToReg) { 236 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 237 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 238 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 239 return MRI.getRegClass(Reg); 240 } 241 242 const SIRegisterInfo *TRI 243 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 244 return TRI->getPhysRegClass(Reg); 245 } 246 247 return nullptr; 248 } 249 250 switch (N->getMachineOpcode()) { 251 default: { 252 const MCInstrDesc &Desc = 253 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 254 unsigned OpIdx = Desc.getNumDefs() + OpNo; 255 if (OpIdx >= Desc.getNumOperands()) 256 return nullptr; 257 int RegClass = Desc.OpInfo[OpIdx].RegClass; 258 if (RegClass == -1) 259 return nullptr; 260 261 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 262 } 263 case AMDGPU::REG_SEQUENCE: { 264 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 265 const TargetRegisterClass *SuperRC = 266 Subtarget->getRegisterInfo()->getRegClass(RCID); 267 268 SDValue SubRegOp = N->getOperand(OpNo + 1); 269 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 270 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 271 SubRegIdx); 272 } 273 } 274 } 275 276 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 277 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 278 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 279 return N; 280 281 const SITargetLowering& Lowering = 282 *static_cast<const SITargetLowering*>(getTargetLowering()); 283 284 // Write max value to m0 before each load operation 285 286 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 287 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 288 289 SDValue Glue = M0.getValue(1); 290 291 SmallVector <SDValue, 8> Ops; 292 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 293 Ops.push_back(N->getOperand(i)); 294 } 295 Ops.push_back(Glue); 296 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 297 298 return N; 299 } 300 301 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 302 switch (NumVectorElts) { 303 case 1: 304 return AMDGPU::SReg_32_XM0RegClassID; 305 case 2: 306 return AMDGPU::SReg_64RegClassID; 307 case 4: 308 return AMDGPU::SReg_128RegClassID; 309 case 8: 310 return AMDGPU::SReg_256RegClassID; 311 case 16: 312 return AMDGPU::SReg_512RegClassID; 313 } 314 315 llvm_unreachable("invalid vector size"); 316 } 317 318 static bool getConstantValue(SDValue N, uint32_t &Out) { 319 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 320 Out = C->getAPIntValue().getZExtValue(); 321 return true; 322 } 323 324 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 325 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 326 return true; 327 } 328 329 return false; 330 } 331 332 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 333 unsigned int Opc = N->getOpcode(); 334 if (N->isMachineOpcode()) { 335 N->setNodeId(-1); 336 return; // Already selected. 337 } 338 339 if (isa<AtomicSDNode>(N) || 340 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 341 N = glueCopyToM0(N); 342 343 switch (Opc) { 344 default: break; 345 // We are selecting i64 ADD here instead of custom lower it during 346 // DAG legalization, so we can fold some i64 ADDs used for address 347 // calculation into the LOAD and STORE instructions. 348 case ISD::ADD: 349 case ISD::ADDC: 350 case ISD::ADDE: 351 case ISD::SUB: 352 case ISD::SUBC: 353 case ISD::SUBE: { 354 if (N->getValueType(0) != MVT::i64 || 355 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 356 break; 357 358 SelectADD_SUB_I64(N); 359 return; 360 } 361 case ISD::UADDO: 362 case ISD::USUBO: { 363 SelectUADDO_USUBO(N); 364 return; 365 } 366 case AMDGPUISD::FMUL_W_CHAIN: { 367 SelectFMUL_W_CHAIN(N); 368 return; 369 } 370 case AMDGPUISD::FMA_W_CHAIN: { 371 SelectFMA_W_CHAIN(N); 372 return; 373 } 374 375 case ISD::SCALAR_TO_VECTOR: 376 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 377 case ISD::BUILD_VECTOR: { 378 unsigned RegClassID; 379 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 380 EVT VT = N->getValueType(0); 381 unsigned NumVectorElts = VT.getVectorNumElements(); 382 EVT EltVT = VT.getVectorElementType(); 383 384 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 385 if (Opc == ISD::BUILD_VECTOR) { 386 uint32_t LHSVal, RHSVal; 387 if (getConstantValue(N->getOperand(0), LHSVal) && 388 getConstantValue(N->getOperand(1), RHSVal)) { 389 uint32_t K = LHSVal | (RHSVal << 16); 390 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 391 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 392 return; 393 } 394 } 395 396 break; 397 } 398 399 assert(EltVT.bitsEq(MVT::i32)); 400 401 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 402 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 403 } else { 404 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 405 // that adds a 128 bits reg copy when going through TwoAddressInstructions 406 // pass. We want to avoid 128 bits copies as much as possible because they 407 // can't be bundled by our scheduler. 408 switch(NumVectorElts) { 409 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 410 case 4: 411 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 412 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 413 else 414 RegClassID = AMDGPU::R600_Reg128RegClassID; 415 break; 416 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 417 } 418 } 419 420 SDLoc DL(N); 421 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 422 423 if (NumVectorElts == 1) { 424 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 425 RegClass); 426 return; 427 } 428 429 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 430 "supported yet"); 431 // 16 = Max Num Vector Elements 432 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 433 // 1 = Vector Register Class 434 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 435 436 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 437 bool IsRegSeq = true; 438 unsigned NOps = N->getNumOperands(); 439 for (unsigned i = 0; i < NOps; i++) { 440 // XXX: Why is this here? 441 if (isa<RegisterSDNode>(N->getOperand(i))) { 442 IsRegSeq = false; 443 break; 444 } 445 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 446 RegSeqArgs[1 + (2 * i) + 1] = 447 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 448 MVT::i32); 449 } 450 451 if (NOps != NumVectorElts) { 452 // Fill in the missing undef elements if this was a scalar_to_vector. 453 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 454 455 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 456 DL, EltVT); 457 for (unsigned i = NOps; i < NumVectorElts; ++i) { 458 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 459 RegSeqArgs[1 + (2 * i) + 1] = 460 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 461 } 462 } 463 464 if (!IsRegSeq) 465 break; 466 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 467 return; 468 } 469 case ISD::BUILD_PAIR: { 470 SDValue RC, SubReg0, SubReg1; 471 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 472 break; 473 } 474 SDLoc DL(N); 475 if (N->getValueType(0) == MVT::i128) { 476 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 477 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 478 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 479 } else if (N->getValueType(0) == MVT::i64) { 480 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 481 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 482 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 483 } else { 484 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 485 } 486 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 487 N->getOperand(1), SubReg1 }; 488 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 489 N->getValueType(0), Ops)); 490 return; 491 } 492 493 case ISD::Constant: 494 case ISD::ConstantFP: { 495 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 496 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 497 break; 498 499 uint64_t Imm; 500 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 501 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 502 else { 503 ConstantSDNode *C = cast<ConstantSDNode>(N); 504 Imm = C->getZExtValue(); 505 } 506 507 SDLoc DL(N); 508 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 509 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 510 MVT::i32)); 511 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 512 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 513 const SDValue Ops[] = { 514 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 515 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 516 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 517 }; 518 519 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 520 N->getValueType(0), Ops)); 521 return; 522 } 523 case ISD::LOAD: 524 case ISD::STORE: { 525 N = glueCopyToM0(N); 526 break; 527 } 528 529 case AMDGPUISD::BFE_I32: 530 case AMDGPUISD::BFE_U32: { 531 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 532 break; 533 534 // There is a scalar version available, but unlike the vector version which 535 // has a separate operand for the offset and width, the scalar version packs 536 // the width and offset into a single operand. Try to move to the scalar 537 // version if the offsets are constant, so that we can try to keep extended 538 // loads of kernel arguments in SGPRs. 539 540 // TODO: Technically we could try to pattern match scalar bitshifts of 541 // dynamic values, but it's probably not useful. 542 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 543 if (!Offset) 544 break; 545 546 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 547 if (!Width) 548 break; 549 550 bool Signed = Opc == AMDGPUISD::BFE_I32; 551 552 uint32_t OffsetVal = Offset->getZExtValue(); 553 uint32_t WidthVal = Width->getZExtValue(); 554 555 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 556 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 557 return; 558 } 559 case AMDGPUISD::DIV_SCALE: { 560 SelectDIV_SCALE(N); 561 return; 562 } 563 case ISD::CopyToReg: { 564 const SITargetLowering& Lowering = 565 *static_cast<const SITargetLowering*>(getTargetLowering()); 566 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 567 break; 568 } 569 case ISD::AND: 570 case ISD::SRL: 571 case ISD::SRA: 572 case ISD::SIGN_EXTEND_INREG: 573 if (N->getValueType(0) != MVT::i32 || 574 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 575 break; 576 577 SelectS_BFE(N); 578 return; 579 case ISD::BRCOND: 580 SelectBRCOND(N); 581 return; 582 583 case AMDGPUISD::ATOMIC_CMP_SWAP: 584 SelectATOMIC_CMP_SWAP(N); 585 return; 586 } 587 588 SelectCode(N); 589 } 590 591 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 592 if (!N->readMem()) 593 return false; 594 if (CbId == -1) 595 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 596 597 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 598 } 599 600 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 601 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 602 const Instruction *Term = BB->getTerminator(); 603 return Term->getMetadata("amdgpu.uniform") || 604 Term->getMetadata("structurizecfg.uniform"); 605 } 606 607 StringRef AMDGPUDAGToDAGISel::getPassName() const { 608 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 609 } 610 611 //===----------------------------------------------------------------------===// 612 // Complex Patterns 613 //===----------------------------------------------------------------------===// 614 615 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 616 SDValue& IntPtr) { 617 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 618 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 619 true); 620 return true; 621 } 622 return false; 623 } 624 625 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 626 SDValue& BaseReg, SDValue &Offset) { 627 if (!isa<ConstantSDNode>(Addr)) { 628 BaseReg = Addr; 629 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 630 return true; 631 } 632 return false; 633 } 634 635 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 636 SDValue &Offset) { 637 ConstantSDNode *IMMOffset; 638 639 if (Addr.getOpcode() == ISD::ADD 640 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 641 && isInt<16>(IMMOffset->getZExtValue())) { 642 643 Base = Addr.getOperand(0); 644 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 645 MVT::i32); 646 return true; 647 // If the pointer address is constant, we can move it to the offset field. 648 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 649 && isInt<16>(IMMOffset->getZExtValue())) { 650 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 651 SDLoc(CurDAG->getEntryNode()), 652 AMDGPU::ZERO, MVT::i32); 653 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 654 MVT::i32); 655 return true; 656 } 657 658 // Default case, no offset 659 Base = Addr; 660 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 661 return true; 662 } 663 664 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 665 SDValue &Offset) { 666 ConstantSDNode *C; 667 SDLoc DL(Addr); 668 669 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 670 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 671 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 672 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 673 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 674 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 675 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 676 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 677 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 678 Base = Addr.getOperand(0); 679 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 680 } else { 681 Base = Addr; 682 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 683 } 684 685 return true; 686 } 687 688 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 689 SDLoc DL(N); 690 SDValue LHS = N->getOperand(0); 691 SDValue RHS = N->getOperand(1); 692 693 unsigned Opcode = N->getOpcode(); 694 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 695 bool ProduceCarry = 696 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 697 bool IsAdd = 698 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 699 700 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 701 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 702 703 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 704 DL, MVT::i32, LHS, Sub0); 705 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 706 DL, MVT::i32, LHS, Sub1); 707 708 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 709 DL, MVT::i32, RHS, Sub0); 710 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 711 DL, MVT::i32, RHS, Sub1); 712 713 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 714 715 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 716 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 717 718 SDNode *AddLo; 719 if (!ConsumeCarry) { 720 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 721 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 722 } else { 723 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 724 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 725 } 726 SDValue AddHiArgs[] = { 727 SDValue(Hi0, 0), 728 SDValue(Hi1, 0), 729 SDValue(AddLo, 1) 730 }; 731 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 732 733 SDValue RegSequenceArgs[] = { 734 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 735 SDValue(AddLo,0), 736 Sub0, 737 SDValue(AddHi,0), 738 Sub1, 739 }; 740 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 741 MVT::i64, RegSequenceArgs); 742 743 if (ProduceCarry) { 744 // Replace the carry-use 745 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 746 } 747 748 // Replace the remaining uses. 749 CurDAG->ReplaceAllUsesWith(N, RegSequence); 750 CurDAG->RemoveDeadNode(N); 751 } 752 753 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 754 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 755 // carry out despite the _i32 name. These were renamed in VI to _U32. 756 // FIXME: We should probably rename the opcodes here. 757 unsigned Opc = N->getOpcode() == ISD::UADDO ? 758 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 759 760 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 761 { N->getOperand(0), N->getOperand(1) }); 762 } 763 764 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 765 SDLoc SL(N); 766 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 767 SDValue Ops[10]; 768 769 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 770 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 771 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 772 Ops[8] = N->getOperand(0); 773 Ops[9] = N->getOperand(4); 774 775 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 776 } 777 778 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 779 SDLoc SL(N); 780 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 781 SDValue Ops[8]; 782 783 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 784 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 785 Ops[6] = N->getOperand(0); 786 Ops[7] = N->getOperand(3); 787 788 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 789 } 790 791 // We need to handle this here because tablegen doesn't support matching 792 // instructions with multiple outputs. 793 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 794 SDLoc SL(N); 795 EVT VT = N->getValueType(0); 796 797 assert(VT == MVT::f32 || VT == MVT::f64); 798 799 unsigned Opc 800 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 801 802 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 803 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 804 } 805 806 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 807 unsigned OffsetBits) const { 808 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 809 (OffsetBits == 8 && !isUInt<8>(Offset))) 810 return false; 811 812 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 813 Subtarget->unsafeDSOffsetFoldingEnabled()) 814 return true; 815 816 // On Southern Islands instruction with a negative base value and an offset 817 // don't seem to work. 818 return CurDAG->SignBitIsZero(Base); 819 } 820 821 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 822 SDValue &Offset) const { 823 SDLoc DL(Addr); 824 if (CurDAG->isBaseWithConstantOffset(Addr)) { 825 SDValue N0 = Addr.getOperand(0); 826 SDValue N1 = Addr.getOperand(1); 827 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 828 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 829 // (add n0, c0) 830 Base = N0; 831 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 832 return true; 833 } 834 } else if (Addr.getOpcode() == ISD::SUB) { 835 // sub C, x -> add (sub 0, x), C 836 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 837 int64_t ByteOffset = C->getSExtValue(); 838 if (isUInt<16>(ByteOffset)) { 839 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 840 841 // XXX - This is kind of hacky. Create a dummy sub node so we can check 842 // the known bits in isDSOffsetLegal. We need to emit the selected node 843 // here, so this is thrown away. 844 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 845 Zero, Addr.getOperand(1)); 846 847 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 848 MachineSDNode *MachineSub 849 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 850 Zero, Addr.getOperand(1)); 851 852 Base = SDValue(MachineSub, 0); 853 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 854 return true; 855 } 856 } 857 } 858 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 859 // If we have a constant address, prefer to put the constant into the 860 // offset. This can save moves to load the constant address since multiple 861 // operations can share the zero base address register, and enables merging 862 // into read2 / write2 instructions. 863 864 SDLoc DL(Addr); 865 866 if (isUInt<16>(CAddr->getZExtValue())) { 867 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 868 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 869 DL, MVT::i32, Zero); 870 Base = SDValue(MovZero, 0); 871 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 872 return true; 873 } 874 } 875 876 // default case 877 Base = Addr; 878 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 879 return true; 880 } 881 882 // TODO: If offset is too big, put low 16-bit into offset. 883 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 884 SDValue &Offset0, 885 SDValue &Offset1) const { 886 SDLoc DL(Addr); 887 888 if (CurDAG->isBaseWithConstantOffset(Addr)) { 889 SDValue N0 = Addr.getOperand(0); 890 SDValue N1 = Addr.getOperand(1); 891 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 892 unsigned DWordOffset0 = C1->getZExtValue() / 4; 893 unsigned DWordOffset1 = DWordOffset0 + 1; 894 // (add n0, c0) 895 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 896 Base = N0; 897 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 898 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 899 return true; 900 } 901 } else if (Addr.getOpcode() == ISD::SUB) { 902 // sub C, x -> add (sub 0, x), C 903 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 904 unsigned DWordOffset0 = C->getZExtValue() / 4; 905 unsigned DWordOffset1 = DWordOffset0 + 1; 906 907 if (isUInt<8>(DWordOffset0)) { 908 SDLoc DL(Addr); 909 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 910 911 // XXX - This is kind of hacky. Create a dummy sub node so we can check 912 // the known bits in isDSOffsetLegal. We need to emit the selected node 913 // here, so this is thrown away. 914 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 915 Zero, Addr.getOperand(1)); 916 917 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 918 MachineSDNode *MachineSub 919 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 920 Zero, Addr.getOperand(1)); 921 922 Base = SDValue(MachineSub, 0); 923 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 924 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 925 return true; 926 } 927 } 928 } 929 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 930 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 931 unsigned DWordOffset1 = DWordOffset0 + 1; 932 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 933 934 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 935 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 936 MachineSDNode *MovZero 937 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 938 DL, MVT::i32, Zero); 939 Base = SDValue(MovZero, 0); 940 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 941 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 942 return true; 943 } 944 } 945 946 // default case 947 948 // FIXME: This is broken on SI where we still need to check if the base 949 // pointer is positive here. 950 Base = Addr; 951 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 952 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 953 return true; 954 } 955 956 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 957 return isUInt<12>(Imm->getZExtValue()); 958 } 959 960 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 961 SDValue &VAddr, SDValue &SOffset, 962 SDValue &Offset, SDValue &Offen, 963 SDValue &Idxen, SDValue &Addr64, 964 SDValue &GLC, SDValue &SLC, 965 SDValue &TFE) const { 966 // Subtarget prefers to use flat instruction 967 if (Subtarget->useFlatForGlobal()) 968 return false; 969 970 SDLoc DL(Addr); 971 972 if (!GLC.getNode()) 973 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 974 if (!SLC.getNode()) 975 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 976 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 977 978 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 979 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 980 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 981 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 982 983 if (CurDAG->isBaseWithConstantOffset(Addr)) { 984 SDValue N0 = Addr.getOperand(0); 985 SDValue N1 = Addr.getOperand(1); 986 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 987 988 if (N0.getOpcode() == ISD::ADD) { 989 // (add (add N2, N3), C1) -> addr64 990 SDValue N2 = N0.getOperand(0); 991 SDValue N3 = N0.getOperand(1); 992 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 993 Ptr = N2; 994 VAddr = N3; 995 } else { 996 // (add N0, C1) -> offset 997 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 998 Ptr = N0; 999 } 1000 1001 if (isLegalMUBUFImmOffset(C1)) { 1002 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1003 return true; 1004 } 1005 1006 if (isUInt<32>(C1->getZExtValue())) { 1007 // Illegal offset, store it in soffset. 1008 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1009 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1010 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1011 0); 1012 return true; 1013 } 1014 } 1015 1016 if (Addr.getOpcode() == ISD::ADD) { 1017 // (add N0, N1) -> addr64 1018 SDValue N0 = Addr.getOperand(0); 1019 SDValue N1 = Addr.getOperand(1); 1020 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1021 Ptr = N0; 1022 VAddr = N1; 1023 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1024 return true; 1025 } 1026 1027 // default case -> offset 1028 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1029 Ptr = Addr; 1030 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1031 1032 return true; 1033 } 1034 1035 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1036 SDValue &VAddr, SDValue &SOffset, 1037 SDValue &Offset, SDValue &GLC, 1038 SDValue &SLC, SDValue &TFE) const { 1039 SDValue Ptr, Offen, Idxen, Addr64; 1040 1041 // addr64 bit was removed for volcanic islands. 1042 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1043 return false; 1044 1045 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1046 GLC, SLC, TFE)) 1047 return false; 1048 1049 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1050 if (C->getSExtValue()) { 1051 SDLoc DL(Addr); 1052 1053 const SITargetLowering& Lowering = 1054 *static_cast<const SITargetLowering*>(getTargetLowering()); 1055 1056 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1057 return true; 1058 } 1059 1060 return false; 1061 } 1062 1063 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1064 SDValue &VAddr, SDValue &SOffset, 1065 SDValue &Offset, 1066 SDValue &SLC) const { 1067 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1068 SDValue GLC, TFE; 1069 1070 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1071 } 1072 1073 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1074 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 1075 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 1076 return N; 1077 } 1078 1079 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1080 SDValue &VAddr, SDValue &SOffset, 1081 SDValue &ImmOffset) const { 1082 1083 SDLoc DL(Addr); 1084 MachineFunction &MF = CurDAG->getMachineFunction(); 1085 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1086 1087 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1088 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1089 1090 // (add n0, c1) 1091 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1092 SDValue N0 = Addr.getOperand(0); 1093 SDValue N1 = Addr.getOperand(1); 1094 1095 // Offsets in vaddr must be positive. 1096 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1097 if (isLegalMUBUFImmOffset(C1)) { 1098 VAddr = foldFrameIndex(N0); 1099 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1100 return true; 1101 } 1102 } 1103 1104 // (node) 1105 VAddr = foldFrameIndex(Addr); 1106 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1107 return true; 1108 } 1109 1110 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1111 SDValue &SOffset, SDValue &Offset, 1112 SDValue &GLC, SDValue &SLC, 1113 SDValue &TFE) const { 1114 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1115 const SIInstrInfo *TII = 1116 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1117 1118 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1119 GLC, SLC, TFE)) 1120 return false; 1121 1122 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1123 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1124 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1125 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1126 APInt::getAllOnesValue(32).getZExtValue(); // Size 1127 SDLoc DL(Addr); 1128 1129 const SITargetLowering& Lowering = 1130 *static_cast<const SITargetLowering*>(getTargetLowering()); 1131 1132 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1133 return true; 1134 } 1135 return false; 1136 } 1137 1138 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1139 SDValue &Soffset, SDValue &Offset 1140 ) const { 1141 SDValue GLC, SLC, TFE; 1142 1143 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1144 } 1145 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1146 SDValue &Soffset, SDValue &Offset, 1147 SDValue &SLC) const { 1148 SDValue GLC, TFE; 1149 1150 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1151 } 1152 1153 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1154 SDValue &SOffset, 1155 SDValue &ImmOffset) const { 1156 SDLoc DL(Constant); 1157 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1158 uint32_t Overflow = 0; 1159 1160 if (Imm >= 4096) { 1161 if (Imm <= 4095 + 64) { 1162 // Use an SOffset inline constant for 1..64 1163 Overflow = Imm - 4095; 1164 Imm = 4095; 1165 } else { 1166 // Try to keep the same value in SOffset for adjacent loads, so that 1167 // the corresponding register contents can be re-used. 1168 // 1169 // Load values with all low-bits set into SOffset, so that a larger 1170 // range of values can be covered using s_movk_i32 1171 uint32_t High = (Imm + 1) & ~4095; 1172 uint32_t Low = (Imm + 1) & 4095; 1173 Imm = Low; 1174 Overflow = High - 1; 1175 } 1176 } 1177 1178 // There is a hardware bug in SI and CI which prevents address clamping in 1179 // MUBUF instructions from working correctly with SOffsets. The immediate 1180 // offset is unaffected. 1181 if (Overflow > 0 && 1182 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1183 return false; 1184 1185 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1186 1187 if (Overflow <= 64) 1188 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1189 else 1190 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1191 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1192 0); 1193 1194 return true; 1195 } 1196 1197 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1198 SDValue &SOffset, 1199 SDValue &ImmOffset) const { 1200 SDLoc DL(Offset); 1201 1202 if (!isa<ConstantSDNode>(Offset)) 1203 return false; 1204 1205 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1206 } 1207 1208 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1209 SDValue &SOffset, 1210 SDValue &ImmOffset, 1211 SDValue &VOffset) const { 1212 SDLoc DL(Offset); 1213 1214 // Don't generate an unnecessary voffset for constant offsets. 1215 if (isa<ConstantSDNode>(Offset)) { 1216 SDValue Tmp1, Tmp2; 1217 1218 // When necessary, use a voffset in <= CI anyway to work around a hardware 1219 // bug. 1220 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1221 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1222 return false; 1223 } 1224 1225 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1226 SDValue N0 = Offset.getOperand(0); 1227 SDValue N1 = Offset.getOperand(1); 1228 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1229 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1230 VOffset = N0; 1231 return true; 1232 } 1233 } 1234 1235 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1236 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1237 VOffset = Offset; 1238 1239 return true; 1240 } 1241 1242 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1243 SDValue &VAddr, 1244 SDValue &SLC, 1245 SDValue &TFE) const { 1246 VAddr = Addr; 1247 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1248 return true; 1249 } 1250 1251 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1252 SDValue &Offset, bool &Imm) const { 1253 1254 // FIXME: Handle non-constant offsets. 1255 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1256 if (!C) 1257 return false; 1258 1259 SDLoc SL(ByteOffsetNode); 1260 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1261 int64_t ByteOffset = C->getSExtValue(); 1262 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1263 1264 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1265 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1266 Imm = true; 1267 return true; 1268 } 1269 1270 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1271 return false; 1272 1273 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1274 // 32-bit Immediates are supported on Sea Islands. 1275 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1276 } else { 1277 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1278 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1279 C32Bit), 0); 1280 } 1281 Imm = false; 1282 return true; 1283 } 1284 1285 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1286 SDValue &Offset, bool &Imm) const { 1287 SDLoc SL(Addr); 1288 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1289 SDValue N0 = Addr.getOperand(0); 1290 SDValue N1 = Addr.getOperand(1); 1291 1292 if (SelectSMRDOffset(N1, Offset, Imm)) { 1293 SBase = N0; 1294 return true; 1295 } 1296 } 1297 SBase = Addr; 1298 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1299 Imm = true; 1300 return true; 1301 } 1302 1303 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1304 SDValue &Offset) const { 1305 bool Imm; 1306 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1307 } 1308 1309 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1310 SDValue &Offset) const { 1311 1312 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1313 return false; 1314 1315 bool Imm; 1316 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1317 return false; 1318 1319 return !Imm && isa<ConstantSDNode>(Offset); 1320 } 1321 1322 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1323 SDValue &Offset) const { 1324 bool Imm; 1325 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1326 !isa<ConstantSDNode>(Offset); 1327 } 1328 1329 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1330 SDValue &Offset) const { 1331 bool Imm; 1332 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1333 } 1334 1335 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1336 SDValue &Offset) const { 1337 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1338 return false; 1339 1340 bool Imm; 1341 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1342 return false; 1343 1344 return !Imm && isa<ConstantSDNode>(Offset); 1345 } 1346 1347 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1348 SDValue &Offset) const { 1349 bool Imm; 1350 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1351 !isa<ConstantSDNode>(Offset); 1352 } 1353 1354 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1355 SDValue &Base, 1356 SDValue &Offset) const { 1357 SDLoc DL(Index); 1358 1359 if (CurDAG->isBaseWithConstantOffset(Index)) { 1360 SDValue N0 = Index.getOperand(0); 1361 SDValue N1 = Index.getOperand(1); 1362 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1363 1364 // (add n0, c0) 1365 Base = N0; 1366 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1367 return true; 1368 } 1369 1370 if (isa<ConstantSDNode>(Index)) 1371 return false; 1372 1373 Base = Index; 1374 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1375 return true; 1376 } 1377 1378 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1379 SDValue Val, uint32_t Offset, 1380 uint32_t Width) { 1381 // Transformation function, pack the offset and width of a BFE into 1382 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1383 // source, bits [5:0] contain the offset and bits [22:16] the width. 1384 uint32_t PackedVal = Offset | (Width << 16); 1385 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1386 1387 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1388 } 1389 1390 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1391 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1392 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1393 // Predicate: 0 < b <= c < 32 1394 1395 const SDValue &Shl = N->getOperand(0); 1396 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1397 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1398 1399 if (B && C) { 1400 uint32_t BVal = B->getZExtValue(); 1401 uint32_t CVal = C->getZExtValue(); 1402 1403 if (0 < BVal && BVal <= CVal && CVal < 32) { 1404 bool Signed = N->getOpcode() == ISD::SRA; 1405 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1406 1407 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1408 32 - CVal)); 1409 return; 1410 } 1411 } 1412 SelectCode(N); 1413 } 1414 1415 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1416 switch (N->getOpcode()) { 1417 case ISD::AND: 1418 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1419 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1420 // Predicate: isMask(mask) 1421 const SDValue &Srl = N->getOperand(0); 1422 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1423 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1424 1425 if (Shift && Mask) { 1426 uint32_t ShiftVal = Shift->getZExtValue(); 1427 uint32_t MaskVal = Mask->getZExtValue(); 1428 1429 if (isMask_32(MaskVal)) { 1430 uint32_t WidthVal = countPopulation(MaskVal); 1431 1432 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1433 Srl.getOperand(0), ShiftVal, WidthVal)); 1434 return; 1435 } 1436 } 1437 } 1438 break; 1439 case ISD::SRL: 1440 if (N->getOperand(0).getOpcode() == ISD::AND) { 1441 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1442 // Predicate: isMask(mask >> b) 1443 const SDValue &And = N->getOperand(0); 1444 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1445 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1446 1447 if (Shift && Mask) { 1448 uint32_t ShiftVal = Shift->getZExtValue(); 1449 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1450 1451 if (isMask_32(MaskVal)) { 1452 uint32_t WidthVal = countPopulation(MaskVal); 1453 1454 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1455 And.getOperand(0), ShiftVal, WidthVal)); 1456 return; 1457 } 1458 } 1459 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1460 SelectS_BFEFromShifts(N); 1461 return; 1462 } 1463 break; 1464 case ISD::SRA: 1465 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1466 SelectS_BFEFromShifts(N); 1467 return; 1468 } 1469 break; 1470 1471 case ISD::SIGN_EXTEND_INREG: { 1472 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1473 SDValue Src = N->getOperand(0); 1474 if (Src.getOpcode() != ISD::SRL) 1475 break; 1476 1477 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1478 if (!Amt) 1479 break; 1480 1481 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1482 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1483 Amt->getZExtValue(), Width)); 1484 return; 1485 } 1486 } 1487 1488 SelectCode(N); 1489 } 1490 1491 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1492 assert(N->getOpcode() == ISD::BRCOND); 1493 if (!N->hasOneUse()) 1494 return false; 1495 1496 SDValue Cond = N->getOperand(1); 1497 if (Cond.getOpcode() == ISD::CopyToReg) 1498 Cond = Cond.getOperand(2); 1499 1500 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1501 return false; 1502 1503 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1504 if (VT == MVT::i32) 1505 return true; 1506 1507 if (VT == MVT::i64) { 1508 auto ST = static_cast<const SISubtarget *>(Subtarget); 1509 1510 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1511 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1512 } 1513 1514 return false; 1515 } 1516 1517 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1518 SDValue Cond = N->getOperand(1); 1519 1520 if (Cond.isUndef()) { 1521 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1522 N->getOperand(2), N->getOperand(0)); 1523 return; 1524 } 1525 1526 if (isCBranchSCC(N)) { 1527 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1528 SelectCode(N); 1529 return; 1530 } 1531 1532 SDLoc SL(N); 1533 1534 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1535 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1536 N->getOperand(2), // Basic Block 1537 VCC.getValue(0)); 1538 } 1539 1540 // This is here because there isn't a way to use the generated sub0_sub1 as the 1541 // subreg index to EXTRACT_SUBREG in tablegen. 1542 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1543 MemSDNode *Mem = cast<MemSDNode>(N); 1544 unsigned AS = Mem->getAddressSpace(); 1545 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1546 SelectCode(N); 1547 return; 1548 } 1549 1550 MVT VT = N->getSimpleValueType(0); 1551 bool Is32 = (VT == MVT::i32); 1552 SDLoc SL(N); 1553 1554 MachineSDNode *CmpSwap = nullptr; 1555 if (Subtarget->hasAddr64()) { 1556 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1557 1558 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1559 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1560 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1561 SDValue CmpVal = Mem->getOperand(2); 1562 1563 // XXX - Do we care about glue operands? 1564 1565 SDValue Ops[] = { 1566 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1567 }; 1568 1569 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1570 } 1571 } 1572 1573 if (!CmpSwap) { 1574 SDValue SRsrc, SOffset, Offset, SLC; 1575 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1576 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1577 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1578 1579 SDValue CmpVal = Mem->getOperand(2); 1580 SDValue Ops[] = { 1581 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1582 }; 1583 1584 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1585 } 1586 } 1587 1588 if (!CmpSwap) { 1589 SelectCode(N); 1590 return; 1591 } 1592 1593 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1594 *MMOs = Mem->getMemOperand(); 1595 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1596 1597 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1598 SDValue Extract 1599 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1600 1601 ReplaceUses(SDValue(N, 0), Extract); 1602 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1603 CurDAG->RemoveDeadNode(N); 1604 } 1605 1606 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1607 SDValue &SrcMods) const { 1608 unsigned Mods = 0; 1609 Src = In; 1610 1611 if (Src.getOpcode() == ISD::FNEG) { 1612 Mods |= SISrcMods::NEG; 1613 Src = Src.getOperand(0); 1614 } 1615 1616 if (Src.getOpcode() == ISD::FABS) { 1617 Mods |= SISrcMods::ABS; 1618 Src = Src.getOperand(0); 1619 } 1620 1621 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1622 return true; 1623 } 1624 1625 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1626 SDValue &SrcMods) const { 1627 SelectVOP3Mods(In, Src, SrcMods); 1628 return isNoNanSrc(Src); 1629 } 1630 1631 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1632 SDValue &SrcMods) const { 1633 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1634 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1635 } 1636 1637 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1638 SDValue &SrcMods, SDValue &Clamp, 1639 SDValue &Omod) const { 1640 SDLoc DL(In); 1641 // FIXME: Handle Clamp and Omod 1642 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1643 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1644 1645 return SelectVOP3Mods(In, Src, SrcMods); 1646 } 1647 1648 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1649 SDValue &SrcMods, SDValue &Clamp, 1650 SDValue &Omod) const { 1651 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1652 1653 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1654 cast<ConstantSDNode>(Clamp)->isNullValue() && 1655 cast<ConstantSDNode>(Omod)->isNullValue(); 1656 } 1657 1658 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1659 SDValue &SrcMods, 1660 SDValue &Omod) const { 1661 // FIXME: Handle Omod 1662 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1663 1664 return SelectVOP3Mods(In, Src, SrcMods); 1665 } 1666 1667 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1668 SDValue &SrcMods, 1669 SDValue &Clamp, 1670 SDValue &Omod) const { 1671 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1672 return SelectVOP3Mods(In, Src, SrcMods); 1673 } 1674 1675 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1676 SDValue &Clamp, SDValue &Omod) const { 1677 Src = In; 1678 1679 SDLoc DL(In); 1680 // FIXME: Handle Clamp and Omod 1681 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1682 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1683 1684 return true; 1685 } 1686 1687 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1688 SDValue &SrcMods) const { 1689 unsigned Mods = 0; 1690 Src = In; 1691 1692 // FIXME: Look for on separate components 1693 if (Src.getOpcode() == ISD::FNEG) { 1694 Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); 1695 Src = Src.getOperand(0); 1696 } 1697 1698 // Packed instructions do not have abs modifiers. 1699 1700 // FIXME: Handle abs/neg of individual components. 1701 // FIXME: Handle swizzling with op_sel 1702 Mods |= SISrcMods::OP_SEL_1; 1703 1704 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1705 return true; 1706 } 1707 1708 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1709 SDValue &SrcMods, 1710 SDValue &Clamp) const { 1711 SDLoc SL(In); 1712 1713 // FIXME: Handle clamp and op_sel 1714 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1715 1716 return SelectVOP3PMods(In, Src, SrcMods); 1717 } 1718 1719 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1720 const AMDGPUTargetLowering& Lowering = 1721 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1722 bool IsModified = false; 1723 do { 1724 IsModified = false; 1725 // Go over all selected nodes and try to fold them a bit more 1726 for (SDNode &Node : CurDAG->allnodes()) { 1727 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1728 if (!MachineNode) 1729 continue; 1730 1731 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1732 if (ResNode != &Node) { 1733 ReplaceUses(&Node, ResNode); 1734 IsModified = true; 1735 } 1736 } 1737 CurDAG->RemoveDeadNodes(); 1738 } while (IsModified); 1739 } 1740