1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUISelLowering.h" // For AMDGPUISD 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "SIRegisterInfo.h" 23 #include "SIISelLowering.h" 24 #include "SIMachineFunctionInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 71 public: 72 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 73 : SelectionDAGISel(TM, OptLevel) {} 74 ~AMDGPUDAGToDAGISel() override = default; 75 76 bool runOnMachineFunction(MachineFunction &MF) override; 77 void Select(SDNode *N) override; 78 StringRef getPassName() const override; 79 void PostprocessISelDAG() override; 80 81 private: 82 SDValue foldFrameIndex(SDValue N) const; 83 bool isNoNanSrc(SDValue N) const; 84 bool isInlineImmediate(const SDNode *N) const; 85 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 86 const R600InstrInfo *TII); 87 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 88 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 89 90 bool isConstantLoad(const MemSDNode *N, int cbID) const; 91 bool isUniformBr(const SDNode *N) const; 92 93 SDNode *glueCopyToM0(SDNode *N) const; 94 95 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 96 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 97 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 98 SDValue& Offset); 99 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 100 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 101 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 102 unsigned OffsetBits) const; 103 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 104 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 105 SDValue &Offset1) const; 106 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 107 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 108 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 109 SDValue &TFE) const; 110 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 111 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 112 SDValue &SLC, SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 114 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 115 SDValue &SLC) const; 116 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 117 SDValue &SOffset, SDValue &ImmOffset) const; 118 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 119 SDValue &Offset, SDValue &GLC, SDValue &SLC, 120 SDValue &TFE) const; 121 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 122 SDValue &Offset, SDValue &SLC) const; 123 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 124 SDValue &Offset) const; 125 bool SelectMUBUFConstant(SDValue Constant, 126 SDValue &SOffset, 127 SDValue &ImmOffset) const; 128 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 129 SDValue &ImmOffset) const; 130 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 131 SDValue &ImmOffset, SDValue &VOffset) const; 132 133 bool SelectFlat(SDValue Addr, SDValue &VAddr, 134 SDValue &SLC, SDValue &TFE) const; 135 136 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 137 bool &Imm) const; 138 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 139 bool &Imm) const; 140 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 141 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 142 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 143 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 144 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 145 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 146 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 147 148 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 149 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 150 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 151 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 152 SDValue &Clamp, SDValue &Omod) const; 153 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 154 SDValue &Clamp, SDValue &Omod) const; 155 156 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 157 SDValue &Omod) const; 158 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 159 SDValue &Clamp, 160 SDValue &Omod) const; 161 162 void SelectADD_SUB_I64(SDNode *N); 163 void SelectUADDO_USUBO(SDNode *N); 164 void SelectDIV_SCALE(SDNode *N); 165 void SelectFMA_W_CHAIN(SDNode *N); 166 void SelectFMUL_W_CHAIN(SDNode *N); 167 168 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 169 uint32_t Offset, uint32_t Width); 170 void SelectS_BFEFromShifts(SDNode *N); 171 void SelectS_BFE(SDNode *N); 172 bool isCBranchSCC(const SDNode *N) const; 173 void SelectBRCOND(SDNode *N); 174 void SelectATOMIC_CMP_SWAP(SDNode *N); 175 176 // Include the pieces autogenerated from the target description. 177 #include "AMDGPUGenDAGISel.inc" 178 }; 179 180 } // end anonymous namespace 181 182 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 183 // DAG, ready for instruction scheduling. 184 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 185 CodeGenOpt::Level OptLevel) { 186 return new AMDGPUDAGToDAGISel(TM, OptLevel); 187 } 188 189 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 190 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 191 return SelectionDAGISel::runOnMachineFunction(MF); 192 } 193 194 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 195 if (TM.Options.NoNaNsFPMath) 196 return true; 197 198 // TODO: Move into isKnownNeverNaN 199 if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N)) 200 return BO->Flags.hasNoNaNs(); 201 202 return CurDAG->isKnownNeverNaN(N); 203 } 204 205 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 206 const SIInstrInfo *TII 207 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 208 209 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 210 return TII->isInlineConstant(C->getAPIntValue()); 211 212 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 213 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 214 215 return false; 216 } 217 218 /// \brief Determine the register class for \p OpNo 219 /// \returns The register class of the virtual register that will be used for 220 /// the given operand number \OpNo or NULL if the register class cannot be 221 /// determined. 222 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 223 unsigned OpNo) const { 224 if (!N->isMachineOpcode()) { 225 if (N->getOpcode() == ISD::CopyToReg) { 226 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 227 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 228 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 229 return MRI.getRegClass(Reg); 230 } 231 232 const SIRegisterInfo *TRI 233 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 234 return TRI->getPhysRegClass(Reg); 235 } 236 237 return nullptr; 238 } 239 240 switch (N->getMachineOpcode()) { 241 default: { 242 const MCInstrDesc &Desc = 243 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 244 unsigned OpIdx = Desc.getNumDefs() + OpNo; 245 if (OpIdx >= Desc.getNumOperands()) 246 return nullptr; 247 int RegClass = Desc.OpInfo[OpIdx].RegClass; 248 if (RegClass == -1) 249 return nullptr; 250 251 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 252 } 253 case AMDGPU::REG_SEQUENCE: { 254 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 255 const TargetRegisterClass *SuperRC = 256 Subtarget->getRegisterInfo()->getRegClass(RCID); 257 258 SDValue SubRegOp = N->getOperand(OpNo + 1); 259 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 260 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 261 SubRegIdx); 262 } 263 } 264 } 265 266 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 267 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 268 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 269 return N; 270 271 const SITargetLowering& Lowering = 272 *static_cast<const SITargetLowering*>(getTargetLowering()); 273 274 // Write max value to m0 before each load operation 275 276 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 277 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 278 279 SDValue Glue = M0.getValue(1); 280 281 SmallVector <SDValue, 8> Ops; 282 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 283 Ops.push_back(N->getOperand(i)); 284 } 285 Ops.push_back(Glue); 286 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 287 288 return N; 289 } 290 291 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 292 switch (NumVectorElts) { 293 case 1: 294 return AMDGPU::SReg_32_XM0RegClassID; 295 case 2: 296 return AMDGPU::SReg_64RegClassID; 297 case 4: 298 return AMDGPU::SReg_128RegClassID; 299 case 8: 300 return AMDGPU::SReg_256RegClassID; 301 case 16: 302 return AMDGPU::SReg_512RegClassID; 303 } 304 305 llvm_unreachable("invalid vector size"); 306 } 307 308 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 309 unsigned int Opc = N->getOpcode(); 310 if (N->isMachineOpcode()) { 311 N->setNodeId(-1); 312 return; // Already selected. 313 } 314 315 if (isa<AtomicSDNode>(N) || 316 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 317 N = glueCopyToM0(N); 318 319 switch (Opc) { 320 default: break; 321 // We are selecting i64 ADD here instead of custom lower it during 322 // DAG legalization, so we can fold some i64 ADDs used for address 323 // calculation into the LOAD and STORE instructions. 324 case ISD::ADD: 325 case ISD::ADDC: 326 case ISD::ADDE: 327 case ISD::SUB: 328 case ISD::SUBC: 329 case ISD::SUBE: { 330 if (N->getValueType(0) != MVT::i64 || 331 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 332 break; 333 334 SelectADD_SUB_I64(N); 335 return; 336 } 337 case ISD::UADDO: 338 case ISD::USUBO: { 339 SelectUADDO_USUBO(N); 340 return; 341 } 342 case AMDGPUISD::FMUL_W_CHAIN: { 343 SelectFMUL_W_CHAIN(N); 344 return; 345 } 346 case AMDGPUISD::FMA_W_CHAIN: { 347 SelectFMA_W_CHAIN(N); 348 return; 349 } 350 351 case ISD::SCALAR_TO_VECTOR: 352 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 353 case ISD::BUILD_VECTOR: { 354 unsigned RegClassID; 355 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 356 EVT VT = N->getValueType(0); 357 unsigned NumVectorElts = VT.getVectorNumElements(); 358 EVT EltVT = VT.getVectorElementType(); 359 assert(EltVT.bitsEq(MVT::i32)); 360 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 361 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 362 } else { 363 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 364 // that adds a 128 bits reg copy when going through TwoAddressInstructions 365 // pass. We want to avoid 128 bits copies as much as possible because they 366 // can't be bundled by our scheduler. 367 switch(NumVectorElts) { 368 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 369 case 4: 370 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 371 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 372 else 373 RegClassID = AMDGPU::R600_Reg128RegClassID; 374 break; 375 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 376 } 377 } 378 379 SDLoc DL(N); 380 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 381 382 if (NumVectorElts == 1) { 383 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 384 RegClass); 385 return; 386 } 387 388 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 389 "supported yet"); 390 // 16 = Max Num Vector Elements 391 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 392 // 1 = Vector Register Class 393 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 394 395 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 396 bool IsRegSeq = true; 397 unsigned NOps = N->getNumOperands(); 398 for (unsigned i = 0; i < NOps; i++) { 399 // XXX: Why is this here? 400 if (isa<RegisterSDNode>(N->getOperand(i))) { 401 IsRegSeq = false; 402 break; 403 } 404 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 405 RegSeqArgs[1 + (2 * i) + 1] = 406 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 407 MVT::i32); 408 } 409 410 if (NOps != NumVectorElts) { 411 // Fill in the missing undef elements if this was a scalar_to_vector. 412 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 413 414 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 415 DL, EltVT); 416 for (unsigned i = NOps; i < NumVectorElts; ++i) { 417 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 418 RegSeqArgs[1 + (2 * i) + 1] = 419 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 420 } 421 } 422 423 if (!IsRegSeq) 424 break; 425 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 426 return; 427 } 428 case ISD::BUILD_PAIR: { 429 SDValue RC, SubReg0, SubReg1; 430 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 431 break; 432 } 433 SDLoc DL(N); 434 if (N->getValueType(0) == MVT::i128) { 435 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 436 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 437 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 438 } else if (N->getValueType(0) == MVT::i64) { 439 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 440 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 441 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 442 } else { 443 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 444 } 445 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 446 N->getOperand(1), SubReg1 }; 447 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 448 N->getValueType(0), Ops)); 449 return; 450 } 451 452 case ISD::Constant: 453 case ISD::ConstantFP: { 454 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 455 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 456 break; 457 458 uint64_t Imm; 459 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 460 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 461 else { 462 ConstantSDNode *C = cast<ConstantSDNode>(N); 463 Imm = C->getZExtValue(); 464 } 465 466 SDLoc DL(N); 467 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 468 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 469 MVT::i32)); 470 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 471 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 472 const SDValue Ops[] = { 473 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 474 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 475 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 476 }; 477 478 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 479 N->getValueType(0), Ops)); 480 return; 481 } 482 case ISD::LOAD: 483 case ISD::STORE: { 484 N = glueCopyToM0(N); 485 break; 486 } 487 488 case AMDGPUISD::BFE_I32: 489 case AMDGPUISD::BFE_U32: { 490 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 491 break; 492 493 // There is a scalar version available, but unlike the vector version which 494 // has a separate operand for the offset and width, the scalar version packs 495 // the width and offset into a single operand. Try to move to the scalar 496 // version if the offsets are constant, so that we can try to keep extended 497 // loads of kernel arguments in SGPRs. 498 499 // TODO: Technically we could try to pattern match scalar bitshifts of 500 // dynamic values, but it's probably not useful. 501 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 502 if (!Offset) 503 break; 504 505 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 506 if (!Width) 507 break; 508 509 bool Signed = Opc == AMDGPUISD::BFE_I32; 510 511 uint32_t OffsetVal = Offset->getZExtValue(); 512 uint32_t WidthVal = Width->getZExtValue(); 513 514 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 515 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 516 return; 517 } 518 case AMDGPUISD::DIV_SCALE: { 519 SelectDIV_SCALE(N); 520 return; 521 } 522 case ISD::CopyToReg: { 523 const SITargetLowering& Lowering = 524 *static_cast<const SITargetLowering*>(getTargetLowering()); 525 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 526 break; 527 } 528 case ISD::AND: 529 case ISD::SRL: 530 case ISD::SRA: 531 case ISD::SIGN_EXTEND_INREG: 532 if (N->getValueType(0) != MVT::i32 || 533 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 534 break; 535 536 SelectS_BFE(N); 537 return; 538 case ISD::BRCOND: 539 SelectBRCOND(N); 540 return; 541 542 case AMDGPUISD::ATOMIC_CMP_SWAP: 543 SelectATOMIC_CMP_SWAP(N); 544 return; 545 } 546 547 SelectCode(N); 548 } 549 550 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 551 if (!N->readMem()) 552 return false; 553 if (CbId == -1) 554 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 555 556 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 557 } 558 559 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 560 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 561 const Instruction *Term = BB->getTerminator(); 562 return Term->getMetadata("amdgpu.uniform") || 563 Term->getMetadata("structurizecfg.uniform"); 564 } 565 566 StringRef AMDGPUDAGToDAGISel::getPassName() const { 567 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 568 } 569 570 //===----------------------------------------------------------------------===// 571 // Complex Patterns 572 //===----------------------------------------------------------------------===// 573 574 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 575 SDValue& IntPtr) { 576 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 577 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 578 true); 579 return true; 580 } 581 return false; 582 } 583 584 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 585 SDValue& BaseReg, SDValue &Offset) { 586 if (!isa<ConstantSDNode>(Addr)) { 587 BaseReg = Addr; 588 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 589 return true; 590 } 591 return false; 592 } 593 594 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 595 SDValue &Offset) { 596 ConstantSDNode *IMMOffset; 597 598 if (Addr.getOpcode() == ISD::ADD 599 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 600 && isInt<16>(IMMOffset->getZExtValue())) { 601 602 Base = Addr.getOperand(0); 603 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 604 MVT::i32); 605 return true; 606 // If the pointer address is constant, we can move it to the offset field. 607 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 608 && isInt<16>(IMMOffset->getZExtValue())) { 609 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 610 SDLoc(CurDAG->getEntryNode()), 611 AMDGPU::ZERO, MVT::i32); 612 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 613 MVT::i32); 614 return true; 615 } 616 617 // Default case, no offset 618 Base = Addr; 619 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 620 return true; 621 } 622 623 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 624 SDValue &Offset) { 625 ConstantSDNode *C; 626 SDLoc DL(Addr); 627 628 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 629 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 630 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 631 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 632 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 633 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 634 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 635 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 636 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 637 Base = Addr.getOperand(0); 638 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 639 } else { 640 Base = Addr; 641 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 642 } 643 644 return true; 645 } 646 647 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 648 SDLoc DL(N); 649 SDValue LHS = N->getOperand(0); 650 SDValue RHS = N->getOperand(1); 651 652 unsigned Opcode = N->getOpcode(); 653 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 654 bool ProduceCarry = 655 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 656 bool IsAdd = 657 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 658 659 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 660 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 661 662 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 663 DL, MVT::i32, LHS, Sub0); 664 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 665 DL, MVT::i32, LHS, Sub1); 666 667 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 668 DL, MVT::i32, RHS, Sub0); 669 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 670 DL, MVT::i32, RHS, Sub1); 671 672 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 673 674 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 675 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 676 677 SDNode *AddLo; 678 if (!ConsumeCarry) { 679 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 680 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 681 } else { 682 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 683 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 684 } 685 SDValue AddHiArgs[] = { 686 SDValue(Hi0, 0), 687 SDValue(Hi1, 0), 688 SDValue(AddLo, 1) 689 }; 690 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 691 692 SDValue RegSequenceArgs[] = { 693 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 694 SDValue(AddLo,0), 695 Sub0, 696 SDValue(AddHi,0), 697 Sub1, 698 }; 699 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 700 MVT::i64, RegSequenceArgs); 701 702 if (ProduceCarry) { 703 // Replace the carry-use 704 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 705 } 706 707 // Replace the remaining uses. 708 CurDAG->ReplaceAllUsesWith(N, RegSequence); 709 CurDAG->RemoveDeadNode(N); 710 } 711 712 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 713 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 714 // carry out despite the _i32 name. These were renamed in VI to _U32. 715 // FIXME: We should probably rename the opcodes here. 716 unsigned Opc = N->getOpcode() == ISD::UADDO ? 717 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 718 719 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 720 { N->getOperand(0), N->getOperand(1) }); 721 } 722 723 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 724 SDLoc SL(N); 725 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 726 SDValue Ops[10]; 727 728 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 729 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 730 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 731 Ops[8] = N->getOperand(0); 732 Ops[9] = N->getOperand(4); 733 734 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 735 } 736 737 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 738 SDLoc SL(N); 739 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 740 SDValue Ops[8]; 741 742 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 743 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 744 Ops[6] = N->getOperand(0); 745 Ops[7] = N->getOperand(3); 746 747 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 748 } 749 750 // We need to handle this here because tablegen doesn't support matching 751 // instructions with multiple outputs. 752 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 753 SDLoc SL(N); 754 EVT VT = N->getValueType(0); 755 756 assert(VT == MVT::f32 || VT == MVT::f64); 757 758 unsigned Opc 759 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 760 761 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 762 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 763 } 764 765 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 766 unsigned OffsetBits) const { 767 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 768 (OffsetBits == 8 && !isUInt<8>(Offset))) 769 return false; 770 771 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 772 Subtarget->unsafeDSOffsetFoldingEnabled()) 773 return true; 774 775 // On Southern Islands instruction with a negative base value and an offset 776 // don't seem to work. 777 return CurDAG->SignBitIsZero(Base); 778 } 779 780 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 781 SDValue &Offset) const { 782 SDLoc DL(Addr); 783 if (CurDAG->isBaseWithConstantOffset(Addr)) { 784 SDValue N0 = Addr.getOperand(0); 785 SDValue N1 = Addr.getOperand(1); 786 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 787 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 788 // (add n0, c0) 789 Base = N0; 790 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 791 return true; 792 } 793 } else if (Addr.getOpcode() == ISD::SUB) { 794 // sub C, x -> add (sub 0, x), C 795 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 796 int64_t ByteOffset = C->getSExtValue(); 797 if (isUInt<16>(ByteOffset)) { 798 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 799 800 // XXX - This is kind of hacky. Create a dummy sub node so we can check 801 // the known bits in isDSOffsetLegal. We need to emit the selected node 802 // here, so this is thrown away. 803 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 804 Zero, Addr.getOperand(1)); 805 806 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 807 MachineSDNode *MachineSub 808 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 809 Zero, Addr.getOperand(1)); 810 811 Base = SDValue(MachineSub, 0); 812 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 813 return true; 814 } 815 } 816 } 817 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 818 // If we have a constant address, prefer to put the constant into the 819 // offset. This can save moves to load the constant address since multiple 820 // operations can share the zero base address register, and enables merging 821 // into read2 / write2 instructions. 822 823 SDLoc DL(Addr); 824 825 if (isUInt<16>(CAddr->getZExtValue())) { 826 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 827 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 828 DL, MVT::i32, Zero); 829 Base = SDValue(MovZero, 0); 830 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 831 return true; 832 } 833 } 834 835 // default case 836 Base = Addr; 837 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 838 return true; 839 } 840 841 // TODO: If offset is too big, put low 16-bit into offset. 842 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 843 SDValue &Offset0, 844 SDValue &Offset1) const { 845 SDLoc DL(Addr); 846 847 if (CurDAG->isBaseWithConstantOffset(Addr)) { 848 SDValue N0 = Addr.getOperand(0); 849 SDValue N1 = Addr.getOperand(1); 850 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 851 unsigned DWordOffset0 = C1->getZExtValue() / 4; 852 unsigned DWordOffset1 = DWordOffset0 + 1; 853 // (add n0, c0) 854 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 855 Base = N0; 856 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 857 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 858 return true; 859 } 860 } else if (Addr.getOpcode() == ISD::SUB) { 861 // sub C, x -> add (sub 0, x), C 862 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 863 unsigned DWordOffset0 = C->getZExtValue() / 4; 864 unsigned DWordOffset1 = DWordOffset0 + 1; 865 866 if (isUInt<8>(DWordOffset0)) { 867 SDLoc DL(Addr); 868 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 869 870 // XXX - This is kind of hacky. Create a dummy sub node so we can check 871 // the known bits in isDSOffsetLegal. We need to emit the selected node 872 // here, so this is thrown away. 873 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 874 Zero, Addr.getOperand(1)); 875 876 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 877 MachineSDNode *MachineSub 878 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 879 Zero, Addr.getOperand(1)); 880 881 Base = SDValue(MachineSub, 0); 882 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 883 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 884 return true; 885 } 886 } 887 } 888 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 889 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 890 unsigned DWordOffset1 = DWordOffset0 + 1; 891 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 892 893 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 894 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 895 MachineSDNode *MovZero 896 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 897 DL, MVT::i32, Zero); 898 Base = SDValue(MovZero, 0); 899 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 900 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 901 return true; 902 } 903 } 904 905 // default case 906 907 // FIXME: This is broken on SI where we still need to check if the base 908 // pointer is positive here. 909 Base = Addr; 910 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 911 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 912 return true; 913 } 914 915 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 916 return isUInt<12>(Imm->getZExtValue()); 917 } 918 919 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 920 SDValue &VAddr, SDValue &SOffset, 921 SDValue &Offset, SDValue &Offen, 922 SDValue &Idxen, SDValue &Addr64, 923 SDValue &GLC, SDValue &SLC, 924 SDValue &TFE) const { 925 // Subtarget prefers to use flat instruction 926 if (Subtarget->useFlatForGlobal()) 927 return false; 928 929 SDLoc DL(Addr); 930 931 if (!GLC.getNode()) 932 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 933 if (!SLC.getNode()) 934 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 935 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 936 937 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 938 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 939 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 940 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 941 942 if (CurDAG->isBaseWithConstantOffset(Addr)) { 943 SDValue N0 = Addr.getOperand(0); 944 SDValue N1 = Addr.getOperand(1); 945 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 946 947 if (N0.getOpcode() == ISD::ADD) { 948 // (add (add N2, N3), C1) -> addr64 949 SDValue N2 = N0.getOperand(0); 950 SDValue N3 = N0.getOperand(1); 951 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 952 Ptr = N2; 953 VAddr = N3; 954 } else { 955 // (add N0, C1) -> offset 956 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 957 Ptr = N0; 958 } 959 960 if (isLegalMUBUFImmOffset(C1)) { 961 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 962 return true; 963 } 964 965 if (isUInt<32>(C1->getZExtValue())) { 966 // Illegal offset, store it in soffset. 967 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 968 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 969 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 970 0); 971 return true; 972 } 973 } 974 975 if (Addr.getOpcode() == ISD::ADD) { 976 // (add N0, N1) -> addr64 977 SDValue N0 = Addr.getOperand(0); 978 SDValue N1 = Addr.getOperand(1); 979 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 980 Ptr = N0; 981 VAddr = N1; 982 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 983 return true; 984 } 985 986 // default case -> offset 987 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 988 Ptr = Addr; 989 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 990 991 return true; 992 } 993 994 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 995 SDValue &VAddr, SDValue &SOffset, 996 SDValue &Offset, SDValue &GLC, 997 SDValue &SLC, SDValue &TFE) const { 998 SDValue Ptr, Offen, Idxen, Addr64; 999 1000 // addr64 bit was removed for volcanic islands. 1001 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1002 return false; 1003 1004 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1005 GLC, SLC, TFE)) 1006 return false; 1007 1008 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1009 if (C->getSExtValue()) { 1010 SDLoc DL(Addr); 1011 1012 const SITargetLowering& Lowering = 1013 *static_cast<const SITargetLowering*>(getTargetLowering()); 1014 1015 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1016 return true; 1017 } 1018 1019 return false; 1020 } 1021 1022 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1023 SDValue &VAddr, SDValue &SOffset, 1024 SDValue &Offset, 1025 SDValue &SLC) const { 1026 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1027 SDValue GLC, TFE; 1028 1029 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1030 } 1031 1032 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1033 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 1034 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 1035 return N; 1036 } 1037 1038 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1039 SDValue &VAddr, SDValue &SOffset, 1040 SDValue &ImmOffset) const { 1041 1042 SDLoc DL(Addr); 1043 MachineFunction &MF = CurDAG->getMachineFunction(); 1044 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1045 1046 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1047 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1048 1049 // (add n0, c1) 1050 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1051 SDValue N0 = Addr.getOperand(0); 1052 SDValue N1 = Addr.getOperand(1); 1053 1054 // Offsets in vaddr must be positive. 1055 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1056 if (isLegalMUBUFImmOffset(C1)) { 1057 VAddr = foldFrameIndex(N0); 1058 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1059 return true; 1060 } 1061 } 1062 1063 // (node) 1064 VAddr = foldFrameIndex(Addr); 1065 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1066 return true; 1067 } 1068 1069 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1070 SDValue &SOffset, SDValue &Offset, 1071 SDValue &GLC, SDValue &SLC, 1072 SDValue &TFE) const { 1073 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1074 const SIInstrInfo *TII = 1075 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1076 1077 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1078 GLC, SLC, TFE)) 1079 return false; 1080 1081 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1082 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1083 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1084 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1085 APInt::getAllOnesValue(32).getZExtValue(); // Size 1086 SDLoc DL(Addr); 1087 1088 const SITargetLowering& Lowering = 1089 *static_cast<const SITargetLowering*>(getTargetLowering()); 1090 1091 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1092 return true; 1093 } 1094 return false; 1095 } 1096 1097 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1098 SDValue &Soffset, SDValue &Offset 1099 ) const { 1100 SDValue GLC, SLC, TFE; 1101 1102 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1103 } 1104 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1105 SDValue &Soffset, SDValue &Offset, 1106 SDValue &SLC) const { 1107 SDValue GLC, TFE; 1108 1109 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1110 } 1111 1112 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1113 SDValue &SOffset, 1114 SDValue &ImmOffset) const { 1115 SDLoc DL(Constant); 1116 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1117 uint32_t Overflow = 0; 1118 1119 if (Imm >= 4096) { 1120 if (Imm <= 4095 + 64) { 1121 // Use an SOffset inline constant for 1..64 1122 Overflow = Imm - 4095; 1123 Imm = 4095; 1124 } else { 1125 // Try to keep the same value in SOffset for adjacent loads, so that 1126 // the corresponding register contents can be re-used. 1127 // 1128 // Load values with all low-bits set into SOffset, so that a larger 1129 // range of values can be covered using s_movk_i32 1130 uint32_t High = (Imm + 1) & ~4095; 1131 uint32_t Low = (Imm + 1) & 4095; 1132 Imm = Low; 1133 Overflow = High - 1; 1134 } 1135 } 1136 1137 // There is a hardware bug in SI and CI which prevents address clamping in 1138 // MUBUF instructions from working correctly with SOffsets. The immediate 1139 // offset is unaffected. 1140 if (Overflow > 0 && 1141 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1142 return false; 1143 1144 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1145 1146 if (Overflow <= 64) 1147 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1148 else 1149 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1150 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1151 0); 1152 1153 return true; 1154 } 1155 1156 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1157 SDValue &SOffset, 1158 SDValue &ImmOffset) const { 1159 SDLoc DL(Offset); 1160 1161 if (!isa<ConstantSDNode>(Offset)) 1162 return false; 1163 1164 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1165 } 1166 1167 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1168 SDValue &SOffset, 1169 SDValue &ImmOffset, 1170 SDValue &VOffset) const { 1171 SDLoc DL(Offset); 1172 1173 // Don't generate an unnecessary voffset for constant offsets. 1174 if (isa<ConstantSDNode>(Offset)) { 1175 SDValue Tmp1, Tmp2; 1176 1177 // When necessary, use a voffset in <= CI anyway to work around a hardware 1178 // bug. 1179 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1180 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1181 return false; 1182 } 1183 1184 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1185 SDValue N0 = Offset.getOperand(0); 1186 SDValue N1 = Offset.getOperand(1); 1187 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1188 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1189 VOffset = N0; 1190 return true; 1191 } 1192 } 1193 1194 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1195 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1196 VOffset = Offset; 1197 1198 return true; 1199 } 1200 1201 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1202 SDValue &VAddr, 1203 SDValue &SLC, 1204 SDValue &TFE) const { 1205 VAddr = Addr; 1206 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1207 return true; 1208 } 1209 1210 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1211 SDValue &Offset, bool &Imm) const { 1212 1213 // FIXME: Handle non-constant offsets. 1214 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1215 if (!C) 1216 return false; 1217 1218 SDLoc SL(ByteOffsetNode); 1219 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1220 int64_t ByteOffset = C->getSExtValue(); 1221 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1222 1223 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1224 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1225 Imm = true; 1226 return true; 1227 } 1228 1229 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1230 return false; 1231 1232 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1233 // 32-bit Immediates are supported on Sea Islands. 1234 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1235 } else { 1236 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1237 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1238 C32Bit), 0); 1239 } 1240 Imm = false; 1241 return true; 1242 } 1243 1244 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1245 SDValue &Offset, bool &Imm) const { 1246 SDLoc SL(Addr); 1247 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1248 SDValue N0 = Addr.getOperand(0); 1249 SDValue N1 = Addr.getOperand(1); 1250 1251 if (SelectSMRDOffset(N1, Offset, Imm)) { 1252 SBase = N0; 1253 return true; 1254 } 1255 } 1256 SBase = Addr; 1257 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1258 Imm = true; 1259 return true; 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1263 SDValue &Offset) const { 1264 bool Imm; 1265 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1266 } 1267 1268 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1269 SDValue &Offset) const { 1270 1271 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1272 return false; 1273 1274 bool Imm; 1275 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1276 return false; 1277 1278 return !Imm && isa<ConstantSDNode>(Offset); 1279 } 1280 1281 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1282 SDValue &Offset) const { 1283 bool Imm; 1284 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1285 !isa<ConstantSDNode>(Offset); 1286 } 1287 1288 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1289 SDValue &Offset) const { 1290 bool Imm; 1291 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1292 } 1293 1294 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1295 SDValue &Offset) const { 1296 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1297 return false; 1298 1299 bool Imm; 1300 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1301 return false; 1302 1303 return !Imm && isa<ConstantSDNode>(Offset); 1304 } 1305 1306 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1307 SDValue &Offset) const { 1308 bool Imm; 1309 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1310 !isa<ConstantSDNode>(Offset); 1311 } 1312 1313 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1314 SDValue &Base, 1315 SDValue &Offset) const { 1316 SDLoc DL(Index); 1317 1318 if (CurDAG->isBaseWithConstantOffset(Index)) { 1319 SDValue N0 = Index.getOperand(0); 1320 SDValue N1 = Index.getOperand(1); 1321 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1322 1323 // (add n0, c0) 1324 Base = N0; 1325 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1326 return true; 1327 } 1328 1329 if (isa<ConstantSDNode>(Index)) 1330 return false; 1331 1332 Base = Index; 1333 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1334 return true; 1335 } 1336 1337 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1338 SDValue Val, uint32_t Offset, 1339 uint32_t Width) { 1340 // Transformation function, pack the offset and width of a BFE into 1341 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1342 // source, bits [5:0] contain the offset and bits [22:16] the width. 1343 uint32_t PackedVal = Offset | (Width << 16); 1344 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1345 1346 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1347 } 1348 1349 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1350 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1351 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1352 // Predicate: 0 < b <= c < 32 1353 1354 const SDValue &Shl = N->getOperand(0); 1355 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1356 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1357 1358 if (B && C) { 1359 uint32_t BVal = B->getZExtValue(); 1360 uint32_t CVal = C->getZExtValue(); 1361 1362 if (0 < BVal && BVal <= CVal && CVal < 32) { 1363 bool Signed = N->getOpcode() == ISD::SRA; 1364 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1365 1366 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1367 32 - CVal)); 1368 return; 1369 } 1370 } 1371 SelectCode(N); 1372 } 1373 1374 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1375 switch (N->getOpcode()) { 1376 case ISD::AND: 1377 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1378 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1379 // Predicate: isMask(mask) 1380 const SDValue &Srl = N->getOperand(0); 1381 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1382 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1383 1384 if (Shift && Mask) { 1385 uint32_t ShiftVal = Shift->getZExtValue(); 1386 uint32_t MaskVal = Mask->getZExtValue(); 1387 1388 if (isMask_32(MaskVal)) { 1389 uint32_t WidthVal = countPopulation(MaskVal); 1390 1391 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1392 Srl.getOperand(0), ShiftVal, WidthVal)); 1393 return; 1394 } 1395 } 1396 } 1397 break; 1398 case ISD::SRL: 1399 if (N->getOperand(0).getOpcode() == ISD::AND) { 1400 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1401 // Predicate: isMask(mask >> b) 1402 const SDValue &And = N->getOperand(0); 1403 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1404 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1405 1406 if (Shift && Mask) { 1407 uint32_t ShiftVal = Shift->getZExtValue(); 1408 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1409 1410 if (isMask_32(MaskVal)) { 1411 uint32_t WidthVal = countPopulation(MaskVal); 1412 1413 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1414 And.getOperand(0), ShiftVal, WidthVal)); 1415 return; 1416 } 1417 } 1418 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1419 SelectS_BFEFromShifts(N); 1420 return; 1421 } 1422 break; 1423 case ISD::SRA: 1424 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1425 SelectS_BFEFromShifts(N); 1426 return; 1427 } 1428 break; 1429 1430 case ISD::SIGN_EXTEND_INREG: { 1431 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1432 SDValue Src = N->getOperand(0); 1433 if (Src.getOpcode() != ISD::SRL) 1434 break; 1435 1436 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1437 if (!Amt) 1438 break; 1439 1440 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1441 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1442 Amt->getZExtValue(), Width)); 1443 return; 1444 } 1445 } 1446 1447 SelectCode(N); 1448 } 1449 1450 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1451 assert(N->getOpcode() == ISD::BRCOND); 1452 if (!N->hasOneUse()) 1453 return false; 1454 1455 SDValue Cond = N->getOperand(1); 1456 if (Cond.getOpcode() == ISD::CopyToReg) 1457 Cond = Cond.getOperand(2); 1458 1459 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1460 return false; 1461 1462 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1463 if (VT == MVT::i32) 1464 return true; 1465 1466 if (VT == MVT::i64) { 1467 auto ST = static_cast<const SISubtarget *>(Subtarget); 1468 1469 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1470 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1471 } 1472 1473 return false; 1474 } 1475 1476 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1477 SDValue Cond = N->getOperand(1); 1478 1479 if (Cond.isUndef()) { 1480 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1481 N->getOperand(2), N->getOperand(0)); 1482 return; 1483 } 1484 1485 if (isCBranchSCC(N)) { 1486 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1487 SelectCode(N); 1488 return; 1489 } 1490 1491 SDLoc SL(N); 1492 1493 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1494 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1495 N->getOperand(2), // Basic Block 1496 VCC.getValue(0)); 1497 } 1498 1499 // This is here because there isn't a way to use the generated sub0_sub1 as the 1500 // subreg index to EXTRACT_SUBREG in tablegen. 1501 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1502 MemSDNode *Mem = cast<MemSDNode>(N); 1503 unsigned AS = Mem->getAddressSpace(); 1504 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1505 SelectCode(N); 1506 return; 1507 } 1508 1509 MVT VT = N->getSimpleValueType(0); 1510 bool Is32 = (VT == MVT::i32); 1511 SDLoc SL(N); 1512 1513 MachineSDNode *CmpSwap = nullptr; 1514 if (Subtarget->hasAddr64()) { 1515 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1516 1517 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1518 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1519 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1520 SDValue CmpVal = Mem->getOperand(2); 1521 1522 // XXX - Do we care about glue operands? 1523 1524 SDValue Ops[] = { 1525 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1526 }; 1527 1528 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1529 } 1530 } 1531 1532 if (!CmpSwap) { 1533 SDValue SRsrc, SOffset, Offset, SLC; 1534 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1535 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1536 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1537 1538 SDValue CmpVal = Mem->getOperand(2); 1539 SDValue Ops[] = { 1540 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1541 }; 1542 1543 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1544 } 1545 } 1546 1547 if (!CmpSwap) { 1548 SelectCode(N); 1549 return; 1550 } 1551 1552 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1553 *MMOs = Mem->getMemOperand(); 1554 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1555 1556 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1557 SDValue Extract 1558 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1559 1560 ReplaceUses(SDValue(N, 0), Extract); 1561 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1562 CurDAG->RemoveDeadNode(N); 1563 } 1564 1565 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1566 SDValue &SrcMods) const { 1567 unsigned Mods = 0; 1568 1569 Src = In; 1570 1571 if (Src.getOpcode() == ISD::FNEG) { 1572 Mods |= SISrcMods::NEG; 1573 Src = Src.getOperand(0); 1574 } 1575 1576 if (Src.getOpcode() == ISD::FABS) { 1577 Mods |= SISrcMods::ABS; 1578 Src = Src.getOperand(0); 1579 } 1580 1581 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1582 1583 return true; 1584 } 1585 1586 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1587 SDValue &SrcMods) const { 1588 SelectVOP3Mods(In, Src, SrcMods); 1589 return isNoNanSrc(Src); 1590 } 1591 1592 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1593 SDValue &SrcMods) const { 1594 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1595 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1596 } 1597 1598 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1599 SDValue &SrcMods, SDValue &Clamp, 1600 SDValue &Omod) const { 1601 SDLoc DL(In); 1602 // FIXME: Handle Clamp and Omod 1603 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1604 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1605 1606 return SelectVOP3Mods(In, Src, SrcMods); 1607 } 1608 1609 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1610 SDValue &SrcMods, SDValue &Clamp, 1611 SDValue &Omod) const { 1612 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1613 1614 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1615 cast<ConstantSDNode>(Clamp)->isNullValue() && 1616 cast<ConstantSDNode>(Omod)->isNullValue(); 1617 } 1618 1619 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1620 SDValue &SrcMods, 1621 SDValue &Omod) const { 1622 // FIXME: Handle Omod 1623 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1624 1625 return SelectVOP3Mods(In, Src, SrcMods); 1626 } 1627 1628 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1629 SDValue &SrcMods, 1630 SDValue &Clamp, 1631 SDValue &Omod) const { 1632 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1633 return SelectVOP3Mods(In, Src, SrcMods); 1634 } 1635 1636 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1637 const AMDGPUTargetLowering& Lowering = 1638 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1639 bool IsModified = false; 1640 do { 1641 IsModified = false; 1642 // Go over all selected nodes and try to fold them a bit more 1643 for (SDNode &Node : CurDAG->allnodes()) { 1644 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1645 if (!MachineNode) 1646 continue; 1647 1648 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1649 if (ResNode != &Node) { 1650 ReplaceUses(&Node, ResNode); 1651 IsModified = true; 1652 } 1653 } 1654 CurDAG->RemoveDeadNodes(); 1655 } while (IsModified); 1656 } 1657