1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUInstrInfo.h" 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUISelLowering.h" // For AMDGPUISD 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIInstrInfo.h" 22 #include "SIRegisterInfo.h" 23 #include "SIISelLowering.h" 24 #include "SIMachineFunctionInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 AMDGPUAS AMDGPUASI; 71 72 public: 73 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 74 : SelectionDAGISel(TM, OptLevel){ 75 AMDGPUASI = AMDGPU::getAMDGPUAS(TM); 76 } 77 ~AMDGPUDAGToDAGISel() override = default; 78 79 bool runOnMachineFunction(MachineFunction &MF) override; 80 void Select(SDNode *N) override; 81 StringRef getPassName() const override; 82 void PostprocessISelDAG() override; 83 84 private: 85 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 86 bool isNoNanSrc(SDValue N) const; 87 bool isInlineImmediate(const SDNode *N) const; 88 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 89 const R600InstrInfo *TII); 90 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 91 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 92 93 bool isConstantLoad(const MemSDNode *N, int cbID) const; 94 bool isUniformBr(const SDNode *N) const; 95 96 SDNode *glueCopyToM0(SDNode *N) const; 97 98 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 99 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 100 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 101 SDValue& Offset); 102 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 103 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 104 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 105 unsigned OffsetBits) const; 106 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 107 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 108 SDValue &Offset1) const; 109 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 110 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 111 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 114 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 115 SDValue &SLC, SDValue &TFE) const; 116 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 117 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 118 SDValue &SLC) const; 119 bool SelectMUBUFScratchOffen(SDNode *Root, 120 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 121 SDValue &SOffset, SDValue &ImmOffset) const; 122 bool SelectMUBUFScratchOffset(SDNode *Root, 123 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 124 SDValue &Offset) const; 125 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 127 SDValue &Offset, SDValue &GLC, SDValue &SLC, 128 SDValue &TFE) const; 129 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 130 SDValue &Offset, SDValue &SLC) const; 131 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 132 SDValue &Offset) const; 133 bool SelectMUBUFConstant(SDValue Constant, 134 SDValue &SOffset, 135 SDValue &ImmOffset) const; 136 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 137 SDValue &ImmOffset) const; 138 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 139 SDValue &ImmOffset, SDValue &VOffset) const; 140 141 bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const; 142 143 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 144 bool &Imm) const; 145 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 146 bool &Imm) const; 147 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 148 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 149 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 150 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 151 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 152 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 153 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 154 155 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 156 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 157 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 158 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 159 SDValue &Clamp, SDValue &Omod) const; 160 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 161 SDValue &Clamp, SDValue &Omod) const; 162 163 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 164 SDValue &Clamp, 165 SDValue &Omod) const; 166 167 bool SelectVOP3OMods(SDValue In, SDValue &Src, 168 SDValue &Clamp, SDValue &Omod) const; 169 170 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 171 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 172 SDValue &Clamp) const; 173 174 void SelectADD_SUB_I64(SDNode *N); 175 void SelectUADDO_USUBO(SDNode *N); 176 void SelectDIV_SCALE(SDNode *N); 177 void SelectFMA_W_CHAIN(SDNode *N); 178 void SelectFMUL_W_CHAIN(SDNode *N); 179 180 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 181 uint32_t Offset, uint32_t Width); 182 void SelectS_BFEFromShifts(SDNode *N); 183 void SelectS_BFE(SDNode *N); 184 bool isCBranchSCC(const SDNode *N) const; 185 void SelectBRCOND(SDNode *N); 186 void SelectATOMIC_CMP_SWAP(SDNode *N); 187 188 // Include the pieces autogenerated from the target description. 189 #include "AMDGPUGenDAGISel.inc" 190 }; 191 192 } // end anonymous namespace 193 194 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 195 // DAG, ready for instruction scheduling. 196 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 197 CodeGenOpt::Level OptLevel) { 198 return new AMDGPUDAGToDAGISel(TM, OptLevel); 199 } 200 201 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 202 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 203 return SelectionDAGISel::runOnMachineFunction(MF); 204 } 205 206 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 207 if (TM.Options.NoNaNsFPMath) 208 return true; 209 210 // TODO: Move into isKnownNeverNaN 211 if (N->getFlags().isDefined()) 212 return N->getFlags().hasNoNaNs(); 213 214 return CurDAG->isKnownNeverNaN(N); 215 } 216 217 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 218 const SIInstrInfo *TII 219 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 220 221 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 222 return TII->isInlineConstant(C->getAPIntValue()); 223 224 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 225 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 226 227 return false; 228 } 229 230 /// \brief Determine the register class for \p OpNo 231 /// \returns The register class of the virtual register that will be used for 232 /// the given operand number \OpNo or NULL if the register class cannot be 233 /// determined. 234 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 235 unsigned OpNo) const { 236 if (!N->isMachineOpcode()) { 237 if (N->getOpcode() == ISD::CopyToReg) { 238 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 239 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 240 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 241 return MRI.getRegClass(Reg); 242 } 243 244 const SIRegisterInfo *TRI 245 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 246 return TRI->getPhysRegClass(Reg); 247 } 248 249 return nullptr; 250 } 251 252 switch (N->getMachineOpcode()) { 253 default: { 254 const MCInstrDesc &Desc = 255 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 256 unsigned OpIdx = Desc.getNumDefs() + OpNo; 257 if (OpIdx >= Desc.getNumOperands()) 258 return nullptr; 259 int RegClass = Desc.OpInfo[OpIdx].RegClass; 260 if (RegClass == -1) 261 return nullptr; 262 263 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 264 } 265 case AMDGPU::REG_SEQUENCE: { 266 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 267 const TargetRegisterClass *SuperRC = 268 Subtarget->getRegisterInfo()->getRegClass(RCID); 269 270 SDValue SubRegOp = N->getOperand(OpNo + 1); 271 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 272 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 273 SubRegIdx); 274 } 275 } 276 } 277 278 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 279 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 280 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 281 return N; 282 283 const SITargetLowering& Lowering = 284 *static_cast<const SITargetLowering*>(getTargetLowering()); 285 286 // Write max value to m0 before each load operation 287 288 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 289 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 290 291 SDValue Glue = M0.getValue(1); 292 293 SmallVector <SDValue, 8> Ops; 294 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 295 Ops.push_back(N->getOperand(i)); 296 } 297 Ops.push_back(Glue); 298 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 299 300 return N; 301 } 302 303 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 304 switch (NumVectorElts) { 305 case 1: 306 return AMDGPU::SReg_32_XM0RegClassID; 307 case 2: 308 return AMDGPU::SReg_64RegClassID; 309 case 4: 310 return AMDGPU::SReg_128RegClassID; 311 case 8: 312 return AMDGPU::SReg_256RegClassID; 313 case 16: 314 return AMDGPU::SReg_512RegClassID; 315 } 316 317 llvm_unreachable("invalid vector size"); 318 } 319 320 static bool getConstantValue(SDValue N, uint32_t &Out) { 321 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 322 Out = C->getAPIntValue().getZExtValue(); 323 return true; 324 } 325 326 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 327 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 328 return true; 329 } 330 331 return false; 332 } 333 334 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 335 unsigned int Opc = N->getOpcode(); 336 if (N->isMachineOpcode()) { 337 N->setNodeId(-1); 338 return; // Already selected. 339 } 340 341 if (isa<AtomicSDNode>(N) || 342 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 343 N = glueCopyToM0(N); 344 345 switch (Opc) { 346 default: break; 347 // We are selecting i64 ADD here instead of custom lower it during 348 // DAG legalization, so we can fold some i64 ADDs used for address 349 // calculation into the LOAD and STORE instructions. 350 case ISD::ADD: 351 case ISD::ADDC: 352 case ISD::ADDE: 353 case ISD::SUB: 354 case ISD::SUBC: 355 case ISD::SUBE: { 356 if (N->getValueType(0) != MVT::i64 || 357 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 358 break; 359 360 SelectADD_SUB_I64(N); 361 return; 362 } 363 case ISD::UADDO: 364 case ISD::USUBO: { 365 SelectUADDO_USUBO(N); 366 return; 367 } 368 case AMDGPUISD::FMUL_W_CHAIN: { 369 SelectFMUL_W_CHAIN(N); 370 return; 371 } 372 case AMDGPUISD::FMA_W_CHAIN: { 373 SelectFMA_W_CHAIN(N); 374 return; 375 } 376 377 case ISD::SCALAR_TO_VECTOR: 378 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 379 case ISD::BUILD_VECTOR: { 380 unsigned RegClassID; 381 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 382 EVT VT = N->getValueType(0); 383 unsigned NumVectorElts = VT.getVectorNumElements(); 384 EVT EltVT = VT.getVectorElementType(); 385 386 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 387 if (Opc == ISD::BUILD_VECTOR) { 388 uint32_t LHSVal, RHSVal; 389 if (getConstantValue(N->getOperand(0), LHSVal) && 390 getConstantValue(N->getOperand(1), RHSVal)) { 391 uint32_t K = LHSVal | (RHSVal << 16); 392 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 393 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 394 return; 395 } 396 } 397 398 break; 399 } 400 401 assert(EltVT.bitsEq(MVT::i32)); 402 403 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 404 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 405 } else { 406 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 407 // that adds a 128 bits reg copy when going through TwoAddressInstructions 408 // pass. We want to avoid 128 bits copies as much as possible because they 409 // can't be bundled by our scheduler. 410 switch(NumVectorElts) { 411 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 412 case 4: 413 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 414 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 415 else 416 RegClassID = AMDGPU::R600_Reg128RegClassID; 417 break; 418 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 419 } 420 } 421 422 SDLoc DL(N); 423 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 424 425 if (NumVectorElts == 1) { 426 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 427 RegClass); 428 return; 429 } 430 431 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 432 "supported yet"); 433 // 16 = Max Num Vector Elements 434 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 435 // 1 = Vector Register Class 436 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 437 438 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 439 bool IsRegSeq = true; 440 unsigned NOps = N->getNumOperands(); 441 for (unsigned i = 0; i < NOps; i++) { 442 // XXX: Why is this here? 443 if (isa<RegisterSDNode>(N->getOperand(i))) { 444 IsRegSeq = false; 445 break; 446 } 447 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 448 RegSeqArgs[1 + (2 * i) + 1] = 449 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 450 MVT::i32); 451 } 452 453 if (NOps != NumVectorElts) { 454 // Fill in the missing undef elements if this was a scalar_to_vector. 455 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 456 457 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 458 DL, EltVT); 459 for (unsigned i = NOps; i < NumVectorElts; ++i) { 460 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 461 RegSeqArgs[1 + (2 * i) + 1] = 462 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 463 } 464 } 465 466 if (!IsRegSeq) 467 break; 468 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 469 return; 470 } 471 case ISD::BUILD_PAIR: { 472 SDValue RC, SubReg0, SubReg1; 473 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 474 break; 475 } 476 SDLoc DL(N); 477 if (N->getValueType(0) == MVT::i128) { 478 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 479 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 480 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 481 } else if (N->getValueType(0) == MVT::i64) { 482 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 483 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 484 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 485 } else { 486 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 487 } 488 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 489 N->getOperand(1), SubReg1 }; 490 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 491 N->getValueType(0), Ops)); 492 return; 493 } 494 495 case ISD::Constant: 496 case ISD::ConstantFP: { 497 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 498 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 499 break; 500 501 uint64_t Imm; 502 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 503 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 504 else { 505 ConstantSDNode *C = cast<ConstantSDNode>(N); 506 Imm = C->getZExtValue(); 507 } 508 509 SDLoc DL(N); 510 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 511 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 512 MVT::i32)); 513 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 514 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 515 const SDValue Ops[] = { 516 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 517 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 518 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 519 }; 520 521 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 522 N->getValueType(0), Ops)); 523 return; 524 } 525 case ISD::LOAD: 526 case ISD::STORE: { 527 N = glueCopyToM0(N); 528 break; 529 } 530 531 case AMDGPUISD::BFE_I32: 532 case AMDGPUISD::BFE_U32: { 533 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 534 break; 535 536 // There is a scalar version available, but unlike the vector version which 537 // has a separate operand for the offset and width, the scalar version packs 538 // the width and offset into a single operand. Try to move to the scalar 539 // version if the offsets are constant, so that we can try to keep extended 540 // loads of kernel arguments in SGPRs. 541 542 // TODO: Technically we could try to pattern match scalar bitshifts of 543 // dynamic values, but it's probably not useful. 544 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 545 if (!Offset) 546 break; 547 548 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 549 if (!Width) 550 break; 551 552 bool Signed = Opc == AMDGPUISD::BFE_I32; 553 554 uint32_t OffsetVal = Offset->getZExtValue(); 555 uint32_t WidthVal = Width->getZExtValue(); 556 557 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 558 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 559 return; 560 } 561 case AMDGPUISD::DIV_SCALE: { 562 SelectDIV_SCALE(N); 563 return; 564 } 565 case ISD::CopyToReg: { 566 const SITargetLowering& Lowering = 567 *static_cast<const SITargetLowering*>(getTargetLowering()); 568 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 569 break; 570 } 571 case ISD::AND: 572 case ISD::SRL: 573 case ISD::SRA: 574 case ISD::SIGN_EXTEND_INREG: 575 if (N->getValueType(0) != MVT::i32 || 576 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 577 break; 578 579 SelectS_BFE(N); 580 return; 581 case ISD::BRCOND: 582 SelectBRCOND(N); 583 return; 584 585 case AMDGPUISD::ATOMIC_CMP_SWAP: 586 SelectATOMIC_CMP_SWAP(N); 587 return; 588 } 589 590 SelectCode(N); 591 } 592 593 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 594 if (!N->readMem()) 595 return false; 596 if (CbId == -1) 597 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 598 599 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 600 } 601 602 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 603 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 604 const Instruction *Term = BB->getTerminator(); 605 return Term->getMetadata("amdgpu.uniform") || 606 Term->getMetadata("structurizecfg.uniform"); 607 } 608 609 StringRef AMDGPUDAGToDAGISel::getPassName() const { 610 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 611 } 612 613 //===----------------------------------------------------------------------===// 614 // Complex Patterns 615 //===----------------------------------------------------------------------===// 616 617 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 618 SDValue& IntPtr) { 619 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 620 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 621 true); 622 return true; 623 } 624 return false; 625 } 626 627 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 628 SDValue& BaseReg, SDValue &Offset) { 629 if (!isa<ConstantSDNode>(Addr)) { 630 BaseReg = Addr; 631 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 632 return true; 633 } 634 return false; 635 } 636 637 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 638 SDValue &Offset) { 639 ConstantSDNode *IMMOffset; 640 641 if (Addr.getOpcode() == ISD::ADD 642 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 643 && isInt<16>(IMMOffset->getZExtValue())) { 644 645 Base = Addr.getOperand(0); 646 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 647 MVT::i32); 648 return true; 649 // If the pointer address is constant, we can move it to the offset field. 650 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 651 && isInt<16>(IMMOffset->getZExtValue())) { 652 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 653 SDLoc(CurDAG->getEntryNode()), 654 AMDGPU::ZERO, MVT::i32); 655 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 656 MVT::i32); 657 return true; 658 } 659 660 // Default case, no offset 661 Base = Addr; 662 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 663 return true; 664 } 665 666 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 667 SDValue &Offset) { 668 ConstantSDNode *C; 669 SDLoc DL(Addr); 670 671 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 672 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 673 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 674 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 675 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 676 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 677 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 678 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 679 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 680 Base = Addr.getOperand(0); 681 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 682 } else { 683 Base = Addr; 684 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 685 } 686 687 return true; 688 } 689 690 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 691 SDLoc DL(N); 692 SDValue LHS = N->getOperand(0); 693 SDValue RHS = N->getOperand(1); 694 695 unsigned Opcode = N->getOpcode(); 696 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 697 bool ProduceCarry = 698 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 699 bool IsAdd = 700 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 701 702 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 703 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 704 705 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 706 DL, MVT::i32, LHS, Sub0); 707 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 708 DL, MVT::i32, LHS, Sub1); 709 710 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 711 DL, MVT::i32, RHS, Sub0); 712 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 713 DL, MVT::i32, RHS, Sub1); 714 715 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 716 717 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 718 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 719 720 SDNode *AddLo; 721 if (!ConsumeCarry) { 722 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 723 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 724 } else { 725 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 726 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 727 } 728 SDValue AddHiArgs[] = { 729 SDValue(Hi0, 0), 730 SDValue(Hi1, 0), 731 SDValue(AddLo, 1) 732 }; 733 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 734 735 SDValue RegSequenceArgs[] = { 736 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 737 SDValue(AddLo,0), 738 Sub0, 739 SDValue(AddHi,0), 740 Sub1, 741 }; 742 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 743 MVT::i64, RegSequenceArgs); 744 745 if (ProduceCarry) { 746 // Replace the carry-use 747 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 748 } 749 750 // Replace the remaining uses. 751 CurDAG->ReplaceAllUsesWith(N, RegSequence); 752 CurDAG->RemoveDeadNode(N); 753 } 754 755 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 756 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 757 // carry out despite the _i32 name. These were renamed in VI to _U32. 758 // FIXME: We should probably rename the opcodes here. 759 unsigned Opc = N->getOpcode() == ISD::UADDO ? 760 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 761 762 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 763 { N->getOperand(0), N->getOperand(1) }); 764 } 765 766 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 767 SDLoc SL(N); 768 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 769 SDValue Ops[10]; 770 771 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 772 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 773 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 774 Ops[8] = N->getOperand(0); 775 Ops[9] = N->getOperand(4); 776 777 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 778 } 779 780 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 781 SDLoc SL(N); 782 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 783 SDValue Ops[8]; 784 785 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 786 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 787 Ops[6] = N->getOperand(0); 788 Ops[7] = N->getOperand(3); 789 790 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 791 } 792 793 // We need to handle this here because tablegen doesn't support matching 794 // instructions with multiple outputs. 795 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 796 SDLoc SL(N); 797 EVT VT = N->getValueType(0); 798 799 assert(VT == MVT::f32 || VT == MVT::f64); 800 801 unsigned Opc 802 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 803 804 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 805 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 806 } 807 808 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 809 unsigned OffsetBits) const { 810 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 811 (OffsetBits == 8 && !isUInt<8>(Offset))) 812 return false; 813 814 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 815 Subtarget->unsafeDSOffsetFoldingEnabled()) 816 return true; 817 818 // On Southern Islands instruction with a negative base value and an offset 819 // don't seem to work. 820 return CurDAG->SignBitIsZero(Base); 821 } 822 823 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 824 SDValue &Offset) const { 825 SDLoc DL(Addr); 826 if (CurDAG->isBaseWithConstantOffset(Addr)) { 827 SDValue N0 = Addr.getOperand(0); 828 SDValue N1 = Addr.getOperand(1); 829 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 830 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 831 // (add n0, c0) 832 Base = N0; 833 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 834 return true; 835 } 836 } else if (Addr.getOpcode() == ISD::SUB) { 837 // sub C, x -> add (sub 0, x), C 838 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 839 int64_t ByteOffset = C->getSExtValue(); 840 if (isUInt<16>(ByteOffset)) { 841 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 842 843 // XXX - This is kind of hacky. Create a dummy sub node so we can check 844 // the known bits in isDSOffsetLegal. We need to emit the selected node 845 // here, so this is thrown away. 846 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 847 Zero, Addr.getOperand(1)); 848 849 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 850 MachineSDNode *MachineSub 851 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 852 Zero, Addr.getOperand(1)); 853 854 Base = SDValue(MachineSub, 0); 855 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 856 return true; 857 } 858 } 859 } 860 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 861 // If we have a constant address, prefer to put the constant into the 862 // offset. This can save moves to load the constant address since multiple 863 // operations can share the zero base address register, and enables merging 864 // into read2 / write2 instructions. 865 866 SDLoc DL(Addr); 867 868 if (isUInt<16>(CAddr->getZExtValue())) { 869 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 870 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 871 DL, MVT::i32, Zero); 872 Base = SDValue(MovZero, 0); 873 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 874 return true; 875 } 876 } 877 878 // default case 879 Base = Addr; 880 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 881 return true; 882 } 883 884 // TODO: If offset is too big, put low 16-bit into offset. 885 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 886 SDValue &Offset0, 887 SDValue &Offset1) const { 888 SDLoc DL(Addr); 889 890 if (CurDAG->isBaseWithConstantOffset(Addr)) { 891 SDValue N0 = Addr.getOperand(0); 892 SDValue N1 = Addr.getOperand(1); 893 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 894 unsigned DWordOffset0 = C1->getZExtValue() / 4; 895 unsigned DWordOffset1 = DWordOffset0 + 1; 896 // (add n0, c0) 897 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 898 Base = N0; 899 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 900 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 901 return true; 902 } 903 } else if (Addr.getOpcode() == ISD::SUB) { 904 // sub C, x -> add (sub 0, x), C 905 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 906 unsigned DWordOffset0 = C->getZExtValue() / 4; 907 unsigned DWordOffset1 = DWordOffset0 + 1; 908 909 if (isUInt<8>(DWordOffset0)) { 910 SDLoc DL(Addr); 911 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 912 913 // XXX - This is kind of hacky. Create a dummy sub node so we can check 914 // the known bits in isDSOffsetLegal. We need to emit the selected node 915 // here, so this is thrown away. 916 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 917 Zero, Addr.getOperand(1)); 918 919 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 920 MachineSDNode *MachineSub 921 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 922 Zero, Addr.getOperand(1)); 923 924 Base = SDValue(MachineSub, 0); 925 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 926 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 927 return true; 928 } 929 } 930 } 931 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 932 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 933 unsigned DWordOffset1 = DWordOffset0 + 1; 934 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 935 936 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 937 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 938 MachineSDNode *MovZero 939 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 940 DL, MVT::i32, Zero); 941 Base = SDValue(MovZero, 0); 942 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 943 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 944 return true; 945 } 946 } 947 948 // default case 949 950 // FIXME: This is broken on SI where we still need to check if the base 951 // pointer is positive here. 952 Base = Addr; 953 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 954 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 955 return true; 956 } 957 958 static bool isLegalMUBUFImmOffset(unsigned Imm) { 959 return isUInt<12>(Imm); 960 } 961 962 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 963 return isLegalMUBUFImmOffset(Imm->getZExtValue()); 964 } 965 966 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 967 SDValue &VAddr, SDValue &SOffset, 968 SDValue &Offset, SDValue &Offen, 969 SDValue &Idxen, SDValue &Addr64, 970 SDValue &GLC, SDValue &SLC, 971 SDValue &TFE) const { 972 // Subtarget prefers to use flat instruction 973 if (Subtarget->useFlatForGlobal()) 974 return false; 975 976 SDLoc DL(Addr); 977 978 if (!GLC.getNode()) 979 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 980 if (!SLC.getNode()) 981 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 982 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 983 984 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 985 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 986 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 987 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 988 989 if (CurDAG->isBaseWithConstantOffset(Addr)) { 990 SDValue N0 = Addr.getOperand(0); 991 SDValue N1 = Addr.getOperand(1); 992 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 993 994 if (N0.getOpcode() == ISD::ADD) { 995 // (add (add N2, N3), C1) -> addr64 996 SDValue N2 = N0.getOperand(0); 997 SDValue N3 = N0.getOperand(1); 998 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 999 Ptr = N2; 1000 VAddr = N3; 1001 } else { 1002 // (add N0, C1) -> offset 1003 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1004 Ptr = N0; 1005 } 1006 1007 if (isLegalMUBUFImmOffset(C1)) { 1008 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1009 return true; 1010 } 1011 1012 if (isUInt<32>(C1->getZExtValue())) { 1013 // Illegal offset, store it in soffset. 1014 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1015 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1016 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1017 0); 1018 return true; 1019 } 1020 } 1021 1022 if (Addr.getOpcode() == ISD::ADD) { 1023 // (add N0, N1) -> addr64 1024 SDValue N0 = Addr.getOperand(0); 1025 SDValue N1 = Addr.getOperand(1); 1026 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1027 Ptr = N0; 1028 VAddr = N1; 1029 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1030 return true; 1031 } 1032 1033 // default case -> offset 1034 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1035 Ptr = Addr; 1036 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1037 1038 return true; 1039 } 1040 1041 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1042 SDValue &VAddr, SDValue &SOffset, 1043 SDValue &Offset, SDValue &GLC, 1044 SDValue &SLC, SDValue &TFE) const { 1045 SDValue Ptr, Offen, Idxen, Addr64; 1046 1047 // addr64 bit was removed for volcanic islands. 1048 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1049 return false; 1050 1051 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1052 GLC, SLC, TFE)) 1053 return false; 1054 1055 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1056 if (C->getSExtValue()) { 1057 SDLoc DL(Addr); 1058 1059 const SITargetLowering& Lowering = 1060 *static_cast<const SITargetLowering*>(getTargetLowering()); 1061 1062 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1063 return true; 1064 } 1065 1066 return false; 1067 } 1068 1069 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1070 SDValue &VAddr, SDValue &SOffset, 1071 SDValue &Offset, 1072 SDValue &SLC) const { 1073 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1074 SDValue GLC, TFE; 1075 1076 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1077 } 1078 1079 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1080 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1081 return PSV && PSV->isStack(); 1082 } 1083 1084 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1085 const MachineFunction &MF = CurDAG->getMachineFunction(); 1086 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1087 1088 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1089 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1090 FI->getValueType(0)); 1091 1092 // If we can resolve this to a frame index access, this is relative to the 1093 // frame pointer SGPR. 1094 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1095 MVT::i32)); 1096 } 1097 1098 // If we don't know this private access is a local stack object, it needs to 1099 // be relative to the entry point's scratch wave offset register. 1100 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1101 MVT::i32)); 1102 } 1103 1104 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, 1105 SDValue Addr, SDValue &Rsrc, 1106 SDValue &VAddr, SDValue &SOffset, 1107 SDValue &ImmOffset) const { 1108 1109 SDLoc DL(Addr); 1110 MachineFunction &MF = CurDAG->getMachineFunction(); 1111 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1112 1113 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1114 1115 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1116 unsigned Imm = CAddr->getZExtValue(); 1117 assert(!isLegalMUBUFImmOffset(Imm) && 1118 "should have been selected by other pattern"); 1119 1120 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1121 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1122 DL, MVT::i32, HighBits); 1123 VAddr = SDValue(MovHighBits, 0); 1124 1125 // In a call sequence, stores to the argument stack area are relative to the 1126 // stack pointer. 1127 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1128 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1129 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1130 1131 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1132 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1133 return true; 1134 } 1135 1136 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1137 // (add n0, c1) 1138 1139 SDValue N0 = Addr.getOperand(0); 1140 SDValue N1 = Addr.getOperand(1); 1141 1142 // Offsets in vaddr must be positive. 1143 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1144 if (isLegalMUBUFImmOffset(C1)) { 1145 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1146 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1147 return true; 1148 } 1149 } 1150 1151 // (node) 1152 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1153 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1154 return true; 1155 } 1156 1157 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, 1158 SDValue Addr, 1159 SDValue &SRsrc, 1160 SDValue &SOffset, 1161 SDValue &Offset) const { 1162 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1163 if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) 1164 return false; 1165 1166 SDLoc DL(Addr); 1167 MachineFunction &MF = CurDAG->getMachineFunction(); 1168 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1169 1170 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1171 1172 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1173 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1174 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1175 1176 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1177 // offset if we know this is in a call sequence. 1178 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1179 1180 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1181 return true; 1182 } 1183 1184 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1185 SDValue &SOffset, SDValue &Offset, 1186 SDValue &GLC, SDValue &SLC, 1187 SDValue &TFE) const { 1188 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1189 const SIInstrInfo *TII = 1190 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1191 1192 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1193 GLC, SLC, TFE)) 1194 return false; 1195 1196 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1197 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1198 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1199 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1200 APInt::getAllOnesValue(32).getZExtValue(); // Size 1201 SDLoc DL(Addr); 1202 1203 const SITargetLowering& Lowering = 1204 *static_cast<const SITargetLowering*>(getTargetLowering()); 1205 1206 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1207 return true; 1208 } 1209 return false; 1210 } 1211 1212 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1213 SDValue &Soffset, SDValue &Offset 1214 ) const { 1215 SDValue GLC, SLC, TFE; 1216 1217 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1218 } 1219 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1220 SDValue &Soffset, SDValue &Offset, 1221 SDValue &SLC) const { 1222 SDValue GLC, TFE; 1223 1224 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1225 } 1226 1227 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1228 SDValue &SOffset, 1229 SDValue &ImmOffset) const { 1230 SDLoc DL(Constant); 1231 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1232 uint32_t Overflow = 0; 1233 1234 if (Imm >= 4096) { 1235 if (Imm <= 4095 + 64) { 1236 // Use an SOffset inline constant for 1..64 1237 Overflow = Imm - 4095; 1238 Imm = 4095; 1239 } else { 1240 // Try to keep the same value in SOffset for adjacent loads, so that 1241 // the corresponding register contents can be re-used. 1242 // 1243 // Load values with all low-bits set into SOffset, so that a larger 1244 // range of values can be covered using s_movk_i32 1245 uint32_t High = (Imm + 1) & ~4095; 1246 uint32_t Low = (Imm + 1) & 4095; 1247 Imm = Low; 1248 Overflow = High - 1; 1249 } 1250 } 1251 1252 // There is a hardware bug in SI and CI which prevents address clamping in 1253 // MUBUF instructions from working correctly with SOffsets. The immediate 1254 // offset is unaffected. 1255 if (Overflow > 0 && 1256 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1257 return false; 1258 1259 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1260 1261 if (Overflow <= 64) 1262 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1263 else 1264 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1265 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1266 0); 1267 1268 return true; 1269 } 1270 1271 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1272 SDValue &SOffset, 1273 SDValue &ImmOffset) const { 1274 SDLoc DL(Offset); 1275 1276 if (!isa<ConstantSDNode>(Offset)) 1277 return false; 1278 1279 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1280 } 1281 1282 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1283 SDValue &SOffset, 1284 SDValue &ImmOffset, 1285 SDValue &VOffset) const { 1286 SDLoc DL(Offset); 1287 1288 // Don't generate an unnecessary voffset for constant offsets. 1289 if (isa<ConstantSDNode>(Offset)) { 1290 SDValue Tmp1, Tmp2; 1291 1292 // When necessary, use a voffset in <= CI anyway to work around a hardware 1293 // bug. 1294 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1295 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1296 return false; 1297 } 1298 1299 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1300 SDValue N0 = Offset.getOperand(0); 1301 SDValue N1 = Offset.getOperand(1); 1302 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1303 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1304 VOffset = N0; 1305 return true; 1306 } 1307 } 1308 1309 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1310 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1311 VOffset = Offset; 1312 1313 return true; 1314 } 1315 1316 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1317 SDValue &VAddr, 1318 SDValue &SLC) const { 1319 VAddr = Addr; 1320 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1321 return true; 1322 } 1323 1324 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1325 SDValue &Offset, bool &Imm) const { 1326 1327 // FIXME: Handle non-constant offsets. 1328 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1329 if (!C) 1330 return false; 1331 1332 SDLoc SL(ByteOffsetNode); 1333 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1334 int64_t ByteOffset = C->getSExtValue(); 1335 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1336 1337 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1338 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1339 Imm = true; 1340 return true; 1341 } 1342 1343 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1344 return false; 1345 1346 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1347 // 32-bit Immediates are supported on Sea Islands. 1348 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1349 } else { 1350 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1351 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1352 C32Bit), 0); 1353 } 1354 Imm = false; 1355 return true; 1356 } 1357 1358 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1359 SDValue &Offset, bool &Imm) const { 1360 SDLoc SL(Addr); 1361 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1362 SDValue N0 = Addr.getOperand(0); 1363 SDValue N1 = Addr.getOperand(1); 1364 1365 if (SelectSMRDOffset(N1, Offset, Imm)) { 1366 SBase = N0; 1367 return true; 1368 } 1369 } 1370 SBase = Addr; 1371 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1372 Imm = true; 1373 return true; 1374 } 1375 1376 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1377 SDValue &Offset) const { 1378 bool Imm; 1379 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1380 } 1381 1382 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1383 SDValue &Offset) const { 1384 1385 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1386 return false; 1387 1388 bool Imm; 1389 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1390 return false; 1391 1392 return !Imm && isa<ConstantSDNode>(Offset); 1393 } 1394 1395 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1396 SDValue &Offset) const { 1397 bool Imm; 1398 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1399 !isa<ConstantSDNode>(Offset); 1400 } 1401 1402 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1403 SDValue &Offset) const { 1404 bool Imm; 1405 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1406 } 1407 1408 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1409 SDValue &Offset) const { 1410 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1411 return false; 1412 1413 bool Imm; 1414 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1415 return false; 1416 1417 return !Imm && isa<ConstantSDNode>(Offset); 1418 } 1419 1420 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1421 SDValue &Offset) const { 1422 bool Imm; 1423 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1424 !isa<ConstantSDNode>(Offset); 1425 } 1426 1427 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1428 SDValue &Base, 1429 SDValue &Offset) const { 1430 SDLoc DL(Index); 1431 1432 if (CurDAG->isBaseWithConstantOffset(Index)) { 1433 SDValue N0 = Index.getOperand(0); 1434 SDValue N1 = Index.getOperand(1); 1435 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1436 1437 // (add n0, c0) 1438 Base = N0; 1439 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1440 return true; 1441 } 1442 1443 if (isa<ConstantSDNode>(Index)) 1444 return false; 1445 1446 Base = Index; 1447 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1448 return true; 1449 } 1450 1451 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1452 SDValue Val, uint32_t Offset, 1453 uint32_t Width) { 1454 // Transformation function, pack the offset and width of a BFE into 1455 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1456 // source, bits [5:0] contain the offset and bits [22:16] the width. 1457 uint32_t PackedVal = Offset | (Width << 16); 1458 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1459 1460 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1461 } 1462 1463 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1464 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1465 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1466 // Predicate: 0 < b <= c < 32 1467 1468 const SDValue &Shl = N->getOperand(0); 1469 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1470 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1471 1472 if (B && C) { 1473 uint32_t BVal = B->getZExtValue(); 1474 uint32_t CVal = C->getZExtValue(); 1475 1476 if (0 < BVal && BVal <= CVal && CVal < 32) { 1477 bool Signed = N->getOpcode() == ISD::SRA; 1478 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1479 1480 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1481 32 - CVal)); 1482 return; 1483 } 1484 } 1485 SelectCode(N); 1486 } 1487 1488 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1489 switch (N->getOpcode()) { 1490 case ISD::AND: 1491 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1492 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1493 // Predicate: isMask(mask) 1494 const SDValue &Srl = N->getOperand(0); 1495 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1496 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1497 1498 if (Shift && Mask) { 1499 uint32_t ShiftVal = Shift->getZExtValue(); 1500 uint32_t MaskVal = Mask->getZExtValue(); 1501 1502 if (isMask_32(MaskVal)) { 1503 uint32_t WidthVal = countPopulation(MaskVal); 1504 1505 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1506 Srl.getOperand(0), ShiftVal, WidthVal)); 1507 return; 1508 } 1509 } 1510 } 1511 break; 1512 case ISD::SRL: 1513 if (N->getOperand(0).getOpcode() == ISD::AND) { 1514 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1515 // Predicate: isMask(mask >> b) 1516 const SDValue &And = N->getOperand(0); 1517 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1518 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1519 1520 if (Shift && Mask) { 1521 uint32_t ShiftVal = Shift->getZExtValue(); 1522 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1523 1524 if (isMask_32(MaskVal)) { 1525 uint32_t WidthVal = countPopulation(MaskVal); 1526 1527 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1528 And.getOperand(0), ShiftVal, WidthVal)); 1529 return; 1530 } 1531 } 1532 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1533 SelectS_BFEFromShifts(N); 1534 return; 1535 } 1536 break; 1537 case ISD::SRA: 1538 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1539 SelectS_BFEFromShifts(N); 1540 return; 1541 } 1542 break; 1543 1544 case ISD::SIGN_EXTEND_INREG: { 1545 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1546 SDValue Src = N->getOperand(0); 1547 if (Src.getOpcode() != ISD::SRL) 1548 break; 1549 1550 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1551 if (!Amt) 1552 break; 1553 1554 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1555 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1556 Amt->getZExtValue(), Width)); 1557 return; 1558 } 1559 } 1560 1561 SelectCode(N); 1562 } 1563 1564 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1565 assert(N->getOpcode() == ISD::BRCOND); 1566 if (!N->hasOneUse()) 1567 return false; 1568 1569 SDValue Cond = N->getOperand(1); 1570 if (Cond.getOpcode() == ISD::CopyToReg) 1571 Cond = Cond.getOperand(2); 1572 1573 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1574 return false; 1575 1576 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1577 if (VT == MVT::i32) 1578 return true; 1579 1580 if (VT == MVT::i64) { 1581 auto ST = static_cast<const SISubtarget *>(Subtarget); 1582 1583 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1584 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1585 } 1586 1587 return false; 1588 } 1589 1590 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1591 SDValue Cond = N->getOperand(1); 1592 1593 if (Cond.isUndef()) { 1594 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1595 N->getOperand(2), N->getOperand(0)); 1596 return; 1597 } 1598 1599 if (isCBranchSCC(N)) { 1600 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1601 SelectCode(N); 1602 return; 1603 } 1604 1605 SDLoc SL(N); 1606 1607 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1608 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1609 N->getOperand(2), // Basic Block 1610 VCC.getValue(0)); 1611 } 1612 1613 // This is here because there isn't a way to use the generated sub0_sub1 as the 1614 // subreg index to EXTRACT_SUBREG in tablegen. 1615 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1616 MemSDNode *Mem = cast<MemSDNode>(N); 1617 unsigned AS = Mem->getAddressSpace(); 1618 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1619 SelectCode(N); 1620 return; 1621 } 1622 1623 MVT VT = N->getSimpleValueType(0); 1624 bool Is32 = (VT == MVT::i32); 1625 SDLoc SL(N); 1626 1627 MachineSDNode *CmpSwap = nullptr; 1628 if (Subtarget->hasAddr64()) { 1629 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1630 1631 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1632 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1633 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1634 SDValue CmpVal = Mem->getOperand(2); 1635 1636 // XXX - Do we care about glue operands? 1637 1638 SDValue Ops[] = { 1639 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1640 }; 1641 1642 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1643 } 1644 } 1645 1646 if (!CmpSwap) { 1647 SDValue SRsrc, SOffset, Offset, SLC; 1648 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1649 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1650 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1651 1652 SDValue CmpVal = Mem->getOperand(2); 1653 SDValue Ops[] = { 1654 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1655 }; 1656 1657 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1658 } 1659 } 1660 1661 if (!CmpSwap) { 1662 SelectCode(N); 1663 return; 1664 } 1665 1666 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1667 *MMOs = Mem->getMemOperand(); 1668 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1669 1670 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1671 SDValue Extract 1672 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1673 1674 ReplaceUses(SDValue(N, 0), Extract); 1675 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1676 CurDAG->RemoveDeadNode(N); 1677 } 1678 1679 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1680 SDValue &SrcMods) const { 1681 unsigned Mods = 0; 1682 Src = In; 1683 1684 if (Src.getOpcode() == ISD::FNEG) { 1685 Mods |= SISrcMods::NEG; 1686 Src = Src.getOperand(0); 1687 } 1688 1689 if (Src.getOpcode() == ISD::FABS) { 1690 Mods |= SISrcMods::ABS; 1691 Src = Src.getOperand(0); 1692 } 1693 1694 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1695 return true; 1696 } 1697 1698 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1699 SDValue &SrcMods) const { 1700 SelectVOP3Mods(In, Src, SrcMods); 1701 return isNoNanSrc(Src); 1702 } 1703 1704 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1705 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1706 return false; 1707 1708 Src = In; 1709 return true; 1710 } 1711 1712 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1713 SDValue &SrcMods, SDValue &Clamp, 1714 SDValue &Omod) const { 1715 SDLoc DL(In); 1716 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1717 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1718 1719 return SelectVOP3Mods(In, Src, SrcMods); 1720 } 1721 1722 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1723 SDValue &SrcMods, 1724 SDValue &Clamp, 1725 SDValue &Omod) const { 1726 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1727 return SelectVOP3Mods(In, Src, SrcMods); 1728 } 1729 1730 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1731 SDValue &Clamp, SDValue &Omod) const { 1732 Src = In; 1733 1734 SDLoc DL(In); 1735 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1736 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1737 1738 return true; 1739 } 1740 1741 static SDValue stripBitcast(SDValue Val) { 1742 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1743 } 1744 1745 // Figure out if this is really an extract of the high 16-bits of a dword. 1746 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1747 In = stripBitcast(In); 1748 if (In.getOpcode() != ISD::TRUNCATE) 1749 return false; 1750 1751 SDValue Srl = In.getOperand(0); 1752 if (Srl.getOpcode() == ISD::SRL) { 1753 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1754 if (ShiftAmt->getZExtValue() == 16) { 1755 Out = stripBitcast(Srl.getOperand(0)); 1756 return true; 1757 } 1758 } 1759 } 1760 1761 return false; 1762 } 1763 1764 // Look through operations that obscure just looking at the low 16-bits of the 1765 // same register. 1766 static SDValue stripExtractLoElt(SDValue In) { 1767 if (In.getOpcode() == ISD::TRUNCATE) { 1768 SDValue Src = In.getOperand(0); 1769 if (Src.getValueType().getSizeInBits() == 32) 1770 return stripBitcast(Src); 1771 } 1772 1773 return In; 1774 } 1775 1776 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1777 SDValue &SrcMods) const { 1778 unsigned Mods = 0; 1779 Src = In; 1780 1781 if (Src.getOpcode() == ISD::FNEG) { 1782 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1783 Src = Src.getOperand(0); 1784 } 1785 1786 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1787 unsigned VecMods = Mods; 1788 1789 SDValue Lo = stripBitcast(Src.getOperand(0)); 1790 SDValue Hi = stripBitcast(Src.getOperand(1)); 1791 1792 if (Lo.getOpcode() == ISD::FNEG) { 1793 Lo = stripBitcast(Lo.getOperand(0)); 1794 Mods ^= SISrcMods::NEG; 1795 } 1796 1797 if (Hi.getOpcode() == ISD::FNEG) { 1798 Hi = stripBitcast(Hi.getOperand(0)); 1799 Mods ^= SISrcMods::NEG_HI; 1800 } 1801 1802 if (isExtractHiElt(Lo, Lo)) 1803 Mods |= SISrcMods::OP_SEL_0; 1804 1805 if (isExtractHiElt(Hi, Hi)) 1806 Mods |= SISrcMods::OP_SEL_1; 1807 1808 Lo = stripExtractLoElt(Lo); 1809 Hi = stripExtractLoElt(Hi); 1810 1811 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1812 // Really a scalar input. Just select from the low half of the register to 1813 // avoid packing. 1814 1815 Src = Lo; 1816 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1817 return true; 1818 } 1819 1820 Mods = VecMods; 1821 } 1822 1823 // Packed instructions do not have abs modifiers. 1824 Mods |= SISrcMods::OP_SEL_1; 1825 1826 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1827 return true; 1828 } 1829 1830 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1831 SDValue &SrcMods, 1832 SDValue &Clamp) const { 1833 SDLoc SL(In); 1834 1835 // FIXME: Handle clamp and op_sel 1836 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1837 1838 return SelectVOP3PMods(In, Src, SrcMods); 1839 } 1840 1841 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1842 const AMDGPUTargetLowering& Lowering = 1843 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1844 bool IsModified = false; 1845 do { 1846 IsModified = false; 1847 // Go over all selected nodes and try to fold them a bit more 1848 for (SDNode &Node : CurDAG->allnodes()) { 1849 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1850 if (!MachineNode) 1851 continue; 1852 1853 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1854 if (ResNode != &Node) { 1855 ReplaceUses(&Node, ResNode); 1856 IsModified = true; 1857 } 1858 } 1859 CurDAG->RemoveDeadNodes(); 1860 } while (IsModified); 1861 } 1862