1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/CodeGen/FunctionLoweringInfo.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/MachineFunction.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/MC/MCInstrDesc.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/MathExtras.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <new> 50 #include <vector> 51 52 using namespace llvm; 53 54 namespace llvm { 55 56 class R600InstrInfo; 57 58 } // end namespace llvm 59 60 //===----------------------------------------------------------------------===// 61 // Instruction Selector Implementation 62 //===----------------------------------------------------------------------===// 63 64 namespace { 65 66 /// AMDGPU specific code to select AMDGPU machine instructions for 67 /// SelectionDAG operations. 68 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 69 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 70 // make the right decision when generating code for different targets. 71 const AMDGPUSubtarget *Subtarget; 72 AMDGPUAS AMDGPUASI; 73 bool EnableLateStructurizeCFG; 74 75 public: 76 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 77 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 78 : SelectionDAGISel(*TM, OptLevel) { 79 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 80 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 81 } 82 ~AMDGPUDAGToDAGISel() override = default; 83 84 void getAnalysisUsage(AnalysisUsage &AU) const override { 85 AU.addRequired<AMDGPUArgumentUsageInfo>(); 86 SelectionDAGISel::getAnalysisUsage(AU); 87 } 88 89 bool runOnMachineFunction(MachineFunction &MF) override; 90 void Select(SDNode *N) override; 91 StringRef getPassName() const override; 92 void PostprocessISelDAG() override; 93 94 protected: 95 void SelectBuildVector(SDNode *N, unsigned RegClassID); 96 97 private: 98 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 99 bool isNoNanSrc(SDValue N) const; 100 bool isInlineImmediate(const SDNode *N) const; 101 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 102 const R600InstrInfo *TII); 103 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 104 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 105 106 bool isConstantLoad(const MemSDNode *N, int cbID) const; 107 bool isUniformBr(const SDNode *N) const; 108 109 SDNode *glueCopyToM0(SDNode *N) const; 110 111 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 112 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 113 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 114 SDValue& Offset); 115 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 116 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 117 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 118 unsigned OffsetBits) const; 119 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 120 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 121 SDValue &Offset1) const; 122 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 123 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 124 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 125 SDValue &TFE) const; 126 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 127 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 128 SDValue &SLC, SDValue &TFE) const; 129 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 130 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 131 SDValue &SLC) const; 132 bool SelectMUBUFScratchOffen(SDNode *Parent, 133 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 134 SDValue &SOffset, SDValue &ImmOffset) const; 135 bool SelectMUBUFScratchOffset(SDNode *Parent, 136 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 137 SDValue &Offset) const; 138 139 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 140 SDValue &Offset, SDValue &GLC, SDValue &SLC, 141 SDValue &TFE) const; 142 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 143 SDValue &Offset, SDValue &SLC) const; 144 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 145 SDValue &Offset) const; 146 bool SelectMUBUFConstant(SDValue Constant, 147 SDValue &SOffset, 148 SDValue &ImmOffset) const; 149 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 150 SDValue &ImmOffset) const; 151 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 152 SDValue &ImmOffset, SDValue &VOffset) const; 153 154 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 155 SDValue &Offset, SDValue &SLC) const; 156 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 157 SDValue &Offset, SDValue &SLC) const; 158 159 template <bool IsSigned> 160 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 161 SDValue &Offset, SDValue &SLC) const; 162 163 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 164 bool &Imm) const; 165 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 166 bool &Imm) const; 167 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 168 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 169 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 171 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 172 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 173 174 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 175 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 176 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 178 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp, SDValue &Omod) const; 180 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 183 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 184 SDValue &Clamp, 185 SDValue &Omod) const; 186 187 bool SelectVOP3OMods(SDValue In, SDValue &Src, 188 SDValue &Clamp, SDValue &Omod) const; 189 190 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 191 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 192 SDValue &Clamp) const; 193 194 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 195 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 196 SDValue &Clamp) const; 197 198 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 199 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 200 SDValue &Clamp) const; 201 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 202 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 203 204 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 205 206 void SelectADD_SUB_I64(SDNode *N); 207 void SelectUADDO_USUBO(SDNode *N); 208 void SelectDIV_SCALE(SDNode *N); 209 void SelectMAD_64_32(SDNode *N); 210 void SelectFMA_W_CHAIN(SDNode *N); 211 void SelectFMUL_W_CHAIN(SDNode *N); 212 213 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 214 uint32_t Offset, uint32_t Width); 215 void SelectS_BFEFromShifts(SDNode *N); 216 void SelectS_BFE(SDNode *N); 217 bool isCBranchSCC(const SDNode *N) const; 218 void SelectBRCOND(SDNode *N); 219 void SelectFMAD(SDNode *N); 220 void SelectATOMIC_CMP_SWAP(SDNode *N); 221 222 protected: 223 // Include the pieces autogenerated from the target description. 224 #include "AMDGPUGenDAGISel.inc" 225 }; 226 227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 228 public: 229 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 230 AMDGPUDAGToDAGISel(TM, OptLevel) {} 231 232 void Select(SDNode *N) override; 233 234 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 235 SDValue &Offset) override; 236 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 237 SDValue &Offset) override; 238 }; 239 240 } // end anonymous namespace 241 242 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 243 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 244 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 245 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 246 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 247 248 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 249 // DAG, ready for instruction scheduling. 250 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 251 CodeGenOpt::Level OptLevel) { 252 return new AMDGPUDAGToDAGISel(TM, OptLevel); 253 } 254 255 /// \brief This pass converts a legalized DAG into a R600-specific 256 // DAG, ready for instruction scheduling. 257 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 258 CodeGenOpt::Level OptLevel) { 259 return new R600DAGToDAGISel(TM, OptLevel); 260 } 261 262 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 263 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 264 return SelectionDAGISel::runOnMachineFunction(MF); 265 } 266 267 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 268 if (TM.Options.NoNaNsFPMath) 269 return true; 270 271 // TODO: Move into isKnownNeverNaN 272 if (N->getFlags().isDefined()) 273 return N->getFlags().hasNoNaNs(); 274 275 return CurDAG->isKnownNeverNaN(N); 276 } 277 278 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 279 const SIInstrInfo *TII 280 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 281 282 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 283 return TII->isInlineConstant(C->getAPIntValue()); 284 285 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 286 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 287 288 return false; 289 } 290 291 /// \brief Determine the register class for \p OpNo 292 /// \returns The register class of the virtual register that will be used for 293 /// the given operand number \OpNo or NULL if the register class cannot be 294 /// determined. 295 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 296 unsigned OpNo) const { 297 if (!N->isMachineOpcode()) { 298 if (N->getOpcode() == ISD::CopyToReg) { 299 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 300 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 301 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 302 return MRI.getRegClass(Reg); 303 } 304 305 const SIRegisterInfo *TRI 306 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 307 return TRI->getPhysRegClass(Reg); 308 } 309 310 return nullptr; 311 } 312 313 switch (N->getMachineOpcode()) { 314 default: { 315 const MCInstrDesc &Desc = 316 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 317 unsigned OpIdx = Desc.getNumDefs() + OpNo; 318 if (OpIdx >= Desc.getNumOperands()) 319 return nullptr; 320 int RegClass = Desc.OpInfo[OpIdx].RegClass; 321 if (RegClass == -1) 322 return nullptr; 323 324 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 325 } 326 case AMDGPU::REG_SEQUENCE: { 327 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 328 const TargetRegisterClass *SuperRC = 329 Subtarget->getRegisterInfo()->getRegClass(RCID); 330 331 SDValue SubRegOp = N->getOperand(OpNo + 1); 332 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 333 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 334 SubRegIdx); 335 } 336 } 337 } 338 339 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 340 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 341 !Subtarget->ldsRequiresM0Init()) 342 return N; 343 344 const SITargetLowering& Lowering = 345 *static_cast<const SITargetLowering*>(getTargetLowering()); 346 347 // Write max value to m0 before each load operation 348 349 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 350 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 351 352 SDValue Glue = M0.getValue(1); 353 354 SmallVector <SDValue, 8> Ops; 355 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 356 Ops.push_back(N->getOperand(i)); 357 } 358 Ops.push_back(Glue); 359 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 360 } 361 362 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 363 switch (NumVectorElts) { 364 case 1: 365 return AMDGPU::SReg_32_XM0RegClassID; 366 case 2: 367 return AMDGPU::SReg_64RegClassID; 368 case 4: 369 return AMDGPU::SReg_128RegClassID; 370 case 8: 371 return AMDGPU::SReg_256RegClassID; 372 case 16: 373 return AMDGPU::SReg_512RegClassID; 374 } 375 376 llvm_unreachable("invalid vector size"); 377 } 378 379 static bool getConstantValue(SDValue N, uint32_t &Out) { 380 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 381 Out = C->getAPIntValue().getZExtValue(); 382 return true; 383 } 384 385 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 386 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 387 return true; 388 } 389 390 return false; 391 } 392 393 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 394 EVT VT = N->getValueType(0); 395 unsigned NumVectorElts = VT.getVectorNumElements(); 396 EVT EltVT = VT.getVectorElementType(); 397 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 398 SDLoc DL(N); 399 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 400 401 if (NumVectorElts == 1) { 402 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 403 RegClass); 404 return; 405 } 406 407 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 408 "supported yet"); 409 // 16 = Max Num Vector Elements 410 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 411 // 1 = Vector Register Class 412 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 413 414 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 415 bool IsRegSeq = true; 416 unsigned NOps = N->getNumOperands(); 417 for (unsigned i = 0; i < NOps; i++) { 418 // XXX: Why is this here? 419 if (isa<RegisterSDNode>(N->getOperand(i))) { 420 IsRegSeq = false; 421 break; 422 } 423 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 424 RegSeqArgs[1 + (2 * i) + 1] = 425 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 426 MVT::i32); 427 } 428 if (NOps != NumVectorElts) { 429 // Fill in the missing undef elements if this was a scalar_to_vector. 430 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 431 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 432 DL, EltVT); 433 for (unsigned i = NOps; i < NumVectorElts; ++i) { 434 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 435 RegSeqArgs[1 + (2 * i) + 1] = 436 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 437 } 438 } 439 440 if (!IsRegSeq) 441 SelectCode(N); 442 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 443 } 444 445 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 446 unsigned int Opc = N->getOpcode(); 447 if (N->isMachineOpcode()) { 448 N->setNodeId(-1); 449 return; // Already selected. 450 } 451 452 if (isa<AtomicSDNode>(N) || 453 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 454 N = glueCopyToM0(N); 455 456 switch (Opc) { 457 default: 458 break; 459 // We are selecting i64 ADD here instead of custom lower it during 460 // DAG legalization, so we can fold some i64 ADDs used for address 461 // calculation into the LOAD and STORE instructions. 462 case ISD::ADDC: 463 case ISD::ADDE: 464 case ISD::SUBC: 465 case ISD::SUBE: { 466 if (N->getValueType(0) != MVT::i64) 467 break; 468 469 SelectADD_SUB_I64(N); 470 return; 471 } 472 case ISD::UADDO: 473 case ISD::USUBO: { 474 SelectUADDO_USUBO(N); 475 return; 476 } 477 case AMDGPUISD::FMUL_W_CHAIN: { 478 SelectFMUL_W_CHAIN(N); 479 return; 480 } 481 case AMDGPUISD::FMA_W_CHAIN: { 482 SelectFMA_W_CHAIN(N); 483 return; 484 } 485 486 case ISD::SCALAR_TO_VECTOR: 487 case ISD::BUILD_VECTOR: { 488 EVT VT = N->getValueType(0); 489 unsigned NumVectorElts = VT.getVectorNumElements(); 490 491 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 492 if (Opc == ISD::BUILD_VECTOR) { 493 uint32_t LHSVal, RHSVal; 494 if (getConstantValue(N->getOperand(0), LHSVal) && 495 getConstantValue(N->getOperand(1), RHSVal)) { 496 uint32_t K = LHSVal | (RHSVal << 16); 497 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 498 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 499 return; 500 } 501 } 502 503 break; 504 } 505 506 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 507 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 508 SelectBuildVector(N, RegClassID); 509 return; 510 } 511 case ISD::BUILD_PAIR: { 512 SDValue RC, SubReg0, SubReg1; 513 SDLoc DL(N); 514 if (N->getValueType(0) == MVT::i128) { 515 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 516 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 517 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 518 } else if (N->getValueType(0) == MVT::i64) { 519 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 520 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 521 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 522 } else { 523 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 524 } 525 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 526 N->getOperand(1), SubReg1 }; 527 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 528 N->getValueType(0), Ops)); 529 return; 530 } 531 532 case ISD::Constant: 533 case ISD::ConstantFP: { 534 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 535 break; 536 537 uint64_t Imm; 538 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 539 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 540 else { 541 ConstantSDNode *C = cast<ConstantSDNode>(N); 542 Imm = C->getZExtValue(); 543 } 544 545 SDLoc DL(N); 546 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 547 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 548 MVT::i32)); 549 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 550 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 551 const SDValue Ops[] = { 552 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 553 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 554 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 555 }; 556 557 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 558 N->getValueType(0), Ops)); 559 return; 560 } 561 case ISD::LOAD: 562 case ISD::STORE: { 563 N = glueCopyToM0(N); 564 break; 565 } 566 567 case AMDGPUISD::BFE_I32: 568 case AMDGPUISD::BFE_U32: { 569 // There is a scalar version available, but unlike the vector version which 570 // has a separate operand for the offset and width, the scalar version packs 571 // the width and offset into a single operand. Try to move to the scalar 572 // version if the offsets are constant, so that we can try to keep extended 573 // loads of kernel arguments in SGPRs. 574 575 // TODO: Technically we could try to pattern match scalar bitshifts of 576 // dynamic values, but it's probably not useful. 577 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 578 if (!Offset) 579 break; 580 581 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 582 if (!Width) 583 break; 584 585 bool Signed = Opc == AMDGPUISD::BFE_I32; 586 587 uint32_t OffsetVal = Offset->getZExtValue(); 588 uint32_t WidthVal = Width->getZExtValue(); 589 590 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 591 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 592 return; 593 } 594 case AMDGPUISD::DIV_SCALE: { 595 SelectDIV_SCALE(N); 596 return; 597 } 598 case AMDGPUISD::MAD_I64_I32: 599 case AMDGPUISD::MAD_U64_U32: { 600 SelectMAD_64_32(N); 601 return; 602 } 603 case ISD::CopyToReg: { 604 const SITargetLowering& Lowering = 605 *static_cast<const SITargetLowering*>(getTargetLowering()); 606 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 607 break; 608 } 609 case ISD::AND: 610 case ISD::SRL: 611 case ISD::SRA: 612 case ISD::SIGN_EXTEND_INREG: 613 if (N->getValueType(0) != MVT::i32) 614 break; 615 616 SelectS_BFE(N); 617 return; 618 case ISD::BRCOND: 619 SelectBRCOND(N); 620 return; 621 case ISD::FMAD: 622 SelectFMAD(N); 623 return; 624 case AMDGPUISD::ATOMIC_CMP_SWAP: 625 SelectATOMIC_CMP_SWAP(N); 626 return; 627 } 628 629 SelectCode(N); 630 } 631 632 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 633 if (!N->readMem()) 634 return false; 635 if (CbId == -1) 636 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 637 638 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 639 } 640 641 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 642 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 643 const Instruction *Term = BB->getTerminator(); 644 return Term->getMetadata("amdgpu.uniform") || 645 Term->getMetadata("structurizecfg.uniform"); 646 } 647 648 StringRef AMDGPUDAGToDAGISel::getPassName() const { 649 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 650 } 651 652 //===----------------------------------------------------------------------===// 653 // Complex Patterns 654 //===----------------------------------------------------------------------===// 655 656 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 657 SDValue& IntPtr) { 658 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 659 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 660 true); 661 return true; 662 } 663 return false; 664 } 665 666 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 667 SDValue& BaseReg, SDValue &Offset) { 668 if (!isa<ConstantSDNode>(Addr)) { 669 BaseReg = Addr; 670 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 671 return true; 672 } 673 return false; 674 } 675 676 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 677 SDValue &Offset) { 678 return false; 679 } 680 681 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 682 SDValue &Offset) { 683 ConstantSDNode *C; 684 SDLoc DL(Addr); 685 686 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 687 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 688 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 689 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 690 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 691 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 692 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 693 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 694 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 695 Base = Addr.getOperand(0); 696 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 697 } else { 698 Base = Addr; 699 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 700 } 701 702 return true; 703 } 704 705 // FIXME: Should only handle addcarry/subcarry 706 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 707 SDLoc DL(N); 708 SDValue LHS = N->getOperand(0); 709 SDValue RHS = N->getOperand(1); 710 711 unsigned Opcode = N->getOpcode(); 712 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 713 bool ProduceCarry = 714 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 715 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 716 717 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 718 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 719 720 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 721 DL, MVT::i32, LHS, Sub0); 722 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 723 DL, MVT::i32, LHS, Sub1); 724 725 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 726 DL, MVT::i32, RHS, Sub0); 727 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 728 DL, MVT::i32, RHS, Sub1); 729 730 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 731 732 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 733 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 734 735 SDNode *AddLo; 736 if (!ConsumeCarry) { 737 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 738 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 739 } else { 740 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 741 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 742 } 743 SDValue AddHiArgs[] = { 744 SDValue(Hi0, 0), 745 SDValue(Hi1, 0), 746 SDValue(AddLo, 1) 747 }; 748 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 749 750 SDValue RegSequenceArgs[] = { 751 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 752 SDValue(AddLo,0), 753 Sub0, 754 SDValue(AddHi,0), 755 Sub1, 756 }; 757 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 758 MVT::i64, RegSequenceArgs); 759 760 if (ProduceCarry) { 761 // Replace the carry-use 762 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 763 } 764 765 // Replace the remaining uses. 766 CurDAG->ReplaceAllUsesWith(N, RegSequence); 767 CurDAG->RemoveDeadNode(N); 768 } 769 770 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 771 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 772 // carry out despite the _i32 name. These were renamed in VI to _U32. 773 // FIXME: We should probably rename the opcodes here. 774 unsigned Opc = N->getOpcode() == ISD::UADDO ? 775 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 776 777 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 778 { N->getOperand(0), N->getOperand(1) }); 779 } 780 781 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 782 SDLoc SL(N); 783 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 784 SDValue Ops[10]; 785 786 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 787 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 788 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 789 Ops[8] = N->getOperand(0); 790 Ops[9] = N->getOperand(4); 791 792 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 793 } 794 795 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 796 SDLoc SL(N); 797 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 798 SDValue Ops[8]; 799 800 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 801 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 802 Ops[6] = N->getOperand(0); 803 Ops[7] = N->getOperand(3); 804 805 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 806 } 807 808 // We need to handle this here because tablegen doesn't support matching 809 // instructions with multiple outputs. 810 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 811 SDLoc SL(N); 812 EVT VT = N->getValueType(0); 813 814 assert(VT == MVT::f32 || VT == MVT::f64); 815 816 unsigned Opc 817 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 818 819 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 820 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 821 } 822 823 // We need to handle this here because tablegen doesn't support matching 824 // instructions with multiple outputs. 825 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 826 SDLoc SL(N); 827 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 828 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 829 830 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 831 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 832 Clamp }; 833 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 834 } 835 836 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 837 unsigned OffsetBits) const { 838 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 839 (OffsetBits == 8 && !isUInt<8>(Offset))) 840 return false; 841 842 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 843 Subtarget->unsafeDSOffsetFoldingEnabled()) 844 return true; 845 846 // On Southern Islands instruction with a negative base value and an offset 847 // don't seem to work. 848 return CurDAG->SignBitIsZero(Base); 849 } 850 851 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 852 SDValue &Offset) const { 853 SDLoc DL(Addr); 854 if (CurDAG->isBaseWithConstantOffset(Addr)) { 855 SDValue N0 = Addr.getOperand(0); 856 SDValue N1 = Addr.getOperand(1); 857 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 858 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 859 // (add n0, c0) 860 Base = N0; 861 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 862 return true; 863 } 864 } else if (Addr.getOpcode() == ISD::SUB) { 865 // sub C, x -> add (sub 0, x), C 866 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 867 int64_t ByteOffset = C->getSExtValue(); 868 if (isUInt<16>(ByteOffset)) { 869 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 870 871 // XXX - This is kind of hacky. Create a dummy sub node so we can check 872 // the known bits in isDSOffsetLegal. We need to emit the selected node 873 // here, so this is thrown away. 874 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 875 Zero, Addr.getOperand(1)); 876 877 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 878 // FIXME: Select to VOP3 version for with-carry. 879 unsigned SubOp = Subtarget->hasAddNoCarry() ? 880 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 881 882 MachineSDNode *MachineSub 883 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 884 Zero, Addr.getOperand(1)); 885 886 Base = SDValue(MachineSub, 0); 887 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 888 return true; 889 } 890 } 891 } 892 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 893 // If we have a constant address, prefer to put the constant into the 894 // offset. This can save moves to load the constant address since multiple 895 // operations can share the zero base address register, and enables merging 896 // into read2 / write2 instructions. 897 898 SDLoc DL(Addr); 899 900 if (isUInt<16>(CAddr->getZExtValue())) { 901 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 902 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 903 DL, MVT::i32, Zero); 904 Base = SDValue(MovZero, 0); 905 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 906 return true; 907 } 908 } 909 910 // default case 911 Base = Addr; 912 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 913 return true; 914 } 915 916 // TODO: If offset is too big, put low 16-bit into offset. 917 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 918 SDValue &Offset0, 919 SDValue &Offset1) const { 920 SDLoc DL(Addr); 921 922 if (CurDAG->isBaseWithConstantOffset(Addr)) { 923 SDValue N0 = Addr.getOperand(0); 924 SDValue N1 = Addr.getOperand(1); 925 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 926 unsigned DWordOffset0 = C1->getZExtValue() / 4; 927 unsigned DWordOffset1 = DWordOffset0 + 1; 928 // (add n0, c0) 929 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 930 Base = N0; 931 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 932 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 933 return true; 934 } 935 } else if (Addr.getOpcode() == ISD::SUB) { 936 // sub C, x -> add (sub 0, x), C 937 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 938 unsigned DWordOffset0 = C->getZExtValue() / 4; 939 unsigned DWordOffset1 = DWordOffset0 + 1; 940 941 if (isUInt<8>(DWordOffset0)) { 942 SDLoc DL(Addr); 943 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 944 945 // XXX - This is kind of hacky. Create a dummy sub node so we can check 946 // the known bits in isDSOffsetLegal. We need to emit the selected node 947 // here, so this is thrown away. 948 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 949 Zero, Addr.getOperand(1)); 950 951 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 952 unsigned SubOp = Subtarget->hasAddNoCarry() ? 953 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 954 955 MachineSDNode *MachineSub 956 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 957 Zero, Addr.getOperand(1)); 958 959 Base = SDValue(MachineSub, 0); 960 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 961 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 962 return true; 963 } 964 } 965 } 966 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 967 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 968 unsigned DWordOffset1 = DWordOffset0 + 1; 969 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 970 971 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 972 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 973 MachineSDNode *MovZero 974 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 975 DL, MVT::i32, Zero); 976 Base = SDValue(MovZero, 0); 977 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 978 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 979 return true; 980 } 981 } 982 983 // default case 984 985 // FIXME: This is broken on SI where we still need to check if the base 986 // pointer is positive here. 987 Base = Addr; 988 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 989 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 990 return true; 991 } 992 993 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 994 SDValue &VAddr, SDValue &SOffset, 995 SDValue &Offset, SDValue &Offen, 996 SDValue &Idxen, SDValue &Addr64, 997 SDValue &GLC, SDValue &SLC, 998 SDValue &TFE) const { 999 // Subtarget prefers to use flat instruction 1000 if (Subtarget->useFlatForGlobal()) 1001 return false; 1002 1003 SDLoc DL(Addr); 1004 1005 if (!GLC.getNode()) 1006 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1007 if (!SLC.getNode()) 1008 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1009 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1010 1011 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1012 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1013 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1014 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1015 1016 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1017 SDValue N0 = Addr.getOperand(0); 1018 SDValue N1 = Addr.getOperand(1); 1019 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1020 1021 if (N0.getOpcode() == ISD::ADD) { 1022 // (add (add N2, N3), C1) -> addr64 1023 SDValue N2 = N0.getOperand(0); 1024 SDValue N3 = N0.getOperand(1); 1025 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1026 Ptr = N2; 1027 VAddr = N3; 1028 } else { 1029 // (add N0, C1) -> offset 1030 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1031 Ptr = N0; 1032 } 1033 1034 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1035 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1036 return true; 1037 } 1038 1039 if (isUInt<32>(C1->getZExtValue())) { 1040 // Illegal offset, store it in soffset. 1041 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1042 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1043 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1044 0); 1045 return true; 1046 } 1047 } 1048 1049 if (Addr.getOpcode() == ISD::ADD) { 1050 // (add N0, N1) -> addr64 1051 SDValue N0 = Addr.getOperand(0); 1052 SDValue N1 = Addr.getOperand(1); 1053 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1054 Ptr = N0; 1055 VAddr = N1; 1056 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1057 return true; 1058 } 1059 1060 // default case -> offset 1061 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1062 Ptr = Addr; 1063 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1064 1065 return true; 1066 } 1067 1068 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1069 SDValue &VAddr, SDValue &SOffset, 1070 SDValue &Offset, SDValue &GLC, 1071 SDValue &SLC, SDValue &TFE) const { 1072 SDValue Ptr, Offen, Idxen, Addr64; 1073 1074 // addr64 bit was removed for volcanic islands. 1075 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1076 return false; 1077 1078 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1079 GLC, SLC, TFE)) 1080 return false; 1081 1082 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1083 if (C->getSExtValue()) { 1084 SDLoc DL(Addr); 1085 1086 const SITargetLowering& Lowering = 1087 *static_cast<const SITargetLowering*>(getTargetLowering()); 1088 1089 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1090 return true; 1091 } 1092 1093 return false; 1094 } 1095 1096 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1097 SDValue &VAddr, SDValue &SOffset, 1098 SDValue &Offset, 1099 SDValue &SLC) const { 1100 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1101 SDValue GLC, TFE; 1102 1103 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1104 } 1105 1106 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1107 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1108 return PSV && PSV->isStack(); 1109 } 1110 1111 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1112 const MachineFunction &MF = CurDAG->getMachineFunction(); 1113 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1114 1115 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1116 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1117 FI->getValueType(0)); 1118 1119 // If we can resolve this to a frame index access, this is relative to the 1120 // frame pointer SGPR. 1121 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1122 MVT::i32)); 1123 } 1124 1125 // If we don't know this private access is a local stack object, it needs to 1126 // be relative to the entry point's scratch wave offset register. 1127 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1128 MVT::i32)); 1129 } 1130 1131 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1132 SDValue Addr, SDValue &Rsrc, 1133 SDValue &VAddr, SDValue &SOffset, 1134 SDValue &ImmOffset) const { 1135 1136 SDLoc DL(Addr); 1137 MachineFunction &MF = CurDAG->getMachineFunction(); 1138 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1139 1140 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1141 1142 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1143 unsigned Imm = CAddr->getZExtValue(); 1144 1145 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1146 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1147 DL, MVT::i32, HighBits); 1148 VAddr = SDValue(MovHighBits, 0); 1149 1150 // In a call sequence, stores to the argument stack area are relative to the 1151 // stack pointer. 1152 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1153 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1154 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1155 1156 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1157 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1158 return true; 1159 } 1160 1161 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1162 // (add n0, c1) 1163 1164 SDValue N0 = Addr.getOperand(0); 1165 SDValue N1 = Addr.getOperand(1); 1166 1167 // Offsets in vaddr must be positive if range checking is enabled. 1168 // 1169 // The total computation of vaddr + soffset + offset must not overflow. If 1170 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1171 // overflowing. 1172 // 1173 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1174 // always perform a range check. If a negative vaddr base index was used, 1175 // this would fail the range check. The overall address computation would 1176 // compute a valid address, but this doesn't happen due to the range 1177 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1178 // 1179 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1180 // MUBUF vaddr, but not on older subtargets which can only do this if the 1181 // sign bit is known 0. 1182 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1183 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1184 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1185 CurDAG->SignBitIsZero(N0))) { 1186 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1187 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1188 return true; 1189 } 1190 } 1191 1192 // (node) 1193 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1194 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1195 return true; 1196 } 1197 1198 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1199 SDValue Addr, 1200 SDValue &SRsrc, 1201 SDValue &SOffset, 1202 SDValue &Offset) const { 1203 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1204 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1205 return false; 1206 1207 SDLoc DL(Addr); 1208 MachineFunction &MF = CurDAG->getMachineFunction(); 1209 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1210 1211 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1212 1213 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1214 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1215 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1216 1217 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1218 // offset if we know this is in a call sequence. 1219 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1220 1221 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1222 return true; 1223 } 1224 1225 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1226 SDValue &SOffset, SDValue &Offset, 1227 SDValue &GLC, SDValue &SLC, 1228 SDValue &TFE) const { 1229 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1230 const SIInstrInfo *TII = 1231 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1232 1233 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1234 GLC, SLC, TFE)) 1235 return false; 1236 1237 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1238 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1239 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1240 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1241 APInt::getAllOnesValue(32).getZExtValue(); // Size 1242 SDLoc DL(Addr); 1243 1244 const SITargetLowering& Lowering = 1245 *static_cast<const SITargetLowering*>(getTargetLowering()); 1246 1247 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1248 return true; 1249 } 1250 return false; 1251 } 1252 1253 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1254 SDValue &Soffset, SDValue &Offset 1255 ) const { 1256 SDValue GLC, SLC, TFE; 1257 1258 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1259 } 1260 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1261 SDValue &Soffset, SDValue &Offset, 1262 SDValue &SLC) const { 1263 SDValue GLC, TFE; 1264 1265 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1266 } 1267 1268 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1269 SDValue &SOffset, 1270 SDValue &ImmOffset) const { 1271 SDLoc DL(Constant); 1272 const uint32_t Align = 4; 1273 const uint32_t MaxImm = alignDown(4095, Align); 1274 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1275 uint32_t Overflow = 0; 1276 1277 if (Imm > MaxImm) { 1278 if (Imm <= MaxImm + 64) { 1279 // Use an SOffset inline constant for 4..64 1280 Overflow = Imm - MaxImm; 1281 Imm = MaxImm; 1282 } else { 1283 // Try to keep the same value in SOffset for adjacent loads, so that 1284 // the corresponding register contents can be re-used. 1285 // 1286 // Load values with all low-bits (except for alignment bits) set into 1287 // SOffset, so that a larger range of values can be covered using 1288 // s_movk_i32. 1289 // 1290 // Atomic operations fail to work correctly when individual address 1291 // components are unaligned, even if their sum is aligned. 1292 uint32_t High = (Imm + Align) & ~4095; 1293 uint32_t Low = (Imm + Align) & 4095; 1294 Imm = Low; 1295 Overflow = High - Align; 1296 } 1297 } 1298 1299 // There is a hardware bug in SI and CI which prevents address clamping in 1300 // MUBUF instructions from working correctly with SOffsets. The immediate 1301 // offset is unaffected. 1302 if (Overflow > 0 && 1303 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1304 return false; 1305 1306 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1307 1308 if (Overflow <= 64) 1309 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1310 else 1311 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1312 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1313 0); 1314 1315 return true; 1316 } 1317 1318 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1319 SDValue &SOffset, 1320 SDValue &ImmOffset) const { 1321 SDLoc DL(Offset); 1322 1323 if (!isa<ConstantSDNode>(Offset)) 1324 return false; 1325 1326 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1327 } 1328 1329 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1330 SDValue &SOffset, 1331 SDValue &ImmOffset, 1332 SDValue &VOffset) const { 1333 SDLoc DL(Offset); 1334 1335 // Don't generate an unnecessary voffset for constant offsets. 1336 if (isa<ConstantSDNode>(Offset)) { 1337 SDValue Tmp1, Tmp2; 1338 1339 // When necessary, use a voffset in <= CI anyway to work around a hardware 1340 // bug. 1341 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1342 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1343 return false; 1344 } 1345 1346 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1347 SDValue N0 = Offset.getOperand(0); 1348 SDValue N1 = Offset.getOperand(1); 1349 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1350 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1351 VOffset = N0; 1352 return true; 1353 } 1354 } 1355 1356 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1357 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1358 VOffset = Offset; 1359 1360 return true; 1361 } 1362 1363 template <bool IsSigned> 1364 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1365 SDValue &VAddr, 1366 SDValue &Offset, 1367 SDValue &SLC) const { 1368 int64_t OffsetVal = 0; 1369 1370 if (Subtarget->hasFlatInstOffsets() && 1371 CurDAG->isBaseWithConstantOffset(Addr)) { 1372 SDValue N0 = Addr.getOperand(0); 1373 SDValue N1 = Addr.getOperand(1); 1374 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1375 1376 if ((IsSigned && isInt<13>(COffsetVal)) || 1377 (!IsSigned && isUInt<12>(COffsetVal))) { 1378 Addr = N0; 1379 OffsetVal = COffsetVal; 1380 } 1381 } 1382 1383 VAddr = Addr; 1384 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1385 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1386 1387 return true; 1388 } 1389 1390 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1391 SDValue &VAddr, 1392 SDValue &Offset, 1393 SDValue &SLC) const { 1394 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1395 } 1396 1397 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1398 SDValue &VAddr, 1399 SDValue &Offset, 1400 SDValue &SLC) const { 1401 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1402 } 1403 1404 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1405 SDValue &Offset, bool &Imm) const { 1406 1407 // FIXME: Handle non-constant offsets. 1408 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1409 if (!C) 1410 return false; 1411 1412 SDLoc SL(ByteOffsetNode); 1413 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1414 int64_t ByteOffset = C->getSExtValue(); 1415 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1416 1417 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1418 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1419 Imm = true; 1420 return true; 1421 } 1422 1423 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1424 return false; 1425 1426 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1427 // 32-bit Immediates are supported on Sea Islands. 1428 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1429 } else { 1430 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1431 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1432 C32Bit), 0); 1433 } 1434 Imm = false; 1435 return true; 1436 } 1437 1438 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1439 SDValue &Offset, bool &Imm) const { 1440 SDLoc SL(Addr); 1441 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1442 SDValue N0 = Addr.getOperand(0); 1443 SDValue N1 = Addr.getOperand(1); 1444 1445 if (SelectSMRDOffset(N1, Offset, Imm)) { 1446 SBase = N0; 1447 return true; 1448 } 1449 } 1450 SBase = Addr; 1451 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1452 Imm = true; 1453 return true; 1454 } 1455 1456 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1457 SDValue &Offset) const { 1458 bool Imm; 1459 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1460 } 1461 1462 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1463 SDValue &Offset) const { 1464 1465 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1466 return false; 1467 1468 bool Imm; 1469 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1470 return false; 1471 1472 return !Imm && isa<ConstantSDNode>(Offset); 1473 } 1474 1475 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1476 SDValue &Offset) const { 1477 bool Imm; 1478 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1479 !isa<ConstantSDNode>(Offset); 1480 } 1481 1482 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1483 SDValue &Offset) const { 1484 bool Imm; 1485 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1486 } 1487 1488 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1489 SDValue &Offset) const { 1490 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1491 return false; 1492 1493 bool Imm; 1494 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1495 return false; 1496 1497 return !Imm && isa<ConstantSDNode>(Offset); 1498 } 1499 1500 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1501 SDValue &Base, 1502 SDValue &Offset) const { 1503 SDLoc DL(Index); 1504 1505 if (CurDAG->isBaseWithConstantOffset(Index)) { 1506 SDValue N0 = Index.getOperand(0); 1507 SDValue N1 = Index.getOperand(1); 1508 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1509 1510 // (add n0, c0) 1511 Base = N0; 1512 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1513 return true; 1514 } 1515 1516 if (isa<ConstantSDNode>(Index)) 1517 return false; 1518 1519 Base = Index; 1520 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1521 return true; 1522 } 1523 1524 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1525 SDValue Val, uint32_t Offset, 1526 uint32_t Width) { 1527 // Transformation function, pack the offset and width of a BFE into 1528 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1529 // source, bits [5:0] contain the offset and bits [22:16] the width. 1530 uint32_t PackedVal = Offset | (Width << 16); 1531 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1532 1533 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1534 } 1535 1536 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1537 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1538 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1539 // Predicate: 0 < b <= c < 32 1540 1541 const SDValue &Shl = N->getOperand(0); 1542 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1543 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1544 1545 if (B && C) { 1546 uint32_t BVal = B->getZExtValue(); 1547 uint32_t CVal = C->getZExtValue(); 1548 1549 if (0 < BVal && BVal <= CVal && CVal < 32) { 1550 bool Signed = N->getOpcode() == ISD::SRA; 1551 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1552 1553 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1554 32 - CVal)); 1555 return; 1556 } 1557 } 1558 SelectCode(N); 1559 } 1560 1561 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1562 switch (N->getOpcode()) { 1563 case ISD::AND: 1564 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1565 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1566 // Predicate: isMask(mask) 1567 const SDValue &Srl = N->getOperand(0); 1568 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1569 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1570 1571 if (Shift && Mask) { 1572 uint32_t ShiftVal = Shift->getZExtValue(); 1573 uint32_t MaskVal = Mask->getZExtValue(); 1574 1575 if (isMask_32(MaskVal)) { 1576 uint32_t WidthVal = countPopulation(MaskVal); 1577 1578 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1579 Srl.getOperand(0), ShiftVal, WidthVal)); 1580 return; 1581 } 1582 } 1583 } 1584 break; 1585 case ISD::SRL: 1586 if (N->getOperand(0).getOpcode() == ISD::AND) { 1587 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1588 // Predicate: isMask(mask >> b) 1589 const SDValue &And = N->getOperand(0); 1590 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1591 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1592 1593 if (Shift && Mask) { 1594 uint32_t ShiftVal = Shift->getZExtValue(); 1595 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1596 1597 if (isMask_32(MaskVal)) { 1598 uint32_t WidthVal = countPopulation(MaskVal); 1599 1600 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1601 And.getOperand(0), ShiftVal, WidthVal)); 1602 return; 1603 } 1604 } 1605 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1606 SelectS_BFEFromShifts(N); 1607 return; 1608 } 1609 break; 1610 case ISD::SRA: 1611 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1612 SelectS_BFEFromShifts(N); 1613 return; 1614 } 1615 break; 1616 1617 case ISD::SIGN_EXTEND_INREG: { 1618 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1619 SDValue Src = N->getOperand(0); 1620 if (Src.getOpcode() != ISD::SRL) 1621 break; 1622 1623 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1624 if (!Amt) 1625 break; 1626 1627 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1628 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1629 Amt->getZExtValue(), Width)); 1630 return; 1631 } 1632 } 1633 1634 SelectCode(N); 1635 } 1636 1637 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1638 assert(N->getOpcode() == ISD::BRCOND); 1639 if (!N->hasOneUse()) 1640 return false; 1641 1642 SDValue Cond = N->getOperand(1); 1643 if (Cond.getOpcode() == ISD::CopyToReg) 1644 Cond = Cond.getOperand(2); 1645 1646 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1647 return false; 1648 1649 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1650 if (VT == MVT::i32) 1651 return true; 1652 1653 if (VT == MVT::i64) { 1654 auto ST = static_cast<const SISubtarget *>(Subtarget); 1655 1656 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1657 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1658 } 1659 1660 return false; 1661 } 1662 1663 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1664 SDValue Cond = N->getOperand(1); 1665 1666 if (Cond.isUndef()) { 1667 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1668 N->getOperand(2), N->getOperand(0)); 1669 return; 1670 } 1671 1672 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1673 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1674 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1675 SDLoc SL(N); 1676 1677 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1678 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1679 N->getOperand(2), // Basic Block 1680 VCC.getValue(0)); 1681 } 1682 1683 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { 1684 MVT VT = N->getSimpleValueType(0); 1685 if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { 1686 SelectCode(N); 1687 return; 1688 } 1689 1690 SDValue Src0 = N->getOperand(0); 1691 SDValue Src1 = N->getOperand(1); 1692 SDValue Src2 = N->getOperand(2); 1693 unsigned Src0Mods, Src1Mods, Src2Mods; 1694 1695 // Avoid using v_mad_mix_f32 unless there is actually an operand using the 1696 // conversion from f16. 1697 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1698 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1699 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1700 1701 assert(!Subtarget->hasFP32Denormals() && 1702 "fmad selected with denormals enabled"); 1703 // TODO: We can select this with f32 denormals enabled if all the sources are 1704 // converted from f16 (in which case fmad isn't legal). 1705 1706 if (Sel0 || Sel1 || Sel2) { 1707 // For dummy operands. 1708 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1709 SDValue Ops[] = { 1710 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1711 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1712 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1713 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1714 Zero, Zero 1715 }; 1716 1717 CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); 1718 } else { 1719 SelectCode(N); 1720 } 1721 } 1722 1723 // This is here because there isn't a way to use the generated sub0_sub1 as the 1724 // subreg index to EXTRACT_SUBREG in tablegen. 1725 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1726 MemSDNode *Mem = cast<MemSDNode>(N); 1727 unsigned AS = Mem->getAddressSpace(); 1728 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1729 SelectCode(N); 1730 return; 1731 } 1732 1733 MVT VT = N->getSimpleValueType(0); 1734 bool Is32 = (VT == MVT::i32); 1735 SDLoc SL(N); 1736 1737 MachineSDNode *CmpSwap = nullptr; 1738 if (Subtarget->hasAddr64()) { 1739 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1740 1741 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1742 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1743 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1744 SDValue CmpVal = Mem->getOperand(2); 1745 1746 // XXX - Do we care about glue operands? 1747 1748 SDValue Ops[] = { 1749 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1750 }; 1751 1752 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1753 } 1754 } 1755 1756 if (!CmpSwap) { 1757 SDValue SRsrc, SOffset, Offset, SLC; 1758 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1759 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1760 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1761 1762 SDValue CmpVal = Mem->getOperand(2); 1763 SDValue Ops[] = { 1764 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1765 }; 1766 1767 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1768 } 1769 } 1770 1771 if (!CmpSwap) { 1772 SelectCode(N); 1773 return; 1774 } 1775 1776 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1777 *MMOs = Mem->getMemOperand(); 1778 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1779 1780 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1781 SDValue Extract 1782 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1783 1784 ReplaceUses(SDValue(N, 0), Extract); 1785 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1786 CurDAG->RemoveDeadNode(N); 1787 } 1788 1789 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1790 unsigned &Mods) const { 1791 Mods = 0; 1792 Src = In; 1793 1794 if (Src.getOpcode() == ISD::FNEG) { 1795 Mods |= SISrcMods::NEG; 1796 Src = Src.getOperand(0); 1797 } 1798 1799 if (Src.getOpcode() == ISD::FABS) { 1800 Mods |= SISrcMods::ABS; 1801 Src = Src.getOperand(0); 1802 } 1803 1804 return true; 1805 } 1806 1807 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1808 SDValue &SrcMods) const { 1809 unsigned Mods; 1810 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1811 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1812 return true; 1813 } 1814 1815 return false; 1816 } 1817 1818 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1819 SDValue &SrcMods) const { 1820 SelectVOP3Mods(In, Src, SrcMods); 1821 return isNoNanSrc(Src); 1822 } 1823 1824 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1825 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1826 return false; 1827 1828 Src = In; 1829 return true; 1830 } 1831 1832 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1833 SDValue &SrcMods, SDValue &Clamp, 1834 SDValue &Omod) const { 1835 SDLoc DL(In); 1836 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1837 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1838 1839 return SelectVOP3Mods(In, Src, SrcMods); 1840 } 1841 1842 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1843 SDValue &SrcMods, 1844 SDValue &Clamp, 1845 SDValue &Omod) const { 1846 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1847 return SelectVOP3Mods(In, Src, SrcMods); 1848 } 1849 1850 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1851 SDValue &Clamp, SDValue &Omod) const { 1852 Src = In; 1853 1854 SDLoc DL(In); 1855 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1856 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1857 1858 return true; 1859 } 1860 1861 static SDValue stripBitcast(SDValue Val) { 1862 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1863 } 1864 1865 // Figure out if this is really an extract of the high 16-bits of a dword. 1866 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1867 In = stripBitcast(In); 1868 if (In.getOpcode() != ISD::TRUNCATE) 1869 return false; 1870 1871 SDValue Srl = In.getOperand(0); 1872 if (Srl.getOpcode() == ISD::SRL) { 1873 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1874 if (ShiftAmt->getZExtValue() == 16) { 1875 Out = stripBitcast(Srl.getOperand(0)); 1876 return true; 1877 } 1878 } 1879 } 1880 1881 return false; 1882 } 1883 1884 // Look through operations that obscure just looking at the low 16-bits of the 1885 // same register. 1886 static SDValue stripExtractLoElt(SDValue In) { 1887 if (In.getOpcode() == ISD::TRUNCATE) { 1888 SDValue Src = In.getOperand(0); 1889 if (Src.getValueType().getSizeInBits() == 32) 1890 return stripBitcast(Src); 1891 } 1892 1893 return In; 1894 } 1895 1896 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1897 SDValue &SrcMods) const { 1898 unsigned Mods = 0; 1899 Src = In; 1900 1901 if (Src.getOpcode() == ISD::FNEG) { 1902 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1903 Src = Src.getOperand(0); 1904 } 1905 1906 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1907 unsigned VecMods = Mods; 1908 1909 SDValue Lo = stripBitcast(Src.getOperand(0)); 1910 SDValue Hi = stripBitcast(Src.getOperand(1)); 1911 1912 if (Lo.getOpcode() == ISD::FNEG) { 1913 Lo = stripBitcast(Lo.getOperand(0)); 1914 Mods ^= SISrcMods::NEG; 1915 } 1916 1917 if (Hi.getOpcode() == ISD::FNEG) { 1918 Hi = stripBitcast(Hi.getOperand(0)); 1919 Mods ^= SISrcMods::NEG_HI; 1920 } 1921 1922 if (isExtractHiElt(Lo, Lo)) 1923 Mods |= SISrcMods::OP_SEL_0; 1924 1925 if (isExtractHiElt(Hi, Hi)) 1926 Mods |= SISrcMods::OP_SEL_1; 1927 1928 Lo = stripExtractLoElt(Lo); 1929 Hi = stripExtractLoElt(Hi); 1930 1931 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1932 // Really a scalar input. Just select from the low half of the register to 1933 // avoid packing. 1934 1935 Src = Lo; 1936 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1937 return true; 1938 } 1939 1940 Mods = VecMods; 1941 } 1942 1943 // Packed instructions do not have abs modifiers. 1944 Mods |= SISrcMods::OP_SEL_1; 1945 1946 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1947 return true; 1948 } 1949 1950 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1951 SDValue &SrcMods, 1952 SDValue &Clamp) const { 1953 SDLoc SL(In); 1954 1955 // FIXME: Handle clamp and op_sel 1956 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1957 1958 return SelectVOP3PMods(In, Src, SrcMods); 1959 } 1960 1961 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 1962 SDValue &SrcMods) const { 1963 Src = In; 1964 // FIXME: Handle op_sel 1965 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1966 return true; 1967 } 1968 1969 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 1970 SDValue &SrcMods, 1971 SDValue &Clamp) const { 1972 SDLoc SL(In); 1973 1974 // FIXME: Handle clamp 1975 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1976 1977 return SelectVOP3OpSel(In, Src, SrcMods); 1978 } 1979 1980 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 1981 SDValue &SrcMods) const { 1982 // FIXME: Handle op_sel 1983 return SelectVOP3Mods(In, Src, SrcMods); 1984 } 1985 1986 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 1987 SDValue &SrcMods, 1988 SDValue &Clamp) const { 1989 SDLoc SL(In); 1990 1991 // FIXME: Handle clamp 1992 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1993 1994 return SelectVOP3OpSelMods(In, Src, SrcMods); 1995 } 1996 1997 // The return value is not whether the match is possible (which it always is), 1998 // but whether or not it a conversion is really used. 1999 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2000 unsigned &Mods) const { 2001 Mods = 0; 2002 SelectVOP3ModsImpl(In, Src, Mods); 2003 2004 if (Src.getOpcode() == ISD::FP_EXTEND) { 2005 Src = Src.getOperand(0); 2006 assert(Src.getValueType() == MVT::f16); 2007 Src = stripBitcast(Src); 2008 2009 // Be careful about folding modifiers if we already have an abs. fneg is 2010 // applied last, so we don't want to apply an earlier fneg. 2011 if ((Mods & SISrcMods::ABS) == 0) { 2012 unsigned ModsTmp; 2013 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2014 2015 if ((ModsTmp & SISrcMods::NEG) != 0) 2016 Mods ^= SISrcMods::NEG; 2017 2018 if ((ModsTmp & SISrcMods::ABS) != 0) 2019 Mods |= SISrcMods::ABS; 2020 } 2021 2022 // op_sel/op_sel_hi decide the source type and source. 2023 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2024 // If the sources's op_sel is set, it picks the high half of the source 2025 // register. 2026 2027 Mods |= SISrcMods::OP_SEL_1; 2028 if (isExtractHiElt(Src, Src)) { 2029 Mods |= SISrcMods::OP_SEL_0; 2030 2031 // TODO: Should we try to look for neg/abs here? 2032 } 2033 2034 return true; 2035 } 2036 2037 return false; 2038 } 2039 2040 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2041 SDValue &SrcMods) const { 2042 unsigned Mods = 0; 2043 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2044 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2045 return true; 2046 } 2047 2048 // TODO: Can we identify things like v_mad_mixhi_f16? 2049 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2050 if (In.isUndef()) { 2051 Src = In; 2052 return true; 2053 } 2054 2055 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2056 SDLoc SL(In); 2057 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2058 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2059 SL, MVT::i32, K); 2060 Src = SDValue(MovK, 0); 2061 return true; 2062 } 2063 2064 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2065 SDLoc SL(In); 2066 SDValue K = CurDAG->getTargetConstant( 2067 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2068 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2069 SL, MVT::i32, K); 2070 Src = SDValue(MovK, 0); 2071 return true; 2072 } 2073 2074 return isExtractHiElt(In, Src); 2075 } 2076 2077 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2078 const AMDGPUTargetLowering& Lowering = 2079 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2080 bool IsModified = false; 2081 do { 2082 IsModified = false; 2083 2084 // Go over all selected nodes and try to fold them a bit more 2085 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2086 while (Position != CurDAG->allnodes_end()) { 2087 SDNode *Node = &*Position++; 2088 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2089 if (!MachineNode) 2090 continue; 2091 2092 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2093 if (ResNode != Node) { 2094 if (ResNode) 2095 ReplaceUses(Node, ResNode); 2096 IsModified = true; 2097 } 2098 } 2099 CurDAG->RemoveDeadNodes(); 2100 } while (IsModified); 2101 } 2102 2103 void R600DAGToDAGISel::Select(SDNode *N) { 2104 unsigned int Opc = N->getOpcode(); 2105 if (N->isMachineOpcode()) { 2106 N->setNodeId(-1); 2107 return; // Already selected. 2108 } 2109 2110 switch (Opc) { 2111 default: break; 2112 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2113 case ISD::SCALAR_TO_VECTOR: 2114 case ISD::BUILD_VECTOR: { 2115 EVT VT = N->getValueType(0); 2116 unsigned NumVectorElts = VT.getVectorNumElements(); 2117 unsigned RegClassID; 2118 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2119 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2120 // pass. We want to avoid 128 bits copies as much as possible because they 2121 // can't be bundled by our scheduler. 2122 switch(NumVectorElts) { 2123 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2124 case 4: 2125 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2126 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2127 else 2128 RegClassID = AMDGPU::R600_Reg128RegClassID; 2129 break; 2130 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2131 } 2132 SelectBuildVector(N, RegClassID); 2133 return; 2134 } 2135 } 2136 2137 SelectCode(N); 2138 } 2139 2140 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2141 SDValue &Offset) { 2142 ConstantSDNode *C; 2143 SDLoc DL(Addr); 2144 2145 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2146 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2147 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2148 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2149 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2150 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2151 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2152 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2153 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2154 Base = Addr.getOperand(0); 2155 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2156 } else { 2157 Base = Addr; 2158 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2159 } 2160 2161 return true; 2162 } 2163 2164 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2165 SDValue &Offset) { 2166 ConstantSDNode *IMMOffset; 2167 2168 if (Addr.getOpcode() == ISD::ADD 2169 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2170 && isInt<16>(IMMOffset->getZExtValue())) { 2171 2172 Base = Addr.getOperand(0); 2173 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2174 MVT::i32); 2175 return true; 2176 // If the pointer address is constant, we can move it to the offset field. 2177 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2178 && isInt<16>(IMMOffset->getZExtValue())) { 2179 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2180 SDLoc(CurDAG->getEntryNode()), 2181 AMDGPU::ZERO, MVT::i32); 2182 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2183 MVT::i32); 2184 return true; 2185 } 2186 2187 // Default case, no offset 2188 Base = Addr; 2189 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2190 return true; 2191 } 2192