1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Analysis/DivergenceAnalysis.h" 31 #include "llvm/Analysis/ValueTracking.h" 32 #include "llvm/CodeGen/FunctionLoweringInfo.h" 33 #include "llvm/CodeGen/ISDOpcodes.h" 34 #include "llvm/CodeGen/MachineFunction.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/MC/MCInstrDesc.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/MachineValueType.h" 47 #include "llvm/Support/MathExtras.h" 48 #include <cassert> 49 #include <cstdint> 50 #include <new> 51 #include <vector> 52 53 using namespace llvm; 54 55 namespace llvm { 56 57 class R600InstrInfo; 58 59 } // end namespace llvm 60 61 //===----------------------------------------------------------------------===// 62 // Instruction Selector Implementation 63 //===----------------------------------------------------------------------===// 64 65 namespace { 66 67 /// AMDGPU specific code to select AMDGPU machine instructions for 68 /// SelectionDAG operations. 69 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 70 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 71 // make the right decision when generating code for different targets. 72 const AMDGPUSubtarget *Subtarget; 73 AMDGPUAS AMDGPUASI; 74 bool EnableLateStructurizeCFG; 75 76 public: 77 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 78 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 79 : SelectionDAGISel(*TM, OptLevel) { 80 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 81 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 82 } 83 ~AMDGPUDAGToDAGISel() override = default; 84 85 void getAnalysisUsage(AnalysisUsage &AU) const override { 86 AU.addRequired<AMDGPUArgumentUsageInfo>(); 87 AU.addRequired<DivergenceAnalysis>(); 88 SelectionDAGISel::getAnalysisUsage(AU); 89 } 90 91 bool runOnMachineFunction(MachineFunction &MF) override; 92 void Select(SDNode *N) override; 93 StringRef getPassName() const override; 94 void PostprocessISelDAG() override; 95 96 protected: 97 void SelectBuildVector(SDNode *N, unsigned RegClassID); 98 99 private: 100 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 101 bool isNoNanSrc(SDValue N) const; 102 bool isInlineImmediate(const SDNode *N) const; 103 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 104 const R600InstrInfo *TII); 105 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 106 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 107 108 bool isConstantLoad(const MemSDNode *N, int cbID) const; 109 bool isUniformBr(const SDNode *N) const; 110 111 SDNode *glueCopyToM0(SDNode *N) const; 112 113 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 114 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 115 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 116 SDValue& Offset); 117 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 118 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 119 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 120 unsigned OffsetBits) const; 121 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 122 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 123 SDValue &Offset1) const; 124 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 125 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 126 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 127 SDValue &TFE) const; 128 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 129 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 130 SDValue &SLC, SDValue &TFE) const; 131 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 132 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 133 SDValue &SLC) const; 134 bool SelectMUBUFScratchOffen(SDNode *Parent, 135 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 136 SDValue &SOffset, SDValue &ImmOffset) const; 137 bool SelectMUBUFScratchOffset(SDNode *Parent, 138 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 139 SDValue &Offset) const; 140 141 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 142 SDValue &Offset, SDValue &GLC, SDValue &SLC, 143 SDValue &TFE) const; 144 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 145 SDValue &Offset, SDValue &SLC) const; 146 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 147 SDValue &Offset) const; 148 bool SelectMUBUFConstant(SDValue Constant, 149 SDValue &SOffset, 150 SDValue &ImmOffset) const; 151 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 152 SDValue &ImmOffset) const; 153 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 154 SDValue &ImmOffset, SDValue &VOffset) const; 155 156 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 157 SDValue &Offset, SDValue &SLC) const; 158 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 159 SDValue &Offset, SDValue &SLC) const; 160 161 template <bool IsSigned> 162 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 163 SDValue &Offset, SDValue &SLC) const; 164 165 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 166 bool &Imm) const; 167 SDValue Expand32BitAddress(SDValue Addr) const; 168 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 169 bool &Imm) const; 170 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 171 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 172 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 173 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 174 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 175 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 176 177 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 178 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 179 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 180 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 181 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 182 SDValue &Clamp, SDValue &Omod) const; 183 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 184 SDValue &Clamp, SDValue &Omod) const; 185 186 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 187 SDValue &Clamp, 188 SDValue &Omod) const; 189 190 bool SelectVOP3OMods(SDValue In, SDValue &Src, 191 SDValue &Clamp, SDValue &Omod) const; 192 193 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 194 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 195 SDValue &Clamp) const; 196 197 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 198 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 199 SDValue &Clamp) const; 200 201 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 202 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 203 SDValue &Clamp) const; 204 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 205 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 206 207 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 208 209 void SelectADD_SUB_I64(SDNode *N); 210 void SelectUADDO_USUBO(SDNode *N); 211 void SelectDIV_SCALE(SDNode *N); 212 void SelectMAD_64_32(SDNode *N); 213 void SelectFMA_W_CHAIN(SDNode *N); 214 void SelectFMUL_W_CHAIN(SDNode *N); 215 216 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 217 uint32_t Offset, uint32_t Width); 218 void SelectS_BFEFromShifts(SDNode *N); 219 void SelectS_BFE(SDNode *N); 220 bool isCBranchSCC(const SDNode *N) const; 221 void SelectBRCOND(SDNode *N); 222 void SelectFMAD(SDNode *N); 223 void SelectATOMIC_CMP_SWAP(SDNode *N); 224 225 protected: 226 // Include the pieces autogenerated from the target description. 227 #include "AMDGPUGenDAGISel.inc" 228 }; 229 230 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 231 public: 232 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 233 AMDGPUDAGToDAGISel(TM, OptLevel) {} 234 235 void Select(SDNode *N) override; 236 237 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 238 SDValue &Offset) override; 239 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 240 SDValue &Offset) override; 241 }; 242 243 } // end anonymous namespace 244 245 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 246 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 247 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 248 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 249 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 250 251 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 252 // DAG, ready for instruction scheduling. 253 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 254 CodeGenOpt::Level OptLevel) { 255 return new AMDGPUDAGToDAGISel(TM, OptLevel); 256 } 257 258 /// \brief This pass converts a legalized DAG into a R600-specific 259 // DAG, ready for instruction scheduling. 260 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 261 CodeGenOpt::Level OptLevel) { 262 return new R600DAGToDAGISel(TM, OptLevel); 263 } 264 265 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 266 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 267 return SelectionDAGISel::runOnMachineFunction(MF); 268 } 269 270 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 271 if (TM.Options.NoNaNsFPMath) 272 return true; 273 274 // TODO: Move into isKnownNeverNaN 275 if (N->getFlags().isDefined()) 276 return N->getFlags().hasNoNaNs(); 277 278 return CurDAG->isKnownNeverNaN(N); 279 } 280 281 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 282 const SIInstrInfo *TII 283 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 284 285 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 286 return TII->isInlineConstant(C->getAPIntValue()); 287 288 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 289 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 290 291 return false; 292 } 293 294 /// \brief Determine the register class for \p OpNo 295 /// \returns The register class of the virtual register that will be used for 296 /// the given operand number \OpNo or NULL if the register class cannot be 297 /// determined. 298 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 299 unsigned OpNo) const { 300 if (!N->isMachineOpcode()) { 301 if (N->getOpcode() == ISD::CopyToReg) { 302 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 303 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 304 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 305 return MRI.getRegClass(Reg); 306 } 307 308 const SIRegisterInfo *TRI 309 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 310 return TRI->getPhysRegClass(Reg); 311 } 312 313 return nullptr; 314 } 315 316 switch (N->getMachineOpcode()) { 317 default: { 318 const MCInstrDesc &Desc = 319 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 320 unsigned OpIdx = Desc.getNumDefs() + OpNo; 321 if (OpIdx >= Desc.getNumOperands()) 322 return nullptr; 323 int RegClass = Desc.OpInfo[OpIdx].RegClass; 324 if (RegClass == -1) 325 return nullptr; 326 327 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 328 } 329 case AMDGPU::REG_SEQUENCE: { 330 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 331 const TargetRegisterClass *SuperRC = 332 Subtarget->getRegisterInfo()->getRegClass(RCID); 333 334 SDValue SubRegOp = N->getOperand(OpNo + 1); 335 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 336 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 337 SubRegIdx); 338 } 339 } 340 } 341 342 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 343 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 344 !Subtarget->ldsRequiresM0Init()) 345 return N; 346 347 const SITargetLowering& Lowering = 348 *static_cast<const SITargetLowering*>(getTargetLowering()); 349 350 // Write max value to m0 before each load operation 351 352 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 353 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 354 355 SDValue Glue = M0.getValue(1); 356 357 SmallVector <SDValue, 8> Ops; 358 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 359 Ops.push_back(N->getOperand(i)); 360 } 361 Ops.push_back(Glue); 362 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 363 } 364 365 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 366 switch (NumVectorElts) { 367 case 1: 368 return AMDGPU::SReg_32_XM0RegClassID; 369 case 2: 370 return AMDGPU::SReg_64RegClassID; 371 case 4: 372 return AMDGPU::SReg_128RegClassID; 373 case 8: 374 return AMDGPU::SReg_256RegClassID; 375 case 16: 376 return AMDGPU::SReg_512RegClassID; 377 } 378 379 llvm_unreachable("invalid vector size"); 380 } 381 382 static bool getConstantValue(SDValue N, uint32_t &Out) { 383 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 384 Out = C->getAPIntValue().getZExtValue(); 385 return true; 386 } 387 388 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 389 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 390 return true; 391 } 392 393 return false; 394 } 395 396 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 397 EVT VT = N->getValueType(0); 398 unsigned NumVectorElts = VT.getVectorNumElements(); 399 EVT EltVT = VT.getVectorElementType(); 400 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 401 SDLoc DL(N); 402 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 403 404 if (NumVectorElts == 1) { 405 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 406 RegClass); 407 return; 408 } 409 410 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 411 "supported yet"); 412 // 16 = Max Num Vector Elements 413 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 414 // 1 = Vector Register Class 415 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 416 417 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 418 bool IsRegSeq = true; 419 unsigned NOps = N->getNumOperands(); 420 for (unsigned i = 0; i < NOps; i++) { 421 // XXX: Why is this here? 422 if (isa<RegisterSDNode>(N->getOperand(i))) { 423 IsRegSeq = false; 424 break; 425 } 426 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 427 RegSeqArgs[1 + (2 * i) + 1] = 428 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 429 MVT::i32); 430 } 431 if (NOps != NumVectorElts) { 432 // Fill in the missing undef elements if this was a scalar_to_vector. 433 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 434 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 435 DL, EltVT); 436 for (unsigned i = NOps; i < NumVectorElts; ++i) { 437 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 438 RegSeqArgs[1 + (2 * i) + 1] = 439 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 440 } 441 } 442 443 if (!IsRegSeq) 444 SelectCode(N); 445 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 446 } 447 448 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 449 unsigned int Opc = N->getOpcode(); 450 if (N->isMachineOpcode()) { 451 N->setNodeId(-1); 452 return; // Already selected. 453 } 454 455 if (isa<AtomicSDNode>(N) || 456 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 457 Opc == AMDGPUISD::ATOMIC_LOAD_FADD || 458 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 459 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 460 N = glueCopyToM0(N); 461 462 switch (Opc) { 463 default: 464 break; 465 // We are selecting i64 ADD here instead of custom lower it during 466 // DAG legalization, so we can fold some i64 ADDs used for address 467 // calculation into the LOAD and STORE instructions. 468 case ISD::ADDC: 469 case ISD::ADDE: 470 case ISD::SUBC: 471 case ISD::SUBE: { 472 if (N->getValueType(0) != MVT::i64) 473 break; 474 475 SelectADD_SUB_I64(N); 476 return; 477 } 478 case ISD::UADDO: 479 case ISD::USUBO: { 480 SelectUADDO_USUBO(N); 481 return; 482 } 483 case AMDGPUISD::FMUL_W_CHAIN: { 484 SelectFMUL_W_CHAIN(N); 485 return; 486 } 487 case AMDGPUISD::FMA_W_CHAIN: { 488 SelectFMA_W_CHAIN(N); 489 return; 490 } 491 492 case ISD::SCALAR_TO_VECTOR: 493 case ISD::BUILD_VECTOR: { 494 EVT VT = N->getValueType(0); 495 unsigned NumVectorElts = VT.getVectorNumElements(); 496 497 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 498 if (Opc == ISD::BUILD_VECTOR) { 499 uint32_t LHSVal, RHSVal; 500 if (getConstantValue(N->getOperand(0), LHSVal) && 501 getConstantValue(N->getOperand(1), RHSVal)) { 502 uint32_t K = LHSVal | (RHSVal << 16); 503 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 504 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 505 return; 506 } 507 } 508 509 break; 510 } 511 512 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 513 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 514 SelectBuildVector(N, RegClassID); 515 return; 516 } 517 case ISD::BUILD_PAIR: { 518 SDValue RC, SubReg0, SubReg1; 519 SDLoc DL(N); 520 if (N->getValueType(0) == MVT::i128) { 521 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 522 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 523 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 524 } else if (N->getValueType(0) == MVT::i64) { 525 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 526 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 527 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 528 } else { 529 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 530 } 531 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 532 N->getOperand(1), SubReg1 }; 533 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 534 N->getValueType(0), Ops)); 535 return; 536 } 537 538 case ISD::Constant: 539 case ISD::ConstantFP: { 540 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 541 break; 542 543 uint64_t Imm; 544 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 545 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 546 else { 547 ConstantSDNode *C = cast<ConstantSDNode>(N); 548 Imm = C->getZExtValue(); 549 } 550 551 SDLoc DL(N); 552 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 553 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 554 MVT::i32)); 555 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 556 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 557 const SDValue Ops[] = { 558 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 559 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 560 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 561 }; 562 563 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 564 N->getValueType(0), Ops)); 565 return; 566 } 567 case ISD::LOAD: 568 case ISD::STORE: { 569 N = glueCopyToM0(N); 570 break; 571 } 572 573 case AMDGPUISD::BFE_I32: 574 case AMDGPUISD::BFE_U32: { 575 // There is a scalar version available, but unlike the vector version which 576 // has a separate operand for the offset and width, the scalar version packs 577 // the width and offset into a single operand. Try to move to the scalar 578 // version if the offsets are constant, so that we can try to keep extended 579 // loads of kernel arguments in SGPRs. 580 581 // TODO: Technically we could try to pattern match scalar bitshifts of 582 // dynamic values, but it's probably not useful. 583 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 584 if (!Offset) 585 break; 586 587 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 588 if (!Width) 589 break; 590 591 bool Signed = Opc == AMDGPUISD::BFE_I32; 592 593 uint32_t OffsetVal = Offset->getZExtValue(); 594 uint32_t WidthVal = Width->getZExtValue(); 595 596 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 597 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 598 return; 599 } 600 case AMDGPUISD::DIV_SCALE: { 601 SelectDIV_SCALE(N); 602 return; 603 } 604 case AMDGPUISD::MAD_I64_I32: 605 case AMDGPUISD::MAD_U64_U32: { 606 SelectMAD_64_32(N); 607 return; 608 } 609 case ISD::CopyToReg: { 610 const SITargetLowering& Lowering = 611 *static_cast<const SITargetLowering*>(getTargetLowering()); 612 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 613 break; 614 } 615 case ISD::AND: 616 case ISD::SRL: 617 case ISD::SRA: 618 case ISD::SIGN_EXTEND_INREG: 619 if (N->getValueType(0) != MVT::i32) 620 break; 621 622 SelectS_BFE(N); 623 return; 624 case ISD::BRCOND: 625 SelectBRCOND(N); 626 return; 627 case ISD::FMAD: 628 SelectFMAD(N); 629 return; 630 case AMDGPUISD::ATOMIC_CMP_SWAP: 631 SelectATOMIC_CMP_SWAP(N); 632 return; 633 } 634 635 SelectCode(N); 636 } 637 638 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 639 if (!N->readMem()) 640 return false; 641 if (CbId == -1) 642 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 643 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; 644 645 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 646 } 647 648 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 649 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 650 const Instruction *Term = BB->getTerminator(); 651 return Term->getMetadata("amdgpu.uniform") || 652 Term->getMetadata("structurizecfg.uniform"); 653 } 654 655 StringRef AMDGPUDAGToDAGISel::getPassName() const { 656 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 657 } 658 659 //===----------------------------------------------------------------------===// 660 // Complex Patterns 661 //===----------------------------------------------------------------------===// 662 663 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 664 SDValue& IntPtr) { 665 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 666 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 667 true); 668 return true; 669 } 670 return false; 671 } 672 673 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 674 SDValue& BaseReg, SDValue &Offset) { 675 if (!isa<ConstantSDNode>(Addr)) { 676 BaseReg = Addr; 677 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 678 return true; 679 } 680 return false; 681 } 682 683 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 684 SDValue &Offset) { 685 return false; 686 } 687 688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 689 SDValue &Offset) { 690 ConstantSDNode *C; 691 SDLoc DL(Addr); 692 693 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 694 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 695 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 696 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 697 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 698 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 699 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 700 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 701 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 702 Base = Addr.getOperand(0); 703 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 704 } else { 705 Base = Addr; 706 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 707 } 708 709 return true; 710 } 711 712 // FIXME: Should only handle addcarry/subcarry 713 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 714 SDLoc DL(N); 715 SDValue LHS = N->getOperand(0); 716 SDValue RHS = N->getOperand(1); 717 718 unsigned Opcode = N->getOpcode(); 719 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 720 bool ProduceCarry = 721 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 722 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 723 724 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 725 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 726 727 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 728 DL, MVT::i32, LHS, Sub0); 729 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 730 DL, MVT::i32, LHS, Sub1); 731 732 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 733 DL, MVT::i32, RHS, Sub0); 734 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 735 DL, MVT::i32, RHS, Sub1); 736 737 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 738 739 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 740 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 741 742 SDNode *AddLo; 743 if (!ConsumeCarry) { 744 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 745 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 746 } else { 747 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 748 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 749 } 750 SDValue AddHiArgs[] = { 751 SDValue(Hi0, 0), 752 SDValue(Hi1, 0), 753 SDValue(AddLo, 1) 754 }; 755 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 756 757 SDValue RegSequenceArgs[] = { 758 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 759 SDValue(AddLo,0), 760 Sub0, 761 SDValue(AddHi,0), 762 Sub1, 763 }; 764 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 765 MVT::i64, RegSequenceArgs); 766 767 if (ProduceCarry) { 768 // Replace the carry-use 769 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 770 } 771 772 // Replace the remaining uses. 773 ReplaceNode(N, RegSequence); 774 } 775 776 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 777 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 778 // carry out despite the _i32 name. These were renamed in VI to _U32. 779 // FIXME: We should probably rename the opcodes here. 780 unsigned Opc = N->getOpcode() == ISD::UADDO ? 781 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 782 783 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 784 { N->getOperand(0), N->getOperand(1) }); 785 } 786 787 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 788 SDLoc SL(N); 789 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 790 SDValue Ops[10]; 791 792 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 793 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 794 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 795 Ops[8] = N->getOperand(0); 796 Ops[9] = N->getOperand(4); 797 798 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 799 } 800 801 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 802 SDLoc SL(N); 803 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 804 SDValue Ops[8]; 805 806 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 807 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 808 Ops[6] = N->getOperand(0); 809 Ops[7] = N->getOperand(3); 810 811 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 812 } 813 814 // We need to handle this here because tablegen doesn't support matching 815 // instructions with multiple outputs. 816 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 817 SDLoc SL(N); 818 EVT VT = N->getValueType(0); 819 820 assert(VT == MVT::f32 || VT == MVT::f64); 821 822 unsigned Opc 823 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 824 825 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 826 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 827 } 828 829 // We need to handle this here because tablegen doesn't support matching 830 // instructions with multiple outputs. 831 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 832 SDLoc SL(N); 833 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 834 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 835 836 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 837 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 838 Clamp }; 839 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 840 } 841 842 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 843 unsigned OffsetBits) const { 844 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 845 (OffsetBits == 8 && !isUInt<8>(Offset))) 846 return false; 847 848 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 849 Subtarget->unsafeDSOffsetFoldingEnabled()) 850 return true; 851 852 // On Southern Islands instruction with a negative base value and an offset 853 // don't seem to work. 854 return CurDAG->SignBitIsZero(Base); 855 } 856 857 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 858 SDValue &Offset) const { 859 SDLoc DL(Addr); 860 if (CurDAG->isBaseWithConstantOffset(Addr)) { 861 SDValue N0 = Addr.getOperand(0); 862 SDValue N1 = Addr.getOperand(1); 863 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 864 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 865 // (add n0, c0) 866 Base = N0; 867 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 868 return true; 869 } 870 } else if (Addr.getOpcode() == ISD::SUB) { 871 // sub C, x -> add (sub 0, x), C 872 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 873 int64_t ByteOffset = C->getSExtValue(); 874 if (isUInt<16>(ByteOffset)) { 875 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 876 877 // XXX - This is kind of hacky. Create a dummy sub node so we can check 878 // the known bits in isDSOffsetLegal. We need to emit the selected node 879 // here, so this is thrown away. 880 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 881 Zero, Addr.getOperand(1)); 882 883 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 884 // FIXME: Select to VOP3 version for with-carry. 885 unsigned SubOp = Subtarget->hasAddNoCarry() ? 886 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 887 888 MachineSDNode *MachineSub 889 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 890 Zero, Addr.getOperand(1)); 891 892 Base = SDValue(MachineSub, 0); 893 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 894 return true; 895 } 896 } 897 } 898 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 899 // If we have a constant address, prefer to put the constant into the 900 // offset. This can save moves to load the constant address since multiple 901 // operations can share the zero base address register, and enables merging 902 // into read2 / write2 instructions. 903 904 SDLoc DL(Addr); 905 906 if (isUInt<16>(CAddr->getZExtValue())) { 907 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 908 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 909 DL, MVT::i32, Zero); 910 Base = SDValue(MovZero, 0); 911 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 912 return true; 913 } 914 } 915 916 // default case 917 Base = Addr; 918 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 919 return true; 920 } 921 922 // TODO: If offset is too big, put low 16-bit into offset. 923 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 924 SDValue &Offset0, 925 SDValue &Offset1) const { 926 SDLoc DL(Addr); 927 928 if (CurDAG->isBaseWithConstantOffset(Addr)) { 929 SDValue N0 = Addr.getOperand(0); 930 SDValue N1 = Addr.getOperand(1); 931 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 932 unsigned DWordOffset0 = C1->getZExtValue() / 4; 933 unsigned DWordOffset1 = DWordOffset0 + 1; 934 // (add n0, c0) 935 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 936 Base = N0; 937 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 938 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 939 return true; 940 } 941 } else if (Addr.getOpcode() == ISD::SUB) { 942 // sub C, x -> add (sub 0, x), C 943 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 944 unsigned DWordOffset0 = C->getZExtValue() / 4; 945 unsigned DWordOffset1 = DWordOffset0 + 1; 946 947 if (isUInt<8>(DWordOffset0)) { 948 SDLoc DL(Addr); 949 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 950 951 // XXX - This is kind of hacky. Create a dummy sub node so we can check 952 // the known bits in isDSOffsetLegal. We need to emit the selected node 953 // here, so this is thrown away. 954 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 955 Zero, Addr.getOperand(1)); 956 957 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 958 unsigned SubOp = Subtarget->hasAddNoCarry() ? 959 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 960 961 MachineSDNode *MachineSub 962 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 963 Zero, Addr.getOperand(1)); 964 965 Base = SDValue(MachineSub, 0); 966 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 967 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 968 return true; 969 } 970 } 971 } 972 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 973 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 974 unsigned DWordOffset1 = DWordOffset0 + 1; 975 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 976 977 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 978 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 979 MachineSDNode *MovZero 980 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 981 DL, MVT::i32, Zero); 982 Base = SDValue(MovZero, 0); 983 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 984 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 985 return true; 986 } 987 } 988 989 // default case 990 991 // FIXME: This is broken on SI where we still need to check if the base 992 // pointer is positive here. 993 Base = Addr; 994 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 995 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 996 return true; 997 } 998 999 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1000 SDValue &VAddr, SDValue &SOffset, 1001 SDValue &Offset, SDValue &Offen, 1002 SDValue &Idxen, SDValue &Addr64, 1003 SDValue &GLC, SDValue &SLC, 1004 SDValue &TFE) const { 1005 // Subtarget prefers to use flat instruction 1006 if (Subtarget->useFlatForGlobal()) 1007 return false; 1008 1009 SDLoc DL(Addr); 1010 1011 if (!GLC.getNode()) 1012 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1013 if (!SLC.getNode()) 1014 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1015 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1016 1017 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1018 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1019 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1020 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1021 1022 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1023 SDValue N0 = Addr.getOperand(0); 1024 SDValue N1 = Addr.getOperand(1); 1025 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1026 1027 if (N0.getOpcode() == ISD::ADD) { 1028 // (add (add N2, N3), C1) -> addr64 1029 SDValue N2 = N0.getOperand(0); 1030 SDValue N3 = N0.getOperand(1); 1031 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1032 Ptr = N2; 1033 VAddr = N3; 1034 } else { 1035 // (add N0, C1) -> offset 1036 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1037 Ptr = N0; 1038 } 1039 1040 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1041 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1042 return true; 1043 } 1044 1045 if (isUInt<32>(C1->getZExtValue())) { 1046 // Illegal offset, store it in soffset. 1047 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1048 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1049 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1050 0); 1051 return true; 1052 } 1053 } 1054 1055 if (Addr.getOpcode() == ISD::ADD) { 1056 // (add N0, N1) -> addr64 1057 SDValue N0 = Addr.getOperand(0); 1058 SDValue N1 = Addr.getOperand(1); 1059 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1060 Ptr = N0; 1061 VAddr = N1; 1062 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1063 return true; 1064 } 1065 1066 // default case -> offset 1067 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1068 Ptr = Addr; 1069 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1070 1071 return true; 1072 } 1073 1074 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1075 SDValue &VAddr, SDValue &SOffset, 1076 SDValue &Offset, SDValue &GLC, 1077 SDValue &SLC, SDValue &TFE) const { 1078 SDValue Ptr, Offen, Idxen, Addr64; 1079 1080 // addr64 bit was removed for volcanic islands. 1081 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1082 return false; 1083 1084 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1085 GLC, SLC, TFE)) 1086 return false; 1087 1088 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1089 if (C->getSExtValue()) { 1090 SDLoc DL(Addr); 1091 1092 const SITargetLowering& Lowering = 1093 *static_cast<const SITargetLowering*>(getTargetLowering()); 1094 1095 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1096 return true; 1097 } 1098 1099 return false; 1100 } 1101 1102 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1103 SDValue &VAddr, SDValue &SOffset, 1104 SDValue &Offset, 1105 SDValue &SLC) const { 1106 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1107 SDValue GLC, TFE; 1108 1109 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1110 } 1111 1112 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1113 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1114 return PSV && PSV->isStack(); 1115 } 1116 1117 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1118 const MachineFunction &MF = CurDAG->getMachineFunction(); 1119 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1120 1121 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1122 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1123 FI->getValueType(0)); 1124 1125 // If we can resolve this to a frame index access, this is relative to the 1126 // frame pointer SGPR. 1127 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1128 MVT::i32)); 1129 } 1130 1131 // If we don't know this private access is a local stack object, it needs to 1132 // be relative to the entry point's scratch wave offset register. 1133 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1134 MVT::i32)); 1135 } 1136 1137 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1138 SDValue Addr, SDValue &Rsrc, 1139 SDValue &VAddr, SDValue &SOffset, 1140 SDValue &ImmOffset) const { 1141 1142 SDLoc DL(Addr); 1143 MachineFunction &MF = CurDAG->getMachineFunction(); 1144 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1145 1146 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1147 1148 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1149 unsigned Imm = CAddr->getZExtValue(); 1150 1151 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1152 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1153 DL, MVT::i32, HighBits); 1154 VAddr = SDValue(MovHighBits, 0); 1155 1156 // In a call sequence, stores to the argument stack area are relative to the 1157 // stack pointer. 1158 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1159 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1160 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1161 1162 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1163 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1164 return true; 1165 } 1166 1167 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1168 // (add n0, c1) 1169 1170 SDValue N0 = Addr.getOperand(0); 1171 SDValue N1 = Addr.getOperand(1); 1172 1173 // Offsets in vaddr must be positive if range checking is enabled. 1174 // 1175 // The total computation of vaddr + soffset + offset must not overflow. If 1176 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1177 // overflowing. 1178 // 1179 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1180 // always perform a range check. If a negative vaddr base index was used, 1181 // this would fail the range check. The overall address computation would 1182 // compute a valid address, but this doesn't happen due to the range 1183 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1184 // 1185 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1186 // MUBUF vaddr, but not on older subtargets which can only do this if the 1187 // sign bit is known 0. 1188 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1189 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1190 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1191 CurDAG->SignBitIsZero(N0))) { 1192 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1193 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1194 return true; 1195 } 1196 } 1197 1198 // (node) 1199 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1200 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1201 return true; 1202 } 1203 1204 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1205 SDValue Addr, 1206 SDValue &SRsrc, 1207 SDValue &SOffset, 1208 SDValue &Offset) const { 1209 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1210 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1211 return false; 1212 1213 SDLoc DL(Addr); 1214 MachineFunction &MF = CurDAG->getMachineFunction(); 1215 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1216 1217 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1218 1219 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1220 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1221 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1222 1223 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1224 // offset if we know this is in a call sequence. 1225 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1226 1227 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1228 return true; 1229 } 1230 1231 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1232 SDValue &SOffset, SDValue &Offset, 1233 SDValue &GLC, SDValue &SLC, 1234 SDValue &TFE) const { 1235 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1236 const SIInstrInfo *TII = 1237 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1238 1239 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1240 GLC, SLC, TFE)) 1241 return false; 1242 1243 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1244 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1245 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1246 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1247 APInt::getAllOnesValue(32).getZExtValue(); // Size 1248 SDLoc DL(Addr); 1249 1250 const SITargetLowering& Lowering = 1251 *static_cast<const SITargetLowering*>(getTargetLowering()); 1252 1253 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1254 return true; 1255 } 1256 return false; 1257 } 1258 1259 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1260 SDValue &Soffset, SDValue &Offset 1261 ) const { 1262 SDValue GLC, SLC, TFE; 1263 1264 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1265 } 1266 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1267 SDValue &Soffset, SDValue &Offset, 1268 SDValue &SLC) const { 1269 SDValue GLC, TFE; 1270 1271 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1272 } 1273 1274 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1275 SDValue &SOffset, 1276 SDValue &ImmOffset) const { 1277 SDLoc DL(Constant); 1278 const uint32_t Align = 4; 1279 const uint32_t MaxImm = alignDown(4095, Align); 1280 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1281 uint32_t Overflow = 0; 1282 1283 if (Imm > MaxImm) { 1284 if (Imm <= MaxImm + 64) { 1285 // Use an SOffset inline constant for 4..64 1286 Overflow = Imm - MaxImm; 1287 Imm = MaxImm; 1288 } else { 1289 // Try to keep the same value in SOffset for adjacent loads, so that 1290 // the corresponding register contents can be re-used. 1291 // 1292 // Load values with all low-bits (except for alignment bits) set into 1293 // SOffset, so that a larger range of values can be covered using 1294 // s_movk_i32. 1295 // 1296 // Atomic operations fail to work correctly when individual address 1297 // components are unaligned, even if their sum is aligned. 1298 uint32_t High = (Imm + Align) & ~4095; 1299 uint32_t Low = (Imm + Align) & 4095; 1300 Imm = Low; 1301 Overflow = High - Align; 1302 } 1303 } 1304 1305 // There is a hardware bug in SI and CI which prevents address clamping in 1306 // MUBUF instructions from working correctly with SOffsets. The immediate 1307 // offset is unaffected. 1308 if (Overflow > 0 && 1309 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1310 return false; 1311 1312 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1313 1314 if (Overflow <= 64) 1315 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1316 else 1317 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1318 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1319 0); 1320 1321 return true; 1322 } 1323 1324 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1325 SDValue &SOffset, 1326 SDValue &ImmOffset) const { 1327 SDLoc DL(Offset); 1328 1329 if (!isa<ConstantSDNode>(Offset)) 1330 return false; 1331 1332 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1333 } 1334 1335 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1336 SDValue &SOffset, 1337 SDValue &ImmOffset, 1338 SDValue &VOffset) const { 1339 SDLoc DL(Offset); 1340 1341 // Don't generate an unnecessary voffset for constant offsets. 1342 if (isa<ConstantSDNode>(Offset)) { 1343 SDValue Tmp1, Tmp2; 1344 1345 // When necessary, use a voffset in <= CI anyway to work around a hardware 1346 // bug. 1347 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1348 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1349 return false; 1350 } 1351 1352 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1353 SDValue N0 = Offset.getOperand(0); 1354 SDValue N1 = Offset.getOperand(1); 1355 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1356 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1357 VOffset = N0; 1358 return true; 1359 } 1360 } 1361 1362 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1363 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1364 VOffset = Offset; 1365 1366 return true; 1367 } 1368 1369 template <bool IsSigned> 1370 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1371 SDValue &VAddr, 1372 SDValue &Offset, 1373 SDValue &SLC) const { 1374 int64_t OffsetVal = 0; 1375 1376 if (Subtarget->hasFlatInstOffsets() && 1377 CurDAG->isBaseWithConstantOffset(Addr)) { 1378 SDValue N0 = Addr.getOperand(0); 1379 SDValue N1 = Addr.getOperand(1); 1380 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1381 1382 if ((IsSigned && isInt<13>(COffsetVal)) || 1383 (!IsSigned && isUInt<12>(COffsetVal))) { 1384 Addr = N0; 1385 OffsetVal = COffsetVal; 1386 } 1387 } 1388 1389 VAddr = Addr; 1390 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1391 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1392 1393 return true; 1394 } 1395 1396 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1397 SDValue &VAddr, 1398 SDValue &Offset, 1399 SDValue &SLC) const { 1400 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1401 } 1402 1403 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1404 SDValue &VAddr, 1405 SDValue &Offset, 1406 SDValue &SLC) const { 1407 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1408 } 1409 1410 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1411 SDValue &Offset, bool &Imm) const { 1412 1413 // FIXME: Handle non-constant offsets. 1414 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1415 if (!C) 1416 return false; 1417 1418 SDLoc SL(ByteOffsetNode); 1419 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1420 int64_t ByteOffset = C->getSExtValue(); 1421 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1422 1423 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1424 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1425 Imm = true; 1426 return true; 1427 } 1428 1429 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1430 return false; 1431 1432 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1433 // 32-bit Immediates are supported on Sea Islands. 1434 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1435 } else { 1436 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1437 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1438 C32Bit), 0); 1439 } 1440 Imm = false; 1441 return true; 1442 } 1443 1444 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1445 if (Addr.getValueType() != MVT::i32) 1446 return Addr; 1447 1448 // Zero-extend a 32-bit address. 1449 SDLoc SL(Addr); 1450 1451 const MachineFunction &MF = CurDAG->getMachineFunction(); 1452 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1453 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1454 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1455 1456 const SDValue Ops[] = { 1457 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1458 Addr, 1459 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1460 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1461 0), 1462 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1463 }; 1464 1465 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1466 Ops), 0); 1467 } 1468 1469 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1470 SDValue &Offset, bool &Imm) const { 1471 SDLoc SL(Addr); 1472 1473 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1474 SDValue N0 = Addr.getOperand(0); 1475 SDValue N1 = Addr.getOperand(1); 1476 1477 if (SelectSMRDOffset(N1, Offset, Imm)) { 1478 SBase = Expand32BitAddress(N0); 1479 return true; 1480 } 1481 } 1482 SBase = Expand32BitAddress(Addr); 1483 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1484 Imm = true; 1485 return true; 1486 } 1487 1488 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1489 SDValue &Offset) const { 1490 bool Imm; 1491 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1492 } 1493 1494 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1495 SDValue &Offset) const { 1496 1497 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1498 return false; 1499 1500 bool Imm; 1501 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1502 return false; 1503 1504 return !Imm && isa<ConstantSDNode>(Offset); 1505 } 1506 1507 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1508 SDValue &Offset) const { 1509 bool Imm; 1510 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1511 !isa<ConstantSDNode>(Offset); 1512 } 1513 1514 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1515 SDValue &Offset) const { 1516 bool Imm; 1517 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1518 } 1519 1520 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1521 SDValue &Offset) const { 1522 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1523 return false; 1524 1525 bool Imm; 1526 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1527 return false; 1528 1529 return !Imm && isa<ConstantSDNode>(Offset); 1530 } 1531 1532 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1533 SDValue &Base, 1534 SDValue &Offset) const { 1535 SDLoc DL(Index); 1536 1537 if (CurDAG->isBaseWithConstantOffset(Index)) { 1538 SDValue N0 = Index.getOperand(0); 1539 SDValue N1 = Index.getOperand(1); 1540 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1541 1542 // (add n0, c0) 1543 Base = N0; 1544 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1545 return true; 1546 } 1547 1548 if (isa<ConstantSDNode>(Index)) 1549 return false; 1550 1551 Base = Index; 1552 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1553 return true; 1554 } 1555 1556 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1557 SDValue Val, uint32_t Offset, 1558 uint32_t Width) { 1559 // Transformation function, pack the offset and width of a BFE into 1560 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1561 // source, bits [5:0] contain the offset and bits [22:16] the width. 1562 uint32_t PackedVal = Offset | (Width << 16); 1563 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1564 1565 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1566 } 1567 1568 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1569 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1570 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1571 // Predicate: 0 < b <= c < 32 1572 1573 const SDValue &Shl = N->getOperand(0); 1574 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1575 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1576 1577 if (B && C) { 1578 uint32_t BVal = B->getZExtValue(); 1579 uint32_t CVal = C->getZExtValue(); 1580 1581 if (0 < BVal && BVal <= CVal && CVal < 32) { 1582 bool Signed = N->getOpcode() == ISD::SRA; 1583 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1584 1585 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1586 32 - CVal)); 1587 return; 1588 } 1589 } 1590 SelectCode(N); 1591 } 1592 1593 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1594 switch (N->getOpcode()) { 1595 case ISD::AND: 1596 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1597 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1598 // Predicate: isMask(mask) 1599 const SDValue &Srl = N->getOperand(0); 1600 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1601 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1602 1603 if (Shift && Mask) { 1604 uint32_t ShiftVal = Shift->getZExtValue(); 1605 uint32_t MaskVal = Mask->getZExtValue(); 1606 1607 if (isMask_32(MaskVal)) { 1608 uint32_t WidthVal = countPopulation(MaskVal); 1609 1610 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1611 Srl.getOperand(0), ShiftVal, WidthVal)); 1612 return; 1613 } 1614 } 1615 } 1616 break; 1617 case ISD::SRL: 1618 if (N->getOperand(0).getOpcode() == ISD::AND) { 1619 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1620 // Predicate: isMask(mask >> b) 1621 const SDValue &And = N->getOperand(0); 1622 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1623 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1624 1625 if (Shift && Mask) { 1626 uint32_t ShiftVal = Shift->getZExtValue(); 1627 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1628 1629 if (isMask_32(MaskVal)) { 1630 uint32_t WidthVal = countPopulation(MaskVal); 1631 1632 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1633 And.getOperand(0), ShiftVal, WidthVal)); 1634 return; 1635 } 1636 } 1637 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1638 SelectS_BFEFromShifts(N); 1639 return; 1640 } 1641 break; 1642 case ISD::SRA: 1643 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1644 SelectS_BFEFromShifts(N); 1645 return; 1646 } 1647 break; 1648 1649 case ISD::SIGN_EXTEND_INREG: { 1650 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1651 SDValue Src = N->getOperand(0); 1652 if (Src.getOpcode() != ISD::SRL) 1653 break; 1654 1655 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1656 if (!Amt) 1657 break; 1658 1659 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1660 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1661 Amt->getZExtValue(), Width)); 1662 return; 1663 } 1664 } 1665 1666 SelectCode(N); 1667 } 1668 1669 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1670 assert(N->getOpcode() == ISD::BRCOND); 1671 if (!N->hasOneUse()) 1672 return false; 1673 1674 SDValue Cond = N->getOperand(1); 1675 if (Cond.getOpcode() == ISD::CopyToReg) 1676 Cond = Cond.getOperand(2); 1677 1678 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1679 return false; 1680 1681 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1682 if (VT == MVT::i32) 1683 return true; 1684 1685 if (VT == MVT::i64) { 1686 auto ST = static_cast<const SISubtarget *>(Subtarget); 1687 1688 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1689 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1690 } 1691 1692 return false; 1693 } 1694 1695 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1696 SDValue Cond = N->getOperand(1); 1697 1698 if (Cond.isUndef()) { 1699 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1700 N->getOperand(2), N->getOperand(0)); 1701 return; 1702 } 1703 1704 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1705 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1706 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1707 SDLoc SL(N); 1708 1709 if (!UseSCCBr) { 1710 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1711 // analyzed what generates the vcc value, so we do not know whether vcc 1712 // bits for disabled lanes are 0. Thus we need to mask out bits for 1713 // disabled lanes. 1714 // 1715 // For the case that we select S_CBRANCH_SCC1 and it gets 1716 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1717 // SIInstrInfo::moveToVALU which inserts the S_AND). 1718 // 1719 // We could add an analysis of what generates the vcc value here and omit 1720 // the S_AND when is unnecessary. But it would be better to add a separate 1721 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1722 // catches both cases. 1723 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1724 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1725 Cond), 1726 0); 1727 } 1728 1729 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1730 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1731 N->getOperand(2), // Basic Block 1732 VCC.getValue(0)); 1733 } 1734 1735 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { 1736 MVT VT = N->getSimpleValueType(0); 1737 if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { 1738 SelectCode(N); 1739 return; 1740 } 1741 1742 SDValue Src0 = N->getOperand(0); 1743 SDValue Src1 = N->getOperand(1); 1744 SDValue Src2 = N->getOperand(2); 1745 unsigned Src0Mods, Src1Mods, Src2Mods; 1746 1747 // Avoid using v_mad_mix_f32 unless there is actually an operand using the 1748 // conversion from f16. 1749 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1750 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1751 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1752 1753 assert(!Subtarget->hasFP32Denormals() && 1754 "fmad selected with denormals enabled"); 1755 // TODO: We can select this with f32 denormals enabled if all the sources are 1756 // converted from f16 (in which case fmad isn't legal). 1757 1758 if (Sel0 || Sel1 || Sel2) { 1759 // For dummy operands. 1760 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1761 SDValue Ops[] = { 1762 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1763 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1764 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1765 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1766 Zero, Zero 1767 }; 1768 1769 CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); 1770 } else { 1771 SelectCode(N); 1772 } 1773 } 1774 1775 // This is here because there isn't a way to use the generated sub0_sub1 as the 1776 // subreg index to EXTRACT_SUBREG in tablegen. 1777 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1778 MemSDNode *Mem = cast<MemSDNode>(N); 1779 unsigned AS = Mem->getAddressSpace(); 1780 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1781 SelectCode(N); 1782 return; 1783 } 1784 1785 MVT VT = N->getSimpleValueType(0); 1786 bool Is32 = (VT == MVT::i32); 1787 SDLoc SL(N); 1788 1789 MachineSDNode *CmpSwap = nullptr; 1790 if (Subtarget->hasAddr64()) { 1791 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1792 1793 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1794 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1795 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1796 SDValue CmpVal = Mem->getOperand(2); 1797 1798 // XXX - Do we care about glue operands? 1799 1800 SDValue Ops[] = { 1801 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1802 }; 1803 1804 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1805 } 1806 } 1807 1808 if (!CmpSwap) { 1809 SDValue SRsrc, SOffset, Offset, SLC; 1810 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1811 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1812 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1813 1814 SDValue CmpVal = Mem->getOperand(2); 1815 SDValue Ops[] = { 1816 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1817 }; 1818 1819 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1820 } 1821 } 1822 1823 if (!CmpSwap) { 1824 SelectCode(N); 1825 return; 1826 } 1827 1828 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1829 *MMOs = Mem->getMemOperand(); 1830 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1831 1832 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1833 SDValue Extract 1834 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1835 1836 ReplaceUses(SDValue(N, 0), Extract); 1837 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1838 CurDAG->RemoveDeadNode(N); 1839 } 1840 1841 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1842 unsigned &Mods) const { 1843 Mods = 0; 1844 Src = In; 1845 1846 if (Src.getOpcode() == ISD::FNEG) { 1847 Mods |= SISrcMods::NEG; 1848 Src = Src.getOperand(0); 1849 } 1850 1851 if (Src.getOpcode() == ISD::FABS) { 1852 Mods |= SISrcMods::ABS; 1853 Src = Src.getOperand(0); 1854 } 1855 1856 return true; 1857 } 1858 1859 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1860 SDValue &SrcMods) const { 1861 unsigned Mods; 1862 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1863 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1864 return true; 1865 } 1866 1867 return false; 1868 } 1869 1870 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1871 SDValue &SrcMods) const { 1872 SelectVOP3Mods(In, Src, SrcMods); 1873 return isNoNanSrc(Src); 1874 } 1875 1876 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1877 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1878 return false; 1879 1880 Src = In; 1881 return true; 1882 } 1883 1884 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1885 SDValue &SrcMods, SDValue &Clamp, 1886 SDValue &Omod) const { 1887 SDLoc DL(In); 1888 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1889 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1890 1891 return SelectVOP3Mods(In, Src, SrcMods); 1892 } 1893 1894 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1895 SDValue &SrcMods, 1896 SDValue &Clamp, 1897 SDValue &Omod) const { 1898 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1899 return SelectVOP3Mods(In, Src, SrcMods); 1900 } 1901 1902 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1903 SDValue &Clamp, SDValue &Omod) const { 1904 Src = In; 1905 1906 SDLoc DL(In); 1907 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1908 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1909 1910 return true; 1911 } 1912 1913 static SDValue stripBitcast(SDValue Val) { 1914 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1915 } 1916 1917 // Figure out if this is really an extract of the high 16-bits of a dword. 1918 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1919 In = stripBitcast(In); 1920 if (In.getOpcode() != ISD::TRUNCATE) 1921 return false; 1922 1923 SDValue Srl = In.getOperand(0); 1924 if (Srl.getOpcode() == ISD::SRL) { 1925 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1926 if (ShiftAmt->getZExtValue() == 16) { 1927 Out = stripBitcast(Srl.getOperand(0)); 1928 return true; 1929 } 1930 } 1931 } 1932 1933 return false; 1934 } 1935 1936 // Look through operations that obscure just looking at the low 16-bits of the 1937 // same register. 1938 static SDValue stripExtractLoElt(SDValue In) { 1939 if (In.getOpcode() == ISD::TRUNCATE) { 1940 SDValue Src = In.getOperand(0); 1941 if (Src.getValueType().getSizeInBits() == 32) 1942 return stripBitcast(Src); 1943 } 1944 1945 return In; 1946 } 1947 1948 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1949 SDValue &SrcMods) const { 1950 unsigned Mods = 0; 1951 Src = In; 1952 1953 if (Src.getOpcode() == ISD::FNEG) { 1954 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1955 Src = Src.getOperand(0); 1956 } 1957 1958 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1959 unsigned VecMods = Mods; 1960 1961 SDValue Lo = stripBitcast(Src.getOperand(0)); 1962 SDValue Hi = stripBitcast(Src.getOperand(1)); 1963 1964 if (Lo.getOpcode() == ISD::FNEG) { 1965 Lo = stripBitcast(Lo.getOperand(0)); 1966 Mods ^= SISrcMods::NEG; 1967 } 1968 1969 if (Hi.getOpcode() == ISD::FNEG) { 1970 Hi = stripBitcast(Hi.getOperand(0)); 1971 Mods ^= SISrcMods::NEG_HI; 1972 } 1973 1974 if (isExtractHiElt(Lo, Lo)) 1975 Mods |= SISrcMods::OP_SEL_0; 1976 1977 if (isExtractHiElt(Hi, Hi)) 1978 Mods |= SISrcMods::OP_SEL_1; 1979 1980 Lo = stripExtractLoElt(Lo); 1981 Hi = stripExtractLoElt(Hi); 1982 1983 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1984 // Really a scalar input. Just select from the low half of the register to 1985 // avoid packing. 1986 1987 Src = Lo; 1988 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1989 return true; 1990 } 1991 1992 Mods = VecMods; 1993 } 1994 1995 // Packed instructions do not have abs modifiers. 1996 Mods |= SISrcMods::OP_SEL_1; 1997 1998 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1999 return true; 2000 } 2001 2002 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2003 SDValue &SrcMods, 2004 SDValue &Clamp) const { 2005 SDLoc SL(In); 2006 2007 // FIXME: Handle clamp and op_sel 2008 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2009 2010 return SelectVOP3PMods(In, Src, SrcMods); 2011 } 2012 2013 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2014 SDValue &SrcMods) const { 2015 Src = In; 2016 // FIXME: Handle op_sel 2017 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2018 return true; 2019 } 2020 2021 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2022 SDValue &SrcMods, 2023 SDValue &Clamp) const { 2024 SDLoc SL(In); 2025 2026 // FIXME: Handle clamp 2027 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2028 2029 return SelectVOP3OpSel(In, Src, SrcMods); 2030 } 2031 2032 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2033 SDValue &SrcMods) const { 2034 // FIXME: Handle op_sel 2035 return SelectVOP3Mods(In, Src, SrcMods); 2036 } 2037 2038 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2039 SDValue &SrcMods, 2040 SDValue &Clamp) const { 2041 SDLoc SL(In); 2042 2043 // FIXME: Handle clamp 2044 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2045 2046 return SelectVOP3OpSelMods(In, Src, SrcMods); 2047 } 2048 2049 // The return value is not whether the match is possible (which it always is), 2050 // but whether or not it a conversion is really used. 2051 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2052 unsigned &Mods) const { 2053 Mods = 0; 2054 SelectVOP3ModsImpl(In, Src, Mods); 2055 2056 if (Src.getOpcode() == ISD::FP_EXTEND) { 2057 Src = Src.getOperand(0); 2058 assert(Src.getValueType() == MVT::f16); 2059 Src = stripBitcast(Src); 2060 2061 // Be careful about folding modifiers if we already have an abs. fneg is 2062 // applied last, so we don't want to apply an earlier fneg. 2063 if ((Mods & SISrcMods::ABS) == 0) { 2064 unsigned ModsTmp; 2065 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2066 2067 if ((ModsTmp & SISrcMods::NEG) != 0) 2068 Mods ^= SISrcMods::NEG; 2069 2070 if ((ModsTmp & SISrcMods::ABS) != 0) 2071 Mods |= SISrcMods::ABS; 2072 } 2073 2074 // op_sel/op_sel_hi decide the source type and source. 2075 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2076 // If the sources's op_sel is set, it picks the high half of the source 2077 // register. 2078 2079 Mods |= SISrcMods::OP_SEL_1; 2080 if (isExtractHiElt(Src, Src)) { 2081 Mods |= SISrcMods::OP_SEL_0; 2082 2083 // TODO: Should we try to look for neg/abs here? 2084 } 2085 2086 return true; 2087 } 2088 2089 return false; 2090 } 2091 2092 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2093 SDValue &SrcMods) const { 2094 unsigned Mods = 0; 2095 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2096 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2097 return true; 2098 } 2099 2100 // TODO: Can we identify things like v_mad_mixhi_f16? 2101 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2102 if (In.isUndef()) { 2103 Src = In; 2104 return true; 2105 } 2106 2107 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2108 SDLoc SL(In); 2109 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2110 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2111 SL, MVT::i32, K); 2112 Src = SDValue(MovK, 0); 2113 return true; 2114 } 2115 2116 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2117 SDLoc SL(In); 2118 SDValue K = CurDAG->getTargetConstant( 2119 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2120 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2121 SL, MVT::i32, K); 2122 Src = SDValue(MovK, 0); 2123 return true; 2124 } 2125 2126 return isExtractHiElt(In, Src); 2127 } 2128 2129 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2130 const AMDGPUTargetLowering& Lowering = 2131 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2132 bool IsModified = false; 2133 do { 2134 IsModified = false; 2135 2136 // Go over all selected nodes and try to fold them a bit more 2137 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2138 while (Position != CurDAG->allnodes_end()) { 2139 SDNode *Node = &*Position++; 2140 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2141 if (!MachineNode) 2142 continue; 2143 2144 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2145 if (ResNode != Node) { 2146 if (ResNode) 2147 ReplaceUses(Node, ResNode); 2148 IsModified = true; 2149 } 2150 } 2151 CurDAG->RemoveDeadNodes(); 2152 } while (IsModified); 2153 } 2154 2155 void R600DAGToDAGISel::Select(SDNode *N) { 2156 unsigned int Opc = N->getOpcode(); 2157 if (N->isMachineOpcode()) { 2158 N->setNodeId(-1); 2159 return; // Already selected. 2160 } 2161 2162 switch (Opc) { 2163 default: break; 2164 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2165 case ISD::SCALAR_TO_VECTOR: 2166 case ISD::BUILD_VECTOR: { 2167 EVT VT = N->getValueType(0); 2168 unsigned NumVectorElts = VT.getVectorNumElements(); 2169 unsigned RegClassID; 2170 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2171 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2172 // pass. We want to avoid 128 bits copies as much as possible because they 2173 // can't be bundled by our scheduler. 2174 switch(NumVectorElts) { 2175 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2176 case 4: 2177 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2178 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2179 else 2180 RegClassID = AMDGPU::R600_Reg128RegClassID; 2181 break; 2182 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2183 } 2184 SelectBuildVector(N, RegClassID); 2185 return; 2186 } 2187 } 2188 2189 SelectCode(N); 2190 } 2191 2192 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2193 SDValue &Offset) { 2194 ConstantSDNode *C; 2195 SDLoc DL(Addr); 2196 2197 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2198 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2199 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2200 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2201 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2202 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2203 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2204 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2205 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2206 Base = Addr.getOperand(0); 2207 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2208 } else { 2209 Base = Addr; 2210 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2211 } 2212 2213 return true; 2214 } 2215 2216 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2217 SDValue &Offset) { 2218 ConstantSDNode *IMMOffset; 2219 2220 if (Addr.getOpcode() == ISD::ADD 2221 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2222 && isInt<16>(IMMOffset->getZExtValue())) { 2223 2224 Base = Addr.getOperand(0); 2225 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2226 MVT::i32); 2227 return true; 2228 // If the pointer address is constant, we can move it to the offset field. 2229 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2230 && isInt<16>(IMMOffset->getZExtValue())) { 2231 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2232 SDLoc(CurDAG->getEntryNode()), 2233 AMDGPU::ZERO, MVT::i32); 2234 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2235 MVT::i32); 2236 return true; 2237 } 2238 2239 // Default case, no offset 2240 Base = Addr; 2241 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2242 return true; 2243 } 2244