1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/CodeGen/FunctionLoweringInfo.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/MachineFunction.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/MC/MCInstrDesc.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/MathExtras.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <new> 50 #include <vector> 51 52 using namespace llvm; 53 54 namespace llvm { 55 56 class R600InstrInfo; 57 58 } // end namespace llvm 59 60 //===----------------------------------------------------------------------===// 61 // Instruction Selector Implementation 62 //===----------------------------------------------------------------------===// 63 64 namespace { 65 66 /// AMDGPU specific code to select AMDGPU machine instructions for 67 /// SelectionDAG operations. 68 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 69 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 70 // make the right decision when generating code for different targets. 71 const AMDGPUSubtarget *Subtarget; 72 AMDGPUAS AMDGPUASI; 73 bool EnableLateStructurizeCFG; 74 75 public: 76 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 77 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 78 : SelectionDAGISel(*TM, OptLevel) { 79 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 80 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 81 } 82 ~AMDGPUDAGToDAGISel() override = default; 83 84 void getAnalysisUsage(AnalysisUsage &AU) const override { 85 AU.addRequired<AMDGPUArgumentUsageInfo>(); 86 SelectionDAGISel::getAnalysisUsage(AU); 87 } 88 89 bool runOnMachineFunction(MachineFunction &MF) override; 90 void Select(SDNode *N) override; 91 StringRef getPassName() const override; 92 void PostprocessISelDAG() override; 93 94 protected: 95 void SelectBuildVector(SDNode *N, unsigned RegClassID); 96 97 private: 98 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 99 bool isNoNanSrc(SDValue N) const; 100 bool isInlineImmediate(const SDNode *N) const; 101 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 102 const R600InstrInfo *TII); 103 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 104 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 105 106 bool isConstantLoad(const MemSDNode *N, int cbID) const; 107 bool isUniformBr(const SDNode *N) const; 108 109 SDNode *glueCopyToM0(SDNode *N) const; 110 111 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 112 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 113 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 114 SDValue& Offset); 115 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 116 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 117 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 118 unsigned OffsetBits) const; 119 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 120 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 121 SDValue &Offset1) const; 122 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 123 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 124 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 125 SDValue &TFE) const; 126 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 127 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 128 SDValue &SLC, SDValue &TFE) const; 129 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 130 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 131 SDValue &SLC) const; 132 bool SelectMUBUFScratchOffen(SDNode *Parent, 133 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 134 SDValue &SOffset, SDValue &ImmOffset) const; 135 bool SelectMUBUFScratchOffset(SDNode *Parent, 136 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 137 SDValue &Offset) const; 138 139 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 140 SDValue &Offset, SDValue &GLC, SDValue &SLC, 141 SDValue &TFE) const; 142 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 143 SDValue &Offset, SDValue &SLC) const; 144 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 145 SDValue &Offset) const; 146 bool SelectMUBUFConstant(SDValue Constant, 147 SDValue &SOffset, 148 SDValue &ImmOffset) const; 149 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 150 SDValue &ImmOffset) const; 151 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 152 SDValue &ImmOffset, SDValue &VOffset) const; 153 154 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 155 SDValue &Offset, SDValue &SLC) const; 156 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 157 SDValue &Offset, SDValue &SLC) const; 158 159 template <bool IsSigned> 160 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 161 SDValue &Offset, SDValue &SLC) const; 162 163 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 164 bool &Imm) const; 165 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 166 bool &Imm) const; 167 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 168 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 169 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 171 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 172 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 173 174 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 175 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 176 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 178 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp, SDValue &Omod) const; 180 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 183 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 184 SDValue &Clamp, 185 SDValue &Omod) const; 186 187 bool SelectVOP3OMods(SDValue In, SDValue &Src, 188 SDValue &Clamp, SDValue &Omod) const; 189 190 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 191 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 192 SDValue &Clamp) const; 193 194 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 195 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 196 SDValue &Clamp) const; 197 198 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 199 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 200 SDValue &Clamp) const; 201 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 202 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 203 204 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 205 206 void SelectADD_SUB_I64(SDNode *N); 207 void SelectUADDO_USUBO(SDNode *N); 208 void SelectDIV_SCALE(SDNode *N); 209 void SelectMAD_64_32(SDNode *N); 210 void SelectFMA_W_CHAIN(SDNode *N); 211 void SelectFMUL_W_CHAIN(SDNode *N); 212 213 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 214 uint32_t Offset, uint32_t Width); 215 void SelectS_BFEFromShifts(SDNode *N); 216 void SelectS_BFE(SDNode *N); 217 bool isCBranchSCC(const SDNode *N) const; 218 void SelectBRCOND(SDNode *N); 219 void SelectFMAD(SDNode *N); 220 void SelectATOMIC_CMP_SWAP(SDNode *N); 221 222 protected: 223 // Include the pieces autogenerated from the target description. 224 #include "AMDGPUGenDAGISel.inc" 225 }; 226 227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 228 public: 229 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 230 AMDGPUDAGToDAGISel(TM, OptLevel) {} 231 232 void Select(SDNode *N) override; 233 234 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 235 SDValue &Offset) override; 236 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 237 SDValue &Offset) override; 238 }; 239 240 } // end anonymous namespace 241 242 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 243 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 244 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 245 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 246 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 247 248 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 249 // DAG, ready for instruction scheduling. 250 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 251 CodeGenOpt::Level OptLevel) { 252 return new AMDGPUDAGToDAGISel(TM, OptLevel); 253 } 254 255 /// \brief This pass converts a legalized DAG into a R600-specific 256 // DAG, ready for instruction scheduling. 257 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 258 CodeGenOpt::Level OptLevel) { 259 return new R600DAGToDAGISel(TM, OptLevel); 260 } 261 262 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 263 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 264 return SelectionDAGISel::runOnMachineFunction(MF); 265 } 266 267 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 268 if (TM.Options.NoNaNsFPMath) 269 return true; 270 271 // TODO: Move into isKnownNeverNaN 272 if (N->getFlags().isDefined()) 273 return N->getFlags().hasNoNaNs(); 274 275 return CurDAG->isKnownNeverNaN(N); 276 } 277 278 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 279 const SIInstrInfo *TII 280 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 281 282 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 283 return TII->isInlineConstant(C->getAPIntValue()); 284 285 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 286 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 287 288 return false; 289 } 290 291 /// \brief Determine the register class for \p OpNo 292 /// \returns The register class of the virtual register that will be used for 293 /// the given operand number \OpNo or NULL if the register class cannot be 294 /// determined. 295 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 296 unsigned OpNo) const { 297 if (!N->isMachineOpcode()) { 298 if (N->getOpcode() == ISD::CopyToReg) { 299 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 300 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 301 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 302 return MRI.getRegClass(Reg); 303 } 304 305 const SIRegisterInfo *TRI 306 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 307 return TRI->getPhysRegClass(Reg); 308 } 309 310 return nullptr; 311 } 312 313 switch (N->getMachineOpcode()) { 314 default: { 315 const MCInstrDesc &Desc = 316 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 317 unsigned OpIdx = Desc.getNumDefs() + OpNo; 318 if (OpIdx >= Desc.getNumOperands()) 319 return nullptr; 320 int RegClass = Desc.OpInfo[OpIdx].RegClass; 321 if (RegClass == -1) 322 return nullptr; 323 324 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 325 } 326 case AMDGPU::REG_SEQUENCE: { 327 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 328 const TargetRegisterClass *SuperRC = 329 Subtarget->getRegisterInfo()->getRegClass(RCID); 330 331 SDValue SubRegOp = N->getOperand(OpNo + 1); 332 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 333 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 334 SubRegIdx); 335 } 336 } 337 } 338 339 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 340 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 341 !Subtarget->ldsRequiresM0Init()) 342 return N; 343 344 const SITargetLowering& Lowering = 345 *static_cast<const SITargetLowering*>(getTargetLowering()); 346 347 // Write max value to m0 before each load operation 348 349 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 350 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 351 352 SDValue Glue = M0.getValue(1); 353 354 SmallVector <SDValue, 8> Ops; 355 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 356 Ops.push_back(N->getOperand(i)); 357 } 358 Ops.push_back(Glue); 359 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 360 361 return N; 362 } 363 364 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 365 switch (NumVectorElts) { 366 case 1: 367 return AMDGPU::SReg_32_XM0RegClassID; 368 case 2: 369 return AMDGPU::SReg_64RegClassID; 370 case 4: 371 return AMDGPU::SReg_128RegClassID; 372 case 8: 373 return AMDGPU::SReg_256RegClassID; 374 case 16: 375 return AMDGPU::SReg_512RegClassID; 376 } 377 378 llvm_unreachable("invalid vector size"); 379 } 380 381 static bool getConstantValue(SDValue N, uint32_t &Out) { 382 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 383 Out = C->getAPIntValue().getZExtValue(); 384 return true; 385 } 386 387 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 388 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 389 return true; 390 } 391 392 return false; 393 } 394 395 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 396 EVT VT = N->getValueType(0); 397 unsigned NumVectorElts = VT.getVectorNumElements(); 398 EVT EltVT = VT.getVectorElementType(); 399 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 400 SDLoc DL(N); 401 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 402 403 if (NumVectorElts == 1) { 404 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 405 RegClass); 406 return; 407 } 408 409 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 410 "supported yet"); 411 // 16 = Max Num Vector Elements 412 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 413 // 1 = Vector Register Class 414 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 415 416 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 417 bool IsRegSeq = true; 418 unsigned NOps = N->getNumOperands(); 419 for (unsigned i = 0; i < NOps; i++) { 420 // XXX: Why is this here? 421 if (isa<RegisterSDNode>(N->getOperand(i))) { 422 IsRegSeq = false; 423 break; 424 } 425 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 426 RegSeqArgs[1 + (2 * i) + 1] = 427 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 428 MVT::i32); 429 } 430 if (NOps != NumVectorElts) { 431 // Fill in the missing undef elements if this was a scalar_to_vector. 432 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 433 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 434 DL, EltVT); 435 for (unsigned i = NOps; i < NumVectorElts; ++i) { 436 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 437 RegSeqArgs[1 + (2 * i) + 1] = 438 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 439 } 440 } 441 442 if (!IsRegSeq) 443 SelectCode(N); 444 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 445 } 446 447 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 448 unsigned int Opc = N->getOpcode(); 449 if (N->isMachineOpcode()) { 450 N->setNodeId(-1); 451 return; // Already selected. 452 } 453 454 if (isa<AtomicSDNode>(N) || 455 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 456 N = glueCopyToM0(N); 457 458 switch (Opc) { 459 default: break; 460 // We are selecting i64 ADD here instead of custom lower it during 461 // DAG legalization, so we can fold some i64 ADDs used for address 462 // calculation into the LOAD and STORE instructions. 463 case ISD::ADDC: 464 case ISD::ADDE: 465 case ISD::SUBC: 466 case ISD::SUBE: { 467 if (N->getValueType(0) != MVT::i64) 468 break; 469 470 SelectADD_SUB_I64(N); 471 return; 472 } 473 case ISD::UADDO: 474 case ISD::USUBO: { 475 SelectUADDO_USUBO(N); 476 return; 477 } 478 case AMDGPUISD::FMUL_W_CHAIN: { 479 SelectFMUL_W_CHAIN(N); 480 return; 481 } 482 case AMDGPUISD::FMA_W_CHAIN: { 483 SelectFMA_W_CHAIN(N); 484 return; 485 } 486 487 case ISD::SCALAR_TO_VECTOR: 488 case ISD::BUILD_VECTOR: { 489 EVT VT = N->getValueType(0); 490 unsigned NumVectorElts = VT.getVectorNumElements(); 491 492 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 493 if (Opc == ISD::BUILD_VECTOR) { 494 uint32_t LHSVal, RHSVal; 495 if (getConstantValue(N->getOperand(0), LHSVal) && 496 getConstantValue(N->getOperand(1), RHSVal)) { 497 uint32_t K = LHSVal | (RHSVal << 16); 498 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 499 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 500 return; 501 } 502 } 503 504 break; 505 } 506 507 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 508 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 509 SelectBuildVector(N, RegClassID); 510 return; 511 } 512 case ISD::BUILD_PAIR: { 513 SDValue RC, SubReg0, SubReg1; 514 SDLoc DL(N); 515 if (N->getValueType(0) == MVT::i128) { 516 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 517 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 518 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 519 } else if (N->getValueType(0) == MVT::i64) { 520 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 521 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 522 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 523 } else { 524 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 525 } 526 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 527 N->getOperand(1), SubReg1 }; 528 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 529 N->getValueType(0), Ops)); 530 return; 531 } 532 533 case ISD::Constant: 534 case ISD::ConstantFP: { 535 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 536 break; 537 538 uint64_t Imm; 539 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 540 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 541 else { 542 ConstantSDNode *C = cast<ConstantSDNode>(N); 543 Imm = C->getZExtValue(); 544 } 545 546 SDLoc DL(N); 547 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 548 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 549 MVT::i32)); 550 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 551 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 552 const SDValue Ops[] = { 553 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 554 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 555 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 556 }; 557 558 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 559 N->getValueType(0), Ops)); 560 return; 561 } 562 case ISD::LOAD: 563 case ISD::STORE: { 564 N = glueCopyToM0(N); 565 break; 566 } 567 568 case AMDGPUISD::BFE_I32: 569 case AMDGPUISD::BFE_U32: { 570 // There is a scalar version available, but unlike the vector version which 571 // has a separate operand for the offset and width, the scalar version packs 572 // the width and offset into a single operand. Try to move to the scalar 573 // version if the offsets are constant, so that we can try to keep extended 574 // loads of kernel arguments in SGPRs. 575 576 // TODO: Technically we could try to pattern match scalar bitshifts of 577 // dynamic values, but it's probably not useful. 578 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 579 if (!Offset) 580 break; 581 582 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 583 if (!Width) 584 break; 585 586 bool Signed = Opc == AMDGPUISD::BFE_I32; 587 588 uint32_t OffsetVal = Offset->getZExtValue(); 589 uint32_t WidthVal = Width->getZExtValue(); 590 591 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 592 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 593 return; 594 } 595 case AMDGPUISD::DIV_SCALE: { 596 SelectDIV_SCALE(N); 597 return; 598 } 599 case AMDGPUISD::MAD_I64_I32: 600 case AMDGPUISD::MAD_U64_U32: { 601 SelectMAD_64_32(N); 602 return; 603 } 604 case ISD::CopyToReg: { 605 const SITargetLowering& Lowering = 606 *static_cast<const SITargetLowering*>(getTargetLowering()); 607 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 608 break; 609 } 610 case ISD::AND: 611 case ISD::SRL: 612 case ISD::SRA: 613 case ISD::SIGN_EXTEND_INREG: 614 if (N->getValueType(0) != MVT::i32) 615 break; 616 617 SelectS_BFE(N); 618 return; 619 case ISD::BRCOND: 620 SelectBRCOND(N); 621 return; 622 case ISD::FMAD: 623 SelectFMAD(N); 624 return; 625 case AMDGPUISD::ATOMIC_CMP_SWAP: 626 SelectATOMIC_CMP_SWAP(N); 627 return; 628 } 629 630 SelectCode(N); 631 } 632 633 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 634 if (!N->readMem()) 635 return false; 636 if (CbId == -1) 637 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 638 639 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 640 } 641 642 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 643 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 644 const Instruction *Term = BB->getTerminator(); 645 return Term->getMetadata("amdgpu.uniform") || 646 Term->getMetadata("structurizecfg.uniform"); 647 } 648 649 StringRef AMDGPUDAGToDAGISel::getPassName() const { 650 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 651 } 652 653 //===----------------------------------------------------------------------===// 654 // Complex Patterns 655 //===----------------------------------------------------------------------===// 656 657 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 658 SDValue& IntPtr) { 659 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 660 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 661 true); 662 return true; 663 } 664 return false; 665 } 666 667 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 668 SDValue& BaseReg, SDValue &Offset) { 669 if (!isa<ConstantSDNode>(Addr)) { 670 BaseReg = Addr; 671 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 672 return true; 673 } 674 return false; 675 } 676 677 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 678 SDValue &Offset) { 679 return false; 680 } 681 682 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 683 SDValue &Offset) { 684 ConstantSDNode *C; 685 SDLoc DL(Addr); 686 687 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 688 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 689 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 690 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 691 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 692 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 693 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 694 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 695 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 696 Base = Addr.getOperand(0); 697 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 698 } else { 699 Base = Addr; 700 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 701 } 702 703 return true; 704 } 705 706 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 707 SDLoc DL(N); 708 SDValue LHS = N->getOperand(0); 709 SDValue RHS = N->getOperand(1); 710 711 unsigned Opcode = N->getOpcode(); 712 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 713 bool ProduceCarry = 714 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 715 bool IsAdd = 716 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 717 718 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 719 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 720 721 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 722 DL, MVT::i32, LHS, Sub0); 723 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 724 DL, MVT::i32, LHS, Sub1); 725 726 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 727 DL, MVT::i32, RHS, Sub0); 728 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 729 DL, MVT::i32, RHS, Sub1); 730 731 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 732 733 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 734 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 735 736 SDNode *AddLo; 737 if (!ConsumeCarry) { 738 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 739 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 740 } else { 741 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 742 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 743 } 744 SDValue AddHiArgs[] = { 745 SDValue(Hi0, 0), 746 SDValue(Hi1, 0), 747 SDValue(AddLo, 1) 748 }; 749 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 750 751 SDValue RegSequenceArgs[] = { 752 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 753 SDValue(AddLo,0), 754 Sub0, 755 SDValue(AddHi,0), 756 Sub1, 757 }; 758 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 759 MVT::i64, RegSequenceArgs); 760 761 if (ProduceCarry) { 762 // Replace the carry-use 763 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 764 } 765 766 // Replace the remaining uses. 767 CurDAG->ReplaceAllUsesWith(N, RegSequence); 768 CurDAG->RemoveDeadNode(N); 769 } 770 771 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 772 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 773 // carry out despite the _i32 name. These were renamed in VI to _U32. 774 // FIXME: We should probably rename the opcodes here. 775 unsigned Opc = N->getOpcode() == ISD::UADDO ? 776 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 777 778 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 779 { N->getOperand(0), N->getOperand(1) }); 780 } 781 782 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 783 SDLoc SL(N); 784 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 785 SDValue Ops[10]; 786 787 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 788 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 789 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 790 Ops[8] = N->getOperand(0); 791 Ops[9] = N->getOperand(4); 792 793 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 794 } 795 796 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 797 SDLoc SL(N); 798 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 799 SDValue Ops[8]; 800 801 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 802 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 803 Ops[6] = N->getOperand(0); 804 Ops[7] = N->getOperand(3); 805 806 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 807 } 808 809 // We need to handle this here because tablegen doesn't support matching 810 // instructions with multiple outputs. 811 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 812 SDLoc SL(N); 813 EVT VT = N->getValueType(0); 814 815 assert(VT == MVT::f32 || VT == MVT::f64); 816 817 unsigned Opc 818 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 819 820 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 821 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 822 } 823 824 // We need to handle this here because tablegen doesn't support matching 825 // instructions with multiple outputs. 826 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 827 SDLoc SL(N); 828 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 829 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 830 831 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 832 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 833 Clamp }; 834 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 835 } 836 837 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 838 unsigned OffsetBits) const { 839 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 840 (OffsetBits == 8 && !isUInt<8>(Offset))) 841 return false; 842 843 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 844 Subtarget->unsafeDSOffsetFoldingEnabled()) 845 return true; 846 847 // On Southern Islands instruction with a negative base value and an offset 848 // don't seem to work. 849 return CurDAG->SignBitIsZero(Base); 850 } 851 852 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 853 SDValue &Offset) const { 854 SDLoc DL(Addr); 855 if (CurDAG->isBaseWithConstantOffset(Addr)) { 856 SDValue N0 = Addr.getOperand(0); 857 SDValue N1 = Addr.getOperand(1); 858 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 859 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 860 // (add n0, c0) 861 Base = N0; 862 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 863 return true; 864 } 865 } else if (Addr.getOpcode() == ISD::SUB) { 866 // sub C, x -> add (sub 0, x), C 867 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 868 int64_t ByteOffset = C->getSExtValue(); 869 if (isUInt<16>(ByteOffset)) { 870 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 871 872 // XXX - This is kind of hacky. Create a dummy sub node so we can check 873 // the known bits in isDSOffsetLegal. We need to emit the selected node 874 // here, so this is thrown away. 875 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 876 Zero, Addr.getOperand(1)); 877 878 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 879 MachineSDNode *MachineSub 880 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 881 Zero, Addr.getOperand(1)); 882 883 Base = SDValue(MachineSub, 0); 884 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 885 return true; 886 } 887 } 888 } 889 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 890 // If we have a constant address, prefer to put the constant into the 891 // offset. This can save moves to load the constant address since multiple 892 // operations can share the zero base address register, and enables merging 893 // into read2 / write2 instructions. 894 895 SDLoc DL(Addr); 896 897 if (isUInt<16>(CAddr->getZExtValue())) { 898 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 899 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 900 DL, MVT::i32, Zero); 901 Base = SDValue(MovZero, 0); 902 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 903 return true; 904 } 905 } 906 907 // default case 908 Base = Addr; 909 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 910 return true; 911 } 912 913 // TODO: If offset is too big, put low 16-bit into offset. 914 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 915 SDValue &Offset0, 916 SDValue &Offset1) const { 917 SDLoc DL(Addr); 918 919 if (CurDAG->isBaseWithConstantOffset(Addr)) { 920 SDValue N0 = Addr.getOperand(0); 921 SDValue N1 = Addr.getOperand(1); 922 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 923 unsigned DWordOffset0 = C1->getZExtValue() / 4; 924 unsigned DWordOffset1 = DWordOffset0 + 1; 925 // (add n0, c0) 926 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 927 Base = N0; 928 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 929 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 930 return true; 931 } 932 } else if (Addr.getOpcode() == ISD::SUB) { 933 // sub C, x -> add (sub 0, x), C 934 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 935 unsigned DWordOffset0 = C->getZExtValue() / 4; 936 unsigned DWordOffset1 = DWordOffset0 + 1; 937 938 if (isUInt<8>(DWordOffset0)) { 939 SDLoc DL(Addr); 940 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 941 942 // XXX - This is kind of hacky. Create a dummy sub node so we can check 943 // the known bits in isDSOffsetLegal. We need to emit the selected node 944 // here, so this is thrown away. 945 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 946 Zero, Addr.getOperand(1)); 947 948 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 949 MachineSDNode *MachineSub 950 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 951 Zero, Addr.getOperand(1)); 952 953 Base = SDValue(MachineSub, 0); 954 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 955 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 956 return true; 957 } 958 } 959 } 960 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 961 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 962 unsigned DWordOffset1 = DWordOffset0 + 1; 963 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 964 965 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 966 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 967 MachineSDNode *MovZero 968 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 969 DL, MVT::i32, Zero); 970 Base = SDValue(MovZero, 0); 971 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 972 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 973 return true; 974 } 975 } 976 977 // default case 978 979 // FIXME: This is broken on SI where we still need to check if the base 980 // pointer is positive here. 981 Base = Addr; 982 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 983 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 984 return true; 985 } 986 987 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 988 SDValue &VAddr, SDValue &SOffset, 989 SDValue &Offset, SDValue &Offen, 990 SDValue &Idxen, SDValue &Addr64, 991 SDValue &GLC, SDValue &SLC, 992 SDValue &TFE) const { 993 // Subtarget prefers to use flat instruction 994 if (Subtarget->useFlatForGlobal()) 995 return false; 996 997 SDLoc DL(Addr); 998 999 if (!GLC.getNode()) 1000 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1001 if (!SLC.getNode()) 1002 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1003 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1004 1005 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1006 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1007 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1008 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1009 1010 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1011 SDValue N0 = Addr.getOperand(0); 1012 SDValue N1 = Addr.getOperand(1); 1013 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1014 1015 if (N0.getOpcode() == ISD::ADD) { 1016 // (add (add N2, N3), C1) -> addr64 1017 SDValue N2 = N0.getOperand(0); 1018 SDValue N3 = N0.getOperand(1); 1019 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1020 Ptr = N2; 1021 VAddr = N3; 1022 } else { 1023 // (add N0, C1) -> offset 1024 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1025 Ptr = N0; 1026 } 1027 1028 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1029 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1030 return true; 1031 } 1032 1033 if (isUInt<32>(C1->getZExtValue())) { 1034 // Illegal offset, store it in soffset. 1035 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1036 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1037 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1038 0); 1039 return true; 1040 } 1041 } 1042 1043 if (Addr.getOpcode() == ISD::ADD) { 1044 // (add N0, N1) -> addr64 1045 SDValue N0 = Addr.getOperand(0); 1046 SDValue N1 = Addr.getOperand(1); 1047 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1048 Ptr = N0; 1049 VAddr = N1; 1050 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1051 return true; 1052 } 1053 1054 // default case -> offset 1055 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1056 Ptr = Addr; 1057 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1058 1059 return true; 1060 } 1061 1062 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1063 SDValue &VAddr, SDValue &SOffset, 1064 SDValue &Offset, SDValue &GLC, 1065 SDValue &SLC, SDValue &TFE) const { 1066 SDValue Ptr, Offen, Idxen, Addr64; 1067 1068 // addr64 bit was removed for volcanic islands. 1069 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1070 return false; 1071 1072 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1073 GLC, SLC, TFE)) 1074 return false; 1075 1076 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1077 if (C->getSExtValue()) { 1078 SDLoc DL(Addr); 1079 1080 const SITargetLowering& Lowering = 1081 *static_cast<const SITargetLowering*>(getTargetLowering()); 1082 1083 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1084 return true; 1085 } 1086 1087 return false; 1088 } 1089 1090 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1091 SDValue &VAddr, SDValue &SOffset, 1092 SDValue &Offset, 1093 SDValue &SLC) const { 1094 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1095 SDValue GLC, TFE; 1096 1097 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1098 } 1099 1100 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1101 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1102 return PSV && PSV->isStack(); 1103 } 1104 1105 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1106 const MachineFunction &MF = CurDAG->getMachineFunction(); 1107 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1108 1109 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1110 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1111 FI->getValueType(0)); 1112 1113 // If we can resolve this to a frame index access, this is relative to the 1114 // frame pointer SGPR. 1115 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1116 MVT::i32)); 1117 } 1118 1119 // If we don't know this private access is a local stack object, it needs to 1120 // be relative to the entry point's scratch wave offset register. 1121 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1122 MVT::i32)); 1123 } 1124 1125 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1126 SDValue Addr, SDValue &Rsrc, 1127 SDValue &VAddr, SDValue &SOffset, 1128 SDValue &ImmOffset) const { 1129 1130 SDLoc DL(Addr); 1131 MachineFunction &MF = CurDAG->getMachineFunction(); 1132 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1133 1134 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1135 1136 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1137 unsigned Imm = CAddr->getZExtValue(); 1138 1139 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1140 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1141 DL, MVT::i32, HighBits); 1142 VAddr = SDValue(MovHighBits, 0); 1143 1144 // In a call sequence, stores to the argument stack area are relative to the 1145 // stack pointer. 1146 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1147 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1148 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1149 1150 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1151 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1152 return true; 1153 } 1154 1155 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1156 // (add n0, c1) 1157 1158 SDValue N0 = Addr.getOperand(0); 1159 SDValue N1 = Addr.getOperand(1); 1160 1161 // Offsets in vaddr must be positive if range checking is enabled. 1162 // 1163 // The total computation of vaddr + soffset + offset must not overflow. If 1164 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1165 // overflowing. 1166 // 1167 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1168 // always perform a range check. If a negative vaddr base index was used, 1169 // this would fail the range check. The overall address computation would 1170 // compute a valid address, but this doesn't happen due to the range 1171 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1172 // 1173 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1174 // MUBUF vaddr, but not on older subtargets which can only do this if the 1175 // sign bit is known 0. 1176 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1177 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1178 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1179 CurDAG->SignBitIsZero(N0))) { 1180 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1181 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1182 return true; 1183 } 1184 } 1185 1186 // (node) 1187 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1188 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1189 return true; 1190 } 1191 1192 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1193 SDValue Addr, 1194 SDValue &SRsrc, 1195 SDValue &SOffset, 1196 SDValue &Offset) const { 1197 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1198 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1199 return false; 1200 1201 SDLoc DL(Addr); 1202 MachineFunction &MF = CurDAG->getMachineFunction(); 1203 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1204 1205 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1206 1207 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1208 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1209 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1210 1211 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1212 // offset if we know this is in a call sequence. 1213 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1214 1215 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1216 return true; 1217 } 1218 1219 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1220 SDValue &SOffset, SDValue &Offset, 1221 SDValue &GLC, SDValue &SLC, 1222 SDValue &TFE) const { 1223 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1224 const SIInstrInfo *TII = 1225 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1226 1227 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1228 GLC, SLC, TFE)) 1229 return false; 1230 1231 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1232 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1233 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1234 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1235 APInt::getAllOnesValue(32).getZExtValue(); // Size 1236 SDLoc DL(Addr); 1237 1238 const SITargetLowering& Lowering = 1239 *static_cast<const SITargetLowering*>(getTargetLowering()); 1240 1241 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1242 return true; 1243 } 1244 return false; 1245 } 1246 1247 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1248 SDValue &Soffset, SDValue &Offset 1249 ) const { 1250 SDValue GLC, SLC, TFE; 1251 1252 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1253 } 1254 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1255 SDValue &Soffset, SDValue &Offset, 1256 SDValue &SLC) const { 1257 SDValue GLC, TFE; 1258 1259 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1263 SDValue &SOffset, 1264 SDValue &ImmOffset) const { 1265 SDLoc DL(Constant); 1266 const uint32_t Align = 4; 1267 const uint32_t MaxImm = alignDown(4095, Align); 1268 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1269 uint32_t Overflow = 0; 1270 1271 if (Imm > MaxImm) { 1272 if (Imm <= MaxImm + 64) { 1273 // Use an SOffset inline constant for 4..64 1274 Overflow = Imm - MaxImm; 1275 Imm = MaxImm; 1276 } else { 1277 // Try to keep the same value in SOffset for adjacent loads, so that 1278 // the corresponding register contents can be re-used. 1279 // 1280 // Load values with all low-bits (except for alignment bits) set into 1281 // SOffset, so that a larger range of values can be covered using 1282 // s_movk_i32. 1283 // 1284 // Atomic operations fail to work correctly when individual address 1285 // components are unaligned, even if their sum is aligned. 1286 uint32_t High = (Imm + Align) & ~4095; 1287 uint32_t Low = (Imm + Align) & 4095; 1288 Imm = Low; 1289 Overflow = High - Align; 1290 } 1291 } 1292 1293 // There is a hardware bug in SI and CI which prevents address clamping in 1294 // MUBUF instructions from working correctly with SOffsets. The immediate 1295 // offset is unaffected. 1296 if (Overflow > 0 && 1297 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1298 return false; 1299 1300 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1301 1302 if (Overflow <= 64) 1303 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1304 else 1305 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1306 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1307 0); 1308 1309 return true; 1310 } 1311 1312 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1313 SDValue &SOffset, 1314 SDValue &ImmOffset) const { 1315 SDLoc DL(Offset); 1316 1317 if (!isa<ConstantSDNode>(Offset)) 1318 return false; 1319 1320 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1321 } 1322 1323 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1324 SDValue &SOffset, 1325 SDValue &ImmOffset, 1326 SDValue &VOffset) const { 1327 SDLoc DL(Offset); 1328 1329 // Don't generate an unnecessary voffset for constant offsets. 1330 if (isa<ConstantSDNode>(Offset)) { 1331 SDValue Tmp1, Tmp2; 1332 1333 // When necessary, use a voffset in <= CI anyway to work around a hardware 1334 // bug. 1335 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1336 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1337 return false; 1338 } 1339 1340 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1341 SDValue N0 = Offset.getOperand(0); 1342 SDValue N1 = Offset.getOperand(1); 1343 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1344 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1345 VOffset = N0; 1346 return true; 1347 } 1348 } 1349 1350 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1351 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1352 VOffset = Offset; 1353 1354 return true; 1355 } 1356 1357 template <bool IsSigned> 1358 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1359 SDValue &VAddr, 1360 SDValue &Offset, 1361 SDValue &SLC) const { 1362 int64_t OffsetVal = 0; 1363 1364 if (Subtarget->hasFlatInstOffsets() && 1365 CurDAG->isBaseWithConstantOffset(Addr)) { 1366 SDValue N0 = Addr.getOperand(0); 1367 SDValue N1 = Addr.getOperand(1); 1368 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1369 1370 if ((IsSigned && isInt<13>(COffsetVal)) || 1371 (!IsSigned && isUInt<12>(COffsetVal))) { 1372 Addr = N0; 1373 OffsetVal = COffsetVal; 1374 } 1375 } 1376 1377 VAddr = Addr; 1378 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1379 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1380 1381 return true; 1382 } 1383 1384 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1385 SDValue &VAddr, 1386 SDValue &Offset, 1387 SDValue &SLC) const { 1388 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1389 } 1390 1391 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1392 SDValue &VAddr, 1393 SDValue &Offset, 1394 SDValue &SLC) const { 1395 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1396 } 1397 1398 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1399 SDValue &Offset, bool &Imm) const { 1400 1401 // FIXME: Handle non-constant offsets. 1402 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1403 if (!C) 1404 return false; 1405 1406 SDLoc SL(ByteOffsetNode); 1407 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1408 int64_t ByteOffset = C->getSExtValue(); 1409 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1410 1411 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1412 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1413 Imm = true; 1414 return true; 1415 } 1416 1417 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1418 return false; 1419 1420 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1421 // 32-bit Immediates are supported on Sea Islands. 1422 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1423 } else { 1424 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1425 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1426 C32Bit), 0); 1427 } 1428 Imm = false; 1429 return true; 1430 } 1431 1432 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1433 SDValue &Offset, bool &Imm) const { 1434 SDLoc SL(Addr); 1435 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1436 SDValue N0 = Addr.getOperand(0); 1437 SDValue N1 = Addr.getOperand(1); 1438 1439 if (SelectSMRDOffset(N1, Offset, Imm)) { 1440 SBase = N0; 1441 return true; 1442 } 1443 } 1444 SBase = Addr; 1445 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1446 Imm = true; 1447 return true; 1448 } 1449 1450 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1451 SDValue &Offset) const { 1452 bool Imm; 1453 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1454 } 1455 1456 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1457 SDValue &Offset) const { 1458 1459 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1460 return false; 1461 1462 bool Imm; 1463 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1464 return false; 1465 1466 return !Imm && isa<ConstantSDNode>(Offset); 1467 } 1468 1469 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1470 SDValue &Offset) const { 1471 bool Imm; 1472 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1473 !isa<ConstantSDNode>(Offset); 1474 } 1475 1476 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1477 SDValue &Offset) const { 1478 bool Imm; 1479 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1480 } 1481 1482 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1483 SDValue &Offset) const { 1484 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1485 return false; 1486 1487 bool Imm; 1488 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1489 return false; 1490 1491 return !Imm && isa<ConstantSDNode>(Offset); 1492 } 1493 1494 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1495 SDValue &Base, 1496 SDValue &Offset) const { 1497 SDLoc DL(Index); 1498 1499 if (CurDAG->isBaseWithConstantOffset(Index)) { 1500 SDValue N0 = Index.getOperand(0); 1501 SDValue N1 = Index.getOperand(1); 1502 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1503 1504 // (add n0, c0) 1505 Base = N0; 1506 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1507 return true; 1508 } 1509 1510 if (isa<ConstantSDNode>(Index)) 1511 return false; 1512 1513 Base = Index; 1514 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1515 return true; 1516 } 1517 1518 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1519 SDValue Val, uint32_t Offset, 1520 uint32_t Width) { 1521 // Transformation function, pack the offset and width of a BFE into 1522 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1523 // source, bits [5:0] contain the offset and bits [22:16] the width. 1524 uint32_t PackedVal = Offset | (Width << 16); 1525 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1526 1527 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1528 } 1529 1530 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1531 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1532 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1533 // Predicate: 0 < b <= c < 32 1534 1535 const SDValue &Shl = N->getOperand(0); 1536 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1537 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1538 1539 if (B && C) { 1540 uint32_t BVal = B->getZExtValue(); 1541 uint32_t CVal = C->getZExtValue(); 1542 1543 if (0 < BVal && BVal <= CVal && CVal < 32) { 1544 bool Signed = N->getOpcode() == ISD::SRA; 1545 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1546 1547 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1548 32 - CVal)); 1549 return; 1550 } 1551 } 1552 SelectCode(N); 1553 } 1554 1555 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1556 switch (N->getOpcode()) { 1557 case ISD::AND: 1558 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1559 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1560 // Predicate: isMask(mask) 1561 const SDValue &Srl = N->getOperand(0); 1562 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1563 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1564 1565 if (Shift && Mask) { 1566 uint32_t ShiftVal = Shift->getZExtValue(); 1567 uint32_t MaskVal = Mask->getZExtValue(); 1568 1569 if (isMask_32(MaskVal)) { 1570 uint32_t WidthVal = countPopulation(MaskVal); 1571 1572 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1573 Srl.getOperand(0), ShiftVal, WidthVal)); 1574 return; 1575 } 1576 } 1577 } 1578 break; 1579 case ISD::SRL: 1580 if (N->getOperand(0).getOpcode() == ISD::AND) { 1581 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1582 // Predicate: isMask(mask >> b) 1583 const SDValue &And = N->getOperand(0); 1584 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1585 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1586 1587 if (Shift && Mask) { 1588 uint32_t ShiftVal = Shift->getZExtValue(); 1589 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1590 1591 if (isMask_32(MaskVal)) { 1592 uint32_t WidthVal = countPopulation(MaskVal); 1593 1594 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1595 And.getOperand(0), ShiftVal, WidthVal)); 1596 return; 1597 } 1598 } 1599 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1600 SelectS_BFEFromShifts(N); 1601 return; 1602 } 1603 break; 1604 case ISD::SRA: 1605 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1606 SelectS_BFEFromShifts(N); 1607 return; 1608 } 1609 break; 1610 1611 case ISD::SIGN_EXTEND_INREG: { 1612 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1613 SDValue Src = N->getOperand(0); 1614 if (Src.getOpcode() != ISD::SRL) 1615 break; 1616 1617 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1618 if (!Amt) 1619 break; 1620 1621 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1622 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1623 Amt->getZExtValue(), Width)); 1624 return; 1625 } 1626 } 1627 1628 SelectCode(N); 1629 } 1630 1631 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1632 assert(N->getOpcode() == ISD::BRCOND); 1633 if (!N->hasOneUse()) 1634 return false; 1635 1636 SDValue Cond = N->getOperand(1); 1637 if (Cond.getOpcode() == ISD::CopyToReg) 1638 Cond = Cond.getOperand(2); 1639 1640 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1641 return false; 1642 1643 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1644 if (VT == MVT::i32) 1645 return true; 1646 1647 if (VT == MVT::i64) { 1648 auto ST = static_cast<const SISubtarget *>(Subtarget); 1649 1650 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1651 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1652 } 1653 1654 return false; 1655 } 1656 1657 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1658 SDValue Cond = N->getOperand(1); 1659 1660 if (Cond.isUndef()) { 1661 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1662 N->getOperand(2), N->getOperand(0)); 1663 return; 1664 } 1665 1666 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1667 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1668 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1669 SDLoc SL(N); 1670 1671 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1672 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1673 N->getOperand(2), // Basic Block 1674 VCC.getValue(0)); 1675 } 1676 1677 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { 1678 MVT VT = N->getSimpleValueType(0); 1679 if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { 1680 SelectCode(N); 1681 return; 1682 } 1683 1684 SDValue Src0 = N->getOperand(0); 1685 SDValue Src1 = N->getOperand(1); 1686 SDValue Src2 = N->getOperand(2); 1687 unsigned Src0Mods, Src1Mods, Src2Mods; 1688 1689 // Avoid using v_mad_mix_f32 unless there is actually an operand using the 1690 // conversion from f16. 1691 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1692 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1693 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1694 1695 assert(!Subtarget->hasFP32Denormals() && 1696 "fmad selected with denormals enabled"); 1697 // TODO: We can select this with f32 denormals enabled if all the sources are 1698 // converted from f16 (in which case fmad isn't legal). 1699 1700 if (Sel0 || Sel1 || Sel2) { 1701 // For dummy operands. 1702 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1703 SDValue Ops[] = { 1704 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1705 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1706 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1707 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1708 Zero, Zero 1709 }; 1710 1711 CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); 1712 } else { 1713 SelectCode(N); 1714 } 1715 } 1716 1717 // This is here because there isn't a way to use the generated sub0_sub1 as the 1718 // subreg index to EXTRACT_SUBREG in tablegen. 1719 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1720 MemSDNode *Mem = cast<MemSDNode>(N); 1721 unsigned AS = Mem->getAddressSpace(); 1722 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1723 SelectCode(N); 1724 return; 1725 } 1726 1727 MVT VT = N->getSimpleValueType(0); 1728 bool Is32 = (VT == MVT::i32); 1729 SDLoc SL(N); 1730 1731 MachineSDNode *CmpSwap = nullptr; 1732 if (Subtarget->hasAddr64()) { 1733 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1734 1735 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1736 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1737 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1738 SDValue CmpVal = Mem->getOperand(2); 1739 1740 // XXX - Do we care about glue operands? 1741 1742 SDValue Ops[] = { 1743 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1744 }; 1745 1746 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1747 } 1748 } 1749 1750 if (!CmpSwap) { 1751 SDValue SRsrc, SOffset, Offset, SLC; 1752 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1753 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1754 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1755 1756 SDValue CmpVal = Mem->getOperand(2); 1757 SDValue Ops[] = { 1758 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1759 }; 1760 1761 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1762 } 1763 } 1764 1765 if (!CmpSwap) { 1766 SelectCode(N); 1767 return; 1768 } 1769 1770 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1771 *MMOs = Mem->getMemOperand(); 1772 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1773 1774 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1775 SDValue Extract 1776 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1777 1778 ReplaceUses(SDValue(N, 0), Extract); 1779 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1780 CurDAG->RemoveDeadNode(N); 1781 } 1782 1783 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1784 unsigned &Mods) const { 1785 Mods = 0; 1786 Src = In; 1787 1788 if (Src.getOpcode() == ISD::FNEG) { 1789 Mods |= SISrcMods::NEG; 1790 Src = Src.getOperand(0); 1791 } 1792 1793 if (Src.getOpcode() == ISD::FABS) { 1794 Mods |= SISrcMods::ABS; 1795 Src = Src.getOperand(0); 1796 } 1797 1798 return true; 1799 } 1800 1801 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1802 SDValue &SrcMods) const { 1803 unsigned Mods; 1804 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1805 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1806 return true; 1807 } 1808 1809 return false; 1810 } 1811 1812 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1813 SDValue &SrcMods) const { 1814 SelectVOP3Mods(In, Src, SrcMods); 1815 return isNoNanSrc(Src); 1816 } 1817 1818 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1819 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1820 return false; 1821 1822 Src = In; 1823 return true; 1824 } 1825 1826 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1827 SDValue &SrcMods, SDValue &Clamp, 1828 SDValue &Omod) const { 1829 SDLoc DL(In); 1830 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1831 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1832 1833 return SelectVOP3Mods(In, Src, SrcMods); 1834 } 1835 1836 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1837 SDValue &SrcMods, 1838 SDValue &Clamp, 1839 SDValue &Omod) const { 1840 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1841 return SelectVOP3Mods(In, Src, SrcMods); 1842 } 1843 1844 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1845 SDValue &Clamp, SDValue &Omod) const { 1846 Src = In; 1847 1848 SDLoc DL(In); 1849 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1850 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1851 1852 return true; 1853 } 1854 1855 static SDValue stripBitcast(SDValue Val) { 1856 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1857 } 1858 1859 // Figure out if this is really an extract of the high 16-bits of a dword. 1860 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1861 In = stripBitcast(In); 1862 if (In.getOpcode() != ISD::TRUNCATE) 1863 return false; 1864 1865 SDValue Srl = In.getOperand(0); 1866 if (Srl.getOpcode() == ISD::SRL) { 1867 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1868 if (ShiftAmt->getZExtValue() == 16) { 1869 Out = stripBitcast(Srl.getOperand(0)); 1870 return true; 1871 } 1872 } 1873 } 1874 1875 return false; 1876 } 1877 1878 // Look through operations that obscure just looking at the low 16-bits of the 1879 // same register. 1880 static SDValue stripExtractLoElt(SDValue In) { 1881 if (In.getOpcode() == ISD::TRUNCATE) { 1882 SDValue Src = In.getOperand(0); 1883 if (Src.getValueType().getSizeInBits() == 32) 1884 return stripBitcast(Src); 1885 } 1886 1887 return In; 1888 } 1889 1890 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1891 SDValue &SrcMods) const { 1892 unsigned Mods = 0; 1893 Src = In; 1894 1895 if (Src.getOpcode() == ISD::FNEG) { 1896 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1897 Src = Src.getOperand(0); 1898 } 1899 1900 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1901 unsigned VecMods = Mods; 1902 1903 SDValue Lo = stripBitcast(Src.getOperand(0)); 1904 SDValue Hi = stripBitcast(Src.getOperand(1)); 1905 1906 if (Lo.getOpcode() == ISD::FNEG) { 1907 Lo = stripBitcast(Lo.getOperand(0)); 1908 Mods ^= SISrcMods::NEG; 1909 } 1910 1911 if (Hi.getOpcode() == ISD::FNEG) { 1912 Hi = stripBitcast(Hi.getOperand(0)); 1913 Mods ^= SISrcMods::NEG_HI; 1914 } 1915 1916 if (isExtractHiElt(Lo, Lo)) 1917 Mods |= SISrcMods::OP_SEL_0; 1918 1919 if (isExtractHiElt(Hi, Hi)) 1920 Mods |= SISrcMods::OP_SEL_1; 1921 1922 Lo = stripExtractLoElt(Lo); 1923 Hi = stripExtractLoElt(Hi); 1924 1925 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1926 // Really a scalar input. Just select from the low half of the register to 1927 // avoid packing. 1928 1929 Src = Lo; 1930 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1931 return true; 1932 } 1933 1934 Mods = VecMods; 1935 } 1936 1937 // Packed instructions do not have abs modifiers. 1938 Mods |= SISrcMods::OP_SEL_1; 1939 1940 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1941 return true; 1942 } 1943 1944 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1945 SDValue &SrcMods, 1946 SDValue &Clamp) const { 1947 SDLoc SL(In); 1948 1949 // FIXME: Handle clamp and op_sel 1950 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1951 1952 return SelectVOP3PMods(In, Src, SrcMods); 1953 } 1954 1955 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 1956 SDValue &SrcMods) const { 1957 Src = In; 1958 // FIXME: Handle op_sel 1959 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1960 return true; 1961 } 1962 1963 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 1964 SDValue &SrcMods, 1965 SDValue &Clamp) const { 1966 SDLoc SL(In); 1967 1968 // FIXME: Handle clamp 1969 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1970 1971 return SelectVOP3OpSel(In, Src, SrcMods); 1972 } 1973 1974 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 1975 SDValue &SrcMods) const { 1976 // FIXME: Handle op_sel 1977 return SelectVOP3Mods(In, Src, SrcMods); 1978 } 1979 1980 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 1981 SDValue &SrcMods, 1982 SDValue &Clamp) const { 1983 SDLoc SL(In); 1984 1985 // FIXME: Handle clamp 1986 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1987 1988 return SelectVOP3OpSelMods(In, Src, SrcMods); 1989 } 1990 1991 // The return value is not whether the match is possible (which it always is), 1992 // but whether or not it a conversion is really used. 1993 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 1994 unsigned &Mods) const { 1995 Mods = 0; 1996 SelectVOP3ModsImpl(In, Src, Mods); 1997 1998 if (Src.getOpcode() == ISD::FP_EXTEND) { 1999 Src = Src.getOperand(0); 2000 assert(Src.getValueType() == MVT::f16); 2001 Src = stripBitcast(Src); 2002 2003 // Be careful about folding modifiers if we already have an abs. fneg is 2004 // applied last, so we don't want to apply an earlier fneg. 2005 if ((Mods & SISrcMods::ABS) == 0) { 2006 unsigned ModsTmp; 2007 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2008 2009 if ((ModsTmp & SISrcMods::NEG) != 0) 2010 Mods ^= SISrcMods::NEG; 2011 2012 if ((ModsTmp & SISrcMods::ABS) != 0) 2013 Mods |= SISrcMods::ABS; 2014 } 2015 2016 // op_sel/op_sel_hi decide the source type and source. 2017 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2018 // If the sources's op_sel is set, it picks the high half of the source 2019 // register. 2020 2021 Mods |= SISrcMods::OP_SEL_1; 2022 if (isExtractHiElt(Src, Src)) { 2023 Mods |= SISrcMods::OP_SEL_0; 2024 2025 // TODO: Should we try to look for neg/abs here? 2026 } 2027 2028 return true; 2029 } 2030 2031 return false; 2032 } 2033 2034 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2035 SDValue &SrcMods) const { 2036 unsigned Mods = 0; 2037 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2038 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2039 return true; 2040 } 2041 2042 // TODO: Can we identify things like v_mad_mixhi_f16? 2043 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2044 if (In.isUndef()) { 2045 Src = In; 2046 return true; 2047 } 2048 2049 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2050 SDLoc SL(In); 2051 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2052 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2053 SL, MVT::i32, K); 2054 Src = SDValue(MovK, 0); 2055 return true; 2056 } 2057 2058 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2059 SDLoc SL(In); 2060 SDValue K = CurDAG->getTargetConstant( 2061 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2062 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2063 SL, MVT::i32, K); 2064 Src = SDValue(MovK, 0); 2065 return true; 2066 } 2067 2068 return isExtractHiElt(In, Src); 2069 } 2070 2071 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2072 const AMDGPUTargetLowering& Lowering = 2073 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2074 bool IsModified = false; 2075 do { 2076 IsModified = false; 2077 // Go over all selected nodes and try to fold them a bit more 2078 for (SDNode &Node : CurDAG->allnodes()) { 2079 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 2080 if (!MachineNode) 2081 continue; 2082 2083 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2084 if (ResNode != &Node) { 2085 ReplaceUses(&Node, ResNode); 2086 IsModified = true; 2087 } 2088 } 2089 CurDAG->RemoveDeadNodes(); 2090 } while (IsModified); 2091 } 2092 2093 void R600DAGToDAGISel::Select(SDNode *N) { 2094 unsigned int Opc = N->getOpcode(); 2095 if (N->isMachineOpcode()) { 2096 N->setNodeId(-1); 2097 return; // Already selected. 2098 } 2099 2100 switch (Opc) { 2101 default: break; 2102 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2103 case ISD::SCALAR_TO_VECTOR: 2104 case ISD::BUILD_VECTOR: { 2105 EVT VT = N->getValueType(0); 2106 unsigned NumVectorElts = VT.getVectorNumElements(); 2107 unsigned RegClassID; 2108 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2109 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2110 // pass. We want to avoid 128 bits copies as much as possible because they 2111 // can't be bundled by our scheduler. 2112 switch(NumVectorElts) { 2113 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2114 case 4: 2115 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2116 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2117 else 2118 RegClassID = AMDGPU::R600_Reg128RegClassID; 2119 break; 2120 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2121 } 2122 SelectBuildVector(N, RegClassID); 2123 return; 2124 } 2125 } 2126 2127 SelectCode(N); 2128 } 2129 2130 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2131 SDValue &Offset) { 2132 ConstantSDNode *C; 2133 SDLoc DL(Addr); 2134 2135 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2136 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2137 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2138 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2139 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2140 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2141 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2142 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2143 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2144 Base = Addr.getOperand(0); 2145 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2146 } else { 2147 Base = Addr; 2148 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2149 } 2150 2151 return true; 2152 } 2153 2154 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2155 SDValue &Offset) { 2156 ConstantSDNode *IMMOffset; 2157 2158 if (Addr.getOpcode() == ISD::ADD 2159 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2160 && isInt<16>(IMMOffset->getZExtValue())) { 2161 2162 Base = Addr.getOperand(0); 2163 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2164 MVT::i32); 2165 return true; 2166 // If the pointer address is constant, we can move it to the offset field. 2167 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2168 && isInt<16>(IMMOffset->getZExtValue())) { 2169 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2170 SDLoc(CurDAG->getEntryNode()), 2171 AMDGPU::ZERO, MVT::i32); 2172 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2173 MVT::i32); 2174 return true; 2175 } 2176 2177 // Default case, no offset 2178 Base = Addr; 2179 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2180 return true; 2181 } 2182