1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Analysis/DivergenceAnalysis.h" 31 #include "llvm/Analysis/ValueTracking.h" 32 #include "llvm/CodeGen/FunctionLoweringInfo.h" 33 #include "llvm/CodeGen/ISDOpcodes.h" 34 #include "llvm/CodeGen/MachineFunction.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/MC/MCInstrDesc.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/MachineValueType.h" 47 #include "llvm/Support/MathExtras.h" 48 #include <cassert> 49 #include <cstdint> 50 #include <new> 51 #include <vector> 52 53 using namespace llvm; 54 55 namespace llvm { 56 57 class R600InstrInfo; 58 59 } // end namespace llvm 60 61 //===----------------------------------------------------------------------===// 62 // Instruction Selector Implementation 63 //===----------------------------------------------------------------------===// 64 65 namespace { 66 67 /// AMDGPU specific code to select AMDGPU machine instructions for 68 /// SelectionDAG operations. 69 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 70 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 71 // make the right decision when generating code for different targets. 72 const AMDGPUSubtarget *Subtarget; 73 AMDGPUAS AMDGPUASI; 74 bool EnableLateStructurizeCFG; 75 76 public: 77 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 78 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 79 : SelectionDAGISel(*TM, OptLevel) { 80 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 81 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 82 } 83 ~AMDGPUDAGToDAGISel() override = default; 84 85 void getAnalysisUsage(AnalysisUsage &AU) const override { 86 AU.addRequired<AMDGPUArgumentUsageInfo>(); 87 AU.addRequired<DivergenceAnalysis>(); 88 SelectionDAGISel::getAnalysisUsage(AU); 89 } 90 91 bool runOnMachineFunction(MachineFunction &MF) override; 92 void Select(SDNode *N) override; 93 StringRef getPassName() const override; 94 void PostprocessISelDAG() override; 95 96 protected: 97 void SelectBuildVector(SDNode *N, unsigned RegClassID); 98 99 private: 100 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 101 bool isNoNanSrc(SDValue N) const; 102 bool isInlineImmediate(const SDNode *N) const; 103 104 bool isConstantLoad(const MemSDNode *N, int cbID) const; 105 bool isUniformBr(const SDNode *N) const; 106 107 SDNode *glueCopyToM0(SDNode *N) const; 108 109 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 110 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 111 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 112 SDValue& Offset); 113 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 114 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 115 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 116 unsigned OffsetBits) const; 117 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 118 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 119 SDValue &Offset1) const; 120 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 121 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 122 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 123 SDValue &TFE) const; 124 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 125 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 126 SDValue &SLC, SDValue &TFE) const; 127 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 128 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 129 SDValue &SLC) const; 130 bool SelectMUBUFScratchOffen(SDNode *Parent, 131 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 132 SDValue &SOffset, SDValue &ImmOffset) const; 133 bool SelectMUBUFScratchOffset(SDNode *Parent, 134 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 135 SDValue &Offset) const; 136 137 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 138 SDValue &Offset, SDValue &GLC, SDValue &SLC, 139 SDValue &TFE) const; 140 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 141 SDValue &Offset, SDValue &SLC) const; 142 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 143 SDValue &Offset) const; 144 bool SelectMUBUFConstant(SDValue Constant, 145 SDValue &SOffset, 146 SDValue &ImmOffset) const; 147 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 148 SDValue &ImmOffset) const; 149 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 150 SDValue &ImmOffset, SDValue &VOffset) const; 151 152 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 153 SDValue &Offset, SDValue &SLC) const; 154 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 155 SDValue &Offset, SDValue &SLC) const; 156 157 template <bool IsSigned> 158 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 159 SDValue &Offset, SDValue &SLC) const; 160 161 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 162 bool &Imm) const; 163 SDValue Expand32BitAddress(SDValue Addr) const; 164 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 165 bool &Imm) const; 166 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 167 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 168 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 169 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 170 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 171 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 172 173 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 174 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 175 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 176 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 177 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 178 SDValue &Clamp, SDValue &Omod) const; 179 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 180 SDValue &Clamp, SDValue &Omod) const; 181 182 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 183 SDValue &Clamp, 184 SDValue &Omod) const; 185 186 bool SelectVOP3OMods(SDValue In, SDValue &Src, 187 SDValue &Clamp, SDValue &Omod) const; 188 189 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 190 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 191 SDValue &Clamp) const; 192 193 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 194 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 195 SDValue &Clamp) const; 196 197 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 198 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 199 SDValue &Clamp) const; 200 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 201 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 202 203 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 204 205 void SelectADD_SUB_I64(SDNode *N); 206 void SelectUADDO_USUBO(SDNode *N); 207 void SelectDIV_SCALE(SDNode *N); 208 void SelectMAD_64_32(SDNode *N); 209 void SelectFMA_W_CHAIN(SDNode *N); 210 void SelectFMUL_W_CHAIN(SDNode *N); 211 212 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 213 uint32_t Offset, uint32_t Width); 214 void SelectS_BFEFromShifts(SDNode *N); 215 void SelectS_BFE(SDNode *N); 216 bool isCBranchSCC(const SDNode *N) const; 217 void SelectBRCOND(SDNode *N); 218 void SelectFMAD_FMA(SDNode *N); 219 void SelectATOMIC_CMP_SWAP(SDNode *N); 220 221 protected: 222 // Include the pieces autogenerated from the target description. 223 #include "AMDGPUGenDAGISel.inc" 224 }; 225 226 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 227 public: 228 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 229 AMDGPUDAGToDAGISel(TM, OptLevel) {} 230 231 void Select(SDNode *N) override; 232 233 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 234 SDValue &Offset) override; 235 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 236 SDValue &Offset) override; 237 }; 238 239 } // end anonymous namespace 240 241 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 242 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 243 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 244 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 245 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 246 247 /// This pass converts a legalized DAG into a AMDGPU-specific 248 // DAG, ready for instruction scheduling. 249 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 250 CodeGenOpt::Level OptLevel) { 251 return new AMDGPUDAGToDAGISel(TM, OptLevel); 252 } 253 254 /// This pass converts a legalized DAG into a R600-specific 255 // DAG, ready for instruction scheduling. 256 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 257 CodeGenOpt::Level OptLevel) { 258 return new R600DAGToDAGISel(TM, OptLevel); 259 } 260 261 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 262 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 263 return SelectionDAGISel::runOnMachineFunction(MF); 264 } 265 266 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 267 if (TM.Options.NoNaNsFPMath) 268 return true; 269 270 // TODO: Move into isKnownNeverNaN 271 if (N->getFlags().isDefined()) 272 return N->getFlags().hasNoNaNs(); 273 274 return CurDAG->isKnownNeverNaN(N); 275 } 276 277 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 278 const SIInstrInfo *TII 279 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 280 281 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 282 return TII->isInlineConstant(C->getAPIntValue()); 283 284 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 285 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 286 287 return false; 288 } 289 290 /// Determine the register class for \p OpNo 291 /// \returns The register class of the virtual register that will be used for 292 /// the given operand number \OpNo or NULL if the register class cannot be 293 /// determined. 294 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 295 unsigned OpNo) const { 296 if (!N->isMachineOpcode()) { 297 if (N->getOpcode() == ISD::CopyToReg) { 298 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 299 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 300 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 301 return MRI.getRegClass(Reg); 302 } 303 304 const SIRegisterInfo *TRI 305 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 306 return TRI->getPhysRegClass(Reg); 307 } 308 309 return nullptr; 310 } 311 312 switch (N->getMachineOpcode()) { 313 default: { 314 const MCInstrDesc &Desc = 315 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 316 unsigned OpIdx = Desc.getNumDefs() + OpNo; 317 if (OpIdx >= Desc.getNumOperands()) 318 return nullptr; 319 int RegClass = Desc.OpInfo[OpIdx].RegClass; 320 if (RegClass == -1) 321 return nullptr; 322 323 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 324 } 325 case AMDGPU::REG_SEQUENCE: { 326 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 327 const TargetRegisterClass *SuperRC = 328 Subtarget->getRegisterInfo()->getRegClass(RCID); 329 330 SDValue SubRegOp = N->getOperand(OpNo + 1); 331 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 332 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 333 SubRegIdx); 334 } 335 } 336 } 337 338 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 339 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 340 !Subtarget->ldsRequiresM0Init()) 341 return N; 342 343 const SITargetLowering& Lowering = 344 *static_cast<const SITargetLowering*>(getTargetLowering()); 345 346 // Write max value to m0 before each load operation 347 348 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 349 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 350 351 SDValue Glue = M0.getValue(1); 352 353 SmallVector <SDValue, 8> Ops; 354 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 355 Ops.push_back(N->getOperand(i)); 356 } 357 Ops.push_back(Glue); 358 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 359 } 360 361 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 362 switch (NumVectorElts) { 363 case 1: 364 return AMDGPU::SReg_32_XM0RegClassID; 365 case 2: 366 return AMDGPU::SReg_64RegClassID; 367 case 4: 368 return AMDGPU::SReg_128RegClassID; 369 case 8: 370 return AMDGPU::SReg_256RegClassID; 371 case 16: 372 return AMDGPU::SReg_512RegClassID; 373 } 374 375 llvm_unreachable("invalid vector size"); 376 } 377 378 static bool getConstantValue(SDValue N, uint32_t &Out) { 379 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 380 Out = C->getAPIntValue().getZExtValue(); 381 return true; 382 } 383 384 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 385 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 386 return true; 387 } 388 389 return false; 390 } 391 392 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 393 EVT VT = N->getValueType(0); 394 unsigned NumVectorElts = VT.getVectorNumElements(); 395 EVT EltVT = VT.getVectorElementType(); 396 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 397 SDLoc DL(N); 398 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 399 400 if (NumVectorElts == 1) { 401 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 402 RegClass); 403 return; 404 } 405 406 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 407 "supported yet"); 408 // 16 = Max Num Vector Elements 409 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 410 // 1 = Vector Register Class 411 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 412 413 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 414 bool IsRegSeq = true; 415 unsigned NOps = N->getNumOperands(); 416 for (unsigned i = 0; i < NOps; i++) { 417 // XXX: Why is this here? 418 if (isa<RegisterSDNode>(N->getOperand(i))) { 419 IsRegSeq = false; 420 break; 421 } 422 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 423 RegSeqArgs[1 + (2 * i) + 1] = 424 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 425 MVT::i32); 426 } 427 if (NOps != NumVectorElts) { 428 // Fill in the missing undef elements if this was a scalar_to_vector. 429 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 430 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 431 DL, EltVT); 432 for (unsigned i = NOps; i < NumVectorElts; ++i) { 433 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 434 RegSeqArgs[1 + (2 * i) + 1] = 435 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 436 } 437 } 438 439 if (!IsRegSeq) 440 SelectCode(N); 441 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 442 } 443 444 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 445 unsigned int Opc = N->getOpcode(); 446 if (N->isMachineOpcode()) { 447 N->setNodeId(-1); 448 return; // Already selected. 449 } 450 451 if (isa<AtomicSDNode>(N) || 452 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 453 Opc == AMDGPUISD::ATOMIC_LOAD_FADD || 454 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 455 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 456 N = glueCopyToM0(N); 457 458 switch (Opc) { 459 default: 460 break; 461 // We are selecting i64 ADD here instead of custom lower it during 462 // DAG legalization, so we can fold some i64 ADDs used for address 463 // calculation into the LOAD and STORE instructions. 464 case ISD::ADDC: 465 case ISD::ADDE: 466 case ISD::SUBC: 467 case ISD::SUBE: { 468 if (N->getValueType(0) != MVT::i64) 469 break; 470 471 SelectADD_SUB_I64(N); 472 return; 473 } 474 case ISD::UADDO: 475 case ISD::USUBO: { 476 SelectUADDO_USUBO(N); 477 return; 478 } 479 case AMDGPUISD::FMUL_W_CHAIN: { 480 SelectFMUL_W_CHAIN(N); 481 return; 482 } 483 case AMDGPUISD::FMA_W_CHAIN: { 484 SelectFMA_W_CHAIN(N); 485 return; 486 } 487 488 case ISD::SCALAR_TO_VECTOR: 489 case ISD::BUILD_VECTOR: { 490 EVT VT = N->getValueType(0); 491 unsigned NumVectorElts = VT.getVectorNumElements(); 492 493 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 494 if (Opc == ISD::BUILD_VECTOR) { 495 uint32_t LHSVal, RHSVal; 496 if (getConstantValue(N->getOperand(0), LHSVal) && 497 getConstantValue(N->getOperand(1), RHSVal)) { 498 uint32_t K = LHSVal | (RHSVal << 16); 499 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 500 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 501 return; 502 } 503 } 504 505 break; 506 } 507 508 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 509 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 510 SelectBuildVector(N, RegClassID); 511 return; 512 } 513 case ISD::BUILD_PAIR: { 514 SDValue RC, SubReg0, SubReg1; 515 SDLoc DL(N); 516 if (N->getValueType(0) == MVT::i128) { 517 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 518 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 519 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 520 } else if (N->getValueType(0) == MVT::i64) { 521 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 522 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 523 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 524 } else { 525 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 526 } 527 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 528 N->getOperand(1), SubReg1 }; 529 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 530 N->getValueType(0), Ops)); 531 return; 532 } 533 534 case ISD::Constant: 535 case ISD::ConstantFP: { 536 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 537 break; 538 539 uint64_t Imm; 540 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 541 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 542 else { 543 ConstantSDNode *C = cast<ConstantSDNode>(N); 544 Imm = C->getZExtValue(); 545 } 546 547 SDLoc DL(N); 548 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 549 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 550 MVT::i32)); 551 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 552 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 553 const SDValue Ops[] = { 554 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 555 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 556 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 557 }; 558 559 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 560 N->getValueType(0), Ops)); 561 return; 562 } 563 case ISD::LOAD: 564 case ISD::STORE: { 565 N = glueCopyToM0(N); 566 break; 567 } 568 569 case AMDGPUISD::BFE_I32: 570 case AMDGPUISD::BFE_U32: { 571 // There is a scalar version available, but unlike the vector version which 572 // has a separate operand for the offset and width, the scalar version packs 573 // the width and offset into a single operand. Try to move to the scalar 574 // version if the offsets are constant, so that we can try to keep extended 575 // loads of kernel arguments in SGPRs. 576 577 // TODO: Technically we could try to pattern match scalar bitshifts of 578 // dynamic values, but it's probably not useful. 579 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 580 if (!Offset) 581 break; 582 583 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 584 if (!Width) 585 break; 586 587 bool Signed = Opc == AMDGPUISD::BFE_I32; 588 589 uint32_t OffsetVal = Offset->getZExtValue(); 590 uint32_t WidthVal = Width->getZExtValue(); 591 592 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 593 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 594 return; 595 } 596 case AMDGPUISD::DIV_SCALE: { 597 SelectDIV_SCALE(N); 598 return; 599 } 600 case AMDGPUISD::MAD_I64_I32: 601 case AMDGPUISD::MAD_U64_U32: { 602 SelectMAD_64_32(N); 603 return; 604 } 605 case ISD::CopyToReg: { 606 const SITargetLowering& Lowering = 607 *static_cast<const SITargetLowering*>(getTargetLowering()); 608 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 609 break; 610 } 611 case ISD::AND: 612 case ISD::SRL: 613 case ISD::SRA: 614 case ISD::SIGN_EXTEND_INREG: 615 if (N->getValueType(0) != MVT::i32) 616 break; 617 618 SelectS_BFE(N); 619 return; 620 case ISD::BRCOND: 621 SelectBRCOND(N); 622 return; 623 case ISD::FMAD: 624 case ISD::FMA: 625 SelectFMAD_FMA(N); 626 return; 627 case AMDGPUISD::ATOMIC_CMP_SWAP: 628 SelectATOMIC_CMP_SWAP(N); 629 return; 630 } 631 632 SelectCode(N); 633 } 634 635 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 636 if (!N->readMem()) 637 return false; 638 if (CbId == -1) 639 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 640 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; 641 642 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 643 } 644 645 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 646 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 647 const Instruction *Term = BB->getTerminator(); 648 return Term->getMetadata("amdgpu.uniform") || 649 Term->getMetadata("structurizecfg.uniform"); 650 } 651 652 StringRef AMDGPUDAGToDAGISel::getPassName() const { 653 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 654 } 655 656 //===----------------------------------------------------------------------===// 657 // Complex Patterns 658 //===----------------------------------------------------------------------===// 659 660 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 661 SDValue& IntPtr) { 662 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 663 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 664 true); 665 return true; 666 } 667 return false; 668 } 669 670 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 671 SDValue& BaseReg, SDValue &Offset) { 672 if (!isa<ConstantSDNode>(Addr)) { 673 BaseReg = Addr; 674 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 675 return true; 676 } 677 return false; 678 } 679 680 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 681 SDValue &Offset) { 682 return false; 683 } 684 685 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 686 SDValue &Offset) { 687 ConstantSDNode *C; 688 SDLoc DL(Addr); 689 690 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 691 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 692 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 693 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 694 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 695 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 696 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 697 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 698 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 699 Base = Addr.getOperand(0); 700 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 701 } else { 702 Base = Addr; 703 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 704 } 705 706 return true; 707 } 708 709 // FIXME: Should only handle addcarry/subcarry 710 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 711 SDLoc DL(N); 712 SDValue LHS = N->getOperand(0); 713 SDValue RHS = N->getOperand(1); 714 715 unsigned Opcode = N->getOpcode(); 716 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 717 bool ProduceCarry = 718 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 719 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 720 721 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 722 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 723 724 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 725 DL, MVT::i32, LHS, Sub0); 726 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 727 DL, MVT::i32, LHS, Sub1); 728 729 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 730 DL, MVT::i32, RHS, Sub0); 731 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 732 DL, MVT::i32, RHS, Sub1); 733 734 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 735 736 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 737 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 738 739 SDNode *AddLo; 740 if (!ConsumeCarry) { 741 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 742 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 743 } else { 744 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 745 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 746 } 747 SDValue AddHiArgs[] = { 748 SDValue(Hi0, 0), 749 SDValue(Hi1, 0), 750 SDValue(AddLo, 1) 751 }; 752 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 753 754 SDValue RegSequenceArgs[] = { 755 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 756 SDValue(AddLo,0), 757 Sub0, 758 SDValue(AddHi,0), 759 Sub1, 760 }; 761 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 762 MVT::i64, RegSequenceArgs); 763 764 if (ProduceCarry) { 765 // Replace the carry-use 766 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 767 } 768 769 // Replace the remaining uses. 770 ReplaceNode(N, RegSequence); 771 } 772 773 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 774 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 775 // carry out despite the _i32 name. These were renamed in VI to _U32. 776 // FIXME: We should probably rename the opcodes here. 777 unsigned Opc = N->getOpcode() == ISD::UADDO ? 778 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 779 780 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 781 { N->getOperand(0), N->getOperand(1) }); 782 } 783 784 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 785 SDLoc SL(N); 786 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 787 SDValue Ops[10]; 788 789 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 790 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 791 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 792 Ops[8] = N->getOperand(0); 793 Ops[9] = N->getOperand(4); 794 795 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 796 } 797 798 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 799 SDLoc SL(N); 800 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 801 SDValue Ops[8]; 802 803 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 804 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 805 Ops[6] = N->getOperand(0); 806 Ops[7] = N->getOperand(3); 807 808 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 809 } 810 811 // We need to handle this here because tablegen doesn't support matching 812 // instructions with multiple outputs. 813 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 814 SDLoc SL(N); 815 EVT VT = N->getValueType(0); 816 817 assert(VT == MVT::f32 || VT == MVT::f64); 818 819 unsigned Opc 820 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 821 822 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 823 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 824 } 825 826 // We need to handle this here because tablegen doesn't support matching 827 // instructions with multiple outputs. 828 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 829 SDLoc SL(N); 830 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 831 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 832 833 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 834 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 835 Clamp }; 836 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 837 } 838 839 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 840 unsigned OffsetBits) const { 841 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 842 (OffsetBits == 8 && !isUInt<8>(Offset))) 843 return false; 844 845 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 846 Subtarget->unsafeDSOffsetFoldingEnabled()) 847 return true; 848 849 // On Southern Islands instruction with a negative base value and an offset 850 // don't seem to work. 851 return CurDAG->SignBitIsZero(Base); 852 } 853 854 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 855 SDValue &Offset) const { 856 SDLoc DL(Addr); 857 if (CurDAG->isBaseWithConstantOffset(Addr)) { 858 SDValue N0 = Addr.getOperand(0); 859 SDValue N1 = Addr.getOperand(1); 860 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 861 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 862 // (add n0, c0) 863 Base = N0; 864 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 865 return true; 866 } 867 } else if (Addr.getOpcode() == ISD::SUB) { 868 // sub C, x -> add (sub 0, x), C 869 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 870 int64_t ByteOffset = C->getSExtValue(); 871 if (isUInt<16>(ByteOffset)) { 872 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 873 874 // XXX - This is kind of hacky. Create a dummy sub node so we can check 875 // the known bits in isDSOffsetLegal. We need to emit the selected node 876 // here, so this is thrown away. 877 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 878 Zero, Addr.getOperand(1)); 879 880 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 881 // FIXME: Select to VOP3 version for with-carry. 882 unsigned SubOp = Subtarget->hasAddNoCarry() ? 883 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 884 885 MachineSDNode *MachineSub 886 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 887 Zero, Addr.getOperand(1)); 888 889 Base = SDValue(MachineSub, 0); 890 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 891 return true; 892 } 893 } 894 } 895 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 896 // If we have a constant address, prefer to put the constant into the 897 // offset. This can save moves to load the constant address since multiple 898 // operations can share the zero base address register, and enables merging 899 // into read2 / write2 instructions. 900 901 SDLoc DL(Addr); 902 903 if (isUInt<16>(CAddr->getZExtValue())) { 904 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 905 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 906 DL, MVT::i32, Zero); 907 Base = SDValue(MovZero, 0); 908 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 909 return true; 910 } 911 } 912 913 // default case 914 Base = Addr; 915 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 916 return true; 917 } 918 919 // TODO: If offset is too big, put low 16-bit into offset. 920 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 921 SDValue &Offset0, 922 SDValue &Offset1) const { 923 SDLoc DL(Addr); 924 925 if (CurDAG->isBaseWithConstantOffset(Addr)) { 926 SDValue N0 = Addr.getOperand(0); 927 SDValue N1 = Addr.getOperand(1); 928 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 929 unsigned DWordOffset0 = C1->getZExtValue() / 4; 930 unsigned DWordOffset1 = DWordOffset0 + 1; 931 // (add n0, c0) 932 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 933 Base = N0; 934 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 935 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 936 return true; 937 } 938 } else if (Addr.getOpcode() == ISD::SUB) { 939 // sub C, x -> add (sub 0, x), C 940 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 941 unsigned DWordOffset0 = C->getZExtValue() / 4; 942 unsigned DWordOffset1 = DWordOffset0 + 1; 943 944 if (isUInt<8>(DWordOffset0)) { 945 SDLoc DL(Addr); 946 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 947 948 // XXX - This is kind of hacky. Create a dummy sub node so we can check 949 // the known bits in isDSOffsetLegal. We need to emit the selected node 950 // here, so this is thrown away. 951 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 952 Zero, Addr.getOperand(1)); 953 954 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 955 unsigned SubOp = Subtarget->hasAddNoCarry() ? 956 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 957 958 MachineSDNode *MachineSub 959 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 960 Zero, Addr.getOperand(1)); 961 962 Base = SDValue(MachineSub, 0); 963 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 964 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 965 return true; 966 } 967 } 968 } 969 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 970 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 971 unsigned DWordOffset1 = DWordOffset0 + 1; 972 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 973 974 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 975 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 976 MachineSDNode *MovZero 977 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 978 DL, MVT::i32, Zero); 979 Base = SDValue(MovZero, 0); 980 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 981 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 982 return true; 983 } 984 } 985 986 // default case 987 988 // FIXME: This is broken on SI where we still need to check if the base 989 // pointer is positive here. 990 Base = Addr; 991 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 992 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 993 return true; 994 } 995 996 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 997 SDValue &VAddr, SDValue &SOffset, 998 SDValue &Offset, SDValue &Offen, 999 SDValue &Idxen, SDValue &Addr64, 1000 SDValue &GLC, SDValue &SLC, 1001 SDValue &TFE) const { 1002 // Subtarget prefers to use flat instruction 1003 if (Subtarget->useFlatForGlobal()) 1004 return false; 1005 1006 SDLoc DL(Addr); 1007 1008 if (!GLC.getNode()) 1009 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1010 if (!SLC.getNode()) 1011 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1012 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1013 1014 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1015 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1016 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1017 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1018 1019 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1020 SDValue N0 = Addr.getOperand(0); 1021 SDValue N1 = Addr.getOperand(1); 1022 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1023 1024 if (N0.getOpcode() == ISD::ADD) { 1025 // (add (add N2, N3), C1) -> addr64 1026 SDValue N2 = N0.getOperand(0); 1027 SDValue N3 = N0.getOperand(1); 1028 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1029 Ptr = N2; 1030 VAddr = N3; 1031 } else { 1032 // (add N0, C1) -> offset 1033 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1034 Ptr = N0; 1035 } 1036 1037 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1038 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1039 return true; 1040 } 1041 1042 if (isUInt<32>(C1->getZExtValue())) { 1043 // Illegal offset, store it in soffset. 1044 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1045 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1046 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1047 0); 1048 return true; 1049 } 1050 } 1051 1052 if (Addr.getOpcode() == ISD::ADD) { 1053 // (add N0, N1) -> addr64 1054 SDValue N0 = Addr.getOperand(0); 1055 SDValue N1 = Addr.getOperand(1); 1056 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1057 Ptr = N0; 1058 VAddr = N1; 1059 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1060 return true; 1061 } 1062 1063 // default case -> offset 1064 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1065 Ptr = Addr; 1066 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1067 1068 return true; 1069 } 1070 1071 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1072 SDValue &VAddr, SDValue &SOffset, 1073 SDValue &Offset, SDValue &GLC, 1074 SDValue &SLC, SDValue &TFE) const { 1075 SDValue Ptr, Offen, Idxen, Addr64; 1076 1077 // addr64 bit was removed for volcanic islands. 1078 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1079 return false; 1080 1081 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1082 GLC, SLC, TFE)) 1083 return false; 1084 1085 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1086 if (C->getSExtValue()) { 1087 SDLoc DL(Addr); 1088 1089 const SITargetLowering& Lowering = 1090 *static_cast<const SITargetLowering*>(getTargetLowering()); 1091 1092 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1093 return true; 1094 } 1095 1096 return false; 1097 } 1098 1099 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1100 SDValue &VAddr, SDValue &SOffset, 1101 SDValue &Offset, 1102 SDValue &SLC) const { 1103 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1104 SDValue GLC, TFE; 1105 1106 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1107 } 1108 1109 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1110 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1111 return PSV && PSV->isStack(); 1112 } 1113 1114 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1115 const MachineFunction &MF = CurDAG->getMachineFunction(); 1116 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1117 1118 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1119 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1120 FI->getValueType(0)); 1121 1122 // If we can resolve this to a frame index access, this is relative to the 1123 // frame pointer SGPR. 1124 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1125 MVT::i32)); 1126 } 1127 1128 // If we don't know this private access is a local stack object, it needs to 1129 // be relative to the entry point's scratch wave offset register. 1130 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1131 MVT::i32)); 1132 } 1133 1134 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1135 SDValue Addr, SDValue &Rsrc, 1136 SDValue &VAddr, SDValue &SOffset, 1137 SDValue &ImmOffset) const { 1138 1139 SDLoc DL(Addr); 1140 MachineFunction &MF = CurDAG->getMachineFunction(); 1141 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1142 1143 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1144 1145 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1146 unsigned Imm = CAddr->getZExtValue(); 1147 1148 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1149 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1150 DL, MVT::i32, HighBits); 1151 VAddr = SDValue(MovHighBits, 0); 1152 1153 // In a call sequence, stores to the argument stack area are relative to the 1154 // stack pointer. 1155 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1156 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1157 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1158 1159 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1160 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1161 return true; 1162 } 1163 1164 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1165 // (add n0, c1) 1166 1167 SDValue N0 = Addr.getOperand(0); 1168 SDValue N1 = Addr.getOperand(1); 1169 1170 // Offsets in vaddr must be positive if range checking is enabled. 1171 // 1172 // The total computation of vaddr + soffset + offset must not overflow. If 1173 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1174 // overflowing. 1175 // 1176 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1177 // always perform a range check. If a negative vaddr base index was used, 1178 // this would fail the range check. The overall address computation would 1179 // compute a valid address, but this doesn't happen due to the range 1180 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1181 // 1182 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1183 // MUBUF vaddr, but not on older subtargets which can only do this if the 1184 // sign bit is known 0. 1185 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1186 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1187 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1188 CurDAG->SignBitIsZero(N0))) { 1189 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1190 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1191 return true; 1192 } 1193 } 1194 1195 // (node) 1196 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1197 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1198 return true; 1199 } 1200 1201 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1202 SDValue Addr, 1203 SDValue &SRsrc, 1204 SDValue &SOffset, 1205 SDValue &Offset) const { 1206 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1207 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1208 return false; 1209 1210 SDLoc DL(Addr); 1211 MachineFunction &MF = CurDAG->getMachineFunction(); 1212 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1213 1214 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1215 1216 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1217 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1218 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1219 1220 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1221 // offset if we know this is in a call sequence. 1222 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1223 1224 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1225 return true; 1226 } 1227 1228 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1229 SDValue &SOffset, SDValue &Offset, 1230 SDValue &GLC, SDValue &SLC, 1231 SDValue &TFE) const { 1232 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1233 const SIInstrInfo *TII = 1234 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1235 1236 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1237 GLC, SLC, TFE)) 1238 return false; 1239 1240 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1241 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1242 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1243 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1244 APInt::getAllOnesValue(32).getZExtValue(); // Size 1245 SDLoc DL(Addr); 1246 1247 const SITargetLowering& Lowering = 1248 *static_cast<const SITargetLowering*>(getTargetLowering()); 1249 1250 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1251 return true; 1252 } 1253 return false; 1254 } 1255 1256 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1257 SDValue &Soffset, SDValue &Offset 1258 ) const { 1259 SDValue GLC, SLC, TFE; 1260 1261 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1262 } 1263 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1264 SDValue &Soffset, SDValue &Offset, 1265 SDValue &SLC) const { 1266 SDValue GLC, TFE; 1267 1268 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1269 } 1270 1271 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1272 SDValue &SOffset, 1273 SDValue &ImmOffset) const { 1274 SDLoc DL(Constant); 1275 const uint32_t Align = 4; 1276 const uint32_t MaxImm = alignDown(4095, Align); 1277 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1278 uint32_t Overflow = 0; 1279 1280 if (Imm > MaxImm) { 1281 if (Imm <= MaxImm + 64) { 1282 // Use an SOffset inline constant for 4..64 1283 Overflow = Imm - MaxImm; 1284 Imm = MaxImm; 1285 } else { 1286 // Try to keep the same value in SOffset for adjacent loads, so that 1287 // the corresponding register contents can be re-used. 1288 // 1289 // Load values with all low-bits (except for alignment bits) set into 1290 // SOffset, so that a larger range of values can be covered using 1291 // s_movk_i32. 1292 // 1293 // Atomic operations fail to work correctly when individual address 1294 // components are unaligned, even if their sum is aligned. 1295 uint32_t High = (Imm + Align) & ~4095; 1296 uint32_t Low = (Imm + Align) & 4095; 1297 Imm = Low; 1298 Overflow = High - Align; 1299 } 1300 } 1301 1302 // There is a hardware bug in SI and CI which prevents address clamping in 1303 // MUBUF instructions from working correctly with SOffsets. The immediate 1304 // offset is unaffected. 1305 if (Overflow > 0 && 1306 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1307 return false; 1308 1309 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1310 1311 if (Overflow <= 64) 1312 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1313 else 1314 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1315 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1316 0); 1317 1318 return true; 1319 } 1320 1321 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1322 SDValue &SOffset, 1323 SDValue &ImmOffset) const { 1324 SDLoc DL(Offset); 1325 1326 if (!isa<ConstantSDNode>(Offset)) 1327 return false; 1328 1329 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1330 } 1331 1332 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1333 SDValue &SOffset, 1334 SDValue &ImmOffset, 1335 SDValue &VOffset) const { 1336 SDLoc DL(Offset); 1337 1338 // Don't generate an unnecessary voffset for constant offsets. 1339 if (isa<ConstantSDNode>(Offset)) { 1340 SDValue Tmp1, Tmp2; 1341 1342 // When necessary, use a voffset in <= CI anyway to work around a hardware 1343 // bug. 1344 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1345 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1346 return false; 1347 } 1348 1349 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1350 SDValue N0 = Offset.getOperand(0); 1351 SDValue N1 = Offset.getOperand(1); 1352 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1353 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1354 VOffset = N0; 1355 return true; 1356 } 1357 } 1358 1359 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1360 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1361 VOffset = Offset; 1362 1363 return true; 1364 } 1365 1366 template <bool IsSigned> 1367 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1368 SDValue &VAddr, 1369 SDValue &Offset, 1370 SDValue &SLC) const { 1371 int64_t OffsetVal = 0; 1372 1373 if (Subtarget->hasFlatInstOffsets() && 1374 CurDAG->isBaseWithConstantOffset(Addr)) { 1375 SDValue N0 = Addr.getOperand(0); 1376 SDValue N1 = Addr.getOperand(1); 1377 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1378 1379 if ((IsSigned && isInt<13>(COffsetVal)) || 1380 (!IsSigned && isUInt<12>(COffsetVal))) { 1381 Addr = N0; 1382 OffsetVal = COffsetVal; 1383 } 1384 } 1385 1386 VAddr = Addr; 1387 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1388 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1389 1390 return true; 1391 } 1392 1393 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1394 SDValue &VAddr, 1395 SDValue &Offset, 1396 SDValue &SLC) const { 1397 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1398 } 1399 1400 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1401 SDValue &VAddr, 1402 SDValue &Offset, 1403 SDValue &SLC) const { 1404 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1405 } 1406 1407 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1408 SDValue &Offset, bool &Imm) const { 1409 1410 // FIXME: Handle non-constant offsets. 1411 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1412 if (!C) 1413 return false; 1414 1415 SDLoc SL(ByteOffsetNode); 1416 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1417 int64_t ByteOffset = C->getSExtValue(); 1418 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1419 1420 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1421 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1422 Imm = true; 1423 return true; 1424 } 1425 1426 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1427 return false; 1428 1429 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1430 // 32-bit Immediates are supported on Sea Islands. 1431 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1432 } else { 1433 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1434 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1435 C32Bit), 0); 1436 } 1437 Imm = false; 1438 return true; 1439 } 1440 1441 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1442 if (Addr.getValueType() != MVT::i32) 1443 return Addr; 1444 1445 // Zero-extend a 32-bit address. 1446 SDLoc SL(Addr); 1447 1448 const MachineFunction &MF = CurDAG->getMachineFunction(); 1449 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1450 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1451 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1452 1453 const SDValue Ops[] = { 1454 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1455 Addr, 1456 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1457 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1458 0), 1459 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1460 }; 1461 1462 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1463 Ops), 0); 1464 } 1465 1466 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1467 SDValue &Offset, bool &Imm) const { 1468 SDLoc SL(Addr); 1469 1470 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1471 SDValue N0 = Addr.getOperand(0); 1472 SDValue N1 = Addr.getOperand(1); 1473 1474 if (SelectSMRDOffset(N1, Offset, Imm)) { 1475 SBase = Expand32BitAddress(N0); 1476 return true; 1477 } 1478 } 1479 SBase = Expand32BitAddress(Addr); 1480 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1481 Imm = true; 1482 return true; 1483 } 1484 1485 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1486 SDValue &Offset) const { 1487 bool Imm; 1488 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1489 } 1490 1491 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1492 SDValue &Offset) const { 1493 1494 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1495 return false; 1496 1497 bool Imm; 1498 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1499 return false; 1500 1501 return !Imm && isa<ConstantSDNode>(Offset); 1502 } 1503 1504 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1505 SDValue &Offset) const { 1506 bool Imm; 1507 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1508 !isa<ConstantSDNode>(Offset); 1509 } 1510 1511 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1512 SDValue &Offset) const { 1513 bool Imm; 1514 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1515 } 1516 1517 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1518 SDValue &Offset) const { 1519 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1520 return false; 1521 1522 bool Imm; 1523 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1524 return false; 1525 1526 return !Imm && isa<ConstantSDNode>(Offset); 1527 } 1528 1529 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1530 SDValue &Base, 1531 SDValue &Offset) const { 1532 SDLoc DL(Index); 1533 1534 if (CurDAG->isBaseWithConstantOffset(Index)) { 1535 SDValue N0 = Index.getOperand(0); 1536 SDValue N1 = Index.getOperand(1); 1537 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1538 1539 // (add n0, c0) 1540 Base = N0; 1541 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1542 return true; 1543 } 1544 1545 if (isa<ConstantSDNode>(Index)) 1546 return false; 1547 1548 Base = Index; 1549 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1550 return true; 1551 } 1552 1553 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1554 SDValue Val, uint32_t Offset, 1555 uint32_t Width) { 1556 // Transformation function, pack the offset and width of a BFE into 1557 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1558 // source, bits [5:0] contain the offset and bits [22:16] the width. 1559 uint32_t PackedVal = Offset | (Width << 16); 1560 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1561 1562 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1563 } 1564 1565 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1566 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1567 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1568 // Predicate: 0 < b <= c < 32 1569 1570 const SDValue &Shl = N->getOperand(0); 1571 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1572 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1573 1574 if (B && C) { 1575 uint32_t BVal = B->getZExtValue(); 1576 uint32_t CVal = C->getZExtValue(); 1577 1578 if (0 < BVal && BVal <= CVal && CVal < 32) { 1579 bool Signed = N->getOpcode() == ISD::SRA; 1580 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1581 1582 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1583 32 - CVal)); 1584 return; 1585 } 1586 } 1587 SelectCode(N); 1588 } 1589 1590 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1591 switch (N->getOpcode()) { 1592 case ISD::AND: 1593 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1594 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1595 // Predicate: isMask(mask) 1596 const SDValue &Srl = N->getOperand(0); 1597 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1598 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1599 1600 if (Shift && Mask) { 1601 uint32_t ShiftVal = Shift->getZExtValue(); 1602 uint32_t MaskVal = Mask->getZExtValue(); 1603 1604 if (isMask_32(MaskVal)) { 1605 uint32_t WidthVal = countPopulation(MaskVal); 1606 1607 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1608 Srl.getOperand(0), ShiftVal, WidthVal)); 1609 return; 1610 } 1611 } 1612 } 1613 break; 1614 case ISD::SRL: 1615 if (N->getOperand(0).getOpcode() == ISD::AND) { 1616 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1617 // Predicate: isMask(mask >> b) 1618 const SDValue &And = N->getOperand(0); 1619 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1620 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1621 1622 if (Shift && Mask) { 1623 uint32_t ShiftVal = Shift->getZExtValue(); 1624 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1625 1626 if (isMask_32(MaskVal)) { 1627 uint32_t WidthVal = countPopulation(MaskVal); 1628 1629 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1630 And.getOperand(0), ShiftVal, WidthVal)); 1631 return; 1632 } 1633 } 1634 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1635 SelectS_BFEFromShifts(N); 1636 return; 1637 } 1638 break; 1639 case ISD::SRA: 1640 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1641 SelectS_BFEFromShifts(N); 1642 return; 1643 } 1644 break; 1645 1646 case ISD::SIGN_EXTEND_INREG: { 1647 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1648 SDValue Src = N->getOperand(0); 1649 if (Src.getOpcode() != ISD::SRL) 1650 break; 1651 1652 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1653 if (!Amt) 1654 break; 1655 1656 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1657 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1658 Amt->getZExtValue(), Width)); 1659 return; 1660 } 1661 } 1662 1663 SelectCode(N); 1664 } 1665 1666 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1667 assert(N->getOpcode() == ISD::BRCOND); 1668 if (!N->hasOneUse()) 1669 return false; 1670 1671 SDValue Cond = N->getOperand(1); 1672 if (Cond.getOpcode() == ISD::CopyToReg) 1673 Cond = Cond.getOperand(2); 1674 1675 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1676 return false; 1677 1678 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1679 if (VT == MVT::i32) 1680 return true; 1681 1682 if (VT == MVT::i64) { 1683 auto ST = static_cast<const SISubtarget *>(Subtarget); 1684 1685 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1686 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1687 } 1688 1689 return false; 1690 } 1691 1692 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1693 SDValue Cond = N->getOperand(1); 1694 1695 if (Cond.isUndef()) { 1696 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1697 N->getOperand(2), N->getOperand(0)); 1698 return; 1699 } 1700 1701 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1702 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1703 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1704 SDLoc SL(N); 1705 1706 if (!UseSCCBr) { 1707 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1708 // analyzed what generates the vcc value, so we do not know whether vcc 1709 // bits for disabled lanes are 0. Thus we need to mask out bits for 1710 // disabled lanes. 1711 // 1712 // For the case that we select S_CBRANCH_SCC1 and it gets 1713 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1714 // SIInstrInfo::moveToVALU which inserts the S_AND). 1715 // 1716 // We could add an analysis of what generates the vcc value here and omit 1717 // the S_AND when is unnecessary. But it would be better to add a separate 1718 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1719 // catches both cases. 1720 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1721 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1722 Cond), 1723 0); 1724 } 1725 1726 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1727 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1728 N->getOperand(2), // Basic Block 1729 VCC.getValue(0)); 1730 } 1731 1732 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { 1733 MVT VT = N->getSimpleValueType(0); 1734 bool IsFMA = N->getOpcode() == ISD::FMA; 1735 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && 1736 !Subtarget->hasFmaMixInsts()) || 1737 ((IsFMA && Subtarget->hasMadMixInsts()) || 1738 (!IsFMA && Subtarget->hasFmaMixInsts()))) { 1739 SelectCode(N); 1740 return; 1741 } 1742 1743 SDValue Src0 = N->getOperand(0); 1744 SDValue Src1 = N->getOperand(1); 1745 SDValue Src2 = N->getOperand(2); 1746 unsigned Src0Mods, Src1Mods, Src2Mods; 1747 1748 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand 1749 // using the conversion from f16. 1750 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1751 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1752 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1753 1754 assert((IsFMA || !Subtarget->hasFP32Denormals()) && 1755 "fmad selected with denormals enabled"); 1756 // TODO: We can select this with f32 denormals enabled if all the sources are 1757 // converted from f16 (in which case fmad isn't legal). 1758 1759 if (Sel0 || Sel1 || Sel2) { 1760 // For dummy operands. 1761 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1762 SDValue Ops[] = { 1763 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1764 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1765 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1766 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1767 Zero, Zero 1768 }; 1769 1770 CurDAG->SelectNodeTo(N, 1771 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, 1772 MVT::f32, Ops); 1773 } else { 1774 SelectCode(N); 1775 } 1776 } 1777 1778 // This is here because there isn't a way to use the generated sub0_sub1 as the 1779 // subreg index to EXTRACT_SUBREG in tablegen. 1780 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1781 MemSDNode *Mem = cast<MemSDNode>(N); 1782 unsigned AS = Mem->getAddressSpace(); 1783 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1784 SelectCode(N); 1785 return; 1786 } 1787 1788 MVT VT = N->getSimpleValueType(0); 1789 bool Is32 = (VT == MVT::i32); 1790 SDLoc SL(N); 1791 1792 MachineSDNode *CmpSwap = nullptr; 1793 if (Subtarget->hasAddr64()) { 1794 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1795 1796 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1797 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1798 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1799 SDValue CmpVal = Mem->getOperand(2); 1800 1801 // XXX - Do we care about glue operands? 1802 1803 SDValue Ops[] = { 1804 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1805 }; 1806 1807 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1808 } 1809 } 1810 1811 if (!CmpSwap) { 1812 SDValue SRsrc, SOffset, Offset, SLC; 1813 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1814 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1815 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1816 1817 SDValue CmpVal = Mem->getOperand(2); 1818 SDValue Ops[] = { 1819 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1820 }; 1821 1822 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1823 } 1824 } 1825 1826 if (!CmpSwap) { 1827 SelectCode(N); 1828 return; 1829 } 1830 1831 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1832 *MMOs = Mem->getMemOperand(); 1833 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1834 1835 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1836 SDValue Extract 1837 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1838 1839 ReplaceUses(SDValue(N, 0), Extract); 1840 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1841 CurDAG->RemoveDeadNode(N); 1842 } 1843 1844 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1845 unsigned &Mods) const { 1846 Mods = 0; 1847 Src = In; 1848 1849 if (Src.getOpcode() == ISD::FNEG) { 1850 Mods |= SISrcMods::NEG; 1851 Src = Src.getOperand(0); 1852 } 1853 1854 if (Src.getOpcode() == ISD::FABS) { 1855 Mods |= SISrcMods::ABS; 1856 Src = Src.getOperand(0); 1857 } 1858 1859 return true; 1860 } 1861 1862 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1863 SDValue &SrcMods) const { 1864 unsigned Mods; 1865 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1866 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1867 return true; 1868 } 1869 1870 return false; 1871 } 1872 1873 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1874 SDValue &SrcMods) const { 1875 SelectVOP3Mods(In, Src, SrcMods); 1876 return isNoNanSrc(Src); 1877 } 1878 1879 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1880 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1881 return false; 1882 1883 Src = In; 1884 return true; 1885 } 1886 1887 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1888 SDValue &SrcMods, SDValue &Clamp, 1889 SDValue &Omod) const { 1890 SDLoc DL(In); 1891 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1892 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1893 1894 return SelectVOP3Mods(In, Src, SrcMods); 1895 } 1896 1897 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1898 SDValue &SrcMods, 1899 SDValue &Clamp, 1900 SDValue &Omod) const { 1901 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1902 return SelectVOP3Mods(In, Src, SrcMods); 1903 } 1904 1905 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1906 SDValue &Clamp, SDValue &Omod) const { 1907 Src = In; 1908 1909 SDLoc DL(In); 1910 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1911 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1912 1913 return true; 1914 } 1915 1916 static SDValue stripBitcast(SDValue Val) { 1917 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1918 } 1919 1920 // Figure out if this is really an extract of the high 16-bits of a dword. 1921 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1922 In = stripBitcast(In); 1923 if (In.getOpcode() != ISD::TRUNCATE) 1924 return false; 1925 1926 SDValue Srl = In.getOperand(0); 1927 if (Srl.getOpcode() == ISD::SRL) { 1928 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1929 if (ShiftAmt->getZExtValue() == 16) { 1930 Out = stripBitcast(Srl.getOperand(0)); 1931 return true; 1932 } 1933 } 1934 } 1935 1936 return false; 1937 } 1938 1939 // Look through operations that obscure just looking at the low 16-bits of the 1940 // same register. 1941 static SDValue stripExtractLoElt(SDValue In) { 1942 if (In.getOpcode() == ISD::TRUNCATE) { 1943 SDValue Src = In.getOperand(0); 1944 if (Src.getValueType().getSizeInBits() == 32) 1945 return stripBitcast(Src); 1946 } 1947 1948 return In; 1949 } 1950 1951 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1952 SDValue &SrcMods) const { 1953 unsigned Mods = 0; 1954 Src = In; 1955 1956 if (Src.getOpcode() == ISD::FNEG) { 1957 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1958 Src = Src.getOperand(0); 1959 } 1960 1961 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1962 unsigned VecMods = Mods; 1963 1964 SDValue Lo = stripBitcast(Src.getOperand(0)); 1965 SDValue Hi = stripBitcast(Src.getOperand(1)); 1966 1967 if (Lo.getOpcode() == ISD::FNEG) { 1968 Lo = stripBitcast(Lo.getOperand(0)); 1969 Mods ^= SISrcMods::NEG; 1970 } 1971 1972 if (Hi.getOpcode() == ISD::FNEG) { 1973 Hi = stripBitcast(Hi.getOperand(0)); 1974 Mods ^= SISrcMods::NEG_HI; 1975 } 1976 1977 if (isExtractHiElt(Lo, Lo)) 1978 Mods |= SISrcMods::OP_SEL_0; 1979 1980 if (isExtractHiElt(Hi, Hi)) 1981 Mods |= SISrcMods::OP_SEL_1; 1982 1983 Lo = stripExtractLoElt(Lo); 1984 Hi = stripExtractLoElt(Hi); 1985 1986 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1987 // Really a scalar input. Just select from the low half of the register to 1988 // avoid packing. 1989 1990 Src = Lo; 1991 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1992 return true; 1993 } 1994 1995 Mods = VecMods; 1996 } 1997 1998 // Packed instructions do not have abs modifiers. 1999 Mods |= SISrcMods::OP_SEL_1; 2000 2001 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2002 return true; 2003 } 2004 2005 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2006 SDValue &SrcMods, 2007 SDValue &Clamp) const { 2008 SDLoc SL(In); 2009 2010 // FIXME: Handle clamp and op_sel 2011 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2012 2013 return SelectVOP3PMods(In, Src, SrcMods); 2014 } 2015 2016 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2017 SDValue &SrcMods) const { 2018 Src = In; 2019 // FIXME: Handle op_sel 2020 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2021 return true; 2022 } 2023 2024 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2025 SDValue &SrcMods, 2026 SDValue &Clamp) const { 2027 SDLoc SL(In); 2028 2029 // FIXME: Handle clamp 2030 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2031 2032 return SelectVOP3OpSel(In, Src, SrcMods); 2033 } 2034 2035 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2036 SDValue &SrcMods) const { 2037 // FIXME: Handle op_sel 2038 return SelectVOP3Mods(In, Src, SrcMods); 2039 } 2040 2041 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2042 SDValue &SrcMods, 2043 SDValue &Clamp) const { 2044 SDLoc SL(In); 2045 2046 // FIXME: Handle clamp 2047 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2048 2049 return SelectVOP3OpSelMods(In, Src, SrcMods); 2050 } 2051 2052 // The return value is not whether the match is possible (which it always is), 2053 // but whether or not it a conversion is really used. 2054 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2055 unsigned &Mods) const { 2056 Mods = 0; 2057 SelectVOP3ModsImpl(In, Src, Mods); 2058 2059 if (Src.getOpcode() == ISD::FP_EXTEND) { 2060 Src = Src.getOperand(0); 2061 assert(Src.getValueType() == MVT::f16); 2062 Src = stripBitcast(Src); 2063 2064 // Be careful about folding modifiers if we already have an abs. fneg is 2065 // applied last, so we don't want to apply an earlier fneg. 2066 if ((Mods & SISrcMods::ABS) == 0) { 2067 unsigned ModsTmp; 2068 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2069 2070 if ((ModsTmp & SISrcMods::NEG) != 0) 2071 Mods ^= SISrcMods::NEG; 2072 2073 if ((ModsTmp & SISrcMods::ABS) != 0) 2074 Mods |= SISrcMods::ABS; 2075 } 2076 2077 // op_sel/op_sel_hi decide the source type and source. 2078 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2079 // If the sources's op_sel is set, it picks the high half of the source 2080 // register. 2081 2082 Mods |= SISrcMods::OP_SEL_1; 2083 if (isExtractHiElt(Src, Src)) { 2084 Mods |= SISrcMods::OP_SEL_0; 2085 2086 // TODO: Should we try to look for neg/abs here? 2087 } 2088 2089 return true; 2090 } 2091 2092 return false; 2093 } 2094 2095 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2096 SDValue &SrcMods) const { 2097 unsigned Mods = 0; 2098 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2099 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2100 return true; 2101 } 2102 2103 // TODO: Can we identify things like v_mad_mixhi_f16? 2104 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2105 if (In.isUndef()) { 2106 Src = In; 2107 return true; 2108 } 2109 2110 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2111 SDLoc SL(In); 2112 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2113 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2114 SL, MVT::i32, K); 2115 Src = SDValue(MovK, 0); 2116 return true; 2117 } 2118 2119 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2120 SDLoc SL(In); 2121 SDValue K = CurDAG->getTargetConstant( 2122 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2123 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2124 SL, MVT::i32, K); 2125 Src = SDValue(MovK, 0); 2126 return true; 2127 } 2128 2129 return isExtractHiElt(In, Src); 2130 } 2131 2132 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2133 const AMDGPUTargetLowering& Lowering = 2134 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2135 bool IsModified = false; 2136 do { 2137 IsModified = false; 2138 2139 // Go over all selected nodes and try to fold them a bit more 2140 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2141 while (Position != CurDAG->allnodes_end()) { 2142 SDNode *Node = &*Position++; 2143 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2144 if (!MachineNode) 2145 continue; 2146 2147 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2148 if (ResNode != Node) { 2149 if (ResNode) 2150 ReplaceUses(Node, ResNode); 2151 IsModified = true; 2152 } 2153 } 2154 CurDAG->RemoveDeadNodes(); 2155 } while (IsModified); 2156 } 2157 2158 void R600DAGToDAGISel::Select(SDNode *N) { 2159 unsigned int Opc = N->getOpcode(); 2160 if (N->isMachineOpcode()) { 2161 N->setNodeId(-1); 2162 return; // Already selected. 2163 } 2164 2165 switch (Opc) { 2166 default: break; 2167 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2168 case ISD::SCALAR_TO_VECTOR: 2169 case ISD::BUILD_VECTOR: { 2170 EVT VT = N->getValueType(0); 2171 unsigned NumVectorElts = VT.getVectorNumElements(); 2172 unsigned RegClassID; 2173 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2174 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2175 // pass. We want to avoid 128 bits copies as much as possible because they 2176 // can't be bundled by our scheduler. 2177 switch(NumVectorElts) { 2178 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2179 case 4: 2180 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2181 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2182 else 2183 RegClassID = AMDGPU::R600_Reg128RegClassID; 2184 break; 2185 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2186 } 2187 SelectBuildVector(N, RegClassID); 2188 return; 2189 } 2190 } 2191 2192 SelectCode(N); 2193 } 2194 2195 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2196 SDValue &Offset) { 2197 ConstantSDNode *C; 2198 SDLoc DL(Addr); 2199 2200 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2201 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2202 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2203 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2204 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2205 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2206 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2207 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2208 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2209 Base = Addr.getOperand(0); 2210 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2211 } else { 2212 Base = Addr; 2213 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2214 } 2215 2216 return true; 2217 } 2218 2219 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2220 SDValue &Offset) { 2221 ConstantSDNode *IMMOffset; 2222 2223 if (Addr.getOpcode() == ISD::ADD 2224 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2225 && isInt<16>(IMMOffset->getZExtValue())) { 2226 2227 Base = Addr.getOperand(0); 2228 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2229 MVT::i32); 2230 return true; 2231 // If the pointer address is constant, we can move it to the offset field. 2232 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2233 && isInt<16>(IMMOffset->getZExtValue())) { 2234 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2235 SDLoc(CurDAG->getEntryNode()), 2236 AMDGPU::ZERO, MVT::i32); 2237 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2238 MVT::i32); 2239 return true; 2240 } 2241 2242 // Default case, no offset 2243 Base = Addr; 2244 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2245 return true; 2246 } 2247