1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPUPerfHintAnalysis.h" 20 #include "AMDGPURegisterInfo.h" 21 #include "AMDGPUSubtarget.h" 22 #include "AMDGPUTargetMachine.h" 23 #include "SIDefines.h" 24 #include "SIISelLowering.h" 25 #include "SIInstrInfo.h" 26 #include "SIMachineFunctionInfo.h" 27 #include "SIRegisterInfo.h" 28 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringRef.h" 32 #include "llvm/Analysis/DivergenceAnalysis.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/CodeGen/FunctionLoweringInfo.h" 35 #include "llvm/CodeGen/ISDOpcodes.h" 36 #include "llvm/CodeGen/MachineFunction.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/SelectionDAG.h" 39 #include "llvm/CodeGen/SelectionDAGISel.h" 40 #include "llvm/CodeGen/SelectionDAGNodes.h" 41 #include "llvm/CodeGen/ValueTypes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/MC/MCInstrDesc.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CodeGen.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include <cassert> 51 #include <cstdint> 52 #include <new> 53 #include <vector> 54 55 using namespace llvm; 56 57 namespace llvm { 58 59 class R600InstrInfo; 60 61 } // end namespace llvm 62 63 //===----------------------------------------------------------------------===// 64 // Instruction Selector Implementation 65 //===----------------------------------------------------------------------===// 66 67 namespace { 68 69 /// AMDGPU specific code to select AMDGPU machine instructions for 70 /// SelectionDAG operations. 71 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 72 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 73 // make the right decision when generating code for different targets. 74 const GCNSubtarget *Subtarget; 75 AMDGPUAS AMDGPUASI; 76 bool EnableLateStructurizeCFG; 77 78 public: 79 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 80 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 81 : SelectionDAGISel(*TM, OptLevel) { 82 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 83 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 84 } 85 ~AMDGPUDAGToDAGISel() override = default; 86 87 void getAnalysisUsage(AnalysisUsage &AU) const override { 88 AU.addRequired<AMDGPUArgumentUsageInfo>(); 89 AU.addRequired<AMDGPUPerfHintAnalysis>(); 90 AU.addRequired<DivergenceAnalysis>(); 91 SelectionDAGISel::getAnalysisUsage(AU); 92 } 93 94 bool runOnMachineFunction(MachineFunction &MF) override; 95 void Select(SDNode *N) override; 96 StringRef getPassName() const override; 97 void PostprocessISelDAG() override; 98 99 protected: 100 void SelectBuildVector(SDNode *N, unsigned RegClassID); 101 102 private: 103 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 104 bool isNoNanSrc(SDValue N) const; 105 bool isInlineImmediate(const SDNode *N) const; 106 107 bool isUniformBr(const SDNode *N) const; 108 109 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; 110 111 SDNode *glueCopyToM0(SDNode *N) const; 112 113 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 114 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 115 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 116 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 117 unsigned OffsetBits) const; 118 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 119 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 120 SDValue &Offset1) const; 121 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 122 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 123 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 124 SDValue &TFE) const; 125 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 126 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 127 SDValue &SLC, SDValue &TFE) const; 128 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 129 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 130 SDValue &SLC) const; 131 bool SelectMUBUFScratchOffen(SDNode *Parent, 132 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 133 SDValue &SOffset, SDValue &ImmOffset) const; 134 bool SelectMUBUFScratchOffset(SDNode *Parent, 135 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 136 SDValue &Offset) const; 137 138 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 139 SDValue &Offset, SDValue &GLC, SDValue &SLC, 140 SDValue &TFE) const; 141 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 142 SDValue &Offset, SDValue &SLC) const; 143 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 144 SDValue &Offset) const; 145 bool SelectMUBUFConstant(SDValue Constant, 146 SDValue &SOffset, 147 SDValue &ImmOffset) const; 148 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 149 SDValue &ImmOffset) const; 150 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 151 SDValue &ImmOffset, SDValue &VOffset) const; 152 153 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 154 SDValue &Offset, SDValue &SLC) const; 155 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 156 SDValue &Offset, SDValue &SLC) const; 157 158 template <bool IsSigned> 159 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 160 SDValue &Offset, SDValue &SLC) const; 161 162 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 163 bool &Imm) const; 164 SDValue Expand32BitAddress(SDValue Addr) const; 165 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 166 bool &Imm) const; 167 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 168 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 169 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 171 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 172 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 173 174 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 175 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 176 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 178 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp, SDValue &Omod) const; 180 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 183 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 184 SDValue &Clamp, 185 SDValue &Omod) const; 186 187 bool SelectVOP3OMods(SDValue In, SDValue &Src, 188 SDValue &Clamp, SDValue &Omod) const; 189 190 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 191 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 192 SDValue &Clamp) const; 193 194 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 195 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 196 SDValue &Clamp) const; 197 198 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 199 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 200 SDValue &Clamp) const; 201 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 202 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 203 204 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 205 206 void SelectADD_SUB_I64(SDNode *N); 207 void SelectUADDO_USUBO(SDNode *N); 208 void SelectDIV_SCALE(SDNode *N); 209 void SelectMAD_64_32(SDNode *N); 210 void SelectFMA_W_CHAIN(SDNode *N); 211 void SelectFMUL_W_CHAIN(SDNode *N); 212 213 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 214 uint32_t Offset, uint32_t Width); 215 void SelectS_BFEFromShifts(SDNode *N); 216 void SelectS_BFE(SDNode *N); 217 bool isCBranchSCC(const SDNode *N) const; 218 void SelectBRCOND(SDNode *N); 219 void SelectFMAD_FMA(SDNode *N); 220 void SelectATOMIC_CMP_SWAP(SDNode *N); 221 222 protected: 223 // Include the pieces autogenerated from the target description. 224 #include "AMDGPUGenDAGISel.inc" 225 }; 226 227 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 228 const R600Subtarget *Subtarget; 229 AMDGPUAS AMDGPUASI; 230 231 bool isConstantLoad(const MemSDNode *N, int cbID) const; 232 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 233 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 234 SDValue& Offset); 235 public: 236 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 237 AMDGPUDAGToDAGISel(TM, OptLevel) { 238 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 239 } 240 241 void Select(SDNode *N) override; 242 243 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 244 SDValue &Offset) override; 245 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 246 SDValue &Offset) override; 247 248 bool runOnMachineFunction(MachineFunction &MF) override; 249 protected: 250 // Include the pieces autogenerated from the target description. 251 #include "R600GenDAGISel.inc" 252 }; 253 254 } // end anonymous namespace 255 256 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 257 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 258 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 259 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) 260 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) 261 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 262 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 263 264 /// This pass converts a legalized DAG into a AMDGPU-specific 265 // DAG, ready for instruction scheduling. 266 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 267 CodeGenOpt::Level OptLevel) { 268 return new AMDGPUDAGToDAGISel(TM, OptLevel); 269 } 270 271 /// This pass converts a legalized DAG into a R600-specific 272 // DAG, ready for instruction scheduling. 273 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 274 CodeGenOpt::Level OptLevel) { 275 return new R600DAGToDAGISel(TM, OptLevel); 276 } 277 278 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 279 Subtarget = &MF.getSubtarget<GCNSubtarget>(); 280 return SelectionDAGISel::runOnMachineFunction(MF); 281 } 282 283 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 284 if (TM.Options.NoNaNsFPMath) 285 return true; 286 287 // TODO: Move into isKnownNeverNaN 288 if (N->getFlags().isDefined()) 289 return N->getFlags().hasNoNaNs(); 290 291 return CurDAG->isKnownNeverNaN(N); 292 } 293 294 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 295 const SIInstrInfo *TII = Subtarget->getInstrInfo(); 296 297 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 298 return TII->isInlineConstant(C->getAPIntValue()); 299 300 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 301 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 302 303 return false; 304 } 305 306 /// Determine the register class for \p OpNo 307 /// \returns The register class of the virtual register that will be used for 308 /// the given operand number \OpNo or NULL if the register class cannot be 309 /// determined. 310 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 311 unsigned OpNo) const { 312 if (!N->isMachineOpcode()) { 313 if (N->getOpcode() == ISD::CopyToReg) { 314 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 315 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 316 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 317 return MRI.getRegClass(Reg); 318 } 319 320 const SIRegisterInfo *TRI 321 = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); 322 return TRI->getPhysRegClass(Reg); 323 } 324 325 return nullptr; 326 } 327 328 switch (N->getMachineOpcode()) { 329 default: { 330 const MCInstrDesc &Desc = 331 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 332 unsigned OpIdx = Desc.getNumDefs() + OpNo; 333 if (OpIdx >= Desc.getNumOperands()) 334 return nullptr; 335 int RegClass = Desc.OpInfo[OpIdx].RegClass; 336 if (RegClass == -1) 337 return nullptr; 338 339 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 340 } 341 case AMDGPU::REG_SEQUENCE: { 342 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 343 const TargetRegisterClass *SuperRC = 344 Subtarget->getRegisterInfo()->getRegClass(RCID); 345 346 SDValue SubRegOp = N->getOperand(OpNo + 1); 347 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 348 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 349 SubRegIdx); 350 } 351 } 352 } 353 354 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 355 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 356 !Subtarget->ldsRequiresM0Init()) 357 return N; 358 359 const SITargetLowering& Lowering = 360 *static_cast<const SITargetLowering*>(getTargetLowering()); 361 362 // Write max value to m0 before each load operation 363 364 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 365 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 366 367 SDValue Glue = M0.getValue(1); 368 369 SmallVector <SDValue, 8> Ops; 370 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 371 Ops.push_back(N->getOperand(i)); 372 } 373 Ops.push_back(Glue); 374 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 375 } 376 377 MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, 378 EVT VT) const { 379 SDNode *Lo = CurDAG->getMachineNode( 380 AMDGPU::S_MOV_B32, DL, MVT::i32, 381 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); 382 SDNode *Hi = 383 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 384 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 385 const SDValue Ops[] = { 386 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 387 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 388 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; 389 390 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); 391 } 392 393 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 394 switch (NumVectorElts) { 395 case 1: 396 return AMDGPU::SReg_32_XM0RegClassID; 397 case 2: 398 return AMDGPU::SReg_64RegClassID; 399 case 4: 400 return AMDGPU::SReg_128RegClassID; 401 case 8: 402 return AMDGPU::SReg_256RegClassID; 403 case 16: 404 return AMDGPU::SReg_512RegClassID; 405 } 406 407 llvm_unreachable("invalid vector size"); 408 } 409 410 static bool getConstantValue(SDValue N, uint32_t &Out) { 411 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 412 Out = C->getAPIntValue().getZExtValue(); 413 return true; 414 } 415 416 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 417 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 418 return true; 419 } 420 421 return false; 422 } 423 424 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 425 EVT VT = N->getValueType(0); 426 unsigned NumVectorElts = VT.getVectorNumElements(); 427 EVT EltVT = VT.getVectorElementType(); 428 SDLoc DL(N); 429 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 430 431 if (NumVectorElts == 1) { 432 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 433 RegClass); 434 return; 435 } 436 437 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 438 "supported yet"); 439 // 16 = Max Num Vector Elements 440 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 441 // 1 = Vector Register Class 442 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 443 444 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 445 bool IsRegSeq = true; 446 unsigned NOps = N->getNumOperands(); 447 for (unsigned i = 0; i < NOps; i++) { 448 // XXX: Why is this here? 449 if (isa<RegisterSDNode>(N->getOperand(i))) { 450 IsRegSeq = false; 451 break; 452 } 453 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 454 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 455 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); 456 } 457 if (NOps != NumVectorElts) { 458 // Fill in the missing undef elements if this was a scalar_to_vector. 459 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 460 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 461 DL, EltVT); 462 for (unsigned i = NOps; i < NumVectorElts; ++i) { 463 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 464 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 465 RegSeqArgs[1 + (2 * i) + 1] = 466 CurDAG->getTargetConstant(Sub, DL, MVT::i32); 467 } 468 } 469 470 if (!IsRegSeq) 471 SelectCode(N); 472 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 473 } 474 475 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 476 unsigned int Opc = N->getOpcode(); 477 if (N->isMachineOpcode()) { 478 N->setNodeId(-1); 479 return; // Already selected. 480 } 481 482 if (isa<AtomicSDNode>(N) || 483 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 484 Opc == AMDGPUISD::ATOMIC_LOAD_FADD || 485 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 486 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 487 N = glueCopyToM0(N); 488 489 switch (Opc) { 490 default: 491 break; 492 // We are selecting i64 ADD here instead of custom lower it during 493 // DAG legalization, so we can fold some i64 ADDs used for address 494 // calculation into the LOAD and STORE instructions. 495 case ISD::ADDC: 496 case ISD::ADDE: 497 case ISD::SUBC: 498 case ISD::SUBE: { 499 if (N->getValueType(0) != MVT::i64) 500 break; 501 502 SelectADD_SUB_I64(N); 503 return; 504 } 505 case ISD::UADDO: 506 case ISD::USUBO: { 507 SelectUADDO_USUBO(N); 508 return; 509 } 510 case AMDGPUISD::FMUL_W_CHAIN: { 511 SelectFMUL_W_CHAIN(N); 512 return; 513 } 514 case AMDGPUISD::FMA_W_CHAIN: { 515 SelectFMA_W_CHAIN(N); 516 return; 517 } 518 519 case ISD::SCALAR_TO_VECTOR: 520 case ISD::BUILD_VECTOR: { 521 EVT VT = N->getValueType(0); 522 unsigned NumVectorElts = VT.getVectorNumElements(); 523 if (VT.getScalarSizeInBits() == 16) { 524 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { 525 uint32_t LHSVal, RHSVal; 526 if (getConstantValue(N->getOperand(0), LHSVal) && 527 getConstantValue(N->getOperand(1), RHSVal)) { 528 uint32_t K = LHSVal | (RHSVal << 16); 529 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 530 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 531 return; 532 } 533 } 534 535 break; 536 } 537 538 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 539 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 540 SelectBuildVector(N, RegClassID); 541 return; 542 } 543 case ISD::BUILD_PAIR: { 544 SDValue RC, SubReg0, SubReg1; 545 SDLoc DL(N); 546 if (N->getValueType(0) == MVT::i128) { 547 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 548 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 549 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 550 } else if (N->getValueType(0) == MVT::i64) { 551 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 552 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 553 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 554 } else { 555 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 556 } 557 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 558 N->getOperand(1), SubReg1 }; 559 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 560 N->getValueType(0), Ops)); 561 return; 562 } 563 564 case ISD::Constant: 565 case ISD::ConstantFP: { 566 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 567 break; 568 569 uint64_t Imm; 570 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 571 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 572 else { 573 ConstantSDNode *C = cast<ConstantSDNode>(N); 574 Imm = C->getZExtValue(); 575 } 576 577 SDLoc DL(N); 578 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); 579 return; 580 } 581 case ISD::LOAD: 582 case ISD::STORE: 583 case ISD::ATOMIC_LOAD: 584 case ISD::ATOMIC_STORE: { 585 N = glueCopyToM0(N); 586 break; 587 } 588 589 case AMDGPUISD::BFE_I32: 590 case AMDGPUISD::BFE_U32: { 591 // There is a scalar version available, but unlike the vector version which 592 // has a separate operand for the offset and width, the scalar version packs 593 // the width and offset into a single operand. Try to move to the scalar 594 // version if the offsets are constant, so that we can try to keep extended 595 // loads of kernel arguments in SGPRs. 596 597 // TODO: Technically we could try to pattern match scalar bitshifts of 598 // dynamic values, but it's probably not useful. 599 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 600 if (!Offset) 601 break; 602 603 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 604 if (!Width) 605 break; 606 607 bool Signed = Opc == AMDGPUISD::BFE_I32; 608 609 uint32_t OffsetVal = Offset->getZExtValue(); 610 uint32_t WidthVal = Width->getZExtValue(); 611 612 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 613 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 614 return; 615 } 616 case AMDGPUISD::DIV_SCALE: { 617 SelectDIV_SCALE(N); 618 return; 619 } 620 case AMDGPUISD::MAD_I64_I32: 621 case AMDGPUISD::MAD_U64_U32: { 622 SelectMAD_64_32(N); 623 return; 624 } 625 case ISD::CopyToReg: { 626 const SITargetLowering& Lowering = 627 *static_cast<const SITargetLowering*>(getTargetLowering()); 628 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 629 break; 630 } 631 case ISD::AND: 632 case ISD::SRL: 633 case ISD::SRA: 634 case ISD::SIGN_EXTEND_INREG: 635 if (N->getValueType(0) != MVT::i32) 636 break; 637 638 SelectS_BFE(N); 639 return; 640 case ISD::BRCOND: 641 SelectBRCOND(N); 642 return; 643 case ISD::FMAD: 644 case ISD::FMA: 645 SelectFMAD_FMA(N); 646 return; 647 case AMDGPUISD::ATOMIC_CMP_SWAP: 648 SelectATOMIC_CMP_SWAP(N); 649 return; 650 case AMDGPUISD::CVT_PKRTZ_F16_F32: 651 case AMDGPUISD::CVT_PKNORM_I16_F32: 652 case AMDGPUISD::CVT_PKNORM_U16_F32: 653 case AMDGPUISD::CVT_PK_U16_U32: 654 case AMDGPUISD::CVT_PK_I16_I32: { 655 // Hack around using a legal type if f16 is illegal. 656 if (N->getValueType(0) == MVT::i32) { 657 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16; 658 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), 659 { N->getOperand(0), N->getOperand(1) }); 660 SelectCode(N); 661 return; 662 } 663 } 664 } 665 666 SelectCode(N); 667 } 668 669 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 670 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 671 const Instruction *Term = BB->getTerminator(); 672 return Term->getMetadata("amdgpu.uniform") || 673 Term->getMetadata("structurizecfg.uniform"); 674 } 675 676 StringRef AMDGPUDAGToDAGISel::getPassName() const { 677 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 678 } 679 680 //===----------------------------------------------------------------------===// 681 // Complex Patterns 682 //===----------------------------------------------------------------------===// 683 684 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 685 SDValue &Offset) { 686 return false; 687 } 688 689 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 690 SDValue &Offset) { 691 ConstantSDNode *C; 692 SDLoc DL(Addr); 693 694 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 695 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 696 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 697 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 698 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 699 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 700 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 701 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 702 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 703 Base = Addr.getOperand(0); 704 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 705 } else { 706 Base = Addr; 707 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 708 } 709 710 return true; 711 } 712 713 // FIXME: Should only handle addcarry/subcarry 714 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 715 SDLoc DL(N); 716 SDValue LHS = N->getOperand(0); 717 SDValue RHS = N->getOperand(1); 718 719 unsigned Opcode = N->getOpcode(); 720 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 721 bool ProduceCarry = 722 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 723 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 724 725 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 726 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 727 728 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 729 DL, MVT::i32, LHS, Sub0); 730 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 731 DL, MVT::i32, LHS, Sub1); 732 733 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 734 DL, MVT::i32, RHS, Sub0); 735 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 736 DL, MVT::i32, RHS, Sub1); 737 738 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 739 740 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 741 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 742 743 SDNode *AddLo; 744 if (!ConsumeCarry) { 745 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 746 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 747 } else { 748 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 749 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 750 } 751 SDValue AddHiArgs[] = { 752 SDValue(Hi0, 0), 753 SDValue(Hi1, 0), 754 SDValue(AddLo, 1) 755 }; 756 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 757 758 SDValue RegSequenceArgs[] = { 759 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 760 SDValue(AddLo,0), 761 Sub0, 762 SDValue(AddHi,0), 763 Sub1, 764 }; 765 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 766 MVT::i64, RegSequenceArgs); 767 768 if (ProduceCarry) { 769 // Replace the carry-use 770 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 771 } 772 773 // Replace the remaining uses. 774 ReplaceNode(N, RegSequence); 775 } 776 777 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 778 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 779 // carry out despite the _i32 name. These were renamed in VI to _U32. 780 // FIXME: We should probably rename the opcodes here. 781 unsigned Opc = N->getOpcode() == ISD::UADDO ? 782 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 783 784 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 785 { N->getOperand(0), N->getOperand(1) }); 786 } 787 788 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 789 SDLoc SL(N); 790 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 791 SDValue Ops[10]; 792 793 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 794 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 795 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 796 Ops[8] = N->getOperand(0); 797 Ops[9] = N->getOperand(4); 798 799 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 800 } 801 802 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 803 SDLoc SL(N); 804 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 805 SDValue Ops[8]; 806 807 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 808 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 809 Ops[6] = N->getOperand(0); 810 Ops[7] = N->getOperand(3); 811 812 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 813 } 814 815 // We need to handle this here because tablegen doesn't support matching 816 // instructions with multiple outputs. 817 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 818 SDLoc SL(N); 819 EVT VT = N->getValueType(0); 820 821 assert(VT == MVT::f32 || VT == MVT::f64); 822 823 unsigned Opc 824 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 825 826 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 827 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 828 } 829 830 // We need to handle this here because tablegen doesn't support matching 831 // instructions with multiple outputs. 832 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 833 SDLoc SL(N); 834 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 835 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 836 837 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 838 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 839 Clamp }; 840 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 841 } 842 843 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 844 unsigned OffsetBits) const { 845 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 846 (OffsetBits == 8 && !isUInt<8>(Offset))) 847 return false; 848 849 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 850 Subtarget->unsafeDSOffsetFoldingEnabled()) 851 return true; 852 853 // On Southern Islands instruction with a negative base value and an offset 854 // don't seem to work. 855 return CurDAG->SignBitIsZero(Base); 856 } 857 858 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 859 SDValue &Offset) const { 860 SDLoc DL(Addr); 861 if (CurDAG->isBaseWithConstantOffset(Addr)) { 862 SDValue N0 = Addr.getOperand(0); 863 SDValue N1 = Addr.getOperand(1); 864 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 865 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 866 // (add n0, c0) 867 Base = N0; 868 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 869 return true; 870 } 871 } else if (Addr.getOpcode() == ISD::SUB) { 872 // sub C, x -> add (sub 0, x), C 873 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 874 int64_t ByteOffset = C->getSExtValue(); 875 if (isUInt<16>(ByteOffset)) { 876 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 877 878 // XXX - This is kind of hacky. Create a dummy sub node so we can check 879 // the known bits in isDSOffsetLegal. We need to emit the selected node 880 // here, so this is thrown away. 881 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 882 Zero, Addr.getOperand(1)); 883 884 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 885 // FIXME: Select to VOP3 version for with-carry. 886 unsigned SubOp = Subtarget->hasAddNoCarry() ? 887 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 888 889 MachineSDNode *MachineSub 890 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 891 Zero, Addr.getOperand(1)); 892 893 Base = SDValue(MachineSub, 0); 894 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 895 return true; 896 } 897 } 898 } 899 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 900 // If we have a constant address, prefer to put the constant into the 901 // offset. This can save moves to load the constant address since multiple 902 // operations can share the zero base address register, and enables merging 903 // into read2 / write2 instructions. 904 905 SDLoc DL(Addr); 906 907 if (isUInt<16>(CAddr->getZExtValue())) { 908 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 909 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 910 DL, MVT::i32, Zero); 911 Base = SDValue(MovZero, 0); 912 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 913 return true; 914 } 915 } 916 917 // default case 918 Base = Addr; 919 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 920 return true; 921 } 922 923 // TODO: If offset is too big, put low 16-bit into offset. 924 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 925 SDValue &Offset0, 926 SDValue &Offset1) const { 927 SDLoc DL(Addr); 928 929 if (CurDAG->isBaseWithConstantOffset(Addr)) { 930 SDValue N0 = Addr.getOperand(0); 931 SDValue N1 = Addr.getOperand(1); 932 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 933 unsigned DWordOffset0 = C1->getZExtValue() / 4; 934 unsigned DWordOffset1 = DWordOffset0 + 1; 935 // (add n0, c0) 936 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 937 Base = N0; 938 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 939 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 940 return true; 941 } 942 } else if (Addr.getOpcode() == ISD::SUB) { 943 // sub C, x -> add (sub 0, x), C 944 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 945 unsigned DWordOffset0 = C->getZExtValue() / 4; 946 unsigned DWordOffset1 = DWordOffset0 + 1; 947 948 if (isUInt<8>(DWordOffset0)) { 949 SDLoc DL(Addr); 950 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 951 952 // XXX - This is kind of hacky. Create a dummy sub node so we can check 953 // the known bits in isDSOffsetLegal. We need to emit the selected node 954 // here, so this is thrown away. 955 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 956 Zero, Addr.getOperand(1)); 957 958 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 959 unsigned SubOp = Subtarget->hasAddNoCarry() ? 960 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 961 962 MachineSDNode *MachineSub 963 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 964 Zero, Addr.getOperand(1)); 965 966 Base = SDValue(MachineSub, 0); 967 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 968 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 969 return true; 970 } 971 } 972 } 973 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 974 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 975 unsigned DWordOffset1 = DWordOffset0 + 1; 976 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 977 978 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 979 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 980 MachineSDNode *MovZero 981 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 982 DL, MVT::i32, Zero); 983 Base = SDValue(MovZero, 0); 984 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 985 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 986 return true; 987 } 988 } 989 990 // default case 991 992 // FIXME: This is broken on SI where we still need to check if the base 993 // pointer is positive here. 994 Base = Addr; 995 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 996 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 997 return true; 998 } 999 1000 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1001 SDValue &VAddr, SDValue &SOffset, 1002 SDValue &Offset, SDValue &Offen, 1003 SDValue &Idxen, SDValue &Addr64, 1004 SDValue &GLC, SDValue &SLC, 1005 SDValue &TFE) const { 1006 // Subtarget prefers to use flat instruction 1007 if (Subtarget->useFlatForGlobal()) 1008 return false; 1009 1010 SDLoc DL(Addr); 1011 1012 if (!GLC.getNode()) 1013 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1014 if (!SLC.getNode()) 1015 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1016 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1017 1018 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1019 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1020 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1021 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1022 1023 ConstantSDNode *C1 = nullptr; 1024 SDValue N0 = Addr; 1025 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1026 C1 = cast<ConstantSDNode>(Addr.getOperand(1)); 1027 if (isUInt<32>(C1->getZExtValue())) 1028 N0 = Addr.getOperand(0); 1029 else 1030 C1 = nullptr; 1031 } 1032 1033 if (N0.getOpcode() == ISD::ADD) { 1034 // (add N2, N3) -> addr64, or 1035 // (add (add N2, N3), C1) -> addr64 1036 SDValue N2 = N0.getOperand(0); 1037 SDValue N3 = N0.getOperand(1); 1038 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1039 1040 if (N2->isDivergent()) { 1041 if (N3->isDivergent()) { 1042 // Both N2 and N3 are divergent. Use N0 (the result of the add) as the 1043 // addr64, and construct the resource from a 0 address. 1044 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); 1045 VAddr = N0; 1046 } else { 1047 // N2 is divergent, N3 is not. 1048 Ptr = N3; 1049 VAddr = N2; 1050 } 1051 } else { 1052 // N2 is not divergent. 1053 Ptr = N2; 1054 VAddr = N3; 1055 } 1056 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1057 } else if (N0->isDivergent()) { 1058 // N0 is divergent. Use it as the addr64, and construct the resource from a 1059 // 0 address. 1060 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); 1061 VAddr = N0; 1062 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1063 } else { 1064 // N0 -> offset, or 1065 // (N0 + C1) -> offset 1066 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1067 Ptr = N0; 1068 } 1069 1070 if (!C1) { 1071 // No offset. 1072 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1073 return true; 1074 } 1075 1076 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1077 // Legal offset for instruction. 1078 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1079 return true; 1080 } 1081 1082 // Illegal offset, store it in soffset. 1083 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1084 SOffset = 1085 SDValue(CurDAG->getMachineNode( 1086 AMDGPU::S_MOV_B32, DL, MVT::i32, 1087 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1088 0); 1089 return true; 1090 } 1091 1092 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1093 SDValue &VAddr, SDValue &SOffset, 1094 SDValue &Offset, SDValue &GLC, 1095 SDValue &SLC, SDValue &TFE) const { 1096 SDValue Ptr, Offen, Idxen, Addr64; 1097 1098 // addr64 bit was removed for volcanic islands. 1099 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1100 return false; 1101 1102 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1103 GLC, SLC, TFE)) 1104 return false; 1105 1106 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1107 if (C->getSExtValue()) { 1108 SDLoc DL(Addr); 1109 1110 const SITargetLowering& Lowering = 1111 *static_cast<const SITargetLowering*>(getTargetLowering()); 1112 1113 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1114 return true; 1115 } 1116 1117 return false; 1118 } 1119 1120 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1121 SDValue &VAddr, SDValue &SOffset, 1122 SDValue &Offset, 1123 SDValue &SLC) const { 1124 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1125 SDValue GLC, TFE; 1126 1127 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1128 } 1129 1130 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1131 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1132 return PSV && PSV->isStack(); 1133 } 1134 1135 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1136 const MachineFunction &MF = CurDAG->getMachineFunction(); 1137 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1138 1139 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1140 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1141 FI->getValueType(0)); 1142 1143 // If we can resolve this to a frame index access, this is relative to the 1144 // frame pointer SGPR. 1145 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1146 MVT::i32)); 1147 } 1148 1149 // If we don't know this private access is a local stack object, it needs to 1150 // be relative to the entry point's scratch wave offset register. 1151 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1152 MVT::i32)); 1153 } 1154 1155 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1156 SDValue Addr, SDValue &Rsrc, 1157 SDValue &VAddr, SDValue &SOffset, 1158 SDValue &ImmOffset) const { 1159 1160 SDLoc DL(Addr); 1161 MachineFunction &MF = CurDAG->getMachineFunction(); 1162 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1163 1164 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1165 1166 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1167 unsigned Imm = CAddr->getZExtValue(); 1168 1169 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1170 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1171 DL, MVT::i32, HighBits); 1172 VAddr = SDValue(MovHighBits, 0); 1173 1174 // In a call sequence, stores to the argument stack area are relative to the 1175 // stack pointer. 1176 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1177 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1178 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1179 1180 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1181 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1182 return true; 1183 } 1184 1185 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1186 // (add n0, c1) 1187 1188 SDValue N0 = Addr.getOperand(0); 1189 SDValue N1 = Addr.getOperand(1); 1190 1191 // Offsets in vaddr must be positive if range checking is enabled. 1192 // 1193 // The total computation of vaddr + soffset + offset must not overflow. If 1194 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1195 // overflowing. 1196 // 1197 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1198 // always perform a range check. If a negative vaddr base index was used, 1199 // this would fail the range check. The overall address computation would 1200 // compute a valid address, but this doesn't happen due to the range 1201 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1202 // 1203 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1204 // MUBUF vaddr, but not on older subtargets which can only do this if the 1205 // sign bit is known 0. 1206 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1207 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1208 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1209 CurDAG->SignBitIsZero(N0))) { 1210 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1211 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1212 return true; 1213 } 1214 } 1215 1216 // (node) 1217 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1218 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1219 return true; 1220 } 1221 1222 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1223 SDValue Addr, 1224 SDValue &SRsrc, 1225 SDValue &SOffset, 1226 SDValue &Offset) const { 1227 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1228 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1229 return false; 1230 1231 SDLoc DL(Addr); 1232 MachineFunction &MF = CurDAG->getMachineFunction(); 1233 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1234 1235 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1236 1237 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1238 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1239 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1240 1241 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1242 // offset if we know this is in a call sequence. 1243 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1244 1245 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1246 return true; 1247 } 1248 1249 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1250 SDValue &SOffset, SDValue &Offset, 1251 SDValue &GLC, SDValue &SLC, 1252 SDValue &TFE) const { 1253 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1254 const SIInstrInfo *TII = 1255 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1256 1257 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1258 GLC, SLC, TFE)) 1259 return false; 1260 1261 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1262 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1263 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1264 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1265 APInt::getAllOnesValue(32).getZExtValue(); // Size 1266 SDLoc DL(Addr); 1267 1268 const SITargetLowering& Lowering = 1269 *static_cast<const SITargetLowering*>(getTargetLowering()); 1270 1271 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1272 return true; 1273 } 1274 return false; 1275 } 1276 1277 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1278 SDValue &Soffset, SDValue &Offset 1279 ) const { 1280 SDValue GLC, SLC, TFE; 1281 1282 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1283 } 1284 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1285 SDValue &Soffset, SDValue &Offset, 1286 SDValue &SLC) const { 1287 SDValue GLC, TFE; 1288 1289 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1290 } 1291 1292 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1293 SDValue &SOffset, 1294 SDValue &ImmOffset) const { 1295 SDLoc DL(Constant); 1296 const uint32_t Align = 4; 1297 const uint32_t MaxImm = alignDown(4095, Align); 1298 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1299 uint32_t Overflow = 0; 1300 1301 if (Imm > MaxImm) { 1302 if (Imm <= MaxImm + 64) { 1303 // Use an SOffset inline constant for 4..64 1304 Overflow = Imm - MaxImm; 1305 Imm = MaxImm; 1306 } else { 1307 // Try to keep the same value in SOffset for adjacent loads, so that 1308 // the corresponding register contents can be re-used. 1309 // 1310 // Load values with all low-bits (except for alignment bits) set into 1311 // SOffset, so that a larger range of values can be covered using 1312 // s_movk_i32. 1313 // 1314 // Atomic operations fail to work correctly when individual address 1315 // components are unaligned, even if their sum is aligned. 1316 uint32_t High = (Imm + Align) & ~4095; 1317 uint32_t Low = (Imm + Align) & 4095; 1318 Imm = Low; 1319 Overflow = High - Align; 1320 } 1321 } 1322 1323 // There is a hardware bug in SI and CI which prevents address clamping in 1324 // MUBUF instructions from working correctly with SOffsets. The immediate 1325 // offset is unaffected. 1326 if (Overflow > 0 && 1327 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1328 return false; 1329 1330 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1331 1332 if (Overflow <= 64) 1333 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1334 else 1335 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1336 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1337 0); 1338 1339 return true; 1340 } 1341 1342 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1343 SDValue &SOffset, 1344 SDValue &ImmOffset) const { 1345 SDLoc DL(Offset); 1346 1347 if (!isa<ConstantSDNode>(Offset)) 1348 return false; 1349 1350 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1351 } 1352 1353 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1354 SDValue &SOffset, 1355 SDValue &ImmOffset, 1356 SDValue &VOffset) const { 1357 SDLoc DL(Offset); 1358 1359 // Don't generate an unnecessary voffset for constant offsets. 1360 if (isa<ConstantSDNode>(Offset)) { 1361 SDValue Tmp1, Tmp2; 1362 1363 // When necessary, use a voffset in <= CI anyway to work around a hardware 1364 // bug. 1365 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1366 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1367 return false; 1368 } 1369 1370 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1371 SDValue N0 = Offset.getOperand(0); 1372 SDValue N1 = Offset.getOperand(1); 1373 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1374 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1375 VOffset = N0; 1376 return true; 1377 } 1378 } 1379 1380 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1381 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1382 VOffset = Offset; 1383 1384 return true; 1385 } 1386 1387 template <bool IsSigned> 1388 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1389 SDValue &VAddr, 1390 SDValue &Offset, 1391 SDValue &SLC) const { 1392 int64_t OffsetVal = 0; 1393 1394 if (Subtarget->hasFlatInstOffsets() && 1395 CurDAG->isBaseWithConstantOffset(Addr)) { 1396 SDValue N0 = Addr.getOperand(0); 1397 SDValue N1 = Addr.getOperand(1); 1398 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1399 1400 if ((IsSigned && isInt<13>(COffsetVal)) || 1401 (!IsSigned && isUInt<12>(COffsetVal))) { 1402 Addr = N0; 1403 OffsetVal = COffsetVal; 1404 } 1405 } 1406 1407 VAddr = Addr; 1408 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1409 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1410 1411 return true; 1412 } 1413 1414 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1415 SDValue &VAddr, 1416 SDValue &Offset, 1417 SDValue &SLC) const { 1418 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1419 } 1420 1421 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1422 SDValue &VAddr, 1423 SDValue &Offset, 1424 SDValue &SLC) const { 1425 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1426 } 1427 1428 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1429 SDValue &Offset, bool &Imm) const { 1430 1431 // FIXME: Handle non-constant offsets. 1432 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1433 if (!C) 1434 return false; 1435 1436 SDLoc SL(ByteOffsetNode); 1437 GCNSubtarget::Generation Gen = Subtarget->getGeneration(); 1438 int64_t ByteOffset = C->getSExtValue(); 1439 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1440 1441 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1442 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1443 Imm = true; 1444 return true; 1445 } 1446 1447 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1448 return false; 1449 1450 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1451 // 32-bit Immediates are supported on Sea Islands. 1452 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1453 } else { 1454 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1455 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1456 C32Bit), 0); 1457 } 1458 Imm = false; 1459 return true; 1460 } 1461 1462 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1463 if (Addr.getValueType() != MVT::i32) 1464 return Addr; 1465 1466 // Zero-extend a 32-bit address. 1467 SDLoc SL(Addr); 1468 1469 const MachineFunction &MF = CurDAG->getMachineFunction(); 1470 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1471 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1472 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1473 1474 const SDValue Ops[] = { 1475 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1476 Addr, 1477 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1478 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1479 0), 1480 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1481 }; 1482 1483 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1484 Ops), 0); 1485 } 1486 1487 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1488 SDValue &Offset, bool &Imm) const { 1489 SDLoc SL(Addr); 1490 1491 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1492 SDValue N0 = Addr.getOperand(0); 1493 SDValue N1 = Addr.getOperand(1); 1494 1495 if (SelectSMRDOffset(N1, Offset, Imm)) { 1496 SBase = Expand32BitAddress(N0); 1497 return true; 1498 } 1499 } 1500 SBase = Expand32BitAddress(Addr); 1501 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1502 Imm = true; 1503 return true; 1504 } 1505 1506 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1507 SDValue &Offset) const { 1508 bool Imm; 1509 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1510 } 1511 1512 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1513 SDValue &Offset) const { 1514 1515 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1516 return false; 1517 1518 bool Imm; 1519 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1520 return false; 1521 1522 return !Imm && isa<ConstantSDNode>(Offset); 1523 } 1524 1525 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1526 SDValue &Offset) const { 1527 bool Imm; 1528 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1529 !isa<ConstantSDNode>(Offset); 1530 } 1531 1532 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1533 SDValue &Offset) const { 1534 bool Imm; 1535 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1536 } 1537 1538 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1539 SDValue &Offset) const { 1540 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1541 return false; 1542 1543 bool Imm; 1544 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1545 return false; 1546 1547 return !Imm && isa<ConstantSDNode>(Offset); 1548 } 1549 1550 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1551 SDValue &Base, 1552 SDValue &Offset) const { 1553 SDLoc DL(Index); 1554 1555 if (CurDAG->isBaseWithConstantOffset(Index)) { 1556 SDValue N0 = Index.getOperand(0); 1557 SDValue N1 = Index.getOperand(1); 1558 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1559 1560 // (add n0, c0) 1561 Base = N0; 1562 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1563 return true; 1564 } 1565 1566 if (isa<ConstantSDNode>(Index)) 1567 return false; 1568 1569 Base = Index; 1570 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1571 return true; 1572 } 1573 1574 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1575 SDValue Val, uint32_t Offset, 1576 uint32_t Width) { 1577 // Transformation function, pack the offset and width of a BFE into 1578 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1579 // source, bits [5:0] contain the offset and bits [22:16] the width. 1580 uint32_t PackedVal = Offset | (Width << 16); 1581 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1582 1583 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1584 } 1585 1586 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1587 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1588 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1589 // Predicate: 0 < b <= c < 32 1590 1591 const SDValue &Shl = N->getOperand(0); 1592 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1593 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1594 1595 if (B && C) { 1596 uint32_t BVal = B->getZExtValue(); 1597 uint32_t CVal = C->getZExtValue(); 1598 1599 if (0 < BVal && BVal <= CVal && CVal < 32) { 1600 bool Signed = N->getOpcode() == ISD::SRA; 1601 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1602 1603 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1604 32 - CVal)); 1605 return; 1606 } 1607 } 1608 SelectCode(N); 1609 } 1610 1611 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1612 switch (N->getOpcode()) { 1613 case ISD::AND: 1614 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1615 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1616 // Predicate: isMask(mask) 1617 const SDValue &Srl = N->getOperand(0); 1618 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1619 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1620 1621 if (Shift && Mask) { 1622 uint32_t ShiftVal = Shift->getZExtValue(); 1623 uint32_t MaskVal = Mask->getZExtValue(); 1624 1625 if (isMask_32(MaskVal)) { 1626 uint32_t WidthVal = countPopulation(MaskVal); 1627 1628 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1629 Srl.getOperand(0), ShiftVal, WidthVal)); 1630 return; 1631 } 1632 } 1633 } 1634 break; 1635 case ISD::SRL: 1636 if (N->getOperand(0).getOpcode() == ISD::AND) { 1637 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1638 // Predicate: isMask(mask >> b) 1639 const SDValue &And = N->getOperand(0); 1640 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1641 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1642 1643 if (Shift && Mask) { 1644 uint32_t ShiftVal = Shift->getZExtValue(); 1645 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1646 1647 if (isMask_32(MaskVal)) { 1648 uint32_t WidthVal = countPopulation(MaskVal); 1649 1650 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1651 And.getOperand(0), ShiftVal, WidthVal)); 1652 return; 1653 } 1654 } 1655 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1656 SelectS_BFEFromShifts(N); 1657 return; 1658 } 1659 break; 1660 case ISD::SRA: 1661 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1662 SelectS_BFEFromShifts(N); 1663 return; 1664 } 1665 break; 1666 1667 case ISD::SIGN_EXTEND_INREG: { 1668 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1669 SDValue Src = N->getOperand(0); 1670 if (Src.getOpcode() != ISD::SRL) 1671 break; 1672 1673 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1674 if (!Amt) 1675 break; 1676 1677 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1678 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1679 Amt->getZExtValue(), Width)); 1680 return; 1681 } 1682 } 1683 1684 SelectCode(N); 1685 } 1686 1687 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1688 assert(N->getOpcode() == ISD::BRCOND); 1689 if (!N->hasOneUse()) 1690 return false; 1691 1692 SDValue Cond = N->getOperand(1); 1693 if (Cond.getOpcode() == ISD::CopyToReg) 1694 Cond = Cond.getOperand(2); 1695 1696 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1697 return false; 1698 1699 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1700 if (VT == MVT::i32) 1701 return true; 1702 1703 if (VT == MVT::i64) { 1704 auto ST = static_cast<const GCNSubtarget *>(Subtarget); 1705 1706 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1707 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1708 } 1709 1710 return false; 1711 } 1712 1713 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1714 SDValue Cond = N->getOperand(1); 1715 1716 if (Cond.isUndef()) { 1717 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1718 N->getOperand(2), N->getOperand(0)); 1719 return; 1720 } 1721 1722 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1723 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1724 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1725 SDLoc SL(N); 1726 1727 if (!UseSCCBr) { 1728 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1729 // analyzed what generates the vcc value, so we do not know whether vcc 1730 // bits for disabled lanes are 0. Thus we need to mask out bits for 1731 // disabled lanes. 1732 // 1733 // For the case that we select S_CBRANCH_SCC1 and it gets 1734 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1735 // SIInstrInfo::moveToVALU which inserts the S_AND). 1736 // 1737 // We could add an analysis of what generates the vcc value here and omit 1738 // the S_AND when is unnecessary. But it would be better to add a separate 1739 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1740 // catches both cases. 1741 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1742 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1743 Cond), 1744 0); 1745 } 1746 1747 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1748 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1749 N->getOperand(2), // Basic Block 1750 VCC.getValue(0)); 1751 } 1752 1753 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { 1754 MVT VT = N->getSimpleValueType(0); 1755 bool IsFMA = N->getOpcode() == ISD::FMA; 1756 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && 1757 !Subtarget->hasFmaMixInsts()) || 1758 ((IsFMA && Subtarget->hasMadMixInsts()) || 1759 (!IsFMA && Subtarget->hasFmaMixInsts()))) { 1760 SelectCode(N); 1761 return; 1762 } 1763 1764 SDValue Src0 = N->getOperand(0); 1765 SDValue Src1 = N->getOperand(1); 1766 SDValue Src2 = N->getOperand(2); 1767 unsigned Src0Mods, Src1Mods, Src2Mods; 1768 1769 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand 1770 // using the conversion from f16. 1771 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1772 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1773 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1774 1775 assert((IsFMA || !Subtarget->hasFP32Denormals()) && 1776 "fmad selected with denormals enabled"); 1777 // TODO: We can select this with f32 denormals enabled if all the sources are 1778 // converted from f16 (in which case fmad isn't legal). 1779 1780 if (Sel0 || Sel1 || Sel2) { 1781 // For dummy operands. 1782 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1783 SDValue Ops[] = { 1784 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1785 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1786 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1787 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1788 Zero, Zero 1789 }; 1790 1791 CurDAG->SelectNodeTo(N, 1792 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, 1793 MVT::f32, Ops); 1794 } else { 1795 SelectCode(N); 1796 } 1797 } 1798 1799 // This is here because there isn't a way to use the generated sub0_sub1 as the 1800 // subreg index to EXTRACT_SUBREG in tablegen. 1801 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1802 MemSDNode *Mem = cast<MemSDNode>(N); 1803 unsigned AS = Mem->getAddressSpace(); 1804 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1805 SelectCode(N); 1806 return; 1807 } 1808 1809 MVT VT = N->getSimpleValueType(0); 1810 bool Is32 = (VT == MVT::i32); 1811 SDLoc SL(N); 1812 1813 MachineSDNode *CmpSwap = nullptr; 1814 if (Subtarget->hasAddr64()) { 1815 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1816 1817 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1818 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1819 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1820 SDValue CmpVal = Mem->getOperand(2); 1821 1822 // XXX - Do we care about glue operands? 1823 1824 SDValue Ops[] = { 1825 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1826 }; 1827 1828 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1829 } 1830 } 1831 1832 if (!CmpSwap) { 1833 SDValue SRsrc, SOffset, Offset, SLC; 1834 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1835 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1836 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1837 1838 SDValue CmpVal = Mem->getOperand(2); 1839 SDValue Ops[] = { 1840 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1841 }; 1842 1843 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1844 } 1845 } 1846 1847 if (!CmpSwap) { 1848 SelectCode(N); 1849 return; 1850 } 1851 1852 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1853 *MMOs = Mem->getMemOperand(); 1854 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1855 1856 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1857 SDValue Extract 1858 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1859 1860 ReplaceUses(SDValue(N, 0), Extract); 1861 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1862 CurDAG->RemoveDeadNode(N); 1863 } 1864 1865 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1866 unsigned &Mods) const { 1867 Mods = 0; 1868 Src = In; 1869 1870 if (Src.getOpcode() == ISD::FNEG) { 1871 Mods |= SISrcMods::NEG; 1872 Src = Src.getOperand(0); 1873 } 1874 1875 if (Src.getOpcode() == ISD::FABS) { 1876 Mods |= SISrcMods::ABS; 1877 Src = Src.getOperand(0); 1878 } 1879 1880 return true; 1881 } 1882 1883 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1884 SDValue &SrcMods) const { 1885 unsigned Mods; 1886 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1887 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1888 return true; 1889 } 1890 1891 return false; 1892 } 1893 1894 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1895 SDValue &SrcMods) const { 1896 SelectVOP3Mods(In, Src, SrcMods); 1897 return isNoNanSrc(Src); 1898 } 1899 1900 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1901 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1902 return false; 1903 1904 Src = In; 1905 return true; 1906 } 1907 1908 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1909 SDValue &SrcMods, SDValue &Clamp, 1910 SDValue &Omod) const { 1911 SDLoc DL(In); 1912 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1913 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1914 1915 return SelectVOP3Mods(In, Src, SrcMods); 1916 } 1917 1918 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1919 SDValue &SrcMods, 1920 SDValue &Clamp, 1921 SDValue &Omod) const { 1922 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1923 return SelectVOP3Mods(In, Src, SrcMods); 1924 } 1925 1926 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1927 SDValue &Clamp, SDValue &Omod) const { 1928 Src = In; 1929 1930 SDLoc DL(In); 1931 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1932 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1933 1934 return true; 1935 } 1936 1937 static SDValue stripBitcast(SDValue Val) { 1938 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1939 } 1940 1941 // Figure out if this is really an extract of the high 16-bits of a dword. 1942 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1943 In = stripBitcast(In); 1944 if (In.getOpcode() != ISD::TRUNCATE) 1945 return false; 1946 1947 SDValue Srl = In.getOperand(0); 1948 if (Srl.getOpcode() == ISD::SRL) { 1949 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1950 if (ShiftAmt->getZExtValue() == 16) { 1951 Out = stripBitcast(Srl.getOperand(0)); 1952 return true; 1953 } 1954 } 1955 } 1956 1957 return false; 1958 } 1959 1960 // Look through operations that obscure just looking at the low 16-bits of the 1961 // same register. 1962 static SDValue stripExtractLoElt(SDValue In) { 1963 if (In.getOpcode() == ISD::TRUNCATE) { 1964 SDValue Src = In.getOperand(0); 1965 if (Src.getValueType().getSizeInBits() == 32) 1966 return stripBitcast(Src); 1967 } 1968 1969 return In; 1970 } 1971 1972 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1973 SDValue &SrcMods) const { 1974 unsigned Mods = 0; 1975 Src = In; 1976 1977 if (Src.getOpcode() == ISD::FNEG) { 1978 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1979 Src = Src.getOperand(0); 1980 } 1981 1982 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1983 unsigned VecMods = Mods; 1984 1985 SDValue Lo = stripBitcast(Src.getOperand(0)); 1986 SDValue Hi = stripBitcast(Src.getOperand(1)); 1987 1988 if (Lo.getOpcode() == ISD::FNEG) { 1989 Lo = stripBitcast(Lo.getOperand(0)); 1990 Mods ^= SISrcMods::NEG; 1991 } 1992 1993 if (Hi.getOpcode() == ISD::FNEG) { 1994 Hi = stripBitcast(Hi.getOperand(0)); 1995 Mods ^= SISrcMods::NEG_HI; 1996 } 1997 1998 if (isExtractHiElt(Lo, Lo)) 1999 Mods |= SISrcMods::OP_SEL_0; 2000 2001 if (isExtractHiElt(Hi, Hi)) 2002 Mods |= SISrcMods::OP_SEL_1; 2003 2004 Lo = stripExtractLoElt(Lo); 2005 Hi = stripExtractLoElt(Hi); 2006 2007 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 2008 // Really a scalar input. Just select from the low half of the register to 2009 // avoid packing. 2010 2011 Src = Lo; 2012 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2013 return true; 2014 } 2015 2016 Mods = VecMods; 2017 } 2018 2019 // Packed instructions do not have abs modifiers. 2020 Mods |= SISrcMods::OP_SEL_1; 2021 2022 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2023 return true; 2024 } 2025 2026 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2027 SDValue &SrcMods, 2028 SDValue &Clamp) const { 2029 SDLoc SL(In); 2030 2031 // FIXME: Handle clamp and op_sel 2032 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2033 2034 return SelectVOP3PMods(In, Src, SrcMods); 2035 } 2036 2037 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2038 SDValue &SrcMods) const { 2039 Src = In; 2040 // FIXME: Handle op_sel 2041 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2042 return true; 2043 } 2044 2045 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2046 SDValue &SrcMods, 2047 SDValue &Clamp) const { 2048 SDLoc SL(In); 2049 2050 // FIXME: Handle clamp 2051 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2052 2053 return SelectVOP3OpSel(In, Src, SrcMods); 2054 } 2055 2056 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2057 SDValue &SrcMods) const { 2058 // FIXME: Handle op_sel 2059 return SelectVOP3Mods(In, Src, SrcMods); 2060 } 2061 2062 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2063 SDValue &SrcMods, 2064 SDValue &Clamp) const { 2065 SDLoc SL(In); 2066 2067 // FIXME: Handle clamp 2068 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2069 2070 return SelectVOP3OpSelMods(In, Src, SrcMods); 2071 } 2072 2073 // The return value is not whether the match is possible (which it always is), 2074 // but whether or not it a conversion is really used. 2075 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2076 unsigned &Mods) const { 2077 Mods = 0; 2078 SelectVOP3ModsImpl(In, Src, Mods); 2079 2080 if (Src.getOpcode() == ISD::FP_EXTEND) { 2081 Src = Src.getOperand(0); 2082 assert(Src.getValueType() == MVT::f16); 2083 Src = stripBitcast(Src); 2084 2085 // Be careful about folding modifiers if we already have an abs. fneg is 2086 // applied last, so we don't want to apply an earlier fneg. 2087 if ((Mods & SISrcMods::ABS) == 0) { 2088 unsigned ModsTmp; 2089 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2090 2091 if ((ModsTmp & SISrcMods::NEG) != 0) 2092 Mods ^= SISrcMods::NEG; 2093 2094 if ((ModsTmp & SISrcMods::ABS) != 0) 2095 Mods |= SISrcMods::ABS; 2096 } 2097 2098 // op_sel/op_sel_hi decide the source type and source. 2099 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2100 // If the sources's op_sel is set, it picks the high half of the source 2101 // register. 2102 2103 Mods |= SISrcMods::OP_SEL_1; 2104 if (isExtractHiElt(Src, Src)) { 2105 Mods |= SISrcMods::OP_SEL_0; 2106 2107 // TODO: Should we try to look for neg/abs here? 2108 } 2109 2110 return true; 2111 } 2112 2113 return false; 2114 } 2115 2116 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2117 SDValue &SrcMods) const { 2118 unsigned Mods = 0; 2119 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2120 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2121 return true; 2122 } 2123 2124 // TODO: Can we identify things like v_mad_mixhi_f16? 2125 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2126 if (In.isUndef()) { 2127 Src = In; 2128 return true; 2129 } 2130 2131 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2132 SDLoc SL(In); 2133 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2134 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2135 SL, MVT::i32, K); 2136 Src = SDValue(MovK, 0); 2137 return true; 2138 } 2139 2140 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2141 SDLoc SL(In); 2142 SDValue K = CurDAG->getTargetConstant( 2143 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2144 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2145 SL, MVT::i32, K); 2146 Src = SDValue(MovK, 0); 2147 return true; 2148 } 2149 2150 return isExtractHiElt(In, Src); 2151 } 2152 2153 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2154 const AMDGPUTargetLowering& Lowering = 2155 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2156 bool IsModified = false; 2157 do { 2158 IsModified = false; 2159 2160 // Go over all selected nodes and try to fold them a bit more 2161 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2162 while (Position != CurDAG->allnodes_end()) { 2163 SDNode *Node = &*Position++; 2164 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2165 if (!MachineNode) 2166 continue; 2167 2168 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2169 if (ResNode != Node) { 2170 if (ResNode) 2171 ReplaceUses(Node, ResNode); 2172 IsModified = true; 2173 } 2174 } 2175 CurDAG->RemoveDeadNodes(); 2176 } while (IsModified); 2177 } 2178 2179 bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 2180 Subtarget = &MF.getSubtarget<R600Subtarget>(); 2181 return SelectionDAGISel::runOnMachineFunction(MF); 2182 } 2183 2184 bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 2185 if (!N->readMem()) 2186 return false; 2187 if (CbId == -1) 2188 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 2189 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; 2190 2191 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 2192 } 2193 2194 bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 2195 SDValue& IntPtr) { 2196 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 2197 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 2198 true); 2199 return true; 2200 } 2201 return false; 2202 } 2203 2204 bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 2205 SDValue& BaseReg, SDValue &Offset) { 2206 if (!isa<ConstantSDNode>(Addr)) { 2207 BaseReg = Addr; 2208 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 2209 return true; 2210 } 2211 return false; 2212 } 2213 2214 void R600DAGToDAGISel::Select(SDNode *N) { 2215 unsigned int Opc = N->getOpcode(); 2216 if (N->isMachineOpcode()) { 2217 N->setNodeId(-1); 2218 return; // Already selected. 2219 } 2220 2221 switch (Opc) { 2222 default: break; 2223 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2224 case ISD::SCALAR_TO_VECTOR: 2225 case ISD::BUILD_VECTOR: { 2226 EVT VT = N->getValueType(0); 2227 unsigned NumVectorElts = VT.getVectorNumElements(); 2228 unsigned RegClassID; 2229 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2230 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2231 // pass. We want to avoid 128 bits copies as much as possible because they 2232 // can't be bundled by our scheduler. 2233 switch(NumVectorElts) { 2234 case 2: RegClassID = R600::R600_Reg64RegClassID; break; 2235 case 4: 2236 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2237 RegClassID = R600::R600_Reg128VerticalRegClassID; 2238 else 2239 RegClassID = R600::R600_Reg128RegClassID; 2240 break; 2241 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2242 } 2243 SelectBuildVector(N, RegClassID); 2244 return; 2245 } 2246 } 2247 2248 SelectCode(N); 2249 } 2250 2251 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2252 SDValue &Offset) { 2253 ConstantSDNode *C; 2254 SDLoc DL(Addr); 2255 2256 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2257 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 2258 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2259 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2260 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2261 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); 2262 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2263 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2264 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2265 Base = Addr.getOperand(0); 2266 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2267 } else { 2268 Base = Addr; 2269 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2270 } 2271 2272 return true; 2273 } 2274 2275 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2276 SDValue &Offset) { 2277 ConstantSDNode *IMMOffset; 2278 2279 if (Addr.getOpcode() == ISD::ADD 2280 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2281 && isInt<16>(IMMOffset->getZExtValue())) { 2282 2283 Base = Addr.getOperand(0); 2284 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2285 MVT::i32); 2286 return true; 2287 // If the pointer address is constant, we can move it to the offset field. 2288 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2289 && isInt<16>(IMMOffset->getZExtValue())) { 2290 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2291 SDLoc(CurDAG->getEntryNode()), 2292 R600::ZERO, MVT::i32); 2293 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2294 MVT::i32); 2295 return true; 2296 } 2297 2298 // Default case, no offset 2299 Base = Addr; 2300 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2301 return true; 2302 } 2303