1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPUPerfHintAnalysis.h" 20 #include "AMDGPURegisterInfo.h" 21 #include "AMDGPUSubtarget.h" 22 #include "AMDGPUTargetMachine.h" 23 #include "SIDefines.h" 24 #include "SIISelLowering.h" 25 #include "SIInstrInfo.h" 26 #include "SIMachineFunctionInfo.h" 27 #include "SIRegisterInfo.h" 28 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringRef.h" 32 #include "llvm/Analysis/DivergenceAnalysis.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/CodeGen/FunctionLoweringInfo.h" 35 #include "llvm/CodeGen/ISDOpcodes.h" 36 #include "llvm/CodeGen/MachineFunction.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/SelectionDAG.h" 39 #include "llvm/CodeGen/SelectionDAGISel.h" 40 #include "llvm/CodeGen/SelectionDAGNodes.h" 41 #include "llvm/CodeGen/ValueTypes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/MC/MCInstrDesc.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CodeGen.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include <cassert> 51 #include <cstdint> 52 #include <new> 53 #include <vector> 54 55 using namespace llvm; 56 57 namespace llvm { 58 59 class R600InstrInfo; 60 61 } // end namespace llvm 62 63 //===----------------------------------------------------------------------===// 64 // Instruction Selector Implementation 65 //===----------------------------------------------------------------------===// 66 67 namespace { 68 69 /// AMDGPU specific code to select AMDGPU machine instructions for 70 /// SelectionDAG operations. 71 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 72 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 73 // make the right decision when generating code for different targets. 74 const AMDGPUSubtarget *Subtarget; 75 AMDGPUAS AMDGPUASI; 76 bool EnableLateStructurizeCFG; 77 78 public: 79 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 80 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 81 : SelectionDAGISel(*TM, OptLevel) { 82 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 83 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 84 } 85 ~AMDGPUDAGToDAGISel() override = default; 86 87 void getAnalysisUsage(AnalysisUsage &AU) const override { 88 AU.addRequired<AMDGPUArgumentUsageInfo>(); 89 AU.addRequired<AMDGPUPerfHintAnalysis>(); 90 AU.addRequired<DivergenceAnalysis>(); 91 SelectionDAGISel::getAnalysisUsage(AU); 92 } 93 94 bool runOnMachineFunction(MachineFunction &MF) override; 95 void Select(SDNode *N) override; 96 StringRef getPassName() const override; 97 void PostprocessISelDAG() override; 98 99 protected: 100 void SelectBuildVector(SDNode *N, unsigned RegClassID); 101 102 private: 103 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 104 bool isNoNanSrc(SDValue N) const; 105 bool isInlineImmediate(const SDNode *N) const; 106 107 bool isConstantLoad(const MemSDNode *N, int cbID) const; 108 bool isUniformBr(const SDNode *N) const; 109 110 SDNode *glueCopyToM0(SDNode *N) const; 111 112 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 113 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 114 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 115 SDValue& Offset); 116 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 117 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 118 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 119 unsigned OffsetBits) const; 120 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 121 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 122 SDValue &Offset1) const; 123 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 124 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 125 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 126 SDValue &TFE) const; 127 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 128 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 129 SDValue &SLC, SDValue &TFE) const; 130 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 131 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 132 SDValue &SLC) const; 133 bool SelectMUBUFScratchOffen(SDNode *Parent, 134 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 135 SDValue &SOffset, SDValue &ImmOffset) const; 136 bool SelectMUBUFScratchOffset(SDNode *Parent, 137 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 138 SDValue &Offset) const; 139 140 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 141 SDValue &Offset, SDValue &GLC, SDValue &SLC, 142 SDValue &TFE) const; 143 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 144 SDValue &Offset, SDValue &SLC) const; 145 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 146 SDValue &Offset) const; 147 bool SelectMUBUFConstant(SDValue Constant, 148 SDValue &SOffset, 149 SDValue &ImmOffset) const; 150 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 151 SDValue &ImmOffset) const; 152 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 153 SDValue &ImmOffset, SDValue &VOffset) const; 154 155 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 156 SDValue &Offset, SDValue &SLC) const; 157 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 158 SDValue &Offset, SDValue &SLC) const; 159 160 template <bool IsSigned> 161 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 162 SDValue &Offset, SDValue &SLC) const; 163 164 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 165 bool &Imm) const; 166 SDValue Expand32BitAddress(SDValue Addr) const; 167 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 168 bool &Imm) const; 169 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 171 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 172 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 173 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 174 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 175 176 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 178 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 179 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 180 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 183 SDValue &Clamp, SDValue &Omod) const; 184 185 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 186 SDValue &Clamp, 187 SDValue &Omod) const; 188 189 bool SelectVOP3OMods(SDValue In, SDValue &Src, 190 SDValue &Clamp, SDValue &Omod) const; 191 192 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 193 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 194 SDValue &Clamp) const; 195 196 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 197 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 198 SDValue &Clamp) const; 199 200 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 201 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 202 SDValue &Clamp) const; 203 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 204 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 205 206 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 207 208 void SelectADD_SUB_I64(SDNode *N); 209 void SelectUADDO_USUBO(SDNode *N); 210 void SelectDIV_SCALE(SDNode *N); 211 void SelectMAD_64_32(SDNode *N); 212 void SelectFMA_W_CHAIN(SDNode *N); 213 void SelectFMUL_W_CHAIN(SDNode *N); 214 215 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 216 uint32_t Offset, uint32_t Width); 217 void SelectS_BFEFromShifts(SDNode *N); 218 void SelectS_BFE(SDNode *N); 219 bool isCBranchSCC(const SDNode *N) const; 220 void SelectBRCOND(SDNode *N); 221 void SelectFMAD_FMA(SDNode *N); 222 void SelectATOMIC_CMP_SWAP(SDNode *N); 223 224 protected: 225 // Include the pieces autogenerated from the target description. 226 #include "AMDGPUGenDAGISel.inc" 227 }; 228 229 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 230 public: 231 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 232 AMDGPUDAGToDAGISel(TM, OptLevel) {} 233 234 void Select(SDNode *N) override; 235 236 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 237 SDValue &Offset) override; 238 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 239 SDValue &Offset) override; 240 }; 241 242 } // end anonymous namespace 243 244 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 245 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 246 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 247 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) 248 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) 249 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 250 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 251 252 /// This pass converts a legalized DAG into a AMDGPU-specific 253 // DAG, ready for instruction scheduling. 254 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 255 CodeGenOpt::Level OptLevel) { 256 return new AMDGPUDAGToDAGISel(TM, OptLevel); 257 } 258 259 /// This pass converts a legalized DAG into a R600-specific 260 // DAG, ready for instruction scheduling. 261 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 262 CodeGenOpt::Level OptLevel) { 263 return new R600DAGToDAGISel(TM, OptLevel); 264 } 265 266 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 267 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 268 return SelectionDAGISel::runOnMachineFunction(MF); 269 } 270 271 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 272 if (TM.Options.NoNaNsFPMath) 273 return true; 274 275 // TODO: Move into isKnownNeverNaN 276 if (N->getFlags().isDefined()) 277 return N->getFlags().hasNoNaNs(); 278 279 return CurDAG->isKnownNeverNaN(N); 280 } 281 282 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 283 const SIInstrInfo *TII 284 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 285 286 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 287 return TII->isInlineConstant(C->getAPIntValue()); 288 289 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 290 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 291 292 return false; 293 } 294 295 /// Determine the register class for \p OpNo 296 /// \returns The register class of the virtual register that will be used for 297 /// the given operand number \OpNo or NULL if the register class cannot be 298 /// determined. 299 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 300 unsigned OpNo) const { 301 if (!N->isMachineOpcode()) { 302 if (N->getOpcode() == ISD::CopyToReg) { 303 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 304 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 305 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 306 return MRI.getRegClass(Reg); 307 } 308 309 const SIRegisterInfo *TRI 310 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 311 return TRI->getPhysRegClass(Reg); 312 } 313 314 return nullptr; 315 } 316 317 switch (N->getMachineOpcode()) { 318 default: { 319 const MCInstrDesc &Desc = 320 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 321 unsigned OpIdx = Desc.getNumDefs() + OpNo; 322 if (OpIdx >= Desc.getNumOperands()) 323 return nullptr; 324 int RegClass = Desc.OpInfo[OpIdx].RegClass; 325 if (RegClass == -1) 326 return nullptr; 327 328 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 329 } 330 case AMDGPU::REG_SEQUENCE: { 331 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 332 const TargetRegisterClass *SuperRC = 333 Subtarget->getRegisterInfo()->getRegClass(RCID); 334 335 SDValue SubRegOp = N->getOperand(OpNo + 1); 336 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 337 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 338 SubRegIdx); 339 } 340 } 341 } 342 343 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 344 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 345 !Subtarget->ldsRequiresM0Init()) 346 return N; 347 348 const SITargetLowering& Lowering = 349 *static_cast<const SITargetLowering*>(getTargetLowering()); 350 351 // Write max value to m0 before each load operation 352 353 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 354 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 355 356 SDValue Glue = M0.getValue(1); 357 358 SmallVector <SDValue, 8> Ops; 359 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 360 Ops.push_back(N->getOperand(i)); 361 } 362 Ops.push_back(Glue); 363 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 364 } 365 366 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 367 switch (NumVectorElts) { 368 case 1: 369 return AMDGPU::SReg_32_XM0RegClassID; 370 case 2: 371 return AMDGPU::SReg_64RegClassID; 372 case 4: 373 return AMDGPU::SReg_128RegClassID; 374 case 8: 375 return AMDGPU::SReg_256RegClassID; 376 case 16: 377 return AMDGPU::SReg_512RegClassID; 378 } 379 380 llvm_unreachable("invalid vector size"); 381 } 382 383 static bool getConstantValue(SDValue N, uint32_t &Out) { 384 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 385 Out = C->getAPIntValue().getZExtValue(); 386 return true; 387 } 388 389 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 390 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 391 return true; 392 } 393 394 return false; 395 } 396 397 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 398 EVT VT = N->getValueType(0); 399 unsigned NumVectorElts = VT.getVectorNumElements(); 400 EVT EltVT = VT.getVectorElementType(); 401 SDLoc DL(N); 402 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 403 404 if (NumVectorElts == 1) { 405 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 406 RegClass); 407 return; 408 } 409 410 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 411 "supported yet"); 412 // 16 = Max Num Vector Elements 413 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 414 // 1 = Vector Register Class 415 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 416 417 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 418 bool IsRegSeq = true; 419 unsigned NOps = N->getNumOperands(); 420 for (unsigned i = 0; i < NOps; i++) { 421 // XXX: Why is this here? 422 if (isa<RegisterSDNode>(N->getOperand(i))) { 423 IsRegSeq = false; 424 break; 425 } 426 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 427 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 428 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); 429 } 430 if (NOps != NumVectorElts) { 431 // Fill in the missing undef elements if this was a scalar_to_vector. 432 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 433 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 434 DL, EltVT); 435 for (unsigned i = NOps; i < NumVectorElts; ++i) { 436 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 437 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 438 RegSeqArgs[1 + (2 * i) + 1] = 439 CurDAG->getTargetConstant(Sub, DL, MVT::i32); 440 } 441 } 442 443 if (!IsRegSeq) 444 SelectCode(N); 445 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 446 } 447 448 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 449 unsigned int Opc = N->getOpcode(); 450 if (N->isMachineOpcode()) { 451 N->setNodeId(-1); 452 return; // Already selected. 453 } 454 455 if (isa<AtomicSDNode>(N) || 456 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 457 Opc == AMDGPUISD::ATOMIC_LOAD_FADD || 458 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 459 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 460 N = glueCopyToM0(N); 461 462 switch (Opc) { 463 default: 464 break; 465 // We are selecting i64 ADD here instead of custom lower it during 466 // DAG legalization, so we can fold some i64 ADDs used for address 467 // calculation into the LOAD and STORE instructions. 468 case ISD::ADDC: 469 case ISD::ADDE: 470 case ISD::SUBC: 471 case ISD::SUBE: { 472 if (N->getValueType(0) != MVT::i64) 473 break; 474 475 SelectADD_SUB_I64(N); 476 return; 477 } 478 case ISD::UADDO: 479 case ISD::USUBO: { 480 SelectUADDO_USUBO(N); 481 return; 482 } 483 case AMDGPUISD::FMUL_W_CHAIN: { 484 SelectFMUL_W_CHAIN(N); 485 return; 486 } 487 case AMDGPUISD::FMA_W_CHAIN: { 488 SelectFMA_W_CHAIN(N); 489 return; 490 } 491 492 case ISD::SCALAR_TO_VECTOR: 493 case ISD::BUILD_VECTOR: { 494 EVT VT = N->getValueType(0); 495 unsigned NumVectorElts = VT.getVectorNumElements(); 496 497 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 498 if (Opc == ISD::BUILD_VECTOR) { 499 uint32_t LHSVal, RHSVal; 500 if (getConstantValue(N->getOperand(0), LHSVal) && 501 getConstantValue(N->getOperand(1), RHSVal)) { 502 uint32_t K = LHSVal | (RHSVal << 16); 503 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 504 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 505 return; 506 } 507 } 508 509 break; 510 } 511 512 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 513 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 514 SelectBuildVector(N, RegClassID); 515 return; 516 } 517 case ISD::BUILD_PAIR: { 518 SDValue RC, SubReg0, SubReg1; 519 SDLoc DL(N); 520 if (N->getValueType(0) == MVT::i128) { 521 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 522 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 523 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 524 } else if (N->getValueType(0) == MVT::i64) { 525 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 526 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 527 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 528 } else { 529 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 530 } 531 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 532 N->getOperand(1), SubReg1 }; 533 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 534 N->getValueType(0), Ops)); 535 return; 536 } 537 538 case ISD::Constant: 539 case ISD::ConstantFP: { 540 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 541 break; 542 543 uint64_t Imm; 544 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 545 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 546 else { 547 ConstantSDNode *C = cast<ConstantSDNode>(N); 548 Imm = C->getZExtValue(); 549 } 550 551 SDLoc DL(N); 552 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 553 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 554 MVT::i32)); 555 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 556 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 557 const SDValue Ops[] = { 558 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 559 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 560 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 561 }; 562 563 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 564 N->getValueType(0), Ops)); 565 return; 566 } 567 case ISD::LOAD: 568 case ISD::STORE: { 569 N = glueCopyToM0(N); 570 break; 571 } 572 573 case AMDGPUISD::BFE_I32: 574 case AMDGPUISD::BFE_U32: { 575 // There is a scalar version available, but unlike the vector version which 576 // has a separate operand for the offset and width, the scalar version packs 577 // the width and offset into a single operand. Try to move to the scalar 578 // version if the offsets are constant, so that we can try to keep extended 579 // loads of kernel arguments in SGPRs. 580 581 // TODO: Technically we could try to pattern match scalar bitshifts of 582 // dynamic values, but it's probably not useful. 583 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 584 if (!Offset) 585 break; 586 587 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 588 if (!Width) 589 break; 590 591 bool Signed = Opc == AMDGPUISD::BFE_I32; 592 593 uint32_t OffsetVal = Offset->getZExtValue(); 594 uint32_t WidthVal = Width->getZExtValue(); 595 596 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 597 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 598 return; 599 } 600 case AMDGPUISD::DIV_SCALE: { 601 SelectDIV_SCALE(N); 602 return; 603 } 604 case AMDGPUISD::MAD_I64_I32: 605 case AMDGPUISD::MAD_U64_U32: { 606 SelectMAD_64_32(N); 607 return; 608 } 609 case ISD::CopyToReg: { 610 const SITargetLowering& Lowering = 611 *static_cast<const SITargetLowering*>(getTargetLowering()); 612 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 613 break; 614 } 615 case ISD::AND: 616 case ISD::SRL: 617 case ISD::SRA: 618 case ISD::SIGN_EXTEND_INREG: 619 if (N->getValueType(0) != MVT::i32) 620 break; 621 622 SelectS_BFE(N); 623 return; 624 case ISD::BRCOND: 625 SelectBRCOND(N); 626 return; 627 case ISD::FMAD: 628 case ISD::FMA: 629 SelectFMAD_FMA(N); 630 return; 631 case AMDGPUISD::ATOMIC_CMP_SWAP: 632 SelectATOMIC_CMP_SWAP(N); 633 return; 634 } 635 636 SelectCode(N); 637 } 638 639 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 640 if (!N->readMem()) 641 return false; 642 if (CbId == -1) 643 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 644 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; 645 646 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 647 } 648 649 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 650 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 651 const Instruction *Term = BB->getTerminator(); 652 return Term->getMetadata("amdgpu.uniform") || 653 Term->getMetadata("structurizecfg.uniform"); 654 } 655 656 StringRef AMDGPUDAGToDAGISel::getPassName() const { 657 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 658 } 659 660 //===----------------------------------------------------------------------===// 661 // Complex Patterns 662 //===----------------------------------------------------------------------===// 663 664 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 665 SDValue& IntPtr) { 666 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 667 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 668 true); 669 return true; 670 } 671 return false; 672 } 673 674 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 675 SDValue& BaseReg, SDValue &Offset) { 676 if (!isa<ConstantSDNode>(Addr)) { 677 BaseReg = Addr; 678 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 679 return true; 680 } 681 return false; 682 } 683 684 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 685 SDValue &Offset) { 686 return false; 687 } 688 689 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 690 SDValue &Offset) { 691 ConstantSDNode *C; 692 SDLoc DL(Addr); 693 694 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 695 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 696 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 697 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 698 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 699 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 700 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 701 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 702 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 703 Base = Addr.getOperand(0); 704 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 705 } else { 706 Base = Addr; 707 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 708 } 709 710 return true; 711 } 712 713 // FIXME: Should only handle addcarry/subcarry 714 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 715 SDLoc DL(N); 716 SDValue LHS = N->getOperand(0); 717 SDValue RHS = N->getOperand(1); 718 719 unsigned Opcode = N->getOpcode(); 720 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 721 bool ProduceCarry = 722 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 723 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 724 725 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 726 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 727 728 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 729 DL, MVT::i32, LHS, Sub0); 730 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 731 DL, MVT::i32, LHS, Sub1); 732 733 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 734 DL, MVT::i32, RHS, Sub0); 735 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 736 DL, MVT::i32, RHS, Sub1); 737 738 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 739 740 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 741 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 742 743 SDNode *AddLo; 744 if (!ConsumeCarry) { 745 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 746 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 747 } else { 748 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 749 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 750 } 751 SDValue AddHiArgs[] = { 752 SDValue(Hi0, 0), 753 SDValue(Hi1, 0), 754 SDValue(AddLo, 1) 755 }; 756 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 757 758 SDValue RegSequenceArgs[] = { 759 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 760 SDValue(AddLo,0), 761 Sub0, 762 SDValue(AddHi,0), 763 Sub1, 764 }; 765 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 766 MVT::i64, RegSequenceArgs); 767 768 if (ProduceCarry) { 769 // Replace the carry-use 770 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 771 } 772 773 // Replace the remaining uses. 774 ReplaceNode(N, RegSequence); 775 } 776 777 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 778 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 779 // carry out despite the _i32 name. These were renamed in VI to _U32. 780 // FIXME: We should probably rename the opcodes here. 781 unsigned Opc = N->getOpcode() == ISD::UADDO ? 782 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 783 784 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 785 { N->getOperand(0), N->getOperand(1) }); 786 } 787 788 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 789 SDLoc SL(N); 790 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 791 SDValue Ops[10]; 792 793 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 794 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 795 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 796 Ops[8] = N->getOperand(0); 797 Ops[9] = N->getOperand(4); 798 799 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 800 } 801 802 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 803 SDLoc SL(N); 804 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 805 SDValue Ops[8]; 806 807 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 808 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 809 Ops[6] = N->getOperand(0); 810 Ops[7] = N->getOperand(3); 811 812 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 813 } 814 815 // We need to handle this here because tablegen doesn't support matching 816 // instructions with multiple outputs. 817 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 818 SDLoc SL(N); 819 EVT VT = N->getValueType(0); 820 821 assert(VT == MVT::f32 || VT == MVT::f64); 822 823 unsigned Opc 824 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 825 826 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 827 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 828 } 829 830 // We need to handle this here because tablegen doesn't support matching 831 // instructions with multiple outputs. 832 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 833 SDLoc SL(N); 834 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 835 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 836 837 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 838 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 839 Clamp }; 840 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 841 } 842 843 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 844 unsigned OffsetBits) const { 845 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 846 (OffsetBits == 8 && !isUInt<8>(Offset))) 847 return false; 848 849 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 850 Subtarget->unsafeDSOffsetFoldingEnabled()) 851 return true; 852 853 // On Southern Islands instruction with a negative base value and an offset 854 // don't seem to work. 855 return CurDAG->SignBitIsZero(Base); 856 } 857 858 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 859 SDValue &Offset) const { 860 SDLoc DL(Addr); 861 if (CurDAG->isBaseWithConstantOffset(Addr)) { 862 SDValue N0 = Addr.getOperand(0); 863 SDValue N1 = Addr.getOperand(1); 864 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 865 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 866 // (add n0, c0) 867 Base = N0; 868 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 869 return true; 870 } 871 } else if (Addr.getOpcode() == ISD::SUB) { 872 // sub C, x -> add (sub 0, x), C 873 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 874 int64_t ByteOffset = C->getSExtValue(); 875 if (isUInt<16>(ByteOffset)) { 876 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 877 878 // XXX - This is kind of hacky. Create a dummy sub node so we can check 879 // the known bits in isDSOffsetLegal. We need to emit the selected node 880 // here, so this is thrown away. 881 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 882 Zero, Addr.getOperand(1)); 883 884 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 885 // FIXME: Select to VOP3 version for with-carry. 886 unsigned SubOp = Subtarget->hasAddNoCarry() ? 887 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 888 889 MachineSDNode *MachineSub 890 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 891 Zero, Addr.getOperand(1)); 892 893 Base = SDValue(MachineSub, 0); 894 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 895 return true; 896 } 897 } 898 } 899 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 900 // If we have a constant address, prefer to put the constant into the 901 // offset. This can save moves to load the constant address since multiple 902 // operations can share the zero base address register, and enables merging 903 // into read2 / write2 instructions. 904 905 SDLoc DL(Addr); 906 907 if (isUInt<16>(CAddr->getZExtValue())) { 908 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 909 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 910 DL, MVT::i32, Zero); 911 Base = SDValue(MovZero, 0); 912 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 913 return true; 914 } 915 } 916 917 // default case 918 Base = Addr; 919 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 920 return true; 921 } 922 923 // TODO: If offset is too big, put low 16-bit into offset. 924 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 925 SDValue &Offset0, 926 SDValue &Offset1) const { 927 SDLoc DL(Addr); 928 929 if (CurDAG->isBaseWithConstantOffset(Addr)) { 930 SDValue N0 = Addr.getOperand(0); 931 SDValue N1 = Addr.getOperand(1); 932 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 933 unsigned DWordOffset0 = C1->getZExtValue() / 4; 934 unsigned DWordOffset1 = DWordOffset0 + 1; 935 // (add n0, c0) 936 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 937 Base = N0; 938 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 939 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 940 return true; 941 } 942 } else if (Addr.getOpcode() == ISD::SUB) { 943 // sub C, x -> add (sub 0, x), C 944 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 945 unsigned DWordOffset0 = C->getZExtValue() / 4; 946 unsigned DWordOffset1 = DWordOffset0 + 1; 947 948 if (isUInt<8>(DWordOffset0)) { 949 SDLoc DL(Addr); 950 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 951 952 // XXX - This is kind of hacky. Create a dummy sub node so we can check 953 // the known bits in isDSOffsetLegal. We need to emit the selected node 954 // here, so this is thrown away. 955 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 956 Zero, Addr.getOperand(1)); 957 958 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 959 unsigned SubOp = Subtarget->hasAddNoCarry() ? 960 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 961 962 MachineSDNode *MachineSub 963 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 964 Zero, Addr.getOperand(1)); 965 966 Base = SDValue(MachineSub, 0); 967 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 968 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 969 return true; 970 } 971 } 972 } 973 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 974 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 975 unsigned DWordOffset1 = DWordOffset0 + 1; 976 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 977 978 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 979 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 980 MachineSDNode *MovZero 981 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 982 DL, MVT::i32, Zero); 983 Base = SDValue(MovZero, 0); 984 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 985 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 986 return true; 987 } 988 } 989 990 // default case 991 992 // FIXME: This is broken on SI where we still need to check if the base 993 // pointer is positive here. 994 Base = Addr; 995 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 996 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 997 return true; 998 } 999 1000 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1001 SDValue &VAddr, SDValue &SOffset, 1002 SDValue &Offset, SDValue &Offen, 1003 SDValue &Idxen, SDValue &Addr64, 1004 SDValue &GLC, SDValue &SLC, 1005 SDValue &TFE) const { 1006 // Subtarget prefers to use flat instruction 1007 if (Subtarget->useFlatForGlobal()) 1008 return false; 1009 1010 SDLoc DL(Addr); 1011 1012 if (!GLC.getNode()) 1013 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1014 if (!SLC.getNode()) 1015 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1016 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1017 1018 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1019 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1020 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1021 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1022 1023 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1024 SDValue N0 = Addr.getOperand(0); 1025 SDValue N1 = Addr.getOperand(1); 1026 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1027 1028 if (N0.getOpcode() == ISD::ADD) { 1029 // (add (add N2, N3), C1) -> addr64 1030 SDValue N2 = N0.getOperand(0); 1031 SDValue N3 = N0.getOperand(1); 1032 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1033 Ptr = N2; 1034 VAddr = N3; 1035 } else { 1036 // (add N0, C1) -> offset 1037 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1038 Ptr = N0; 1039 } 1040 1041 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1042 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1043 return true; 1044 } 1045 1046 if (isUInt<32>(C1->getZExtValue())) { 1047 // Illegal offset, store it in soffset. 1048 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1049 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1050 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1051 0); 1052 return true; 1053 } 1054 } 1055 1056 if (Addr.getOpcode() == ISD::ADD) { 1057 // (add N0, N1) -> addr64 1058 SDValue N0 = Addr.getOperand(0); 1059 SDValue N1 = Addr.getOperand(1); 1060 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1061 Ptr = N0; 1062 VAddr = N1; 1063 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1064 return true; 1065 } 1066 1067 // default case -> offset 1068 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1069 Ptr = Addr; 1070 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1071 1072 return true; 1073 } 1074 1075 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1076 SDValue &VAddr, SDValue &SOffset, 1077 SDValue &Offset, SDValue &GLC, 1078 SDValue &SLC, SDValue &TFE) const { 1079 SDValue Ptr, Offen, Idxen, Addr64; 1080 1081 // addr64 bit was removed for volcanic islands. 1082 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1083 return false; 1084 1085 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1086 GLC, SLC, TFE)) 1087 return false; 1088 1089 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1090 if (C->getSExtValue()) { 1091 SDLoc DL(Addr); 1092 1093 const SITargetLowering& Lowering = 1094 *static_cast<const SITargetLowering*>(getTargetLowering()); 1095 1096 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1097 return true; 1098 } 1099 1100 return false; 1101 } 1102 1103 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1104 SDValue &VAddr, SDValue &SOffset, 1105 SDValue &Offset, 1106 SDValue &SLC) const { 1107 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1108 SDValue GLC, TFE; 1109 1110 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1111 } 1112 1113 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1114 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1115 return PSV && PSV->isStack(); 1116 } 1117 1118 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1119 const MachineFunction &MF = CurDAG->getMachineFunction(); 1120 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1121 1122 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1123 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1124 FI->getValueType(0)); 1125 1126 // If we can resolve this to a frame index access, this is relative to the 1127 // frame pointer SGPR. 1128 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1129 MVT::i32)); 1130 } 1131 1132 // If we don't know this private access is a local stack object, it needs to 1133 // be relative to the entry point's scratch wave offset register. 1134 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1135 MVT::i32)); 1136 } 1137 1138 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1139 SDValue Addr, SDValue &Rsrc, 1140 SDValue &VAddr, SDValue &SOffset, 1141 SDValue &ImmOffset) const { 1142 1143 SDLoc DL(Addr); 1144 MachineFunction &MF = CurDAG->getMachineFunction(); 1145 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1146 1147 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1148 1149 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1150 unsigned Imm = CAddr->getZExtValue(); 1151 1152 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1153 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1154 DL, MVT::i32, HighBits); 1155 VAddr = SDValue(MovHighBits, 0); 1156 1157 // In a call sequence, stores to the argument stack area are relative to the 1158 // stack pointer. 1159 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1160 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1161 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1162 1163 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1164 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1165 return true; 1166 } 1167 1168 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1169 // (add n0, c1) 1170 1171 SDValue N0 = Addr.getOperand(0); 1172 SDValue N1 = Addr.getOperand(1); 1173 1174 // Offsets in vaddr must be positive if range checking is enabled. 1175 // 1176 // The total computation of vaddr + soffset + offset must not overflow. If 1177 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1178 // overflowing. 1179 // 1180 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1181 // always perform a range check. If a negative vaddr base index was used, 1182 // this would fail the range check. The overall address computation would 1183 // compute a valid address, but this doesn't happen due to the range 1184 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1185 // 1186 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1187 // MUBUF vaddr, but not on older subtargets which can only do this if the 1188 // sign bit is known 0. 1189 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1190 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1191 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1192 CurDAG->SignBitIsZero(N0))) { 1193 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1194 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1195 return true; 1196 } 1197 } 1198 1199 // (node) 1200 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1201 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1202 return true; 1203 } 1204 1205 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1206 SDValue Addr, 1207 SDValue &SRsrc, 1208 SDValue &SOffset, 1209 SDValue &Offset) const { 1210 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1211 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1212 return false; 1213 1214 SDLoc DL(Addr); 1215 MachineFunction &MF = CurDAG->getMachineFunction(); 1216 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1217 1218 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1219 1220 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1221 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1222 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1223 1224 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1225 // offset if we know this is in a call sequence. 1226 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1227 1228 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1229 return true; 1230 } 1231 1232 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1233 SDValue &SOffset, SDValue &Offset, 1234 SDValue &GLC, SDValue &SLC, 1235 SDValue &TFE) const { 1236 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1237 const SIInstrInfo *TII = 1238 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1239 1240 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1241 GLC, SLC, TFE)) 1242 return false; 1243 1244 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1245 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1246 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1247 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1248 APInt::getAllOnesValue(32).getZExtValue(); // Size 1249 SDLoc DL(Addr); 1250 1251 const SITargetLowering& Lowering = 1252 *static_cast<const SITargetLowering*>(getTargetLowering()); 1253 1254 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1255 return true; 1256 } 1257 return false; 1258 } 1259 1260 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1261 SDValue &Soffset, SDValue &Offset 1262 ) const { 1263 SDValue GLC, SLC, TFE; 1264 1265 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1266 } 1267 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1268 SDValue &Soffset, SDValue &Offset, 1269 SDValue &SLC) const { 1270 SDValue GLC, TFE; 1271 1272 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1273 } 1274 1275 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1276 SDValue &SOffset, 1277 SDValue &ImmOffset) const { 1278 SDLoc DL(Constant); 1279 const uint32_t Align = 4; 1280 const uint32_t MaxImm = alignDown(4095, Align); 1281 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1282 uint32_t Overflow = 0; 1283 1284 if (Imm > MaxImm) { 1285 if (Imm <= MaxImm + 64) { 1286 // Use an SOffset inline constant for 4..64 1287 Overflow = Imm - MaxImm; 1288 Imm = MaxImm; 1289 } else { 1290 // Try to keep the same value in SOffset for adjacent loads, so that 1291 // the corresponding register contents can be re-used. 1292 // 1293 // Load values with all low-bits (except for alignment bits) set into 1294 // SOffset, so that a larger range of values can be covered using 1295 // s_movk_i32. 1296 // 1297 // Atomic operations fail to work correctly when individual address 1298 // components are unaligned, even if their sum is aligned. 1299 uint32_t High = (Imm + Align) & ~4095; 1300 uint32_t Low = (Imm + Align) & 4095; 1301 Imm = Low; 1302 Overflow = High - Align; 1303 } 1304 } 1305 1306 // There is a hardware bug in SI and CI which prevents address clamping in 1307 // MUBUF instructions from working correctly with SOffsets. The immediate 1308 // offset is unaffected. 1309 if (Overflow > 0 && 1310 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1311 return false; 1312 1313 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1314 1315 if (Overflow <= 64) 1316 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1317 else 1318 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1319 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1320 0); 1321 1322 return true; 1323 } 1324 1325 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1326 SDValue &SOffset, 1327 SDValue &ImmOffset) const { 1328 SDLoc DL(Offset); 1329 1330 if (!isa<ConstantSDNode>(Offset)) 1331 return false; 1332 1333 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1334 } 1335 1336 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1337 SDValue &SOffset, 1338 SDValue &ImmOffset, 1339 SDValue &VOffset) const { 1340 SDLoc DL(Offset); 1341 1342 // Don't generate an unnecessary voffset for constant offsets. 1343 if (isa<ConstantSDNode>(Offset)) { 1344 SDValue Tmp1, Tmp2; 1345 1346 // When necessary, use a voffset in <= CI anyway to work around a hardware 1347 // bug. 1348 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1349 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1350 return false; 1351 } 1352 1353 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1354 SDValue N0 = Offset.getOperand(0); 1355 SDValue N1 = Offset.getOperand(1); 1356 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1357 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1358 VOffset = N0; 1359 return true; 1360 } 1361 } 1362 1363 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1364 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1365 VOffset = Offset; 1366 1367 return true; 1368 } 1369 1370 template <bool IsSigned> 1371 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1372 SDValue &VAddr, 1373 SDValue &Offset, 1374 SDValue &SLC) const { 1375 int64_t OffsetVal = 0; 1376 1377 if (Subtarget->hasFlatInstOffsets() && 1378 CurDAG->isBaseWithConstantOffset(Addr)) { 1379 SDValue N0 = Addr.getOperand(0); 1380 SDValue N1 = Addr.getOperand(1); 1381 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1382 1383 if ((IsSigned && isInt<13>(COffsetVal)) || 1384 (!IsSigned && isUInt<12>(COffsetVal))) { 1385 Addr = N0; 1386 OffsetVal = COffsetVal; 1387 } 1388 } 1389 1390 VAddr = Addr; 1391 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1392 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1393 1394 return true; 1395 } 1396 1397 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1398 SDValue &VAddr, 1399 SDValue &Offset, 1400 SDValue &SLC) const { 1401 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1402 } 1403 1404 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1405 SDValue &VAddr, 1406 SDValue &Offset, 1407 SDValue &SLC) const { 1408 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1409 } 1410 1411 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1412 SDValue &Offset, bool &Imm) const { 1413 1414 // FIXME: Handle non-constant offsets. 1415 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1416 if (!C) 1417 return false; 1418 1419 SDLoc SL(ByteOffsetNode); 1420 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1421 int64_t ByteOffset = C->getSExtValue(); 1422 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1423 1424 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1425 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1426 Imm = true; 1427 return true; 1428 } 1429 1430 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1431 return false; 1432 1433 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1434 // 32-bit Immediates are supported on Sea Islands. 1435 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1436 } else { 1437 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1438 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1439 C32Bit), 0); 1440 } 1441 Imm = false; 1442 return true; 1443 } 1444 1445 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1446 if (Addr.getValueType() != MVT::i32) 1447 return Addr; 1448 1449 // Zero-extend a 32-bit address. 1450 SDLoc SL(Addr); 1451 1452 const MachineFunction &MF = CurDAG->getMachineFunction(); 1453 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1454 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1455 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1456 1457 const SDValue Ops[] = { 1458 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1459 Addr, 1460 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1461 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1462 0), 1463 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1464 }; 1465 1466 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1467 Ops), 0); 1468 } 1469 1470 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1471 SDValue &Offset, bool &Imm) const { 1472 SDLoc SL(Addr); 1473 1474 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1475 SDValue N0 = Addr.getOperand(0); 1476 SDValue N1 = Addr.getOperand(1); 1477 1478 if (SelectSMRDOffset(N1, Offset, Imm)) { 1479 SBase = Expand32BitAddress(N0); 1480 return true; 1481 } 1482 } 1483 SBase = Expand32BitAddress(Addr); 1484 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1485 Imm = true; 1486 return true; 1487 } 1488 1489 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1490 SDValue &Offset) const { 1491 bool Imm; 1492 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1493 } 1494 1495 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1496 SDValue &Offset) const { 1497 1498 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1499 return false; 1500 1501 bool Imm; 1502 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1503 return false; 1504 1505 return !Imm && isa<ConstantSDNode>(Offset); 1506 } 1507 1508 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1509 SDValue &Offset) const { 1510 bool Imm; 1511 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1512 !isa<ConstantSDNode>(Offset); 1513 } 1514 1515 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1516 SDValue &Offset) const { 1517 bool Imm; 1518 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1519 } 1520 1521 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1522 SDValue &Offset) const { 1523 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1524 return false; 1525 1526 bool Imm; 1527 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1528 return false; 1529 1530 return !Imm && isa<ConstantSDNode>(Offset); 1531 } 1532 1533 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1534 SDValue &Base, 1535 SDValue &Offset) const { 1536 SDLoc DL(Index); 1537 1538 if (CurDAG->isBaseWithConstantOffset(Index)) { 1539 SDValue N0 = Index.getOperand(0); 1540 SDValue N1 = Index.getOperand(1); 1541 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1542 1543 // (add n0, c0) 1544 Base = N0; 1545 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1546 return true; 1547 } 1548 1549 if (isa<ConstantSDNode>(Index)) 1550 return false; 1551 1552 Base = Index; 1553 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1554 return true; 1555 } 1556 1557 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1558 SDValue Val, uint32_t Offset, 1559 uint32_t Width) { 1560 // Transformation function, pack the offset and width of a BFE into 1561 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1562 // source, bits [5:0] contain the offset and bits [22:16] the width. 1563 uint32_t PackedVal = Offset | (Width << 16); 1564 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1565 1566 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1567 } 1568 1569 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1570 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1571 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1572 // Predicate: 0 < b <= c < 32 1573 1574 const SDValue &Shl = N->getOperand(0); 1575 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1576 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1577 1578 if (B && C) { 1579 uint32_t BVal = B->getZExtValue(); 1580 uint32_t CVal = C->getZExtValue(); 1581 1582 if (0 < BVal && BVal <= CVal && CVal < 32) { 1583 bool Signed = N->getOpcode() == ISD::SRA; 1584 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1585 1586 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1587 32 - CVal)); 1588 return; 1589 } 1590 } 1591 SelectCode(N); 1592 } 1593 1594 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1595 switch (N->getOpcode()) { 1596 case ISD::AND: 1597 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1598 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1599 // Predicate: isMask(mask) 1600 const SDValue &Srl = N->getOperand(0); 1601 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1602 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1603 1604 if (Shift && Mask) { 1605 uint32_t ShiftVal = Shift->getZExtValue(); 1606 uint32_t MaskVal = Mask->getZExtValue(); 1607 1608 if (isMask_32(MaskVal)) { 1609 uint32_t WidthVal = countPopulation(MaskVal); 1610 1611 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1612 Srl.getOperand(0), ShiftVal, WidthVal)); 1613 return; 1614 } 1615 } 1616 } 1617 break; 1618 case ISD::SRL: 1619 if (N->getOperand(0).getOpcode() == ISD::AND) { 1620 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1621 // Predicate: isMask(mask >> b) 1622 const SDValue &And = N->getOperand(0); 1623 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1624 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1625 1626 if (Shift && Mask) { 1627 uint32_t ShiftVal = Shift->getZExtValue(); 1628 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1629 1630 if (isMask_32(MaskVal)) { 1631 uint32_t WidthVal = countPopulation(MaskVal); 1632 1633 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1634 And.getOperand(0), ShiftVal, WidthVal)); 1635 return; 1636 } 1637 } 1638 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1639 SelectS_BFEFromShifts(N); 1640 return; 1641 } 1642 break; 1643 case ISD::SRA: 1644 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1645 SelectS_BFEFromShifts(N); 1646 return; 1647 } 1648 break; 1649 1650 case ISD::SIGN_EXTEND_INREG: { 1651 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1652 SDValue Src = N->getOperand(0); 1653 if (Src.getOpcode() != ISD::SRL) 1654 break; 1655 1656 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1657 if (!Amt) 1658 break; 1659 1660 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1661 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1662 Amt->getZExtValue(), Width)); 1663 return; 1664 } 1665 } 1666 1667 SelectCode(N); 1668 } 1669 1670 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1671 assert(N->getOpcode() == ISD::BRCOND); 1672 if (!N->hasOneUse()) 1673 return false; 1674 1675 SDValue Cond = N->getOperand(1); 1676 if (Cond.getOpcode() == ISD::CopyToReg) 1677 Cond = Cond.getOperand(2); 1678 1679 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1680 return false; 1681 1682 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1683 if (VT == MVT::i32) 1684 return true; 1685 1686 if (VT == MVT::i64) { 1687 auto ST = static_cast<const SISubtarget *>(Subtarget); 1688 1689 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1690 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1691 } 1692 1693 return false; 1694 } 1695 1696 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1697 SDValue Cond = N->getOperand(1); 1698 1699 if (Cond.isUndef()) { 1700 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1701 N->getOperand(2), N->getOperand(0)); 1702 return; 1703 } 1704 1705 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1706 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1707 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1708 SDLoc SL(N); 1709 1710 if (!UseSCCBr) { 1711 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1712 // analyzed what generates the vcc value, so we do not know whether vcc 1713 // bits for disabled lanes are 0. Thus we need to mask out bits for 1714 // disabled lanes. 1715 // 1716 // For the case that we select S_CBRANCH_SCC1 and it gets 1717 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1718 // SIInstrInfo::moveToVALU which inserts the S_AND). 1719 // 1720 // We could add an analysis of what generates the vcc value here and omit 1721 // the S_AND when is unnecessary. But it would be better to add a separate 1722 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1723 // catches both cases. 1724 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1725 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1726 Cond), 1727 0); 1728 } 1729 1730 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1731 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1732 N->getOperand(2), // Basic Block 1733 VCC.getValue(0)); 1734 } 1735 1736 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { 1737 MVT VT = N->getSimpleValueType(0); 1738 bool IsFMA = N->getOpcode() == ISD::FMA; 1739 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && 1740 !Subtarget->hasFmaMixInsts()) || 1741 ((IsFMA && Subtarget->hasMadMixInsts()) || 1742 (!IsFMA && Subtarget->hasFmaMixInsts()))) { 1743 SelectCode(N); 1744 return; 1745 } 1746 1747 SDValue Src0 = N->getOperand(0); 1748 SDValue Src1 = N->getOperand(1); 1749 SDValue Src2 = N->getOperand(2); 1750 unsigned Src0Mods, Src1Mods, Src2Mods; 1751 1752 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand 1753 // using the conversion from f16. 1754 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1755 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1756 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1757 1758 assert((IsFMA || !Subtarget->hasFP32Denormals()) && 1759 "fmad selected with denormals enabled"); 1760 // TODO: We can select this with f32 denormals enabled if all the sources are 1761 // converted from f16 (in which case fmad isn't legal). 1762 1763 if (Sel0 || Sel1 || Sel2) { 1764 // For dummy operands. 1765 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1766 SDValue Ops[] = { 1767 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1768 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1769 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1770 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1771 Zero, Zero 1772 }; 1773 1774 CurDAG->SelectNodeTo(N, 1775 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, 1776 MVT::f32, Ops); 1777 } else { 1778 SelectCode(N); 1779 } 1780 } 1781 1782 // This is here because there isn't a way to use the generated sub0_sub1 as the 1783 // subreg index to EXTRACT_SUBREG in tablegen. 1784 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1785 MemSDNode *Mem = cast<MemSDNode>(N); 1786 unsigned AS = Mem->getAddressSpace(); 1787 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1788 SelectCode(N); 1789 return; 1790 } 1791 1792 MVT VT = N->getSimpleValueType(0); 1793 bool Is32 = (VT == MVT::i32); 1794 SDLoc SL(N); 1795 1796 MachineSDNode *CmpSwap = nullptr; 1797 if (Subtarget->hasAddr64()) { 1798 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1799 1800 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1801 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1802 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1803 SDValue CmpVal = Mem->getOperand(2); 1804 1805 // XXX - Do we care about glue operands? 1806 1807 SDValue Ops[] = { 1808 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1809 }; 1810 1811 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1812 } 1813 } 1814 1815 if (!CmpSwap) { 1816 SDValue SRsrc, SOffset, Offset, SLC; 1817 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1818 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1819 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1820 1821 SDValue CmpVal = Mem->getOperand(2); 1822 SDValue Ops[] = { 1823 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1824 }; 1825 1826 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1827 } 1828 } 1829 1830 if (!CmpSwap) { 1831 SelectCode(N); 1832 return; 1833 } 1834 1835 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1836 *MMOs = Mem->getMemOperand(); 1837 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1838 1839 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1840 SDValue Extract 1841 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1842 1843 ReplaceUses(SDValue(N, 0), Extract); 1844 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1845 CurDAG->RemoveDeadNode(N); 1846 } 1847 1848 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1849 unsigned &Mods) const { 1850 Mods = 0; 1851 Src = In; 1852 1853 if (Src.getOpcode() == ISD::FNEG) { 1854 Mods |= SISrcMods::NEG; 1855 Src = Src.getOperand(0); 1856 } 1857 1858 if (Src.getOpcode() == ISD::FABS) { 1859 Mods |= SISrcMods::ABS; 1860 Src = Src.getOperand(0); 1861 } 1862 1863 return true; 1864 } 1865 1866 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1867 SDValue &SrcMods) const { 1868 unsigned Mods; 1869 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1870 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1871 return true; 1872 } 1873 1874 return false; 1875 } 1876 1877 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1878 SDValue &SrcMods) const { 1879 SelectVOP3Mods(In, Src, SrcMods); 1880 return isNoNanSrc(Src); 1881 } 1882 1883 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1884 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1885 return false; 1886 1887 Src = In; 1888 return true; 1889 } 1890 1891 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1892 SDValue &SrcMods, SDValue &Clamp, 1893 SDValue &Omod) const { 1894 SDLoc DL(In); 1895 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1896 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1897 1898 return SelectVOP3Mods(In, Src, SrcMods); 1899 } 1900 1901 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1902 SDValue &SrcMods, 1903 SDValue &Clamp, 1904 SDValue &Omod) const { 1905 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1906 return SelectVOP3Mods(In, Src, SrcMods); 1907 } 1908 1909 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1910 SDValue &Clamp, SDValue &Omod) const { 1911 Src = In; 1912 1913 SDLoc DL(In); 1914 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1915 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1916 1917 return true; 1918 } 1919 1920 static SDValue stripBitcast(SDValue Val) { 1921 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1922 } 1923 1924 // Figure out if this is really an extract of the high 16-bits of a dword. 1925 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1926 In = stripBitcast(In); 1927 if (In.getOpcode() != ISD::TRUNCATE) 1928 return false; 1929 1930 SDValue Srl = In.getOperand(0); 1931 if (Srl.getOpcode() == ISD::SRL) { 1932 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1933 if (ShiftAmt->getZExtValue() == 16) { 1934 Out = stripBitcast(Srl.getOperand(0)); 1935 return true; 1936 } 1937 } 1938 } 1939 1940 return false; 1941 } 1942 1943 // Look through operations that obscure just looking at the low 16-bits of the 1944 // same register. 1945 static SDValue stripExtractLoElt(SDValue In) { 1946 if (In.getOpcode() == ISD::TRUNCATE) { 1947 SDValue Src = In.getOperand(0); 1948 if (Src.getValueType().getSizeInBits() == 32) 1949 return stripBitcast(Src); 1950 } 1951 1952 return In; 1953 } 1954 1955 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1956 SDValue &SrcMods) const { 1957 unsigned Mods = 0; 1958 Src = In; 1959 1960 if (Src.getOpcode() == ISD::FNEG) { 1961 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1962 Src = Src.getOperand(0); 1963 } 1964 1965 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1966 unsigned VecMods = Mods; 1967 1968 SDValue Lo = stripBitcast(Src.getOperand(0)); 1969 SDValue Hi = stripBitcast(Src.getOperand(1)); 1970 1971 if (Lo.getOpcode() == ISD::FNEG) { 1972 Lo = stripBitcast(Lo.getOperand(0)); 1973 Mods ^= SISrcMods::NEG; 1974 } 1975 1976 if (Hi.getOpcode() == ISD::FNEG) { 1977 Hi = stripBitcast(Hi.getOperand(0)); 1978 Mods ^= SISrcMods::NEG_HI; 1979 } 1980 1981 if (isExtractHiElt(Lo, Lo)) 1982 Mods |= SISrcMods::OP_SEL_0; 1983 1984 if (isExtractHiElt(Hi, Hi)) 1985 Mods |= SISrcMods::OP_SEL_1; 1986 1987 Lo = stripExtractLoElt(Lo); 1988 Hi = stripExtractLoElt(Hi); 1989 1990 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1991 // Really a scalar input. Just select from the low half of the register to 1992 // avoid packing. 1993 1994 Src = Lo; 1995 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1996 return true; 1997 } 1998 1999 Mods = VecMods; 2000 } 2001 2002 // Packed instructions do not have abs modifiers. 2003 Mods |= SISrcMods::OP_SEL_1; 2004 2005 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2006 return true; 2007 } 2008 2009 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2010 SDValue &SrcMods, 2011 SDValue &Clamp) const { 2012 SDLoc SL(In); 2013 2014 // FIXME: Handle clamp and op_sel 2015 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2016 2017 return SelectVOP3PMods(In, Src, SrcMods); 2018 } 2019 2020 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2021 SDValue &SrcMods) const { 2022 Src = In; 2023 // FIXME: Handle op_sel 2024 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2025 return true; 2026 } 2027 2028 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2029 SDValue &SrcMods, 2030 SDValue &Clamp) const { 2031 SDLoc SL(In); 2032 2033 // FIXME: Handle clamp 2034 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2035 2036 return SelectVOP3OpSel(In, Src, SrcMods); 2037 } 2038 2039 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2040 SDValue &SrcMods) const { 2041 // FIXME: Handle op_sel 2042 return SelectVOP3Mods(In, Src, SrcMods); 2043 } 2044 2045 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2046 SDValue &SrcMods, 2047 SDValue &Clamp) const { 2048 SDLoc SL(In); 2049 2050 // FIXME: Handle clamp 2051 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2052 2053 return SelectVOP3OpSelMods(In, Src, SrcMods); 2054 } 2055 2056 // The return value is not whether the match is possible (which it always is), 2057 // but whether or not it a conversion is really used. 2058 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2059 unsigned &Mods) const { 2060 Mods = 0; 2061 SelectVOP3ModsImpl(In, Src, Mods); 2062 2063 if (Src.getOpcode() == ISD::FP_EXTEND) { 2064 Src = Src.getOperand(0); 2065 assert(Src.getValueType() == MVT::f16); 2066 Src = stripBitcast(Src); 2067 2068 // Be careful about folding modifiers if we already have an abs. fneg is 2069 // applied last, so we don't want to apply an earlier fneg. 2070 if ((Mods & SISrcMods::ABS) == 0) { 2071 unsigned ModsTmp; 2072 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2073 2074 if ((ModsTmp & SISrcMods::NEG) != 0) 2075 Mods ^= SISrcMods::NEG; 2076 2077 if ((ModsTmp & SISrcMods::ABS) != 0) 2078 Mods |= SISrcMods::ABS; 2079 } 2080 2081 // op_sel/op_sel_hi decide the source type and source. 2082 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2083 // If the sources's op_sel is set, it picks the high half of the source 2084 // register. 2085 2086 Mods |= SISrcMods::OP_SEL_1; 2087 if (isExtractHiElt(Src, Src)) { 2088 Mods |= SISrcMods::OP_SEL_0; 2089 2090 // TODO: Should we try to look for neg/abs here? 2091 } 2092 2093 return true; 2094 } 2095 2096 return false; 2097 } 2098 2099 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2100 SDValue &SrcMods) const { 2101 unsigned Mods = 0; 2102 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2103 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2104 return true; 2105 } 2106 2107 // TODO: Can we identify things like v_mad_mixhi_f16? 2108 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2109 if (In.isUndef()) { 2110 Src = In; 2111 return true; 2112 } 2113 2114 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2115 SDLoc SL(In); 2116 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2117 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2118 SL, MVT::i32, K); 2119 Src = SDValue(MovK, 0); 2120 return true; 2121 } 2122 2123 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2124 SDLoc SL(In); 2125 SDValue K = CurDAG->getTargetConstant( 2126 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2127 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2128 SL, MVT::i32, K); 2129 Src = SDValue(MovK, 0); 2130 return true; 2131 } 2132 2133 return isExtractHiElt(In, Src); 2134 } 2135 2136 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2137 const AMDGPUTargetLowering& Lowering = 2138 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2139 bool IsModified = false; 2140 do { 2141 IsModified = false; 2142 2143 // Go over all selected nodes and try to fold them a bit more 2144 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2145 while (Position != CurDAG->allnodes_end()) { 2146 SDNode *Node = &*Position++; 2147 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2148 if (!MachineNode) 2149 continue; 2150 2151 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2152 if (ResNode != Node) { 2153 if (ResNode) 2154 ReplaceUses(Node, ResNode); 2155 IsModified = true; 2156 } 2157 } 2158 CurDAG->RemoveDeadNodes(); 2159 } while (IsModified); 2160 } 2161 2162 void R600DAGToDAGISel::Select(SDNode *N) { 2163 unsigned int Opc = N->getOpcode(); 2164 if (N->isMachineOpcode()) { 2165 N->setNodeId(-1); 2166 return; // Already selected. 2167 } 2168 2169 switch (Opc) { 2170 default: break; 2171 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2172 case ISD::SCALAR_TO_VECTOR: 2173 case ISD::BUILD_VECTOR: { 2174 EVT VT = N->getValueType(0); 2175 unsigned NumVectorElts = VT.getVectorNumElements(); 2176 unsigned RegClassID; 2177 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2178 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2179 // pass. We want to avoid 128 bits copies as much as possible because they 2180 // can't be bundled by our scheduler. 2181 switch(NumVectorElts) { 2182 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2183 case 4: 2184 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2185 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2186 else 2187 RegClassID = AMDGPU::R600_Reg128RegClassID; 2188 break; 2189 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2190 } 2191 SelectBuildVector(N, RegClassID); 2192 return; 2193 } 2194 } 2195 2196 SelectCode(N); 2197 } 2198 2199 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2200 SDValue &Offset) { 2201 ConstantSDNode *C; 2202 SDLoc DL(Addr); 2203 2204 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2205 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2206 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2207 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2208 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2209 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2210 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2211 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2212 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2213 Base = Addr.getOperand(0); 2214 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2215 } else { 2216 Base = Addr; 2217 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2218 } 2219 2220 return true; 2221 } 2222 2223 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2224 SDValue &Offset) { 2225 ConstantSDNode *IMMOffset; 2226 2227 if (Addr.getOpcode() == ISD::ADD 2228 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2229 && isInt<16>(IMMOffset->getZExtValue())) { 2230 2231 Base = Addr.getOperand(0); 2232 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2233 MVT::i32); 2234 return true; 2235 // If the pointer address is constant, we can move it to the offset field. 2236 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2237 && isInt<16>(IMMOffset->getZExtValue())) { 2238 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2239 SDLoc(CurDAG->getEntryNode()), 2240 AMDGPU::ZERO, MVT::i32); 2241 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2242 MVT::i32); 2243 return true; 2244 } 2245 2246 // Default case, no offset 2247 Base = Addr; 2248 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2249 return true; 2250 } 2251