1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPUPerfHintAnalysis.h" 20 #include "AMDGPURegisterInfo.h" 21 #include "AMDGPUSubtarget.h" 22 #include "AMDGPUTargetMachine.h" 23 #include "SIDefines.h" 24 #include "SIISelLowering.h" 25 #include "SIInstrInfo.h" 26 #include "SIMachineFunctionInfo.h" 27 #include "SIRegisterInfo.h" 28 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringRef.h" 32 #include "llvm/Analysis/DivergenceAnalysis.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/CodeGen/FunctionLoweringInfo.h" 35 #include "llvm/CodeGen/ISDOpcodes.h" 36 #include "llvm/CodeGen/MachineFunction.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/SelectionDAG.h" 39 #include "llvm/CodeGen/SelectionDAGISel.h" 40 #include "llvm/CodeGen/SelectionDAGNodes.h" 41 #include "llvm/CodeGen/ValueTypes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/MC/MCInstrDesc.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/CodeGen.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include <cassert> 51 #include <cstdint> 52 #include <new> 53 #include <vector> 54 55 using namespace llvm; 56 57 namespace llvm { 58 59 class R600InstrInfo; 60 61 } // end namespace llvm 62 63 //===----------------------------------------------------------------------===// 64 // Instruction Selector Implementation 65 //===----------------------------------------------------------------------===// 66 67 namespace { 68 69 /// AMDGPU specific code to select AMDGPU machine instructions for 70 /// SelectionDAG operations. 71 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 72 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 73 // make the right decision when generating code for different targets. 74 const AMDGPUSubtarget *Subtarget; 75 AMDGPUAS AMDGPUASI; 76 bool EnableLateStructurizeCFG; 77 78 public: 79 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 80 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 81 : SelectionDAGISel(*TM, OptLevel) { 82 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 83 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 84 } 85 ~AMDGPUDAGToDAGISel() override = default; 86 87 void getAnalysisUsage(AnalysisUsage &AU) const override { 88 AU.addRequired<AMDGPUArgumentUsageInfo>(); 89 AU.addRequired<AMDGPUPerfHintAnalysis>(); 90 AU.addRequired<DivergenceAnalysis>(); 91 SelectionDAGISel::getAnalysisUsage(AU); 92 } 93 94 bool runOnMachineFunction(MachineFunction &MF) override; 95 void Select(SDNode *N) override; 96 StringRef getPassName() const override; 97 void PostprocessISelDAG() override; 98 99 protected: 100 void SelectBuildVector(SDNode *N, unsigned RegClassID); 101 102 private: 103 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 104 bool isNoNanSrc(SDValue N) const; 105 bool isInlineImmediate(const SDNode *N) const; 106 107 bool isConstantLoad(const MemSDNode *N, int cbID) const; 108 bool isUniformBr(const SDNode *N) const; 109 110 SDNode *glueCopyToM0(SDNode *N) const; 111 112 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 113 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 114 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 115 SDValue& Offset); 116 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 117 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 118 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 119 unsigned OffsetBits) const; 120 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 121 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 122 SDValue &Offset1) const; 123 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 124 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 125 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 126 SDValue &TFE) const; 127 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 128 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 129 SDValue &SLC, SDValue &TFE) const; 130 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 131 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 132 SDValue &SLC) const; 133 bool SelectMUBUFScratchOffen(SDNode *Parent, 134 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 135 SDValue &SOffset, SDValue &ImmOffset) const; 136 bool SelectMUBUFScratchOffset(SDNode *Parent, 137 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 138 SDValue &Offset) const; 139 140 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 141 SDValue &Offset, SDValue &GLC, SDValue &SLC, 142 SDValue &TFE) const; 143 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 144 SDValue &Offset, SDValue &SLC) const; 145 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 146 SDValue &Offset) const; 147 bool SelectMUBUFConstant(SDValue Constant, 148 SDValue &SOffset, 149 SDValue &ImmOffset) const; 150 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 151 SDValue &ImmOffset) const; 152 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 153 SDValue &ImmOffset, SDValue &VOffset) const; 154 155 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 156 SDValue &Offset, SDValue &SLC) const; 157 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 158 SDValue &Offset, SDValue &SLC) const; 159 160 template <bool IsSigned> 161 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 162 SDValue &Offset, SDValue &SLC) const; 163 164 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 165 bool &Imm) const; 166 SDValue Expand32BitAddress(SDValue Addr) const; 167 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 168 bool &Imm) const; 169 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 171 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 172 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 173 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 174 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 175 176 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 178 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 179 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 180 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 183 SDValue &Clamp, SDValue &Omod) const; 184 185 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 186 SDValue &Clamp, 187 SDValue &Omod) const; 188 189 bool SelectVOP3OMods(SDValue In, SDValue &Src, 190 SDValue &Clamp, SDValue &Omod) const; 191 192 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 193 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 194 SDValue &Clamp) const; 195 196 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 197 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 198 SDValue &Clamp) const; 199 200 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 201 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 202 SDValue &Clamp) const; 203 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 204 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 205 206 bool SelectHi16Elt(SDValue In, SDValue &Src) const; 207 208 void SelectADD_SUB_I64(SDNode *N); 209 void SelectUADDO_USUBO(SDNode *N); 210 void SelectDIV_SCALE(SDNode *N); 211 void SelectMAD_64_32(SDNode *N); 212 void SelectFMA_W_CHAIN(SDNode *N); 213 void SelectFMUL_W_CHAIN(SDNode *N); 214 215 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 216 uint32_t Offset, uint32_t Width); 217 void SelectS_BFEFromShifts(SDNode *N); 218 void SelectS_BFE(SDNode *N); 219 bool isCBranchSCC(const SDNode *N) const; 220 void SelectBRCOND(SDNode *N); 221 void SelectFMAD_FMA(SDNode *N); 222 void SelectATOMIC_CMP_SWAP(SDNode *N); 223 224 protected: 225 // Include the pieces autogenerated from the target description. 226 #include "AMDGPUGenDAGISel.inc" 227 }; 228 229 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 230 public: 231 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 232 AMDGPUDAGToDAGISel(TM, OptLevel) {} 233 234 void Select(SDNode *N) override; 235 236 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 237 SDValue &Offset) override; 238 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 239 SDValue &Offset) override; 240 }; 241 242 } // end anonymous namespace 243 244 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 245 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 246 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 247 INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) 248 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) 249 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 250 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 251 252 /// This pass converts a legalized DAG into a AMDGPU-specific 253 // DAG, ready for instruction scheduling. 254 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 255 CodeGenOpt::Level OptLevel) { 256 return new AMDGPUDAGToDAGISel(TM, OptLevel); 257 } 258 259 /// This pass converts a legalized DAG into a R600-specific 260 // DAG, ready for instruction scheduling. 261 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 262 CodeGenOpt::Level OptLevel) { 263 return new R600DAGToDAGISel(TM, OptLevel); 264 } 265 266 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 267 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 268 return SelectionDAGISel::runOnMachineFunction(MF); 269 } 270 271 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 272 if (TM.Options.NoNaNsFPMath) 273 return true; 274 275 // TODO: Move into isKnownNeverNaN 276 if (N->getFlags().isDefined()) 277 return N->getFlags().hasNoNaNs(); 278 279 return CurDAG->isKnownNeverNaN(N); 280 } 281 282 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 283 const SIInstrInfo *TII 284 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 285 286 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 287 return TII->isInlineConstant(C->getAPIntValue()); 288 289 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 290 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 291 292 return false; 293 } 294 295 /// Determine the register class for \p OpNo 296 /// \returns The register class of the virtual register that will be used for 297 /// the given operand number \OpNo or NULL if the register class cannot be 298 /// determined. 299 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 300 unsigned OpNo) const { 301 if (!N->isMachineOpcode()) { 302 if (N->getOpcode() == ISD::CopyToReg) { 303 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 304 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 305 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 306 return MRI.getRegClass(Reg); 307 } 308 309 const SIRegisterInfo *TRI 310 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 311 return TRI->getPhysRegClass(Reg); 312 } 313 314 return nullptr; 315 } 316 317 switch (N->getMachineOpcode()) { 318 default: { 319 const MCInstrDesc &Desc = 320 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 321 unsigned OpIdx = Desc.getNumDefs() + OpNo; 322 if (OpIdx >= Desc.getNumOperands()) 323 return nullptr; 324 int RegClass = Desc.OpInfo[OpIdx].RegClass; 325 if (RegClass == -1) 326 return nullptr; 327 328 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 329 } 330 case AMDGPU::REG_SEQUENCE: { 331 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 332 const TargetRegisterClass *SuperRC = 333 Subtarget->getRegisterInfo()->getRegClass(RCID); 334 335 SDValue SubRegOp = N->getOperand(OpNo + 1); 336 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 337 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 338 SubRegIdx); 339 } 340 } 341 } 342 343 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 344 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS || 345 !Subtarget->ldsRequiresM0Init()) 346 return N; 347 348 const SITargetLowering& Lowering = 349 *static_cast<const SITargetLowering*>(getTargetLowering()); 350 351 // Write max value to m0 before each load operation 352 353 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 354 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 355 356 SDValue Glue = M0.getValue(1); 357 358 SmallVector <SDValue, 8> Ops; 359 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 360 Ops.push_back(N->getOperand(i)); 361 } 362 Ops.push_back(Glue); 363 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 364 } 365 366 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 367 switch (NumVectorElts) { 368 case 1: 369 return AMDGPU::SReg_32_XM0RegClassID; 370 case 2: 371 return AMDGPU::SReg_64RegClassID; 372 case 4: 373 return AMDGPU::SReg_128RegClassID; 374 case 8: 375 return AMDGPU::SReg_256RegClassID; 376 case 16: 377 return AMDGPU::SReg_512RegClassID; 378 } 379 380 llvm_unreachable("invalid vector size"); 381 } 382 383 static bool getConstantValue(SDValue N, uint32_t &Out) { 384 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 385 Out = C->getAPIntValue().getZExtValue(); 386 return true; 387 } 388 389 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 390 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 391 return true; 392 } 393 394 return false; 395 } 396 397 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 398 EVT VT = N->getValueType(0); 399 unsigned NumVectorElts = VT.getVectorNumElements(); 400 EVT EltVT = VT.getVectorElementType(); 401 SDLoc DL(N); 402 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 403 404 if (NumVectorElts == 1) { 405 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 406 RegClass); 407 return; 408 } 409 410 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 411 "supported yet"); 412 // 16 = Max Num Vector Elements 413 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 414 // 1 = Vector Register Class 415 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 416 417 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 418 bool IsRegSeq = true; 419 unsigned NOps = N->getNumOperands(); 420 for (unsigned i = 0; i < NOps; i++) { 421 // XXX: Why is this here? 422 if (isa<RegisterSDNode>(N->getOperand(i))) { 423 IsRegSeq = false; 424 break; 425 } 426 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 427 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 428 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); 429 } 430 if (NOps != NumVectorElts) { 431 // Fill in the missing undef elements if this was a scalar_to_vector. 432 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 433 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 434 DL, EltVT); 435 for (unsigned i = NOps; i < NumVectorElts; ++i) { 436 unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); 437 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 438 RegSeqArgs[1 + (2 * i) + 1] = 439 CurDAG->getTargetConstant(Sub, DL, MVT::i32); 440 } 441 } 442 443 if (!IsRegSeq) 444 SelectCode(N); 445 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 446 } 447 448 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 449 unsigned int Opc = N->getOpcode(); 450 if (N->isMachineOpcode()) { 451 N->setNodeId(-1); 452 return; // Already selected. 453 } 454 455 if (isa<AtomicSDNode>(N) || 456 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || 457 Opc == AMDGPUISD::ATOMIC_LOAD_FADD || 458 Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || 459 Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) 460 N = glueCopyToM0(N); 461 462 switch (Opc) { 463 default: 464 break; 465 // We are selecting i64 ADD here instead of custom lower it during 466 // DAG legalization, so we can fold some i64 ADDs used for address 467 // calculation into the LOAD and STORE instructions. 468 case ISD::ADDC: 469 case ISD::ADDE: 470 case ISD::SUBC: 471 case ISD::SUBE: { 472 if (N->getValueType(0) != MVT::i64) 473 break; 474 475 SelectADD_SUB_I64(N); 476 return; 477 } 478 case ISD::UADDO: 479 case ISD::USUBO: { 480 SelectUADDO_USUBO(N); 481 return; 482 } 483 case AMDGPUISD::FMUL_W_CHAIN: { 484 SelectFMUL_W_CHAIN(N); 485 return; 486 } 487 case AMDGPUISD::FMA_W_CHAIN: { 488 SelectFMA_W_CHAIN(N); 489 return; 490 } 491 492 case ISD::SCALAR_TO_VECTOR: 493 case ISD::BUILD_VECTOR: { 494 EVT VT = N->getValueType(0); 495 unsigned NumVectorElts = VT.getVectorNumElements(); 496 if (VT.getScalarSizeInBits() == 16) { 497 if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { 498 uint32_t LHSVal, RHSVal; 499 if (getConstantValue(N->getOperand(0), LHSVal) && 500 getConstantValue(N->getOperand(1), RHSVal)) { 501 uint32_t K = LHSVal | (RHSVal << 16); 502 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 503 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 504 return; 505 } 506 } 507 508 break; 509 } 510 511 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 512 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 513 SelectBuildVector(N, RegClassID); 514 return; 515 } 516 case ISD::BUILD_PAIR: { 517 SDValue RC, SubReg0, SubReg1; 518 SDLoc DL(N); 519 if (N->getValueType(0) == MVT::i128) { 520 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 521 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 522 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 523 } else if (N->getValueType(0) == MVT::i64) { 524 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 525 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 526 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 527 } else { 528 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 529 } 530 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 531 N->getOperand(1), SubReg1 }; 532 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 533 N->getValueType(0), Ops)); 534 return; 535 } 536 537 case ISD::Constant: 538 case ISD::ConstantFP: { 539 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 540 break; 541 542 uint64_t Imm; 543 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 544 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 545 else { 546 ConstantSDNode *C = cast<ConstantSDNode>(N); 547 Imm = C->getZExtValue(); 548 } 549 550 SDLoc DL(N); 551 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 552 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 553 MVT::i32)); 554 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 555 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 556 const SDValue Ops[] = { 557 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 558 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 559 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 560 }; 561 562 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 563 N->getValueType(0), Ops)); 564 return; 565 } 566 case ISD::LOAD: 567 case ISD::STORE: 568 case ISD::ATOMIC_LOAD: 569 case ISD::ATOMIC_STORE: { 570 N = glueCopyToM0(N); 571 break; 572 } 573 574 case AMDGPUISD::BFE_I32: 575 case AMDGPUISD::BFE_U32: { 576 // There is a scalar version available, but unlike the vector version which 577 // has a separate operand for the offset and width, the scalar version packs 578 // the width and offset into a single operand. Try to move to the scalar 579 // version if the offsets are constant, so that we can try to keep extended 580 // loads of kernel arguments in SGPRs. 581 582 // TODO: Technically we could try to pattern match scalar bitshifts of 583 // dynamic values, but it's probably not useful. 584 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 585 if (!Offset) 586 break; 587 588 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 589 if (!Width) 590 break; 591 592 bool Signed = Opc == AMDGPUISD::BFE_I32; 593 594 uint32_t OffsetVal = Offset->getZExtValue(); 595 uint32_t WidthVal = Width->getZExtValue(); 596 597 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 598 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 599 return; 600 } 601 case AMDGPUISD::DIV_SCALE: { 602 SelectDIV_SCALE(N); 603 return; 604 } 605 case AMDGPUISD::MAD_I64_I32: 606 case AMDGPUISD::MAD_U64_U32: { 607 SelectMAD_64_32(N); 608 return; 609 } 610 case ISD::CopyToReg: { 611 const SITargetLowering& Lowering = 612 *static_cast<const SITargetLowering*>(getTargetLowering()); 613 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 614 break; 615 } 616 case ISD::AND: 617 case ISD::SRL: 618 case ISD::SRA: 619 case ISD::SIGN_EXTEND_INREG: 620 if (N->getValueType(0) != MVT::i32) 621 break; 622 623 SelectS_BFE(N); 624 return; 625 case ISD::BRCOND: 626 SelectBRCOND(N); 627 return; 628 case ISD::FMAD: 629 case ISD::FMA: 630 SelectFMAD_FMA(N); 631 return; 632 case AMDGPUISD::ATOMIC_CMP_SWAP: 633 SelectATOMIC_CMP_SWAP(N); 634 return; 635 } 636 637 SelectCode(N); 638 } 639 640 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 641 if (!N->readMem()) 642 return false; 643 if (CbId == -1) 644 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS || 645 N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT; 646 647 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 648 } 649 650 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 651 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 652 const Instruction *Term = BB->getTerminator(); 653 return Term->getMetadata("amdgpu.uniform") || 654 Term->getMetadata("structurizecfg.uniform"); 655 } 656 657 StringRef AMDGPUDAGToDAGISel::getPassName() const { 658 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 659 } 660 661 //===----------------------------------------------------------------------===// 662 // Complex Patterns 663 //===----------------------------------------------------------------------===// 664 665 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 666 SDValue& IntPtr) { 667 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 668 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 669 true); 670 return true; 671 } 672 return false; 673 } 674 675 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 676 SDValue& BaseReg, SDValue &Offset) { 677 if (!isa<ConstantSDNode>(Addr)) { 678 BaseReg = Addr; 679 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 680 return true; 681 } 682 return false; 683 } 684 685 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 686 SDValue &Offset) { 687 return false; 688 } 689 690 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 691 SDValue &Offset) { 692 ConstantSDNode *C; 693 SDLoc DL(Addr); 694 695 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 696 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 697 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 698 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 699 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 700 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 701 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 702 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 703 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 704 Base = Addr.getOperand(0); 705 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 706 } else { 707 Base = Addr; 708 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 709 } 710 711 return true; 712 } 713 714 // FIXME: Should only handle addcarry/subcarry 715 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 716 SDLoc DL(N); 717 SDValue LHS = N->getOperand(0); 718 SDValue RHS = N->getOperand(1); 719 720 unsigned Opcode = N->getOpcode(); 721 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 722 bool ProduceCarry = 723 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 724 bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; 725 726 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 727 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 728 729 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 730 DL, MVT::i32, LHS, Sub0); 731 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 732 DL, MVT::i32, LHS, Sub1); 733 734 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 735 DL, MVT::i32, RHS, Sub0); 736 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 737 DL, MVT::i32, RHS, Sub1); 738 739 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 740 741 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 742 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 743 744 SDNode *AddLo; 745 if (!ConsumeCarry) { 746 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 747 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 748 } else { 749 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 750 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 751 } 752 SDValue AddHiArgs[] = { 753 SDValue(Hi0, 0), 754 SDValue(Hi1, 0), 755 SDValue(AddLo, 1) 756 }; 757 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 758 759 SDValue RegSequenceArgs[] = { 760 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 761 SDValue(AddLo,0), 762 Sub0, 763 SDValue(AddHi,0), 764 Sub1, 765 }; 766 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 767 MVT::i64, RegSequenceArgs); 768 769 if (ProduceCarry) { 770 // Replace the carry-use 771 ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); 772 } 773 774 // Replace the remaining uses. 775 ReplaceNode(N, RegSequence); 776 } 777 778 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 779 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 780 // carry out despite the _i32 name. These were renamed in VI to _U32. 781 // FIXME: We should probably rename the opcodes here. 782 unsigned Opc = N->getOpcode() == ISD::UADDO ? 783 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 784 785 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 786 { N->getOperand(0), N->getOperand(1) }); 787 } 788 789 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 790 SDLoc SL(N); 791 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 792 SDValue Ops[10]; 793 794 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 795 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 796 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 797 Ops[8] = N->getOperand(0); 798 Ops[9] = N->getOperand(4); 799 800 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 801 } 802 803 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 804 SDLoc SL(N); 805 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 806 SDValue Ops[8]; 807 808 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 809 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 810 Ops[6] = N->getOperand(0); 811 Ops[7] = N->getOperand(3); 812 813 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 814 } 815 816 // We need to handle this here because tablegen doesn't support matching 817 // instructions with multiple outputs. 818 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 819 SDLoc SL(N); 820 EVT VT = N->getValueType(0); 821 822 assert(VT == MVT::f32 || VT == MVT::f64); 823 824 unsigned Opc 825 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 826 827 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 828 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 829 } 830 831 // We need to handle this here because tablegen doesn't support matching 832 // instructions with multiple outputs. 833 void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { 834 SDLoc SL(N); 835 bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; 836 unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32; 837 838 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); 839 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 840 Clamp }; 841 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 842 } 843 844 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 845 unsigned OffsetBits) const { 846 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 847 (OffsetBits == 8 && !isUInt<8>(Offset))) 848 return false; 849 850 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 851 Subtarget->unsafeDSOffsetFoldingEnabled()) 852 return true; 853 854 // On Southern Islands instruction with a negative base value and an offset 855 // don't seem to work. 856 return CurDAG->SignBitIsZero(Base); 857 } 858 859 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 860 SDValue &Offset) const { 861 SDLoc DL(Addr); 862 if (CurDAG->isBaseWithConstantOffset(Addr)) { 863 SDValue N0 = Addr.getOperand(0); 864 SDValue N1 = Addr.getOperand(1); 865 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 866 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 867 // (add n0, c0) 868 Base = N0; 869 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 870 return true; 871 } 872 } else if (Addr.getOpcode() == ISD::SUB) { 873 // sub C, x -> add (sub 0, x), C 874 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 875 int64_t ByteOffset = C->getSExtValue(); 876 if (isUInt<16>(ByteOffset)) { 877 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 878 879 // XXX - This is kind of hacky. Create a dummy sub node so we can check 880 // the known bits in isDSOffsetLegal. We need to emit the selected node 881 // here, so this is thrown away. 882 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 883 Zero, Addr.getOperand(1)); 884 885 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 886 // FIXME: Select to VOP3 version for with-carry. 887 unsigned SubOp = Subtarget->hasAddNoCarry() ? 888 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 889 890 MachineSDNode *MachineSub 891 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 892 Zero, Addr.getOperand(1)); 893 894 Base = SDValue(MachineSub, 0); 895 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 896 return true; 897 } 898 } 899 } 900 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 901 // If we have a constant address, prefer to put the constant into the 902 // offset. This can save moves to load the constant address since multiple 903 // operations can share the zero base address register, and enables merging 904 // into read2 / write2 instructions. 905 906 SDLoc DL(Addr); 907 908 if (isUInt<16>(CAddr->getZExtValue())) { 909 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 910 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 911 DL, MVT::i32, Zero); 912 Base = SDValue(MovZero, 0); 913 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 914 return true; 915 } 916 } 917 918 // default case 919 Base = Addr; 920 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 921 return true; 922 } 923 924 // TODO: If offset is too big, put low 16-bit into offset. 925 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 926 SDValue &Offset0, 927 SDValue &Offset1) const { 928 SDLoc DL(Addr); 929 930 if (CurDAG->isBaseWithConstantOffset(Addr)) { 931 SDValue N0 = Addr.getOperand(0); 932 SDValue N1 = Addr.getOperand(1); 933 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 934 unsigned DWordOffset0 = C1->getZExtValue() / 4; 935 unsigned DWordOffset1 = DWordOffset0 + 1; 936 // (add n0, c0) 937 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 938 Base = N0; 939 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 940 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 941 return true; 942 } 943 } else if (Addr.getOpcode() == ISD::SUB) { 944 // sub C, x -> add (sub 0, x), C 945 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 946 unsigned DWordOffset0 = C->getZExtValue() / 4; 947 unsigned DWordOffset1 = DWordOffset0 + 1; 948 949 if (isUInt<8>(DWordOffset0)) { 950 SDLoc DL(Addr); 951 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 952 953 // XXX - This is kind of hacky. Create a dummy sub node so we can check 954 // the known bits in isDSOffsetLegal. We need to emit the selected node 955 // here, so this is thrown away. 956 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 957 Zero, Addr.getOperand(1)); 958 959 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 960 unsigned SubOp = Subtarget->hasAddNoCarry() ? 961 AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32; 962 963 MachineSDNode *MachineSub 964 = CurDAG->getMachineNode(SubOp, DL, MVT::i32, 965 Zero, Addr.getOperand(1)); 966 967 Base = SDValue(MachineSub, 0); 968 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 969 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 970 return true; 971 } 972 } 973 } 974 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 975 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 976 unsigned DWordOffset1 = DWordOffset0 + 1; 977 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 978 979 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 980 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 981 MachineSDNode *MovZero 982 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 983 DL, MVT::i32, Zero); 984 Base = SDValue(MovZero, 0); 985 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 986 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 987 return true; 988 } 989 } 990 991 // default case 992 993 // FIXME: This is broken on SI where we still need to check if the base 994 // pointer is positive here. 995 Base = Addr; 996 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 997 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 998 return true; 999 } 1000 1001 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1002 SDValue &VAddr, SDValue &SOffset, 1003 SDValue &Offset, SDValue &Offen, 1004 SDValue &Idxen, SDValue &Addr64, 1005 SDValue &GLC, SDValue &SLC, 1006 SDValue &TFE) const { 1007 // Subtarget prefers to use flat instruction 1008 if (Subtarget->useFlatForGlobal()) 1009 return false; 1010 1011 SDLoc DL(Addr); 1012 1013 if (!GLC.getNode()) 1014 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1015 if (!SLC.getNode()) 1016 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1017 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1018 1019 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1020 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1021 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1022 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1023 1024 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1025 SDValue N0 = Addr.getOperand(0); 1026 SDValue N1 = Addr.getOperand(1); 1027 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1028 1029 if (N0.getOpcode() == ISD::ADD) { 1030 // (add (add N2, N3), C1) -> addr64 1031 SDValue N2 = N0.getOperand(0); 1032 SDValue N3 = N0.getOperand(1); 1033 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1034 Ptr = N2; 1035 VAddr = N3; 1036 } else { 1037 // (add N0, C1) -> offset 1038 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1039 Ptr = N0; 1040 } 1041 1042 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { 1043 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1044 return true; 1045 } 1046 1047 if (isUInt<32>(C1->getZExtValue())) { 1048 // Illegal offset, store it in soffset. 1049 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1050 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1051 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1052 0); 1053 return true; 1054 } 1055 } 1056 1057 if (Addr.getOpcode() == ISD::ADD) { 1058 // (add N0, N1) -> addr64 1059 SDValue N0 = Addr.getOperand(0); 1060 SDValue N1 = Addr.getOperand(1); 1061 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1062 Ptr = N0; 1063 VAddr = N1; 1064 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1065 return true; 1066 } 1067 1068 // default case -> offset 1069 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1070 Ptr = Addr; 1071 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1072 1073 return true; 1074 } 1075 1076 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1077 SDValue &VAddr, SDValue &SOffset, 1078 SDValue &Offset, SDValue &GLC, 1079 SDValue &SLC, SDValue &TFE) const { 1080 SDValue Ptr, Offen, Idxen, Addr64; 1081 1082 // addr64 bit was removed for volcanic islands. 1083 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1084 return false; 1085 1086 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1087 GLC, SLC, TFE)) 1088 return false; 1089 1090 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1091 if (C->getSExtValue()) { 1092 SDLoc DL(Addr); 1093 1094 const SITargetLowering& Lowering = 1095 *static_cast<const SITargetLowering*>(getTargetLowering()); 1096 1097 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1098 return true; 1099 } 1100 1101 return false; 1102 } 1103 1104 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1105 SDValue &VAddr, SDValue &SOffset, 1106 SDValue &Offset, 1107 SDValue &SLC) const { 1108 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1109 SDValue GLC, TFE; 1110 1111 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1112 } 1113 1114 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1115 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1116 return PSV && PSV->isStack(); 1117 } 1118 1119 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1120 const MachineFunction &MF = CurDAG->getMachineFunction(); 1121 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1122 1123 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1124 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1125 FI->getValueType(0)); 1126 1127 // If we can resolve this to a frame index access, this is relative to the 1128 // frame pointer SGPR. 1129 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1130 MVT::i32)); 1131 } 1132 1133 // If we don't know this private access is a local stack object, it needs to 1134 // be relative to the entry point's scratch wave offset register. 1135 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1136 MVT::i32)); 1137 } 1138 1139 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1140 SDValue Addr, SDValue &Rsrc, 1141 SDValue &VAddr, SDValue &SOffset, 1142 SDValue &ImmOffset) const { 1143 1144 SDLoc DL(Addr); 1145 MachineFunction &MF = CurDAG->getMachineFunction(); 1146 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1147 1148 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1149 1150 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1151 unsigned Imm = CAddr->getZExtValue(); 1152 1153 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1154 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1155 DL, MVT::i32, HighBits); 1156 VAddr = SDValue(MovHighBits, 0); 1157 1158 // In a call sequence, stores to the argument stack area are relative to the 1159 // stack pointer. 1160 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1161 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1162 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1163 1164 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1165 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1166 return true; 1167 } 1168 1169 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1170 // (add n0, c1) 1171 1172 SDValue N0 = Addr.getOperand(0); 1173 SDValue N1 = Addr.getOperand(1); 1174 1175 // Offsets in vaddr must be positive if range checking is enabled. 1176 // 1177 // The total computation of vaddr + soffset + offset must not overflow. If 1178 // vaddr is negative, even if offset is 0 the sgpr offset add will end up 1179 // overflowing. 1180 // 1181 // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would 1182 // always perform a range check. If a negative vaddr base index was used, 1183 // this would fail the range check. The overall address computation would 1184 // compute a valid address, but this doesn't happen due to the range 1185 // check. For out-of-bounds MUBUF loads, a 0 is returned. 1186 // 1187 // Therefore it should be safe to fold any VGPR offset on gfx9 into the 1188 // MUBUF vaddr, but not on older subtargets which can only do this if the 1189 // sign bit is known 0. 1190 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1191 if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && 1192 (!Subtarget->privateMemoryResourceIsRangeChecked() || 1193 CurDAG->SignBitIsZero(N0))) { 1194 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1195 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1196 return true; 1197 } 1198 } 1199 1200 // (node) 1201 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1202 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1203 return true; 1204 } 1205 1206 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1207 SDValue Addr, 1208 SDValue &SRsrc, 1209 SDValue &SOffset, 1210 SDValue &Offset) const { 1211 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1212 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) 1213 return false; 1214 1215 SDLoc DL(Addr); 1216 MachineFunction &MF = CurDAG->getMachineFunction(); 1217 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1218 1219 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1220 1221 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1222 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1223 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1224 1225 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1226 // offset if we know this is in a call sequence. 1227 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1228 1229 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1230 return true; 1231 } 1232 1233 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1234 SDValue &SOffset, SDValue &Offset, 1235 SDValue &GLC, SDValue &SLC, 1236 SDValue &TFE) const { 1237 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1238 const SIInstrInfo *TII = 1239 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1240 1241 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1242 GLC, SLC, TFE)) 1243 return false; 1244 1245 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1246 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1247 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1248 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1249 APInt::getAllOnesValue(32).getZExtValue(); // Size 1250 SDLoc DL(Addr); 1251 1252 const SITargetLowering& Lowering = 1253 *static_cast<const SITargetLowering*>(getTargetLowering()); 1254 1255 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1256 return true; 1257 } 1258 return false; 1259 } 1260 1261 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1262 SDValue &Soffset, SDValue &Offset 1263 ) const { 1264 SDValue GLC, SLC, TFE; 1265 1266 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1267 } 1268 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1269 SDValue &Soffset, SDValue &Offset, 1270 SDValue &SLC) const { 1271 SDValue GLC, TFE; 1272 1273 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1274 } 1275 1276 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1277 SDValue &SOffset, 1278 SDValue &ImmOffset) const { 1279 SDLoc DL(Constant); 1280 const uint32_t Align = 4; 1281 const uint32_t MaxImm = alignDown(4095, Align); 1282 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1283 uint32_t Overflow = 0; 1284 1285 if (Imm > MaxImm) { 1286 if (Imm <= MaxImm + 64) { 1287 // Use an SOffset inline constant for 4..64 1288 Overflow = Imm - MaxImm; 1289 Imm = MaxImm; 1290 } else { 1291 // Try to keep the same value in SOffset for adjacent loads, so that 1292 // the corresponding register contents can be re-used. 1293 // 1294 // Load values with all low-bits (except for alignment bits) set into 1295 // SOffset, so that a larger range of values can be covered using 1296 // s_movk_i32. 1297 // 1298 // Atomic operations fail to work correctly when individual address 1299 // components are unaligned, even if their sum is aligned. 1300 uint32_t High = (Imm + Align) & ~4095; 1301 uint32_t Low = (Imm + Align) & 4095; 1302 Imm = Low; 1303 Overflow = High - Align; 1304 } 1305 } 1306 1307 // There is a hardware bug in SI and CI which prevents address clamping in 1308 // MUBUF instructions from working correctly with SOffsets. The immediate 1309 // offset is unaffected. 1310 if (Overflow > 0 && 1311 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1312 return false; 1313 1314 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1315 1316 if (Overflow <= 64) 1317 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1318 else 1319 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1320 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1321 0); 1322 1323 return true; 1324 } 1325 1326 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1327 SDValue &SOffset, 1328 SDValue &ImmOffset) const { 1329 SDLoc DL(Offset); 1330 1331 if (!isa<ConstantSDNode>(Offset)) 1332 return false; 1333 1334 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1335 } 1336 1337 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1338 SDValue &SOffset, 1339 SDValue &ImmOffset, 1340 SDValue &VOffset) const { 1341 SDLoc DL(Offset); 1342 1343 // Don't generate an unnecessary voffset for constant offsets. 1344 if (isa<ConstantSDNode>(Offset)) { 1345 SDValue Tmp1, Tmp2; 1346 1347 // When necessary, use a voffset in <= CI anyway to work around a hardware 1348 // bug. 1349 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1350 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1351 return false; 1352 } 1353 1354 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1355 SDValue N0 = Offset.getOperand(0); 1356 SDValue N1 = Offset.getOperand(1); 1357 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1358 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1359 VOffset = N0; 1360 return true; 1361 } 1362 } 1363 1364 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1365 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1366 VOffset = Offset; 1367 1368 return true; 1369 } 1370 1371 template <bool IsSigned> 1372 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1373 SDValue &VAddr, 1374 SDValue &Offset, 1375 SDValue &SLC) const { 1376 int64_t OffsetVal = 0; 1377 1378 if (Subtarget->hasFlatInstOffsets() && 1379 CurDAG->isBaseWithConstantOffset(Addr)) { 1380 SDValue N0 = Addr.getOperand(0); 1381 SDValue N1 = Addr.getOperand(1); 1382 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1383 1384 if ((IsSigned && isInt<13>(COffsetVal)) || 1385 (!IsSigned && isUInt<12>(COffsetVal))) { 1386 Addr = N0; 1387 OffsetVal = COffsetVal; 1388 } 1389 } 1390 1391 VAddr = Addr; 1392 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1393 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1394 1395 return true; 1396 } 1397 1398 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1399 SDValue &VAddr, 1400 SDValue &Offset, 1401 SDValue &SLC) const { 1402 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1403 } 1404 1405 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1406 SDValue &VAddr, 1407 SDValue &Offset, 1408 SDValue &SLC) const { 1409 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1410 } 1411 1412 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1413 SDValue &Offset, bool &Imm) const { 1414 1415 // FIXME: Handle non-constant offsets. 1416 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1417 if (!C) 1418 return false; 1419 1420 SDLoc SL(ByteOffsetNode); 1421 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1422 int64_t ByteOffset = C->getSExtValue(); 1423 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1424 1425 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1426 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1427 Imm = true; 1428 return true; 1429 } 1430 1431 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1432 return false; 1433 1434 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1435 // 32-bit Immediates are supported on Sea Islands. 1436 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1437 } else { 1438 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1439 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1440 C32Bit), 0); 1441 } 1442 Imm = false; 1443 return true; 1444 } 1445 1446 SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { 1447 if (Addr.getValueType() != MVT::i32) 1448 return Addr; 1449 1450 // Zero-extend a 32-bit address. 1451 SDLoc SL(Addr); 1452 1453 const MachineFunction &MF = CurDAG->getMachineFunction(); 1454 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1455 unsigned AddrHiVal = Info->get32BitAddressHighBits(); 1456 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); 1457 1458 const SDValue Ops[] = { 1459 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), 1460 Addr, 1461 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), 1462 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), 1463 0), 1464 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), 1465 }; 1466 1467 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, 1468 Ops), 0); 1469 } 1470 1471 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1472 SDValue &Offset, bool &Imm) const { 1473 SDLoc SL(Addr); 1474 1475 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1476 SDValue N0 = Addr.getOperand(0); 1477 SDValue N1 = Addr.getOperand(1); 1478 1479 if (SelectSMRDOffset(N1, Offset, Imm)) { 1480 SBase = Expand32BitAddress(N0); 1481 return true; 1482 } 1483 } 1484 SBase = Expand32BitAddress(Addr); 1485 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1486 Imm = true; 1487 return true; 1488 } 1489 1490 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1491 SDValue &Offset) const { 1492 bool Imm; 1493 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1494 } 1495 1496 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1497 SDValue &Offset) const { 1498 1499 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1500 return false; 1501 1502 bool Imm; 1503 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1504 return false; 1505 1506 return !Imm && isa<ConstantSDNode>(Offset); 1507 } 1508 1509 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1510 SDValue &Offset) const { 1511 bool Imm; 1512 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1513 !isa<ConstantSDNode>(Offset); 1514 } 1515 1516 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1517 SDValue &Offset) const { 1518 bool Imm; 1519 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1520 } 1521 1522 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1523 SDValue &Offset) const { 1524 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1525 return false; 1526 1527 bool Imm; 1528 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1529 return false; 1530 1531 return !Imm && isa<ConstantSDNode>(Offset); 1532 } 1533 1534 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1535 SDValue &Base, 1536 SDValue &Offset) const { 1537 SDLoc DL(Index); 1538 1539 if (CurDAG->isBaseWithConstantOffset(Index)) { 1540 SDValue N0 = Index.getOperand(0); 1541 SDValue N1 = Index.getOperand(1); 1542 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1543 1544 // (add n0, c0) 1545 Base = N0; 1546 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1547 return true; 1548 } 1549 1550 if (isa<ConstantSDNode>(Index)) 1551 return false; 1552 1553 Base = Index; 1554 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1555 return true; 1556 } 1557 1558 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1559 SDValue Val, uint32_t Offset, 1560 uint32_t Width) { 1561 // Transformation function, pack the offset and width of a BFE into 1562 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1563 // source, bits [5:0] contain the offset and bits [22:16] the width. 1564 uint32_t PackedVal = Offset | (Width << 16); 1565 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1566 1567 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1568 } 1569 1570 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1571 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1572 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1573 // Predicate: 0 < b <= c < 32 1574 1575 const SDValue &Shl = N->getOperand(0); 1576 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1577 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1578 1579 if (B && C) { 1580 uint32_t BVal = B->getZExtValue(); 1581 uint32_t CVal = C->getZExtValue(); 1582 1583 if (0 < BVal && BVal <= CVal && CVal < 32) { 1584 bool Signed = N->getOpcode() == ISD::SRA; 1585 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1586 1587 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1588 32 - CVal)); 1589 return; 1590 } 1591 } 1592 SelectCode(N); 1593 } 1594 1595 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1596 switch (N->getOpcode()) { 1597 case ISD::AND: 1598 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1599 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1600 // Predicate: isMask(mask) 1601 const SDValue &Srl = N->getOperand(0); 1602 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1603 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1604 1605 if (Shift && Mask) { 1606 uint32_t ShiftVal = Shift->getZExtValue(); 1607 uint32_t MaskVal = Mask->getZExtValue(); 1608 1609 if (isMask_32(MaskVal)) { 1610 uint32_t WidthVal = countPopulation(MaskVal); 1611 1612 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1613 Srl.getOperand(0), ShiftVal, WidthVal)); 1614 return; 1615 } 1616 } 1617 } 1618 break; 1619 case ISD::SRL: 1620 if (N->getOperand(0).getOpcode() == ISD::AND) { 1621 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1622 // Predicate: isMask(mask >> b) 1623 const SDValue &And = N->getOperand(0); 1624 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1625 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1626 1627 if (Shift && Mask) { 1628 uint32_t ShiftVal = Shift->getZExtValue(); 1629 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1630 1631 if (isMask_32(MaskVal)) { 1632 uint32_t WidthVal = countPopulation(MaskVal); 1633 1634 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1635 And.getOperand(0), ShiftVal, WidthVal)); 1636 return; 1637 } 1638 } 1639 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1640 SelectS_BFEFromShifts(N); 1641 return; 1642 } 1643 break; 1644 case ISD::SRA: 1645 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1646 SelectS_BFEFromShifts(N); 1647 return; 1648 } 1649 break; 1650 1651 case ISD::SIGN_EXTEND_INREG: { 1652 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1653 SDValue Src = N->getOperand(0); 1654 if (Src.getOpcode() != ISD::SRL) 1655 break; 1656 1657 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1658 if (!Amt) 1659 break; 1660 1661 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1662 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1663 Amt->getZExtValue(), Width)); 1664 return; 1665 } 1666 } 1667 1668 SelectCode(N); 1669 } 1670 1671 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1672 assert(N->getOpcode() == ISD::BRCOND); 1673 if (!N->hasOneUse()) 1674 return false; 1675 1676 SDValue Cond = N->getOperand(1); 1677 if (Cond.getOpcode() == ISD::CopyToReg) 1678 Cond = Cond.getOperand(2); 1679 1680 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1681 return false; 1682 1683 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1684 if (VT == MVT::i32) 1685 return true; 1686 1687 if (VT == MVT::i64) { 1688 auto ST = static_cast<const SISubtarget *>(Subtarget); 1689 1690 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1691 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1692 } 1693 1694 return false; 1695 } 1696 1697 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1698 SDValue Cond = N->getOperand(1); 1699 1700 if (Cond.isUndef()) { 1701 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1702 N->getOperand(2), N->getOperand(0)); 1703 return; 1704 } 1705 1706 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1707 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1708 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1709 SDLoc SL(N); 1710 1711 if (!UseSCCBr) { 1712 // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not 1713 // analyzed what generates the vcc value, so we do not know whether vcc 1714 // bits for disabled lanes are 0. Thus we need to mask out bits for 1715 // disabled lanes. 1716 // 1717 // For the case that we select S_CBRANCH_SCC1 and it gets 1718 // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls 1719 // SIInstrInfo::moveToVALU which inserts the S_AND). 1720 // 1721 // We could add an analysis of what generates the vcc value here and omit 1722 // the S_AND when is unnecessary. But it would be better to add a separate 1723 // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it 1724 // catches both cases. 1725 Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1726 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1727 Cond), 1728 0); 1729 } 1730 1731 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1732 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1733 N->getOperand(2), // Basic Block 1734 VCC.getValue(0)); 1735 } 1736 1737 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { 1738 MVT VT = N->getSimpleValueType(0); 1739 bool IsFMA = N->getOpcode() == ISD::FMA; 1740 if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && 1741 !Subtarget->hasFmaMixInsts()) || 1742 ((IsFMA && Subtarget->hasMadMixInsts()) || 1743 (!IsFMA && Subtarget->hasFmaMixInsts()))) { 1744 SelectCode(N); 1745 return; 1746 } 1747 1748 SDValue Src0 = N->getOperand(0); 1749 SDValue Src1 = N->getOperand(1); 1750 SDValue Src2 = N->getOperand(2); 1751 unsigned Src0Mods, Src1Mods, Src2Mods; 1752 1753 // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand 1754 // using the conversion from f16. 1755 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1756 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1757 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1758 1759 assert((IsFMA || !Subtarget->hasFP32Denormals()) && 1760 "fmad selected with denormals enabled"); 1761 // TODO: We can select this with f32 denormals enabled if all the sources are 1762 // converted from f16 (in which case fmad isn't legal). 1763 1764 if (Sel0 || Sel1 || Sel2) { 1765 // For dummy operands. 1766 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1767 SDValue Ops[] = { 1768 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1769 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1770 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1771 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1772 Zero, Zero 1773 }; 1774 1775 CurDAG->SelectNodeTo(N, 1776 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, 1777 MVT::f32, Ops); 1778 } else { 1779 SelectCode(N); 1780 } 1781 } 1782 1783 // This is here because there isn't a way to use the generated sub0_sub1 as the 1784 // subreg index to EXTRACT_SUBREG in tablegen. 1785 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1786 MemSDNode *Mem = cast<MemSDNode>(N); 1787 unsigned AS = Mem->getAddressSpace(); 1788 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1789 SelectCode(N); 1790 return; 1791 } 1792 1793 MVT VT = N->getSimpleValueType(0); 1794 bool Is32 = (VT == MVT::i32); 1795 SDLoc SL(N); 1796 1797 MachineSDNode *CmpSwap = nullptr; 1798 if (Subtarget->hasAddr64()) { 1799 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1800 1801 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1802 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1803 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1804 SDValue CmpVal = Mem->getOperand(2); 1805 1806 // XXX - Do we care about glue operands? 1807 1808 SDValue Ops[] = { 1809 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1810 }; 1811 1812 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1813 } 1814 } 1815 1816 if (!CmpSwap) { 1817 SDValue SRsrc, SOffset, Offset, SLC; 1818 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1819 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1820 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1821 1822 SDValue CmpVal = Mem->getOperand(2); 1823 SDValue Ops[] = { 1824 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1825 }; 1826 1827 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1828 } 1829 } 1830 1831 if (!CmpSwap) { 1832 SelectCode(N); 1833 return; 1834 } 1835 1836 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1837 *MMOs = Mem->getMemOperand(); 1838 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1839 1840 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1841 SDValue Extract 1842 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1843 1844 ReplaceUses(SDValue(N, 0), Extract); 1845 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1846 CurDAG->RemoveDeadNode(N); 1847 } 1848 1849 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1850 unsigned &Mods) const { 1851 Mods = 0; 1852 Src = In; 1853 1854 if (Src.getOpcode() == ISD::FNEG) { 1855 Mods |= SISrcMods::NEG; 1856 Src = Src.getOperand(0); 1857 } 1858 1859 if (Src.getOpcode() == ISD::FABS) { 1860 Mods |= SISrcMods::ABS; 1861 Src = Src.getOperand(0); 1862 } 1863 1864 return true; 1865 } 1866 1867 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1868 SDValue &SrcMods) const { 1869 unsigned Mods; 1870 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1871 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1872 return true; 1873 } 1874 1875 return false; 1876 } 1877 1878 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1879 SDValue &SrcMods) const { 1880 SelectVOP3Mods(In, Src, SrcMods); 1881 return isNoNanSrc(Src); 1882 } 1883 1884 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1885 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1886 return false; 1887 1888 Src = In; 1889 return true; 1890 } 1891 1892 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1893 SDValue &SrcMods, SDValue &Clamp, 1894 SDValue &Omod) const { 1895 SDLoc DL(In); 1896 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1897 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1898 1899 return SelectVOP3Mods(In, Src, SrcMods); 1900 } 1901 1902 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1903 SDValue &SrcMods, 1904 SDValue &Clamp, 1905 SDValue &Omod) const { 1906 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1907 return SelectVOP3Mods(In, Src, SrcMods); 1908 } 1909 1910 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1911 SDValue &Clamp, SDValue &Omod) const { 1912 Src = In; 1913 1914 SDLoc DL(In); 1915 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1916 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1917 1918 return true; 1919 } 1920 1921 static SDValue stripBitcast(SDValue Val) { 1922 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1923 } 1924 1925 // Figure out if this is really an extract of the high 16-bits of a dword. 1926 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1927 In = stripBitcast(In); 1928 if (In.getOpcode() != ISD::TRUNCATE) 1929 return false; 1930 1931 SDValue Srl = In.getOperand(0); 1932 if (Srl.getOpcode() == ISD::SRL) { 1933 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1934 if (ShiftAmt->getZExtValue() == 16) { 1935 Out = stripBitcast(Srl.getOperand(0)); 1936 return true; 1937 } 1938 } 1939 } 1940 1941 return false; 1942 } 1943 1944 // Look through operations that obscure just looking at the low 16-bits of the 1945 // same register. 1946 static SDValue stripExtractLoElt(SDValue In) { 1947 if (In.getOpcode() == ISD::TRUNCATE) { 1948 SDValue Src = In.getOperand(0); 1949 if (Src.getValueType().getSizeInBits() == 32) 1950 return stripBitcast(Src); 1951 } 1952 1953 return In; 1954 } 1955 1956 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1957 SDValue &SrcMods) const { 1958 unsigned Mods = 0; 1959 Src = In; 1960 1961 if (Src.getOpcode() == ISD::FNEG) { 1962 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1963 Src = Src.getOperand(0); 1964 } 1965 1966 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1967 unsigned VecMods = Mods; 1968 1969 SDValue Lo = stripBitcast(Src.getOperand(0)); 1970 SDValue Hi = stripBitcast(Src.getOperand(1)); 1971 1972 if (Lo.getOpcode() == ISD::FNEG) { 1973 Lo = stripBitcast(Lo.getOperand(0)); 1974 Mods ^= SISrcMods::NEG; 1975 } 1976 1977 if (Hi.getOpcode() == ISD::FNEG) { 1978 Hi = stripBitcast(Hi.getOperand(0)); 1979 Mods ^= SISrcMods::NEG_HI; 1980 } 1981 1982 if (isExtractHiElt(Lo, Lo)) 1983 Mods |= SISrcMods::OP_SEL_0; 1984 1985 if (isExtractHiElt(Hi, Hi)) 1986 Mods |= SISrcMods::OP_SEL_1; 1987 1988 Lo = stripExtractLoElt(Lo); 1989 Hi = stripExtractLoElt(Hi); 1990 1991 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1992 // Really a scalar input. Just select from the low half of the register to 1993 // avoid packing. 1994 1995 Src = Lo; 1996 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1997 return true; 1998 } 1999 2000 Mods = VecMods; 2001 } 2002 2003 // Packed instructions do not have abs modifiers. 2004 Mods |= SISrcMods::OP_SEL_1; 2005 2006 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2007 return true; 2008 } 2009 2010 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 2011 SDValue &SrcMods, 2012 SDValue &Clamp) const { 2013 SDLoc SL(In); 2014 2015 // FIXME: Handle clamp and op_sel 2016 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2017 2018 return SelectVOP3PMods(In, Src, SrcMods); 2019 } 2020 2021 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 2022 SDValue &SrcMods) const { 2023 Src = In; 2024 // FIXME: Handle op_sel 2025 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 2026 return true; 2027 } 2028 2029 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 2030 SDValue &SrcMods, 2031 SDValue &Clamp) const { 2032 SDLoc SL(In); 2033 2034 // FIXME: Handle clamp 2035 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2036 2037 return SelectVOP3OpSel(In, Src, SrcMods); 2038 } 2039 2040 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 2041 SDValue &SrcMods) const { 2042 // FIXME: Handle op_sel 2043 return SelectVOP3Mods(In, Src, SrcMods); 2044 } 2045 2046 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 2047 SDValue &SrcMods, 2048 SDValue &Clamp) const { 2049 SDLoc SL(In); 2050 2051 // FIXME: Handle clamp 2052 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 2053 2054 return SelectVOP3OpSelMods(In, Src, SrcMods); 2055 } 2056 2057 // The return value is not whether the match is possible (which it always is), 2058 // but whether or not it a conversion is really used. 2059 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 2060 unsigned &Mods) const { 2061 Mods = 0; 2062 SelectVOP3ModsImpl(In, Src, Mods); 2063 2064 if (Src.getOpcode() == ISD::FP_EXTEND) { 2065 Src = Src.getOperand(0); 2066 assert(Src.getValueType() == MVT::f16); 2067 Src = stripBitcast(Src); 2068 2069 // Be careful about folding modifiers if we already have an abs. fneg is 2070 // applied last, so we don't want to apply an earlier fneg. 2071 if ((Mods & SISrcMods::ABS) == 0) { 2072 unsigned ModsTmp; 2073 SelectVOP3ModsImpl(Src, Src, ModsTmp); 2074 2075 if ((ModsTmp & SISrcMods::NEG) != 0) 2076 Mods ^= SISrcMods::NEG; 2077 2078 if ((ModsTmp & SISrcMods::ABS) != 0) 2079 Mods |= SISrcMods::ABS; 2080 } 2081 2082 // op_sel/op_sel_hi decide the source type and source. 2083 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 2084 // If the sources's op_sel is set, it picks the high half of the source 2085 // register. 2086 2087 Mods |= SISrcMods::OP_SEL_1; 2088 if (isExtractHiElt(Src, Src)) { 2089 Mods |= SISrcMods::OP_SEL_0; 2090 2091 // TODO: Should we try to look for neg/abs here? 2092 } 2093 2094 return true; 2095 } 2096 2097 return false; 2098 } 2099 2100 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2101 SDValue &SrcMods) const { 2102 unsigned Mods = 0; 2103 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2104 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2105 return true; 2106 } 2107 2108 // TODO: Can we identify things like v_mad_mixhi_f16? 2109 bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const { 2110 if (In.isUndef()) { 2111 Src = In; 2112 return true; 2113 } 2114 2115 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { 2116 SDLoc SL(In); 2117 SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32); 2118 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2119 SL, MVT::i32, K); 2120 Src = SDValue(MovK, 0); 2121 return true; 2122 } 2123 2124 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { 2125 SDLoc SL(In); 2126 SDValue K = CurDAG->getTargetConstant( 2127 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); 2128 MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 2129 SL, MVT::i32, K); 2130 Src = SDValue(MovK, 0); 2131 return true; 2132 } 2133 2134 return isExtractHiElt(In, Src); 2135 } 2136 2137 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2138 const AMDGPUTargetLowering& Lowering = 2139 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2140 bool IsModified = false; 2141 do { 2142 IsModified = false; 2143 2144 // Go over all selected nodes and try to fold them a bit more 2145 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); 2146 while (Position != CurDAG->allnodes_end()) { 2147 SDNode *Node = &*Position++; 2148 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); 2149 if (!MachineNode) 2150 continue; 2151 2152 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2153 if (ResNode != Node) { 2154 if (ResNode) 2155 ReplaceUses(Node, ResNode); 2156 IsModified = true; 2157 } 2158 } 2159 CurDAG->RemoveDeadNodes(); 2160 } while (IsModified); 2161 } 2162 2163 void R600DAGToDAGISel::Select(SDNode *N) { 2164 unsigned int Opc = N->getOpcode(); 2165 if (N->isMachineOpcode()) { 2166 N->setNodeId(-1); 2167 return; // Already selected. 2168 } 2169 2170 switch (Opc) { 2171 default: break; 2172 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2173 case ISD::SCALAR_TO_VECTOR: 2174 case ISD::BUILD_VECTOR: { 2175 EVT VT = N->getValueType(0); 2176 unsigned NumVectorElts = VT.getVectorNumElements(); 2177 unsigned RegClassID; 2178 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2179 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2180 // pass. We want to avoid 128 bits copies as much as possible because they 2181 // can't be bundled by our scheduler. 2182 switch(NumVectorElts) { 2183 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2184 case 4: 2185 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2186 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2187 else 2188 RegClassID = AMDGPU::R600_Reg128RegClassID; 2189 break; 2190 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2191 } 2192 SelectBuildVector(N, RegClassID); 2193 return; 2194 } 2195 } 2196 2197 SelectCode(N); 2198 } 2199 2200 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2201 SDValue &Offset) { 2202 ConstantSDNode *C; 2203 SDLoc DL(Addr); 2204 2205 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2206 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2207 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2208 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2209 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2210 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2211 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2212 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2213 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2214 Base = Addr.getOperand(0); 2215 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2216 } else { 2217 Base = Addr; 2218 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2219 } 2220 2221 return true; 2222 } 2223 2224 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2225 SDValue &Offset) { 2226 ConstantSDNode *IMMOffset; 2227 2228 if (Addr.getOpcode() == ISD::ADD 2229 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2230 && isInt<16>(IMMOffset->getZExtValue())) { 2231 2232 Base = Addr.getOperand(0); 2233 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2234 MVT::i32); 2235 return true; 2236 // If the pointer address is constant, we can move it to the offset field. 2237 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2238 && isInt<16>(IMMOffset->getZExtValue())) { 2239 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2240 SDLoc(CurDAG->getEntryNode()), 2241 AMDGPU::ZERO, MVT::i32); 2242 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2243 MVT::i32); 2244 return true; 2245 } 2246 2247 // Default case, no offset 2248 Base = Addr; 2249 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2250 return true; 2251 } 2252