1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUArgumentUsageInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUInstrInfo.h" 19 #include "AMDGPURegisterInfo.h" 20 #include "AMDGPUSubtarget.h" 21 #include "AMDGPUTargetMachine.h" 22 #include "SIDefines.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "SIMachineFunctionInfo.h" 26 #include "SIRegisterInfo.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Analysis/ValueTracking.h" 31 #include "llvm/CodeGen/FunctionLoweringInfo.h" 32 #include "llvm/CodeGen/ISDOpcodes.h" 33 #include "llvm/CodeGen/MachineFunction.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/Instruction.h" 42 #include "llvm/MC/MCInstrDesc.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/ErrorHandling.h" 46 #include "llvm/Support/MathExtras.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <new> 50 #include <vector> 51 52 using namespace llvm; 53 54 namespace llvm { 55 56 class R600InstrInfo; 57 58 } // end namespace llvm 59 60 //===----------------------------------------------------------------------===// 61 // Instruction Selector Implementation 62 //===----------------------------------------------------------------------===// 63 64 namespace { 65 66 /// AMDGPU specific code to select AMDGPU machine instructions for 67 /// SelectionDAG operations. 68 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 69 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 70 // make the right decision when generating code for different targets. 71 const AMDGPUSubtarget *Subtarget; 72 AMDGPUAS AMDGPUASI; 73 bool EnableLateStructurizeCFG; 74 75 public: 76 explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, 77 CodeGenOpt::Level OptLevel = CodeGenOpt::Default) 78 : SelectionDAGISel(*TM, OptLevel) { 79 AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); 80 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; 81 } 82 ~AMDGPUDAGToDAGISel() override = default; 83 84 void getAnalysisUsage(AnalysisUsage &AU) const override { 85 AU.addRequired<AMDGPUArgumentUsageInfo>(); 86 SelectionDAGISel::getAnalysisUsage(AU); 87 } 88 89 bool runOnMachineFunction(MachineFunction &MF) override; 90 void Select(SDNode *N) override; 91 StringRef getPassName() const override; 92 void PostprocessISelDAG() override; 93 94 protected: 95 void SelectBuildVector(SDNode *N, unsigned RegClassID); 96 97 private: 98 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 99 bool isNoNanSrc(SDValue N) const; 100 bool isInlineImmediate(const SDNode *N) const; 101 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 102 const R600InstrInfo *TII); 103 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 104 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 105 106 bool isConstantLoad(const MemSDNode *N, int cbID) const; 107 bool isUniformBr(const SDNode *N) const; 108 109 SDNode *glueCopyToM0(SDNode *N) const; 110 111 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 112 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 113 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 114 SDValue& Offset); 115 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 116 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 117 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 118 unsigned OffsetBits) const; 119 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 120 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 121 SDValue &Offset1) const; 122 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 123 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 124 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 125 SDValue &TFE) const; 126 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 127 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 128 SDValue &SLC, SDValue &TFE) const; 129 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 130 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 131 SDValue &SLC) const; 132 bool SelectMUBUFScratchOffen(SDNode *Parent, 133 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 134 SDValue &SOffset, SDValue &ImmOffset) const; 135 bool SelectMUBUFScratchOffset(SDNode *Parent, 136 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 137 SDValue &Offset) const; 138 139 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 140 SDValue &Offset, SDValue &GLC, SDValue &SLC, 141 SDValue &TFE) const; 142 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 143 SDValue &Offset, SDValue &SLC) const; 144 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 145 SDValue &Offset) const; 146 bool SelectMUBUFConstant(SDValue Constant, 147 SDValue &SOffset, 148 SDValue &ImmOffset) const; 149 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 150 SDValue &ImmOffset) const; 151 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 152 SDValue &ImmOffset, SDValue &VOffset) const; 153 154 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 155 SDValue &Offset, SDValue &SLC) const; 156 bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, 157 SDValue &Offset, SDValue &SLC) const; 158 159 template <bool IsSigned> 160 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 161 SDValue &Offset, SDValue &SLC) const; 162 163 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 164 bool &Imm) const; 165 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 166 bool &Imm) const; 167 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 168 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 169 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 170 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 171 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 172 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 173 174 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 175 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; 176 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 177 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 178 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp, SDValue &Omod) const; 180 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 181 SDValue &Clamp, SDValue &Omod) const; 182 183 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 184 SDValue &Clamp, 185 SDValue &Omod) const; 186 187 bool SelectVOP3OMods(SDValue In, SDValue &Src, 188 SDValue &Clamp, SDValue &Omod) const; 189 190 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 191 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 192 SDValue &Clamp) const; 193 194 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 195 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 196 SDValue &Clamp) const; 197 198 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 199 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 200 SDValue &Clamp) const; 201 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; 202 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 203 204 void SelectADD_SUB_I64(SDNode *N); 205 void SelectUADDO_USUBO(SDNode *N); 206 void SelectDIV_SCALE(SDNode *N); 207 void SelectFMA_W_CHAIN(SDNode *N); 208 void SelectFMUL_W_CHAIN(SDNode *N); 209 210 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 211 uint32_t Offset, uint32_t Width); 212 void SelectS_BFEFromShifts(SDNode *N); 213 void SelectS_BFE(SDNode *N); 214 bool isCBranchSCC(const SDNode *N) const; 215 void SelectBRCOND(SDNode *N); 216 void SelectFMAD(SDNode *N); 217 void SelectATOMIC_CMP_SWAP(SDNode *N); 218 219 protected: 220 // Include the pieces autogenerated from the target description. 221 #include "AMDGPUGenDAGISel.inc" 222 }; 223 224 class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { 225 public: 226 explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : 227 AMDGPUDAGToDAGISel(TM, OptLevel) {} 228 229 void Select(SDNode *N) override; 230 231 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, 232 SDValue &Offset) override; 233 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 234 SDValue &Offset) override; 235 }; 236 237 } // end anonymous namespace 238 239 INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel", 240 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 241 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) 242 INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel", 243 "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) 244 245 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 246 // DAG, ready for instruction scheduling. 247 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, 248 CodeGenOpt::Level OptLevel) { 249 return new AMDGPUDAGToDAGISel(TM, OptLevel); 250 } 251 252 /// \brief This pass converts a legalized DAG into a R600-specific 253 // DAG, ready for instruction scheduling. 254 FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, 255 CodeGenOpt::Level OptLevel) { 256 return new R600DAGToDAGISel(TM, OptLevel); 257 } 258 259 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 260 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 261 return SelectionDAGISel::runOnMachineFunction(MF); 262 } 263 264 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 265 if (TM.Options.NoNaNsFPMath) 266 return true; 267 268 // TODO: Move into isKnownNeverNaN 269 if (N->getFlags().isDefined()) 270 return N->getFlags().hasNoNaNs(); 271 272 return CurDAG->isKnownNeverNaN(N); 273 } 274 275 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 276 const SIInstrInfo *TII 277 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 278 279 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 280 return TII->isInlineConstant(C->getAPIntValue()); 281 282 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 283 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 284 285 return false; 286 } 287 288 /// \brief Determine the register class for \p OpNo 289 /// \returns The register class of the virtual register that will be used for 290 /// the given operand number \OpNo or NULL if the register class cannot be 291 /// determined. 292 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 293 unsigned OpNo) const { 294 if (!N->isMachineOpcode()) { 295 if (N->getOpcode() == ISD::CopyToReg) { 296 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 297 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 298 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 299 return MRI.getRegClass(Reg); 300 } 301 302 const SIRegisterInfo *TRI 303 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 304 return TRI->getPhysRegClass(Reg); 305 } 306 307 return nullptr; 308 } 309 310 switch (N->getMachineOpcode()) { 311 default: { 312 const MCInstrDesc &Desc = 313 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 314 unsigned OpIdx = Desc.getNumDefs() + OpNo; 315 if (OpIdx >= Desc.getNumOperands()) 316 return nullptr; 317 int RegClass = Desc.OpInfo[OpIdx].RegClass; 318 if (RegClass == -1) 319 return nullptr; 320 321 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 322 } 323 case AMDGPU::REG_SEQUENCE: { 324 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 325 const TargetRegisterClass *SuperRC = 326 Subtarget->getRegisterInfo()->getRegClass(RCID); 327 328 SDValue SubRegOp = N->getOperand(OpNo + 1); 329 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 330 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 331 SubRegIdx); 332 } 333 } 334 } 335 336 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 337 if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 338 return N; 339 340 const SITargetLowering& Lowering = 341 *static_cast<const SITargetLowering*>(getTargetLowering()); 342 343 // Write max value to m0 before each load operation 344 345 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 346 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 347 348 SDValue Glue = M0.getValue(1); 349 350 SmallVector <SDValue, 8> Ops; 351 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 352 Ops.push_back(N->getOperand(i)); 353 } 354 Ops.push_back(Glue); 355 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 356 357 return N; 358 } 359 360 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 361 switch (NumVectorElts) { 362 case 1: 363 return AMDGPU::SReg_32_XM0RegClassID; 364 case 2: 365 return AMDGPU::SReg_64RegClassID; 366 case 4: 367 return AMDGPU::SReg_128RegClassID; 368 case 8: 369 return AMDGPU::SReg_256RegClassID; 370 case 16: 371 return AMDGPU::SReg_512RegClassID; 372 } 373 374 llvm_unreachable("invalid vector size"); 375 } 376 377 static bool getConstantValue(SDValue N, uint32_t &Out) { 378 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 379 Out = C->getAPIntValue().getZExtValue(); 380 return true; 381 } 382 383 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 384 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 385 return true; 386 } 387 388 return false; 389 } 390 391 void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { 392 EVT VT = N->getValueType(0); 393 unsigned NumVectorElts = VT.getVectorNumElements(); 394 EVT EltVT = VT.getVectorElementType(); 395 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 396 SDLoc DL(N); 397 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 398 399 if (NumVectorElts == 1) { 400 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 401 RegClass); 402 return; 403 } 404 405 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 406 "supported yet"); 407 // 16 = Max Num Vector Elements 408 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 409 // 1 = Vector Register Class 410 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 411 412 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 413 bool IsRegSeq = true; 414 unsigned NOps = N->getNumOperands(); 415 for (unsigned i = 0; i < NOps; i++) { 416 // XXX: Why is this here? 417 if (isa<RegisterSDNode>(N->getOperand(i))) { 418 IsRegSeq = false; 419 break; 420 } 421 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 422 RegSeqArgs[1 + (2 * i) + 1] = 423 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 424 MVT::i32); 425 } 426 if (NOps != NumVectorElts) { 427 // Fill in the missing undef elements if this was a scalar_to_vector. 428 assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 429 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 430 DL, EltVT); 431 for (unsigned i = NOps; i < NumVectorElts; ++i) { 432 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 433 RegSeqArgs[1 + (2 * i) + 1] = 434 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 435 } 436 } 437 438 if (!IsRegSeq) 439 SelectCode(N); 440 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 441 } 442 443 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 444 unsigned int Opc = N->getOpcode(); 445 if (N->isMachineOpcode()) { 446 N->setNodeId(-1); 447 return; // Already selected. 448 } 449 450 if (isa<AtomicSDNode>(N) || 451 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 452 N = glueCopyToM0(N); 453 454 switch (Opc) { 455 default: break; 456 // We are selecting i64 ADD here instead of custom lower it during 457 // DAG legalization, so we can fold some i64 ADDs used for address 458 // calculation into the LOAD and STORE instructions. 459 case ISD::ADD: 460 case ISD::ADDC: 461 case ISD::ADDE: 462 case ISD::SUB: 463 case ISD::SUBC: 464 case ISD::SUBE: { 465 if (N->getValueType(0) != MVT::i64) 466 break; 467 468 SelectADD_SUB_I64(N); 469 return; 470 } 471 case ISD::UADDO: 472 case ISD::USUBO: { 473 SelectUADDO_USUBO(N); 474 return; 475 } 476 case AMDGPUISD::FMUL_W_CHAIN: { 477 SelectFMUL_W_CHAIN(N); 478 return; 479 } 480 case AMDGPUISD::FMA_W_CHAIN: { 481 SelectFMA_W_CHAIN(N); 482 return; 483 } 484 485 case ISD::SCALAR_TO_VECTOR: 486 case ISD::BUILD_VECTOR: { 487 EVT VT = N->getValueType(0); 488 unsigned NumVectorElts = VT.getVectorNumElements(); 489 490 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 491 if (Opc == ISD::BUILD_VECTOR) { 492 uint32_t LHSVal, RHSVal; 493 if (getConstantValue(N->getOperand(0), LHSVal) && 494 getConstantValue(N->getOperand(1), RHSVal)) { 495 uint32_t K = LHSVal | (RHSVal << 16); 496 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 497 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 498 return; 499 } 500 } 501 502 break; 503 } 504 505 assert(VT.getVectorElementType().bitsEq(MVT::i32)); 506 unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 507 SelectBuildVector(N, RegClassID); 508 return; 509 } 510 case ISD::BUILD_PAIR: { 511 SDValue RC, SubReg0, SubReg1; 512 SDLoc DL(N); 513 if (N->getValueType(0) == MVT::i128) { 514 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 515 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 516 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 517 } else if (N->getValueType(0) == MVT::i64) { 518 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 519 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 520 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 521 } else { 522 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 523 } 524 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 525 N->getOperand(1), SubReg1 }; 526 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 527 N->getValueType(0), Ops)); 528 return; 529 } 530 531 case ISD::Constant: 532 case ISD::ConstantFP: { 533 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 534 break; 535 536 uint64_t Imm; 537 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 538 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 539 else { 540 ConstantSDNode *C = cast<ConstantSDNode>(N); 541 Imm = C->getZExtValue(); 542 } 543 544 SDLoc DL(N); 545 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 546 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 547 MVT::i32)); 548 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 549 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 550 const SDValue Ops[] = { 551 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 552 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 553 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 554 }; 555 556 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 557 N->getValueType(0), Ops)); 558 return; 559 } 560 case ISD::LOAD: 561 case ISD::STORE: { 562 N = glueCopyToM0(N); 563 break; 564 } 565 566 case AMDGPUISD::BFE_I32: 567 case AMDGPUISD::BFE_U32: { 568 // There is a scalar version available, but unlike the vector version which 569 // has a separate operand for the offset and width, the scalar version packs 570 // the width and offset into a single operand. Try to move to the scalar 571 // version if the offsets are constant, so that we can try to keep extended 572 // loads of kernel arguments in SGPRs. 573 574 // TODO: Technically we could try to pattern match scalar bitshifts of 575 // dynamic values, but it's probably not useful. 576 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 577 if (!Offset) 578 break; 579 580 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 581 if (!Width) 582 break; 583 584 bool Signed = Opc == AMDGPUISD::BFE_I32; 585 586 uint32_t OffsetVal = Offset->getZExtValue(); 587 uint32_t WidthVal = Width->getZExtValue(); 588 589 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 590 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 591 return; 592 } 593 case AMDGPUISD::DIV_SCALE: { 594 SelectDIV_SCALE(N); 595 return; 596 } 597 case ISD::CopyToReg: { 598 const SITargetLowering& Lowering = 599 *static_cast<const SITargetLowering*>(getTargetLowering()); 600 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 601 break; 602 } 603 case ISD::AND: 604 case ISD::SRL: 605 case ISD::SRA: 606 case ISD::SIGN_EXTEND_INREG: 607 if (N->getValueType(0) != MVT::i32) 608 break; 609 610 SelectS_BFE(N); 611 return; 612 case ISD::BRCOND: 613 SelectBRCOND(N); 614 return; 615 case ISD::FMAD: 616 SelectFMAD(N); 617 return; 618 case AMDGPUISD::ATOMIC_CMP_SWAP: 619 SelectATOMIC_CMP_SWAP(N); 620 return; 621 } 622 623 SelectCode(N); 624 } 625 626 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 627 if (!N->readMem()) 628 return false; 629 if (CbId == -1) 630 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 631 632 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 633 } 634 635 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 636 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 637 const Instruction *Term = BB->getTerminator(); 638 return Term->getMetadata("amdgpu.uniform") || 639 Term->getMetadata("structurizecfg.uniform"); 640 } 641 642 StringRef AMDGPUDAGToDAGISel::getPassName() const { 643 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 644 } 645 646 //===----------------------------------------------------------------------===// 647 // Complex Patterns 648 //===----------------------------------------------------------------------===// 649 650 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 651 SDValue& IntPtr) { 652 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 653 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 654 true); 655 return true; 656 } 657 return false; 658 } 659 660 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 661 SDValue& BaseReg, SDValue &Offset) { 662 if (!isa<ConstantSDNode>(Addr)) { 663 BaseReg = Addr; 664 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 665 return true; 666 } 667 return false; 668 } 669 670 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 671 SDValue &Offset) { 672 return false; 673 } 674 675 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 676 SDValue &Offset) { 677 ConstantSDNode *C; 678 SDLoc DL(Addr); 679 680 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 681 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 682 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 683 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 684 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 685 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 686 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 687 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 688 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 689 Base = Addr.getOperand(0); 690 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 691 } else { 692 Base = Addr; 693 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 694 } 695 696 return true; 697 } 698 699 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 700 SDLoc DL(N); 701 SDValue LHS = N->getOperand(0); 702 SDValue RHS = N->getOperand(1); 703 704 unsigned Opcode = N->getOpcode(); 705 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 706 bool ProduceCarry = 707 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 708 bool IsAdd = 709 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 710 711 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 712 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 713 714 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 715 DL, MVT::i32, LHS, Sub0); 716 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 717 DL, MVT::i32, LHS, Sub1); 718 719 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 720 DL, MVT::i32, RHS, Sub0); 721 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 722 DL, MVT::i32, RHS, Sub1); 723 724 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 725 726 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 727 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 728 729 SDNode *AddLo; 730 if (!ConsumeCarry) { 731 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 732 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 733 } else { 734 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 735 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 736 } 737 SDValue AddHiArgs[] = { 738 SDValue(Hi0, 0), 739 SDValue(Hi1, 0), 740 SDValue(AddLo, 1) 741 }; 742 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 743 744 SDValue RegSequenceArgs[] = { 745 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 746 SDValue(AddLo,0), 747 Sub0, 748 SDValue(AddHi,0), 749 Sub1, 750 }; 751 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 752 MVT::i64, RegSequenceArgs); 753 754 if (ProduceCarry) { 755 // Replace the carry-use 756 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 757 } 758 759 // Replace the remaining uses. 760 CurDAG->ReplaceAllUsesWith(N, RegSequence); 761 CurDAG->RemoveDeadNode(N); 762 } 763 764 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 765 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 766 // carry out despite the _i32 name. These were renamed in VI to _U32. 767 // FIXME: We should probably rename the opcodes here. 768 unsigned Opc = N->getOpcode() == ISD::UADDO ? 769 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 770 771 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 772 { N->getOperand(0), N->getOperand(1) }); 773 } 774 775 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 776 SDLoc SL(N); 777 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 778 SDValue Ops[10]; 779 780 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 781 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 782 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 783 Ops[8] = N->getOperand(0); 784 Ops[9] = N->getOperand(4); 785 786 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 787 } 788 789 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 790 SDLoc SL(N); 791 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 792 SDValue Ops[8]; 793 794 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 795 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 796 Ops[6] = N->getOperand(0); 797 Ops[7] = N->getOperand(3); 798 799 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 800 } 801 802 // We need to handle this here because tablegen doesn't support matching 803 // instructions with multiple outputs. 804 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 805 SDLoc SL(N); 806 EVT VT = N->getValueType(0); 807 808 assert(VT == MVT::f32 || VT == MVT::f64); 809 810 unsigned Opc 811 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 812 813 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 814 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 815 } 816 817 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 818 unsigned OffsetBits) const { 819 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 820 (OffsetBits == 8 && !isUInt<8>(Offset))) 821 return false; 822 823 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 824 Subtarget->unsafeDSOffsetFoldingEnabled()) 825 return true; 826 827 // On Southern Islands instruction with a negative base value and an offset 828 // don't seem to work. 829 return CurDAG->SignBitIsZero(Base); 830 } 831 832 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 833 SDValue &Offset) const { 834 SDLoc DL(Addr); 835 if (CurDAG->isBaseWithConstantOffset(Addr)) { 836 SDValue N0 = Addr.getOperand(0); 837 SDValue N1 = Addr.getOperand(1); 838 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 839 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 840 // (add n0, c0) 841 Base = N0; 842 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 843 return true; 844 } 845 } else if (Addr.getOpcode() == ISD::SUB) { 846 // sub C, x -> add (sub 0, x), C 847 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 848 int64_t ByteOffset = C->getSExtValue(); 849 if (isUInt<16>(ByteOffset)) { 850 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 851 852 // XXX - This is kind of hacky. Create a dummy sub node so we can check 853 // the known bits in isDSOffsetLegal. We need to emit the selected node 854 // here, so this is thrown away. 855 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 856 Zero, Addr.getOperand(1)); 857 858 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 859 MachineSDNode *MachineSub 860 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 861 Zero, Addr.getOperand(1)); 862 863 Base = SDValue(MachineSub, 0); 864 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 865 return true; 866 } 867 } 868 } 869 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 870 // If we have a constant address, prefer to put the constant into the 871 // offset. This can save moves to load the constant address since multiple 872 // operations can share the zero base address register, and enables merging 873 // into read2 / write2 instructions. 874 875 SDLoc DL(Addr); 876 877 if (isUInt<16>(CAddr->getZExtValue())) { 878 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 879 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 880 DL, MVT::i32, Zero); 881 Base = SDValue(MovZero, 0); 882 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 883 return true; 884 } 885 } 886 887 // default case 888 Base = Addr; 889 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 890 return true; 891 } 892 893 // TODO: If offset is too big, put low 16-bit into offset. 894 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 895 SDValue &Offset0, 896 SDValue &Offset1) const { 897 SDLoc DL(Addr); 898 899 if (CurDAG->isBaseWithConstantOffset(Addr)) { 900 SDValue N0 = Addr.getOperand(0); 901 SDValue N1 = Addr.getOperand(1); 902 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 903 unsigned DWordOffset0 = C1->getZExtValue() / 4; 904 unsigned DWordOffset1 = DWordOffset0 + 1; 905 // (add n0, c0) 906 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 907 Base = N0; 908 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 909 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 910 return true; 911 } 912 } else if (Addr.getOpcode() == ISD::SUB) { 913 // sub C, x -> add (sub 0, x), C 914 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 915 unsigned DWordOffset0 = C->getZExtValue() / 4; 916 unsigned DWordOffset1 = DWordOffset0 + 1; 917 918 if (isUInt<8>(DWordOffset0)) { 919 SDLoc DL(Addr); 920 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 921 922 // XXX - This is kind of hacky. Create a dummy sub node so we can check 923 // the known bits in isDSOffsetLegal. We need to emit the selected node 924 // here, so this is thrown away. 925 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 926 Zero, Addr.getOperand(1)); 927 928 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 929 MachineSDNode *MachineSub 930 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 931 Zero, Addr.getOperand(1)); 932 933 Base = SDValue(MachineSub, 0); 934 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 935 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 936 return true; 937 } 938 } 939 } 940 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 941 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 942 unsigned DWordOffset1 = DWordOffset0 + 1; 943 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 944 945 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 946 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 947 MachineSDNode *MovZero 948 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 949 DL, MVT::i32, Zero); 950 Base = SDValue(MovZero, 0); 951 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 952 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 953 return true; 954 } 955 } 956 957 // default case 958 959 // FIXME: This is broken on SI where we still need to check if the base 960 // pointer is positive here. 961 Base = Addr; 962 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 963 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 964 return true; 965 } 966 967 static bool isLegalMUBUFImmOffset(unsigned Imm) { 968 return isUInt<12>(Imm); 969 } 970 971 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 972 return isLegalMUBUFImmOffset(Imm->getZExtValue()); 973 } 974 975 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 976 SDValue &VAddr, SDValue &SOffset, 977 SDValue &Offset, SDValue &Offen, 978 SDValue &Idxen, SDValue &Addr64, 979 SDValue &GLC, SDValue &SLC, 980 SDValue &TFE) const { 981 // Subtarget prefers to use flat instruction 982 if (Subtarget->useFlatForGlobal()) 983 return false; 984 985 SDLoc DL(Addr); 986 987 if (!GLC.getNode()) 988 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 989 if (!SLC.getNode()) 990 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 991 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 992 993 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 994 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 995 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 996 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 997 998 if (CurDAG->isBaseWithConstantOffset(Addr)) { 999 SDValue N0 = Addr.getOperand(0); 1000 SDValue N1 = Addr.getOperand(1); 1001 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1002 1003 if (N0.getOpcode() == ISD::ADD) { 1004 // (add (add N2, N3), C1) -> addr64 1005 SDValue N2 = N0.getOperand(0); 1006 SDValue N3 = N0.getOperand(1); 1007 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1008 Ptr = N2; 1009 VAddr = N3; 1010 } else { 1011 // (add N0, C1) -> offset 1012 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1013 Ptr = N0; 1014 } 1015 1016 if (isLegalMUBUFImmOffset(C1)) { 1017 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1018 return true; 1019 } 1020 1021 if (isUInt<32>(C1->getZExtValue())) { 1022 // Illegal offset, store it in soffset. 1023 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1024 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1025 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1026 0); 1027 return true; 1028 } 1029 } 1030 1031 if (Addr.getOpcode() == ISD::ADD) { 1032 // (add N0, N1) -> addr64 1033 SDValue N0 = Addr.getOperand(0); 1034 SDValue N1 = Addr.getOperand(1); 1035 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1036 Ptr = N0; 1037 VAddr = N1; 1038 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1039 return true; 1040 } 1041 1042 // default case -> offset 1043 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1044 Ptr = Addr; 1045 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1046 1047 return true; 1048 } 1049 1050 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1051 SDValue &VAddr, SDValue &SOffset, 1052 SDValue &Offset, SDValue &GLC, 1053 SDValue &SLC, SDValue &TFE) const { 1054 SDValue Ptr, Offen, Idxen, Addr64; 1055 1056 // addr64 bit was removed for volcanic islands. 1057 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1058 return false; 1059 1060 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1061 GLC, SLC, TFE)) 1062 return false; 1063 1064 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1065 if (C->getSExtValue()) { 1066 SDLoc DL(Addr); 1067 1068 const SITargetLowering& Lowering = 1069 *static_cast<const SITargetLowering*>(getTargetLowering()); 1070 1071 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1072 return true; 1073 } 1074 1075 return false; 1076 } 1077 1078 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1079 SDValue &VAddr, SDValue &SOffset, 1080 SDValue &Offset, 1081 SDValue &SLC) const { 1082 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1083 SDValue GLC, TFE; 1084 1085 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1086 } 1087 1088 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1089 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1090 return PSV && PSV->isStack(); 1091 } 1092 1093 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1094 const MachineFunction &MF = CurDAG->getMachineFunction(); 1095 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1096 1097 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1098 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1099 FI->getValueType(0)); 1100 1101 // If we can resolve this to a frame index access, this is relative to the 1102 // frame pointer SGPR. 1103 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1104 MVT::i32)); 1105 } 1106 1107 // If we don't know this private access is a local stack object, it needs to 1108 // be relative to the entry point's scratch wave offset register. 1109 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1110 MVT::i32)); 1111 } 1112 1113 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, 1114 SDValue Addr, SDValue &Rsrc, 1115 SDValue &VAddr, SDValue &SOffset, 1116 SDValue &ImmOffset) const { 1117 1118 SDLoc DL(Addr); 1119 MachineFunction &MF = CurDAG->getMachineFunction(); 1120 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1121 1122 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1123 1124 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1125 unsigned Imm = CAddr->getZExtValue(); 1126 assert(!isLegalMUBUFImmOffset(Imm) && 1127 "should have been selected by other pattern"); 1128 1129 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1130 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1131 DL, MVT::i32, HighBits); 1132 VAddr = SDValue(MovHighBits, 0); 1133 1134 // In a call sequence, stores to the argument stack area are relative to the 1135 // stack pointer. 1136 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1137 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1138 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1139 1140 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1141 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1142 return true; 1143 } 1144 1145 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1146 // (add n0, c1) 1147 1148 SDValue N0 = Addr.getOperand(0); 1149 SDValue N1 = Addr.getOperand(1); 1150 1151 // Offsets in vaddr must be positive. 1152 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1153 if (isLegalMUBUFImmOffset(C1)) { 1154 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1155 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1156 return true; 1157 } 1158 } 1159 1160 // (node) 1161 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1162 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1163 return true; 1164 } 1165 1166 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, 1167 SDValue Addr, 1168 SDValue &SRsrc, 1169 SDValue &SOffset, 1170 SDValue &Offset) const { 1171 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1172 if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) 1173 return false; 1174 1175 SDLoc DL(Addr); 1176 MachineFunction &MF = CurDAG->getMachineFunction(); 1177 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1178 1179 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1180 1181 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); 1182 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1183 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1184 1185 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1186 // offset if we know this is in a call sequence. 1187 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1188 1189 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1190 return true; 1191 } 1192 1193 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1194 SDValue &SOffset, SDValue &Offset, 1195 SDValue &GLC, SDValue &SLC, 1196 SDValue &TFE) const { 1197 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1198 const SIInstrInfo *TII = 1199 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1200 1201 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1202 GLC, SLC, TFE)) 1203 return false; 1204 1205 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1206 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1207 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1208 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1209 APInt::getAllOnesValue(32).getZExtValue(); // Size 1210 SDLoc DL(Addr); 1211 1212 const SITargetLowering& Lowering = 1213 *static_cast<const SITargetLowering*>(getTargetLowering()); 1214 1215 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1216 return true; 1217 } 1218 return false; 1219 } 1220 1221 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1222 SDValue &Soffset, SDValue &Offset 1223 ) const { 1224 SDValue GLC, SLC, TFE; 1225 1226 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1227 } 1228 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1229 SDValue &Soffset, SDValue &Offset, 1230 SDValue &SLC) const { 1231 SDValue GLC, TFE; 1232 1233 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1234 } 1235 1236 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1237 SDValue &SOffset, 1238 SDValue &ImmOffset) const { 1239 SDLoc DL(Constant); 1240 const uint32_t Align = 4; 1241 const uint32_t MaxImm = alignDown(4095, Align); 1242 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1243 uint32_t Overflow = 0; 1244 1245 if (Imm > MaxImm) { 1246 if (Imm <= MaxImm + 64) { 1247 // Use an SOffset inline constant for 4..64 1248 Overflow = Imm - MaxImm; 1249 Imm = MaxImm; 1250 } else { 1251 // Try to keep the same value in SOffset for adjacent loads, so that 1252 // the corresponding register contents can be re-used. 1253 // 1254 // Load values with all low-bits (except for alignment bits) set into 1255 // SOffset, so that a larger range of values can be covered using 1256 // s_movk_i32. 1257 // 1258 // Atomic operations fail to work correctly when individual address 1259 // components are unaligned, even if their sum is aligned. 1260 uint32_t High = (Imm + Align) & ~4095; 1261 uint32_t Low = (Imm + Align) & 4095; 1262 Imm = Low; 1263 Overflow = High - Align; 1264 } 1265 } 1266 1267 // There is a hardware bug in SI and CI which prevents address clamping in 1268 // MUBUF instructions from working correctly with SOffsets. The immediate 1269 // offset is unaffected. 1270 if (Overflow > 0 && 1271 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1272 return false; 1273 1274 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1275 1276 if (Overflow <= 64) 1277 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1278 else 1279 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1280 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1281 0); 1282 1283 return true; 1284 } 1285 1286 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1287 SDValue &SOffset, 1288 SDValue &ImmOffset) const { 1289 SDLoc DL(Offset); 1290 1291 if (!isa<ConstantSDNode>(Offset)) 1292 return false; 1293 1294 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1295 } 1296 1297 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1298 SDValue &SOffset, 1299 SDValue &ImmOffset, 1300 SDValue &VOffset) const { 1301 SDLoc DL(Offset); 1302 1303 // Don't generate an unnecessary voffset for constant offsets. 1304 if (isa<ConstantSDNode>(Offset)) { 1305 SDValue Tmp1, Tmp2; 1306 1307 // When necessary, use a voffset in <= CI anyway to work around a hardware 1308 // bug. 1309 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1310 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1311 return false; 1312 } 1313 1314 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1315 SDValue N0 = Offset.getOperand(0); 1316 SDValue N1 = Offset.getOperand(1); 1317 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1318 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1319 VOffset = N0; 1320 return true; 1321 } 1322 } 1323 1324 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1325 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1326 VOffset = Offset; 1327 1328 return true; 1329 } 1330 1331 template <bool IsSigned> 1332 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1333 SDValue &VAddr, 1334 SDValue &Offset, 1335 SDValue &SLC) const { 1336 int64_t OffsetVal = 0; 1337 1338 if (Subtarget->hasFlatInstOffsets() && 1339 CurDAG->isBaseWithConstantOffset(Addr)) { 1340 SDValue N0 = Addr.getOperand(0); 1341 SDValue N1 = Addr.getOperand(1); 1342 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); 1343 1344 if ((IsSigned && isInt<13>(COffsetVal)) || 1345 (!IsSigned && isUInt<12>(COffsetVal))) { 1346 Addr = N0; 1347 OffsetVal = COffsetVal; 1348 } 1349 } 1350 1351 VAddr = Addr; 1352 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1353 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1354 1355 return true; 1356 } 1357 1358 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1359 SDValue &VAddr, 1360 SDValue &Offset, 1361 SDValue &SLC) const { 1362 return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC); 1363 } 1364 1365 bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, 1366 SDValue &VAddr, 1367 SDValue &Offset, 1368 SDValue &SLC) const { 1369 return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC); 1370 } 1371 1372 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1373 SDValue &Offset, bool &Imm) const { 1374 1375 // FIXME: Handle non-constant offsets. 1376 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1377 if (!C) 1378 return false; 1379 1380 SDLoc SL(ByteOffsetNode); 1381 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1382 int64_t ByteOffset = C->getSExtValue(); 1383 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1384 1385 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1386 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1387 Imm = true; 1388 return true; 1389 } 1390 1391 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1392 return false; 1393 1394 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1395 // 32-bit Immediates are supported on Sea Islands. 1396 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1397 } else { 1398 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1399 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1400 C32Bit), 0); 1401 } 1402 Imm = false; 1403 return true; 1404 } 1405 1406 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1407 SDValue &Offset, bool &Imm) const { 1408 SDLoc SL(Addr); 1409 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1410 SDValue N0 = Addr.getOperand(0); 1411 SDValue N1 = Addr.getOperand(1); 1412 1413 if (SelectSMRDOffset(N1, Offset, Imm)) { 1414 SBase = N0; 1415 return true; 1416 } 1417 } 1418 SBase = Addr; 1419 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1420 Imm = true; 1421 return true; 1422 } 1423 1424 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1425 SDValue &Offset) const { 1426 bool Imm; 1427 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1428 } 1429 1430 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1431 SDValue &Offset) const { 1432 1433 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1434 return false; 1435 1436 bool Imm; 1437 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1438 return false; 1439 1440 return !Imm && isa<ConstantSDNode>(Offset); 1441 } 1442 1443 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1444 SDValue &Offset) const { 1445 bool Imm; 1446 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1447 !isa<ConstantSDNode>(Offset); 1448 } 1449 1450 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1451 SDValue &Offset) const { 1452 bool Imm; 1453 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1454 } 1455 1456 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1457 SDValue &Offset) const { 1458 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1459 return false; 1460 1461 bool Imm; 1462 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1463 return false; 1464 1465 return !Imm && isa<ConstantSDNode>(Offset); 1466 } 1467 1468 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1469 SDValue &Base, 1470 SDValue &Offset) const { 1471 SDLoc DL(Index); 1472 1473 if (CurDAG->isBaseWithConstantOffset(Index)) { 1474 SDValue N0 = Index.getOperand(0); 1475 SDValue N1 = Index.getOperand(1); 1476 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1477 1478 // (add n0, c0) 1479 Base = N0; 1480 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1481 return true; 1482 } 1483 1484 if (isa<ConstantSDNode>(Index)) 1485 return false; 1486 1487 Base = Index; 1488 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1489 return true; 1490 } 1491 1492 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1493 SDValue Val, uint32_t Offset, 1494 uint32_t Width) { 1495 // Transformation function, pack the offset and width of a BFE into 1496 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1497 // source, bits [5:0] contain the offset and bits [22:16] the width. 1498 uint32_t PackedVal = Offset | (Width << 16); 1499 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1500 1501 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1502 } 1503 1504 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1505 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1506 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1507 // Predicate: 0 < b <= c < 32 1508 1509 const SDValue &Shl = N->getOperand(0); 1510 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1511 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1512 1513 if (B && C) { 1514 uint32_t BVal = B->getZExtValue(); 1515 uint32_t CVal = C->getZExtValue(); 1516 1517 if (0 < BVal && BVal <= CVal && CVal < 32) { 1518 bool Signed = N->getOpcode() == ISD::SRA; 1519 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1520 1521 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1522 32 - CVal)); 1523 return; 1524 } 1525 } 1526 SelectCode(N); 1527 } 1528 1529 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1530 switch (N->getOpcode()) { 1531 case ISD::AND: 1532 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1533 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1534 // Predicate: isMask(mask) 1535 const SDValue &Srl = N->getOperand(0); 1536 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1537 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1538 1539 if (Shift && Mask) { 1540 uint32_t ShiftVal = Shift->getZExtValue(); 1541 uint32_t MaskVal = Mask->getZExtValue(); 1542 1543 if (isMask_32(MaskVal)) { 1544 uint32_t WidthVal = countPopulation(MaskVal); 1545 1546 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1547 Srl.getOperand(0), ShiftVal, WidthVal)); 1548 return; 1549 } 1550 } 1551 } 1552 break; 1553 case ISD::SRL: 1554 if (N->getOperand(0).getOpcode() == ISD::AND) { 1555 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1556 // Predicate: isMask(mask >> b) 1557 const SDValue &And = N->getOperand(0); 1558 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1559 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1560 1561 if (Shift && Mask) { 1562 uint32_t ShiftVal = Shift->getZExtValue(); 1563 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1564 1565 if (isMask_32(MaskVal)) { 1566 uint32_t WidthVal = countPopulation(MaskVal); 1567 1568 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1569 And.getOperand(0), ShiftVal, WidthVal)); 1570 return; 1571 } 1572 } 1573 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1574 SelectS_BFEFromShifts(N); 1575 return; 1576 } 1577 break; 1578 case ISD::SRA: 1579 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1580 SelectS_BFEFromShifts(N); 1581 return; 1582 } 1583 break; 1584 1585 case ISD::SIGN_EXTEND_INREG: { 1586 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1587 SDValue Src = N->getOperand(0); 1588 if (Src.getOpcode() != ISD::SRL) 1589 break; 1590 1591 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1592 if (!Amt) 1593 break; 1594 1595 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1596 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1597 Amt->getZExtValue(), Width)); 1598 return; 1599 } 1600 } 1601 1602 SelectCode(N); 1603 } 1604 1605 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1606 assert(N->getOpcode() == ISD::BRCOND); 1607 if (!N->hasOneUse()) 1608 return false; 1609 1610 SDValue Cond = N->getOperand(1); 1611 if (Cond.getOpcode() == ISD::CopyToReg) 1612 Cond = Cond.getOperand(2); 1613 1614 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1615 return false; 1616 1617 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1618 if (VT == MVT::i32) 1619 return true; 1620 1621 if (VT == MVT::i64) { 1622 auto ST = static_cast<const SISubtarget *>(Subtarget); 1623 1624 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1625 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1626 } 1627 1628 return false; 1629 } 1630 1631 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1632 SDValue Cond = N->getOperand(1); 1633 1634 if (Cond.isUndef()) { 1635 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1636 N->getOperand(2), N->getOperand(0)); 1637 return; 1638 } 1639 1640 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); 1641 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; 1642 unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; 1643 SDLoc SL(N); 1644 1645 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); 1646 CurDAG->SelectNodeTo(N, BrOp, MVT::Other, 1647 N->getOperand(2), // Basic Block 1648 VCC.getValue(0)); 1649 } 1650 1651 void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) { 1652 MVT VT = N->getSimpleValueType(0); 1653 if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) { 1654 SelectCode(N); 1655 return; 1656 } 1657 1658 SDValue Src0 = N->getOperand(0); 1659 SDValue Src1 = N->getOperand(1); 1660 SDValue Src2 = N->getOperand(2); 1661 unsigned Src0Mods, Src1Mods, Src2Mods; 1662 1663 // Avoid using v_mad_mix_f32 unless there is actually an operand using the 1664 // conversion from f16. 1665 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); 1666 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); 1667 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); 1668 1669 assert(!Subtarget->hasFP32Denormals() && 1670 "fmad selected with denormals enabled"); 1671 // TODO: We can select this with f32 denormals enabled if all the sources are 1672 // converted from f16 (in which case fmad isn't legal). 1673 1674 if (Sel0 || Sel1 || Sel2) { 1675 // For dummy operands. 1676 SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); 1677 SDValue Ops[] = { 1678 CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, 1679 CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, 1680 CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, 1681 CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), 1682 Zero, Zero 1683 }; 1684 1685 CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops); 1686 } else { 1687 SelectCode(N); 1688 } 1689 } 1690 1691 // This is here because there isn't a way to use the generated sub0_sub1 as the 1692 // subreg index to EXTRACT_SUBREG in tablegen. 1693 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1694 MemSDNode *Mem = cast<MemSDNode>(N); 1695 unsigned AS = Mem->getAddressSpace(); 1696 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1697 SelectCode(N); 1698 return; 1699 } 1700 1701 MVT VT = N->getSimpleValueType(0); 1702 bool Is32 = (VT == MVT::i32); 1703 SDLoc SL(N); 1704 1705 MachineSDNode *CmpSwap = nullptr; 1706 if (Subtarget->hasAddr64()) { 1707 SDValue SRsrc, VAddr, SOffset, Offset, SLC; 1708 1709 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1710 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1711 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1712 SDValue CmpVal = Mem->getOperand(2); 1713 1714 // XXX - Do we care about glue operands? 1715 1716 SDValue Ops[] = { 1717 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1718 }; 1719 1720 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1721 } 1722 } 1723 1724 if (!CmpSwap) { 1725 SDValue SRsrc, SOffset, Offset, SLC; 1726 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1727 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1728 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1729 1730 SDValue CmpVal = Mem->getOperand(2); 1731 SDValue Ops[] = { 1732 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1733 }; 1734 1735 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1736 } 1737 } 1738 1739 if (!CmpSwap) { 1740 SelectCode(N); 1741 return; 1742 } 1743 1744 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1745 *MMOs = Mem->getMemOperand(); 1746 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1747 1748 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1749 SDValue Extract 1750 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1751 1752 ReplaceUses(SDValue(N, 0), Extract); 1753 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1754 CurDAG->RemoveDeadNode(N); 1755 } 1756 1757 bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, 1758 unsigned &Mods) const { 1759 Mods = 0; 1760 Src = In; 1761 1762 if (Src.getOpcode() == ISD::FNEG) { 1763 Mods |= SISrcMods::NEG; 1764 Src = Src.getOperand(0); 1765 } 1766 1767 if (Src.getOpcode() == ISD::FABS) { 1768 Mods |= SISrcMods::ABS; 1769 Src = Src.getOperand(0); 1770 } 1771 1772 return true; 1773 } 1774 1775 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1776 SDValue &SrcMods) const { 1777 unsigned Mods; 1778 if (SelectVOP3ModsImpl(In, Src, Mods)) { 1779 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1780 return true; 1781 } 1782 1783 return false; 1784 } 1785 1786 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1787 SDValue &SrcMods) const { 1788 SelectVOP3Mods(In, Src, SrcMods); 1789 return isNoNanSrc(Src); 1790 } 1791 1792 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1793 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1794 return false; 1795 1796 Src = In; 1797 return true; 1798 } 1799 1800 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1801 SDValue &SrcMods, SDValue &Clamp, 1802 SDValue &Omod) const { 1803 SDLoc DL(In); 1804 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1805 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1806 1807 return SelectVOP3Mods(In, Src, SrcMods); 1808 } 1809 1810 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1811 SDValue &SrcMods, 1812 SDValue &Clamp, 1813 SDValue &Omod) const { 1814 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1815 return SelectVOP3Mods(In, Src, SrcMods); 1816 } 1817 1818 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1819 SDValue &Clamp, SDValue &Omod) const { 1820 Src = In; 1821 1822 SDLoc DL(In); 1823 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1824 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1825 1826 return true; 1827 } 1828 1829 static SDValue stripBitcast(SDValue Val) { 1830 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1831 } 1832 1833 // Figure out if this is really an extract of the high 16-bits of a dword. 1834 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1835 In = stripBitcast(In); 1836 if (In.getOpcode() != ISD::TRUNCATE) 1837 return false; 1838 1839 SDValue Srl = In.getOperand(0); 1840 if (Srl.getOpcode() == ISD::SRL) { 1841 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1842 if (ShiftAmt->getZExtValue() == 16) { 1843 Out = stripBitcast(Srl.getOperand(0)); 1844 return true; 1845 } 1846 } 1847 } 1848 1849 return false; 1850 } 1851 1852 // Look through operations that obscure just looking at the low 16-bits of the 1853 // same register. 1854 static SDValue stripExtractLoElt(SDValue In) { 1855 if (In.getOpcode() == ISD::TRUNCATE) { 1856 SDValue Src = In.getOperand(0); 1857 if (Src.getValueType().getSizeInBits() == 32) 1858 return stripBitcast(Src); 1859 } 1860 1861 return In; 1862 } 1863 1864 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1865 SDValue &SrcMods) const { 1866 unsigned Mods = 0; 1867 Src = In; 1868 1869 if (Src.getOpcode() == ISD::FNEG) { 1870 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1871 Src = Src.getOperand(0); 1872 } 1873 1874 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1875 unsigned VecMods = Mods; 1876 1877 SDValue Lo = stripBitcast(Src.getOperand(0)); 1878 SDValue Hi = stripBitcast(Src.getOperand(1)); 1879 1880 if (Lo.getOpcode() == ISD::FNEG) { 1881 Lo = stripBitcast(Lo.getOperand(0)); 1882 Mods ^= SISrcMods::NEG; 1883 } 1884 1885 if (Hi.getOpcode() == ISD::FNEG) { 1886 Hi = stripBitcast(Hi.getOperand(0)); 1887 Mods ^= SISrcMods::NEG_HI; 1888 } 1889 1890 if (isExtractHiElt(Lo, Lo)) 1891 Mods |= SISrcMods::OP_SEL_0; 1892 1893 if (isExtractHiElt(Hi, Hi)) 1894 Mods |= SISrcMods::OP_SEL_1; 1895 1896 Lo = stripExtractLoElt(Lo); 1897 Hi = stripExtractLoElt(Hi); 1898 1899 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1900 // Really a scalar input. Just select from the low half of the register to 1901 // avoid packing. 1902 1903 Src = Lo; 1904 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1905 return true; 1906 } 1907 1908 Mods = VecMods; 1909 } 1910 1911 // Packed instructions do not have abs modifiers. 1912 Mods |= SISrcMods::OP_SEL_1; 1913 1914 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1915 return true; 1916 } 1917 1918 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1919 SDValue &SrcMods, 1920 SDValue &Clamp) const { 1921 SDLoc SL(In); 1922 1923 // FIXME: Handle clamp and op_sel 1924 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1925 1926 return SelectVOP3PMods(In, Src, SrcMods); 1927 } 1928 1929 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 1930 SDValue &SrcMods) const { 1931 Src = In; 1932 // FIXME: Handle op_sel 1933 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1934 return true; 1935 } 1936 1937 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 1938 SDValue &SrcMods, 1939 SDValue &Clamp) const { 1940 SDLoc SL(In); 1941 1942 // FIXME: Handle clamp 1943 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1944 1945 return SelectVOP3OpSel(In, Src, SrcMods); 1946 } 1947 1948 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 1949 SDValue &SrcMods) const { 1950 // FIXME: Handle op_sel 1951 return SelectVOP3Mods(In, Src, SrcMods); 1952 } 1953 1954 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 1955 SDValue &SrcMods, 1956 SDValue &Clamp) const { 1957 SDLoc SL(In); 1958 1959 // FIXME: Handle clamp 1960 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1961 1962 return SelectVOP3OpSelMods(In, Src, SrcMods); 1963 } 1964 1965 // The return value is not whether the match is possible (which it always is), 1966 // but whether or not it a conversion is really used. 1967 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 1968 unsigned &Mods) const { 1969 Mods = 0; 1970 SelectVOP3ModsImpl(In, Src, Mods); 1971 1972 if (Src.getOpcode() == ISD::FP_EXTEND) { 1973 Src = Src.getOperand(0); 1974 assert(Src.getValueType() == MVT::f16); 1975 Src = stripBitcast(Src); 1976 1977 // Be careful about folding modifiers if we already have an abs. fneg is 1978 // applied last, so we don't want to apply an earlier fneg. 1979 if ((Mods & SISrcMods::ABS) == 0) { 1980 unsigned ModsTmp; 1981 SelectVOP3ModsImpl(Src, Src, ModsTmp); 1982 1983 if ((ModsTmp & SISrcMods::NEG) != 0) 1984 Mods ^= SISrcMods::NEG; 1985 1986 if ((ModsTmp & SISrcMods::ABS) != 0) 1987 Mods |= SISrcMods::ABS; 1988 } 1989 1990 // op_sel/op_sel_hi decide the source type and source. 1991 // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. 1992 // If the sources's op_sel is set, it picks the high half of the source 1993 // register. 1994 1995 Mods |= SISrcMods::OP_SEL_1; 1996 if (isExtractHiElt(Src, Src)) { 1997 Mods |= SISrcMods::OP_SEL_0; 1998 1999 // TODO: Should we try to look for neg/abs here? 2000 } 2001 2002 return true; 2003 } 2004 2005 return false; 2006 } 2007 2008 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, 2009 SDValue &SrcMods) const { 2010 unsigned Mods = 0; 2011 SelectVOP3PMadMixModsImpl(In, Src, Mods); 2012 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 2013 return true; 2014 } 2015 2016 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 2017 const AMDGPUTargetLowering& Lowering = 2018 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 2019 bool IsModified = false; 2020 do { 2021 IsModified = false; 2022 // Go over all selected nodes and try to fold them a bit more 2023 for (SDNode &Node : CurDAG->allnodes()) { 2024 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 2025 if (!MachineNode) 2026 continue; 2027 2028 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 2029 if (ResNode != &Node) { 2030 ReplaceUses(&Node, ResNode); 2031 IsModified = true; 2032 } 2033 } 2034 CurDAG->RemoveDeadNodes(); 2035 } while (IsModified); 2036 } 2037 2038 void R600DAGToDAGISel::Select(SDNode *N) { 2039 unsigned int Opc = N->getOpcode(); 2040 if (N->isMachineOpcode()) { 2041 N->setNodeId(-1); 2042 return; // Already selected. 2043 } 2044 2045 switch (Opc) { 2046 default: break; 2047 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 2048 case ISD::SCALAR_TO_VECTOR: 2049 case ISD::BUILD_VECTOR: { 2050 EVT VT = N->getValueType(0); 2051 unsigned NumVectorElts = VT.getVectorNumElements(); 2052 unsigned RegClassID; 2053 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 2054 // that adds a 128 bits reg copy when going through TwoAddressInstructions 2055 // pass. We want to avoid 128 bits copies as much as possible because they 2056 // can't be bundled by our scheduler. 2057 switch(NumVectorElts) { 2058 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 2059 case 4: 2060 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 2061 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 2062 else 2063 RegClassID = AMDGPU::R600_Reg128RegClassID; 2064 break; 2065 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 2066 } 2067 SelectBuildVector(N, RegClassID); 2068 return; 2069 } 2070 } 2071 2072 SelectCode(N); 2073 } 2074 2075 bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 2076 SDValue &Offset) { 2077 ConstantSDNode *C; 2078 SDLoc DL(Addr); 2079 2080 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 2081 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2082 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2083 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 2084 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 2085 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 2086 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2087 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 2088 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 2089 Base = Addr.getOperand(0); 2090 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 2091 } else { 2092 Base = Addr; 2093 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 2094 } 2095 2096 return true; 2097 } 2098 2099 bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 2100 SDValue &Offset) { 2101 ConstantSDNode *IMMOffset; 2102 2103 if (Addr.getOpcode() == ISD::ADD 2104 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 2105 && isInt<16>(IMMOffset->getZExtValue())) { 2106 2107 Base = Addr.getOperand(0); 2108 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2109 MVT::i32); 2110 return true; 2111 // If the pointer address is constant, we can move it to the offset field. 2112 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 2113 && isInt<16>(IMMOffset->getZExtValue())) { 2114 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 2115 SDLoc(CurDAG->getEntryNode()), 2116 AMDGPU::ZERO, MVT::i32); 2117 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 2118 MVT::i32); 2119 return true; 2120 } 2121 2122 // Default case, no offset 2123 Base = Addr; 2124 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 2125 return true; 2126 } 2127