1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUISelLowering.h" // For AMDGPUISD 17 #include "AMDGPUInstrInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "SIRegisterInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 AMDGPUAS AMDGPUASI; 71 72 public: 73 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 74 : SelectionDAGISel(TM, OptLevel){ 75 AMDGPUASI = AMDGPU::getAMDGPUAS(TM); 76 } 77 ~AMDGPUDAGToDAGISel() override = default; 78 79 bool runOnMachineFunction(MachineFunction &MF) override; 80 void Select(SDNode *N) override; 81 StringRef getPassName() const override; 82 void PostprocessISelDAG() override; 83 84 private: 85 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 86 bool isNoNanSrc(SDValue N) const; 87 bool isInlineImmediate(const SDNode *N) const; 88 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 89 const R600InstrInfo *TII); 90 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 91 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 92 93 bool isConstantLoad(const MemSDNode *N, int cbID) const; 94 bool isUniformBr(const SDNode *N) const; 95 96 SDNode *glueCopyToM0(SDNode *N) const; 97 98 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 99 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 100 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 101 SDValue& Offset); 102 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 103 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 104 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 105 unsigned OffsetBits) const; 106 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 107 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 108 SDValue &Offset1) const; 109 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 110 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 111 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 114 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 115 SDValue &SLC, SDValue &TFE) const; 116 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 117 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 118 SDValue &SLC) const; 119 bool SelectMUBUFScratchOffen(SDNode *Root, 120 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 121 SDValue &SOffset, SDValue &ImmOffset) const; 122 bool SelectMUBUFScratchOffset(SDNode *Root, 123 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 124 SDValue &Offset) const; 125 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 127 SDValue &Offset, SDValue &GLC, SDValue &SLC, 128 SDValue &TFE) const; 129 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 130 SDValue &Offset, SDValue &SLC) const; 131 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 132 SDValue &Offset) const; 133 bool SelectMUBUFConstant(SDValue Constant, 134 SDValue &SOffset, 135 SDValue &ImmOffset) const; 136 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 137 SDValue &ImmOffset) const; 138 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 139 SDValue &ImmOffset, SDValue &VOffset) const; 140 141 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 142 SDValue &Offset, SDValue &SLC) const; 143 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 144 SDValue &Offset, SDValue &SLC) const; 145 146 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 147 bool &Imm) const; 148 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 149 bool &Imm) const; 150 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 151 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 152 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 153 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 154 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 155 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 156 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 157 158 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 159 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 160 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 161 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 162 SDValue &Clamp, SDValue &Omod) const; 163 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 164 SDValue &Clamp, SDValue &Omod) const; 165 166 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 167 SDValue &Clamp, 168 SDValue &Omod) const; 169 170 bool SelectVOP3OMods(SDValue In, SDValue &Src, 171 SDValue &Clamp, SDValue &Omod) const; 172 173 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 174 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 175 SDValue &Clamp) const; 176 177 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 178 bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, 179 SDValue &Clamp) const; 180 181 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 182 bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 183 SDValue &Clamp) const; 184 185 void SelectADD_SUB_I64(SDNode *N); 186 void SelectUADDO_USUBO(SDNode *N); 187 void SelectDIV_SCALE(SDNode *N); 188 void SelectFMA_W_CHAIN(SDNode *N); 189 void SelectFMUL_W_CHAIN(SDNode *N); 190 191 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 192 uint32_t Offset, uint32_t Width); 193 void SelectS_BFEFromShifts(SDNode *N); 194 void SelectS_BFE(SDNode *N); 195 bool isCBranchSCC(const SDNode *N) const; 196 void SelectBRCOND(SDNode *N); 197 void SelectATOMIC_CMP_SWAP(SDNode *N); 198 199 // Include the pieces autogenerated from the target description. 200 #include "AMDGPUGenDAGISel.inc" 201 }; 202 203 } // end anonymous namespace 204 205 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 206 // DAG, ready for instruction scheduling. 207 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 208 CodeGenOpt::Level OptLevel) { 209 return new AMDGPUDAGToDAGISel(TM, OptLevel); 210 } 211 212 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 213 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 214 return SelectionDAGISel::runOnMachineFunction(MF); 215 } 216 217 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 218 if (TM.Options.NoNaNsFPMath) 219 return true; 220 221 // TODO: Move into isKnownNeverNaN 222 if (N->getFlags().isDefined()) 223 return N->getFlags().hasNoNaNs(); 224 225 return CurDAG->isKnownNeverNaN(N); 226 } 227 228 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 229 const SIInstrInfo *TII 230 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 231 232 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 233 return TII->isInlineConstant(C->getAPIntValue()); 234 235 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 236 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 237 238 return false; 239 } 240 241 /// \brief Determine the register class for \p OpNo 242 /// \returns The register class of the virtual register that will be used for 243 /// the given operand number \OpNo or NULL if the register class cannot be 244 /// determined. 245 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 246 unsigned OpNo) const { 247 if (!N->isMachineOpcode()) { 248 if (N->getOpcode() == ISD::CopyToReg) { 249 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 250 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 251 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 252 return MRI.getRegClass(Reg); 253 } 254 255 const SIRegisterInfo *TRI 256 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 257 return TRI->getPhysRegClass(Reg); 258 } 259 260 return nullptr; 261 } 262 263 switch (N->getMachineOpcode()) { 264 default: { 265 const MCInstrDesc &Desc = 266 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 267 unsigned OpIdx = Desc.getNumDefs() + OpNo; 268 if (OpIdx >= Desc.getNumOperands()) 269 return nullptr; 270 int RegClass = Desc.OpInfo[OpIdx].RegClass; 271 if (RegClass == -1) 272 return nullptr; 273 274 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 275 } 276 case AMDGPU::REG_SEQUENCE: { 277 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 278 const TargetRegisterClass *SuperRC = 279 Subtarget->getRegisterInfo()->getRegClass(RCID); 280 281 SDValue SubRegOp = N->getOperand(OpNo + 1); 282 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 283 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 284 SubRegIdx); 285 } 286 } 287 } 288 289 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 290 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 291 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 292 return N; 293 294 const SITargetLowering& Lowering = 295 *static_cast<const SITargetLowering*>(getTargetLowering()); 296 297 // Write max value to m0 before each load operation 298 299 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 300 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 301 302 SDValue Glue = M0.getValue(1); 303 304 SmallVector <SDValue, 8> Ops; 305 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 306 Ops.push_back(N->getOperand(i)); 307 } 308 Ops.push_back(Glue); 309 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 310 311 return N; 312 } 313 314 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 315 switch (NumVectorElts) { 316 case 1: 317 return AMDGPU::SReg_32_XM0RegClassID; 318 case 2: 319 return AMDGPU::SReg_64RegClassID; 320 case 4: 321 return AMDGPU::SReg_128RegClassID; 322 case 8: 323 return AMDGPU::SReg_256RegClassID; 324 case 16: 325 return AMDGPU::SReg_512RegClassID; 326 } 327 328 llvm_unreachable("invalid vector size"); 329 } 330 331 static bool getConstantValue(SDValue N, uint32_t &Out) { 332 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 333 Out = C->getAPIntValue().getZExtValue(); 334 return true; 335 } 336 337 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 338 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 339 return true; 340 } 341 342 return false; 343 } 344 345 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 346 unsigned int Opc = N->getOpcode(); 347 if (N->isMachineOpcode()) { 348 N->setNodeId(-1); 349 return; // Already selected. 350 } 351 352 if (isa<AtomicSDNode>(N) || 353 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 354 N = glueCopyToM0(N); 355 356 switch (Opc) { 357 default: break; 358 // We are selecting i64 ADD here instead of custom lower it during 359 // DAG legalization, so we can fold some i64 ADDs used for address 360 // calculation into the LOAD and STORE instructions. 361 case ISD::ADD: 362 case ISD::ADDC: 363 case ISD::ADDE: 364 case ISD::SUB: 365 case ISD::SUBC: 366 case ISD::SUBE: { 367 if (N->getValueType(0) != MVT::i64 || 368 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 369 break; 370 371 SelectADD_SUB_I64(N); 372 return; 373 } 374 case ISD::UADDO: 375 case ISD::USUBO: { 376 SelectUADDO_USUBO(N); 377 return; 378 } 379 case AMDGPUISD::FMUL_W_CHAIN: { 380 SelectFMUL_W_CHAIN(N); 381 return; 382 } 383 case AMDGPUISD::FMA_W_CHAIN: { 384 SelectFMA_W_CHAIN(N); 385 return; 386 } 387 388 case ISD::SCALAR_TO_VECTOR: 389 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 390 case ISD::BUILD_VECTOR: { 391 unsigned RegClassID; 392 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 393 EVT VT = N->getValueType(0); 394 unsigned NumVectorElts = VT.getVectorNumElements(); 395 EVT EltVT = VT.getVectorElementType(); 396 397 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 398 if (Opc == ISD::BUILD_VECTOR) { 399 uint32_t LHSVal, RHSVal; 400 if (getConstantValue(N->getOperand(0), LHSVal) && 401 getConstantValue(N->getOperand(1), RHSVal)) { 402 uint32_t K = LHSVal | (RHSVal << 16); 403 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 404 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 405 return; 406 } 407 } 408 409 break; 410 } 411 412 assert(EltVT.bitsEq(MVT::i32)); 413 414 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 415 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 416 } else { 417 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 418 // that adds a 128 bits reg copy when going through TwoAddressInstructions 419 // pass. We want to avoid 128 bits copies as much as possible because they 420 // can't be bundled by our scheduler. 421 switch(NumVectorElts) { 422 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 423 case 4: 424 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 425 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 426 else 427 RegClassID = AMDGPU::R600_Reg128RegClassID; 428 break; 429 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 430 } 431 } 432 433 SDLoc DL(N); 434 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 435 436 if (NumVectorElts == 1) { 437 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 438 RegClass); 439 return; 440 } 441 442 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 443 "supported yet"); 444 // 16 = Max Num Vector Elements 445 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 446 // 1 = Vector Register Class 447 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 448 449 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 450 bool IsRegSeq = true; 451 unsigned NOps = N->getNumOperands(); 452 for (unsigned i = 0; i < NOps; i++) { 453 // XXX: Why is this here? 454 if (isa<RegisterSDNode>(N->getOperand(i))) { 455 IsRegSeq = false; 456 break; 457 } 458 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 459 RegSeqArgs[1 + (2 * i) + 1] = 460 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 461 MVT::i32); 462 } 463 464 if (NOps != NumVectorElts) { 465 // Fill in the missing undef elements if this was a scalar_to_vector. 466 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 467 468 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 469 DL, EltVT); 470 for (unsigned i = NOps; i < NumVectorElts; ++i) { 471 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 472 RegSeqArgs[1 + (2 * i) + 1] = 473 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 474 } 475 } 476 477 if (!IsRegSeq) 478 break; 479 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 480 return; 481 } 482 case ISD::BUILD_PAIR: { 483 SDValue RC, SubReg0, SubReg1; 484 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 485 break; 486 } 487 SDLoc DL(N); 488 if (N->getValueType(0) == MVT::i128) { 489 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 490 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 491 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 492 } else if (N->getValueType(0) == MVT::i64) { 493 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 494 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 495 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 496 } else { 497 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 498 } 499 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 500 N->getOperand(1), SubReg1 }; 501 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 502 N->getValueType(0), Ops)); 503 return; 504 } 505 506 case ISD::Constant: 507 case ISD::ConstantFP: { 508 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 509 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 510 break; 511 512 uint64_t Imm; 513 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 514 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 515 else { 516 ConstantSDNode *C = cast<ConstantSDNode>(N); 517 Imm = C->getZExtValue(); 518 } 519 520 SDLoc DL(N); 521 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 522 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 523 MVT::i32)); 524 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 525 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 526 const SDValue Ops[] = { 527 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 528 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 529 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 530 }; 531 532 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 533 N->getValueType(0), Ops)); 534 return; 535 } 536 case ISD::LOAD: 537 case ISD::STORE: { 538 N = glueCopyToM0(N); 539 break; 540 } 541 542 case AMDGPUISD::BFE_I32: 543 case AMDGPUISD::BFE_U32: { 544 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 545 break; 546 547 // There is a scalar version available, but unlike the vector version which 548 // has a separate operand for the offset and width, the scalar version packs 549 // the width and offset into a single operand. Try to move to the scalar 550 // version if the offsets are constant, so that we can try to keep extended 551 // loads of kernel arguments in SGPRs. 552 553 // TODO: Technically we could try to pattern match scalar bitshifts of 554 // dynamic values, but it's probably not useful. 555 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 556 if (!Offset) 557 break; 558 559 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 560 if (!Width) 561 break; 562 563 bool Signed = Opc == AMDGPUISD::BFE_I32; 564 565 uint32_t OffsetVal = Offset->getZExtValue(); 566 uint32_t WidthVal = Width->getZExtValue(); 567 568 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 569 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 570 return; 571 } 572 case AMDGPUISD::DIV_SCALE: { 573 SelectDIV_SCALE(N); 574 return; 575 } 576 case ISD::CopyToReg: { 577 const SITargetLowering& Lowering = 578 *static_cast<const SITargetLowering*>(getTargetLowering()); 579 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 580 break; 581 } 582 case ISD::AND: 583 case ISD::SRL: 584 case ISD::SRA: 585 case ISD::SIGN_EXTEND_INREG: 586 if (N->getValueType(0) != MVT::i32 || 587 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 588 break; 589 590 SelectS_BFE(N); 591 return; 592 case ISD::BRCOND: 593 SelectBRCOND(N); 594 return; 595 596 case AMDGPUISD::ATOMIC_CMP_SWAP: 597 SelectATOMIC_CMP_SWAP(N); 598 return; 599 } 600 601 SelectCode(N); 602 } 603 604 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 605 if (!N->readMem()) 606 return false; 607 if (CbId == -1) 608 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 609 610 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 611 } 612 613 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 614 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 615 const Instruction *Term = BB->getTerminator(); 616 return Term->getMetadata("amdgpu.uniform") || 617 Term->getMetadata("structurizecfg.uniform"); 618 } 619 620 StringRef AMDGPUDAGToDAGISel::getPassName() const { 621 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 622 } 623 624 //===----------------------------------------------------------------------===// 625 // Complex Patterns 626 //===----------------------------------------------------------------------===// 627 628 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 629 SDValue& IntPtr) { 630 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 631 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 632 true); 633 return true; 634 } 635 return false; 636 } 637 638 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 639 SDValue& BaseReg, SDValue &Offset) { 640 if (!isa<ConstantSDNode>(Addr)) { 641 BaseReg = Addr; 642 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 643 return true; 644 } 645 return false; 646 } 647 648 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 649 SDValue &Offset) { 650 ConstantSDNode *IMMOffset; 651 652 if (Addr.getOpcode() == ISD::ADD 653 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 654 && isInt<16>(IMMOffset->getZExtValue())) { 655 656 Base = Addr.getOperand(0); 657 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 658 MVT::i32); 659 return true; 660 // If the pointer address is constant, we can move it to the offset field. 661 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 662 && isInt<16>(IMMOffset->getZExtValue())) { 663 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 664 SDLoc(CurDAG->getEntryNode()), 665 AMDGPU::ZERO, MVT::i32); 666 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 667 MVT::i32); 668 return true; 669 } 670 671 // Default case, no offset 672 Base = Addr; 673 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 674 return true; 675 } 676 677 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 678 SDValue &Offset) { 679 ConstantSDNode *C; 680 SDLoc DL(Addr); 681 682 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 683 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 684 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 685 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 686 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 687 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 688 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 689 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 690 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 691 Base = Addr.getOperand(0); 692 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 693 } else { 694 Base = Addr; 695 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 696 } 697 698 return true; 699 } 700 701 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 702 SDLoc DL(N); 703 SDValue LHS = N->getOperand(0); 704 SDValue RHS = N->getOperand(1); 705 706 unsigned Opcode = N->getOpcode(); 707 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 708 bool ProduceCarry = 709 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 710 bool IsAdd = 711 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 712 713 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 714 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 715 716 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 717 DL, MVT::i32, LHS, Sub0); 718 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 719 DL, MVT::i32, LHS, Sub1); 720 721 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 722 DL, MVT::i32, RHS, Sub0); 723 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 724 DL, MVT::i32, RHS, Sub1); 725 726 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 727 728 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 729 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 730 731 SDNode *AddLo; 732 if (!ConsumeCarry) { 733 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 734 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 735 } else { 736 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 737 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 738 } 739 SDValue AddHiArgs[] = { 740 SDValue(Hi0, 0), 741 SDValue(Hi1, 0), 742 SDValue(AddLo, 1) 743 }; 744 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 745 746 SDValue RegSequenceArgs[] = { 747 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 748 SDValue(AddLo,0), 749 Sub0, 750 SDValue(AddHi,0), 751 Sub1, 752 }; 753 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 754 MVT::i64, RegSequenceArgs); 755 756 if (ProduceCarry) { 757 // Replace the carry-use 758 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 759 } 760 761 // Replace the remaining uses. 762 CurDAG->ReplaceAllUsesWith(N, RegSequence); 763 CurDAG->RemoveDeadNode(N); 764 } 765 766 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 767 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 768 // carry out despite the _i32 name. These were renamed in VI to _U32. 769 // FIXME: We should probably rename the opcodes here. 770 unsigned Opc = N->getOpcode() == ISD::UADDO ? 771 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 772 773 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 774 { N->getOperand(0), N->getOperand(1) }); 775 } 776 777 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 778 SDLoc SL(N); 779 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 780 SDValue Ops[10]; 781 782 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 783 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 784 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 785 Ops[8] = N->getOperand(0); 786 Ops[9] = N->getOperand(4); 787 788 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 789 } 790 791 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 792 SDLoc SL(N); 793 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 794 SDValue Ops[8]; 795 796 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 797 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 798 Ops[6] = N->getOperand(0); 799 Ops[7] = N->getOperand(3); 800 801 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 802 } 803 804 // We need to handle this here because tablegen doesn't support matching 805 // instructions with multiple outputs. 806 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 807 SDLoc SL(N); 808 EVT VT = N->getValueType(0); 809 810 assert(VT == MVT::f32 || VT == MVT::f64); 811 812 unsigned Opc 813 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 814 815 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 816 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 817 } 818 819 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 820 unsigned OffsetBits) const { 821 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 822 (OffsetBits == 8 && !isUInt<8>(Offset))) 823 return false; 824 825 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 826 Subtarget->unsafeDSOffsetFoldingEnabled()) 827 return true; 828 829 // On Southern Islands instruction with a negative base value and an offset 830 // don't seem to work. 831 return CurDAG->SignBitIsZero(Base); 832 } 833 834 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 835 SDValue &Offset) const { 836 SDLoc DL(Addr); 837 if (CurDAG->isBaseWithConstantOffset(Addr)) { 838 SDValue N0 = Addr.getOperand(0); 839 SDValue N1 = Addr.getOperand(1); 840 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 841 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 842 // (add n0, c0) 843 Base = N0; 844 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 845 return true; 846 } 847 } else if (Addr.getOpcode() == ISD::SUB) { 848 // sub C, x -> add (sub 0, x), C 849 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 850 int64_t ByteOffset = C->getSExtValue(); 851 if (isUInt<16>(ByteOffset)) { 852 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 853 854 // XXX - This is kind of hacky. Create a dummy sub node so we can check 855 // the known bits in isDSOffsetLegal. We need to emit the selected node 856 // here, so this is thrown away. 857 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 858 Zero, Addr.getOperand(1)); 859 860 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 861 MachineSDNode *MachineSub 862 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 863 Zero, Addr.getOperand(1)); 864 865 Base = SDValue(MachineSub, 0); 866 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 867 return true; 868 } 869 } 870 } 871 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 872 // If we have a constant address, prefer to put the constant into the 873 // offset. This can save moves to load the constant address since multiple 874 // operations can share the zero base address register, and enables merging 875 // into read2 / write2 instructions. 876 877 SDLoc DL(Addr); 878 879 if (isUInt<16>(CAddr->getZExtValue())) { 880 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 881 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 882 DL, MVT::i32, Zero); 883 Base = SDValue(MovZero, 0); 884 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 885 return true; 886 } 887 } 888 889 // default case 890 Base = Addr; 891 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 892 return true; 893 } 894 895 // TODO: If offset is too big, put low 16-bit into offset. 896 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 897 SDValue &Offset0, 898 SDValue &Offset1) const { 899 SDLoc DL(Addr); 900 901 if (CurDAG->isBaseWithConstantOffset(Addr)) { 902 SDValue N0 = Addr.getOperand(0); 903 SDValue N1 = Addr.getOperand(1); 904 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 905 unsigned DWordOffset0 = C1->getZExtValue() / 4; 906 unsigned DWordOffset1 = DWordOffset0 + 1; 907 // (add n0, c0) 908 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 909 Base = N0; 910 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 911 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 912 return true; 913 } 914 } else if (Addr.getOpcode() == ISD::SUB) { 915 // sub C, x -> add (sub 0, x), C 916 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 917 unsigned DWordOffset0 = C->getZExtValue() / 4; 918 unsigned DWordOffset1 = DWordOffset0 + 1; 919 920 if (isUInt<8>(DWordOffset0)) { 921 SDLoc DL(Addr); 922 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 923 924 // XXX - This is kind of hacky. Create a dummy sub node so we can check 925 // the known bits in isDSOffsetLegal. We need to emit the selected node 926 // here, so this is thrown away. 927 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 928 Zero, Addr.getOperand(1)); 929 930 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 931 MachineSDNode *MachineSub 932 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 933 Zero, Addr.getOperand(1)); 934 935 Base = SDValue(MachineSub, 0); 936 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 937 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 938 return true; 939 } 940 } 941 } 942 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 943 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 944 unsigned DWordOffset1 = DWordOffset0 + 1; 945 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 946 947 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 948 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 949 MachineSDNode *MovZero 950 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 951 DL, MVT::i32, Zero); 952 Base = SDValue(MovZero, 0); 953 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 954 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 955 return true; 956 } 957 } 958 959 // default case 960 961 // FIXME: This is broken on SI where we still need to check if the base 962 // pointer is positive here. 963 Base = Addr; 964 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 965 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 966 return true; 967 } 968 969 static bool isLegalMUBUFImmOffset(unsigned Imm) { 970 return isUInt<12>(Imm); 971 } 972 973 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 974 return isLegalMUBUFImmOffset(Imm->getZExtValue()); 975 } 976 977 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 978 SDValue &VAddr, SDValue &SOffset, 979 SDValue &Offset, SDValue &Offen, 980 SDValue &Idxen, SDValue &Addr64, 981 SDValue &GLC, SDValue &SLC, 982 SDValue &TFE) const { 983 // Subtarget prefers to use flat instruction 984 if (Subtarget->useFlatForGlobal()) 985 return false; 986 987 SDLoc DL(Addr); 988 989 if (!GLC.getNode()) 990 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 991 if (!SLC.getNode()) 992 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 993 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 994 995 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 996 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 997 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 998 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 999 1000 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1001 SDValue N0 = Addr.getOperand(0); 1002 SDValue N1 = Addr.getOperand(1); 1003 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1004 1005 if (N0.getOpcode() == ISD::ADD) { 1006 // (add (add N2, N3), C1) -> addr64 1007 SDValue N2 = N0.getOperand(0); 1008 SDValue N3 = N0.getOperand(1); 1009 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1010 Ptr = N2; 1011 VAddr = N3; 1012 } else { 1013 // (add N0, C1) -> offset 1014 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1015 Ptr = N0; 1016 } 1017 1018 if (isLegalMUBUFImmOffset(C1)) { 1019 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1020 return true; 1021 } 1022 1023 if (isUInt<32>(C1->getZExtValue())) { 1024 // Illegal offset, store it in soffset. 1025 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1026 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1027 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1028 0); 1029 return true; 1030 } 1031 } 1032 1033 if (Addr.getOpcode() == ISD::ADD) { 1034 // (add N0, N1) -> addr64 1035 SDValue N0 = Addr.getOperand(0); 1036 SDValue N1 = Addr.getOperand(1); 1037 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1038 Ptr = N0; 1039 VAddr = N1; 1040 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1041 return true; 1042 } 1043 1044 // default case -> offset 1045 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1046 Ptr = Addr; 1047 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1048 1049 return true; 1050 } 1051 1052 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1053 SDValue &VAddr, SDValue &SOffset, 1054 SDValue &Offset, SDValue &GLC, 1055 SDValue &SLC, SDValue &TFE) const { 1056 SDValue Ptr, Offen, Idxen, Addr64; 1057 1058 // addr64 bit was removed for volcanic islands. 1059 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1060 return false; 1061 1062 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1063 GLC, SLC, TFE)) 1064 return false; 1065 1066 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1067 if (C->getSExtValue()) { 1068 SDLoc DL(Addr); 1069 1070 const SITargetLowering& Lowering = 1071 *static_cast<const SITargetLowering*>(getTargetLowering()); 1072 1073 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1074 return true; 1075 } 1076 1077 return false; 1078 } 1079 1080 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1081 SDValue &VAddr, SDValue &SOffset, 1082 SDValue &Offset, 1083 SDValue &SLC) const { 1084 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1085 SDValue GLC, TFE; 1086 1087 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1088 } 1089 1090 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1091 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1092 return PSV && PSV->isStack(); 1093 } 1094 1095 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1096 const MachineFunction &MF = CurDAG->getMachineFunction(); 1097 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1098 1099 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1100 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1101 FI->getValueType(0)); 1102 1103 // If we can resolve this to a frame index access, this is relative to the 1104 // frame pointer SGPR. 1105 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1106 MVT::i32)); 1107 } 1108 1109 // If we don't know this private access is a local stack object, it needs to 1110 // be relative to the entry point's scratch wave offset register. 1111 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1112 MVT::i32)); 1113 } 1114 1115 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, 1116 SDValue Addr, SDValue &Rsrc, 1117 SDValue &VAddr, SDValue &SOffset, 1118 SDValue &ImmOffset) const { 1119 1120 SDLoc DL(Addr); 1121 MachineFunction &MF = CurDAG->getMachineFunction(); 1122 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1123 1124 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1125 1126 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1127 unsigned Imm = CAddr->getZExtValue(); 1128 assert(!isLegalMUBUFImmOffset(Imm) && 1129 "should have been selected by other pattern"); 1130 1131 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1132 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1133 DL, MVT::i32, HighBits); 1134 VAddr = SDValue(MovHighBits, 0); 1135 1136 // In a call sequence, stores to the argument stack area are relative to the 1137 // stack pointer. 1138 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1139 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1140 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1141 1142 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1143 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1144 return true; 1145 } 1146 1147 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1148 // (add n0, c1) 1149 1150 SDValue N0 = Addr.getOperand(0); 1151 SDValue N1 = Addr.getOperand(1); 1152 1153 // Offsets in vaddr must be positive. 1154 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1155 if (isLegalMUBUFImmOffset(C1)) { 1156 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1157 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1158 return true; 1159 } 1160 } 1161 1162 // (node) 1163 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1164 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1165 return true; 1166 } 1167 1168 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, 1169 SDValue Addr, 1170 SDValue &SRsrc, 1171 SDValue &SOffset, 1172 SDValue &Offset) const { 1173 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1174 if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) 1175 return false; 1176 1177 SDLoc DL(Addr); 1178 MachineFunction &MF = CurDAG->getMachineFunction(); 1179 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1180 1181 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1182 1183 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1184 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1185 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1186 1187 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1188 // offset if we know this is in a call sequence. 1189 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1190 1191 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1192 return true; 1193 } 1194 1195 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1196 SDValue &SOffset, SDValue &Offset, 1197 SDValue &GLC, SDValue &SLC, 1198 SDValue &TFE) const { 1199 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1200 const SIInstrInfo *TII = 1201 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1202 1203 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1204 GLC, SLC, TFE)) 1205 return false; 1206 1207 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1208 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1209 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1210 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1211 APInt::getAllOnesValue(32).getZExtValue(); // Size 1212 SDLoc DL(Addr); 1213 1214 const SITargetLowering& Lowering = 1215 *static_cast<const SITargetLowering*>(getTargetLowering()); 1216 1217 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1218 return true; 1219 } 1220 return false; 1221 } 1222 1223 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1224 SDValue &Soffset, SDValue &Offset 1225 ) const { 1226 SDValue GLC, SLC, TFE; 1227 1228 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1229 } 1230 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1231 SDValue &Soffset, SDValue &Offset, 1232 SDValue &SLC) const { 1233 SDValue GLC, TFE; 1234 1235 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1236 } 1237 1238 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1239 SDValue &SOffset, 1240 SDValue &ImmOffset) const { 1241 SDLoc DL(Constant); 1242 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1243 uint32_t Overflow = 0; 1244 1245 if (Imm >= 4096) { 1246 if (Imm <= 4095 + 64) { 1247 // Use an SOffset inline constant for 1..64 1248 Overflow = Imm - 4095; 1249 Imm = 4095; 1250 } else { 1251 // Try to keep the same value in SOffset for adjacent loads, so that 1252 // the corresponding register contents can be re-used. 1253 // 1254 // Load values with all low-bits set into SOffset, so that a larger 1255 // range of values can be covered using s_movk_i32 1256 uint32_t High = (Imm + 1) & ~4095; 1257 uint32_t Low = (Imm + 1) & 4095; 1258 Imm = Low; 1259 Overflow = High - 1; 1260 } 1261 } 1262 1263 // There is a hardware bug in SI and CI which prevents address clamping in 1264 // MUBUF instructions from working correctly with SOffsets. The immediate 1265 // offset is unaffected. 1266 if (Overflow > 0 && 1267 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1268 return false; 1269 1270 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1271 1272 if (Overflow <= 64) 1273 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1274 else 1275 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1276 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1277 0); 1278 1279 return true; 1280 } 1281 1282 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1283 SDValue &SOffset, 1284 SDValue &ImmOffset) const { 1285 SDLoc DL(Offset); 1286 1287 if (!isa<ConstantSDNode>(Offset)) 1288 return false; 1289 1290 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1291 } 1292 1293 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1294 SDValue &SOffset, 1295 SDValue &ImmOffset, 1296 SDValue &VOffset) const { 1297 SDLoc DL(Offset); 1298 1299 // Don't generate an unnecessary voffset for constant offsets. 1300 if (isa<ConstantSDNode>(Offset)) { 1301 SDValue Tmp1, Tmp2; 1302 1303 // When necessary, use a voffset in <= CI anyway to work around a hardware 1304 // bug. 1305 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1306 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1307 return false; 1308 } 1309 1310 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1311 SDValue N0 = Offset.getOperand(0); 1312 SDValue N1 = Offset.getOperand(1); 1313 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1314 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1315 VOffset = N0; 1316 return true; 1317 } 1318 } 1319 1320 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1321 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1322 VOffset = Offset; 1323 1324 return true; 1325 } 1326 1327 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1328 SDValue &VAddr, 1329 SDValue &Offset, 1330 SDValue &SLC) const { 1331 int64_t OffsetVal = 0; 1332 1333 if (Subtarget->hasFlatInstOffsets() && 1334 CurDAG->isBaseWithConstantOffset(Addr)) { 1335 SDValue N0 = Addr.getOperand(0); 1336 SDValue N1 = Addr.getOperand(1); 1337 uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue(); 1338 if (isUInt<12>(COffsetVal)) { 1339 Addr = N0; 1340 OffsetVal = COffsetVal; 1341 } 1342 } 1343 1344 VAddr = Addr; 1345 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1346 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1347 1348 return true; 1349 } 1350 1351 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1352 SDValue &VAddr, 1353 SDValue &Offset, 1354 SDValue &SLC) const { 1355 return SelectFlatOffset(Addr, VAddr, Offset, SLC); 1356 } 1357 1358 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1359 SDValue &Offset, bool &Imm) const { 1360 1361 // FIXME: Handle non-constant offsets. 1362 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1363 if (!C) 1364 return false; 1365 1366 SDLoc SL(ByteOffsetNode); 1367 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1368 int64_t ByteOffset = C->getSExtValue(); 1369 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1370 1371 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1372 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1373 Imm = true; 1374 return true; 1375 } 1376 1377 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1378 return false; 1379 1380 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1381 // 32-bit Immediates are supported on Sea Islands. 1382 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1383 } else { 1384 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1385 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1386 C32Bit), 0); 1387 } 1388 Imm = false; 1389 return true; 1390 } 1391 1392 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1393 SDValue &Offset, bool &Imm) const { 1394 SDLoc SL(Addr); 1395 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1396 SDValue N0 = Addr.getOperand(0); 1397 SDValue N1 = Addr.getOperand(1); 1398 1399 if (SelectSMRDOffset(N1, Offset, Imm)) { 1400 SBase = N0; 1401 return true; 1402 } 1403 } 1404 SBase = Addr; 1405 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1406 Imm = true; 1407 return true; 1408 } 1409 1410 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1411 SDValue &Offset) const { 1412 bool Imm; 1413 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1414 } 1415 1416 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1417 SDValue &Offset) const { 1418 1419 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1420 return false; 1421 1422 bool Imm; 1423 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1424 return false; 1425 1426 return !Imm && isa<ConstantSDNode>(Offset); 1427 } 1428 1429 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1430 SDValue &Offset) const { 1431 bool Imm; 1432 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1433 !isa<ConstantSDNode>(Offset); 1434 } 1435 1436 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1437 SDValue &Offset) const { 1438 bool Imm; 1439 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1440 } 1441 1442 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1443 SDValue &Offset) const { 1444 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1445 return false; 1446 1447 bool Imm; 1448 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1449 return false; 1450 1451 return !Imm && isa<ConstantSDNode>(Offset); 1452 } 1453 1454 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1455 SDValue &Offset) const { 1456 bool Imm; 1457 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1458 !isa<ConstantSDNode>(Offset); 1459 } 1460 1461 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1462 SDValue &Base, 1463 SDValue &Offset) const { 1464 SDLoc DL(Index); 1465 1466 if (CurDAG->isBaseWithConstantOffset(Index)) { 1467 SDValue N0 = Index.getOperand(0); 1468 SDValue N1 = Index.getOperand(1); 1469 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1470 1471 // (add n0, c0) 1472 Base = N0; 1473 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1474 return true; 1475 } 1476 1477 if (isa<ConstantSDNode>(Index)) 1478 return false; 1479 1480 Base = Index; 1481 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1482 return true; 1483 } 1484 1485 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1486 SDValue Val, uint32_t Offset, 1487 uint32_t Width) { 1488 // Transformation function, pack the offset and width of a BFE into 1489 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1490 // source, bits [5:0] contain the offset and bits [22:16] the width. 1491 uint32_t PackedVal = Offset | (Width << 16); 1492 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1493 1494 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1495 } 1496 1497 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1498 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1499 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1500 // Predicate: 0 < b <= c < 32 1501 1502 const SDValue &Shl = N->getOperand(0); 1503 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1504 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1505 1506 if (B && C) { 1507 uint32_t BVal = B->getZExtValue(); 1508 uint32_t CVal = C->getZExtValue(); 1509 1510 if (0 < BVal && BVal <= CVal && CVal < 32) { 1511 bool Signed = N->getOpcode() == ISD::SRA; 1512 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1513 1514 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1515 32 - CVal)); 1516 return; 1517 } 1518 } 1519 SelectCode(N); 1520 } 1521 1522 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1523 switch (N->getOpcode()) { 1524 case ISD::AND: 1525 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1526 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1527 // Predicate: isMask(mask) 1528 const SDValue &Srl = N->getOperand(0); 1529 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1530 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1531 1532 if (Shift && Mask) { 1533 uint32_t ShiftVal = Shift->getZExtValue(); 1534 uint32_t MaskVal = Mask->getZExtValue(); 1535 1536 if (isMask_32(MaskVal)) { 1537 uint32_t WidthVal = countPopulation(MaskVal); 1538 1539 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1540 Srl.getOperand(0), ShiftVal, WidthVal)); 1541 return; 1542 } 1543 } 1544 } 1545 break; 1546 case ISD::SRL: 1547 if (N->getOperand(0).getOpcode() == ISD::AND) { 1548 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1549 // Predicate: isMask(mask >> b) 1550 const SDValue &And = N->getOperand(0); 1551 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1552 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1553 1554 if (Shift && Mask) { 1555 uint32_t ShiftVal = Shift->getZExtValue(); 1556 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1557 1558 if (isMask_32(MaskVal)) { 1559 uint32_t WidthVal = countPopulation(MaskVal); 1560 1561 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1562 And.getOperand(0), ShiftVal, WidthVal)); 1563 return; 1564 } 1565 } 1566 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1567 SelectS_BFEFromShifts(N); 1568 return; 1569 } 1570 break; 1571 case ISD::SRA: 1572 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1573 SelectS_BFEFromShifts(N); 1574 return; 1575 } 1576 break; 1577 1578 case ISD::SIGN_EXTEND_INREG: { 1579 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1580 SDValue Src = N->getOperand(0); 1581 if (Src.getOpcode() != ISD::SRL) 1582 break; 1583 1584 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1585 if (!Amt) 1586 break; 1587 1588 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1589 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1590 Amt->getZExtValue(), Width)); 1591 return; 1592 } 1593 } 1594 1595 SelectCode(N); 1596 } 1597 1598 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1599 assert(N->getOpcode() == ISD::BRCOND); 1600 if (!N->hasOneUse()) 1601 return false; 1602 1603 SDValue Cond = N->getOperand(1); 1604 if (Cond.getOpcode() == ISD::CopyToReg) 1605 Cond = Cond.getOperand(2); 1606 1607 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1608 return false; 1609 1610 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1611 if (VT == MVT::i32) 1612 return true; 1613 1614 if (VT == MVT::i64) { 1615 auto ST = static_cast<const SISubtarget *>(Subtarget); 1616 1617 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1618 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1619 } 1620 1621 return false; 1622 } 1623 1624 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1625 SDValue Cond = N->getOperand(1); 1626 1627 if (Cond.isUndef()) { 1628 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1629 N->getOperand(2), N->getOperand(0)); 1630 return; 1631 } 1632 1633 if (isCBranchSCC(N)) { 1634 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1635 SelectCode(N); 1636 return; 1637 } 1638 1639 SDLoc SL(N); 1640 1641 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1642 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1643 N->getOperand(2), // Basic Block 1644 VCC.getValue(0)); 1645 } 1646 1647 // This is here because there isn't a way to use the generated sub0_sub1 as the 1648 // subreg index to EXTRACT_SUBREG in tablegen. 1649 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1650 MemSDNode *Mem = cast<MemSDNode>(N); 1651 unsigned AS = Mem->getAddressSpace(); 1652 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1653 SelectCode(N); 1654 return; 1655 } 1656 1657 MVT VT = N->getSimpleValueType(0); 1658 bool Is32 = (VT == MVT::i32); 1659 SDLoc SL(N); 1660 1661 MachineSDNode *CmpSwap = nullptr; 1662 if (Subtarget->hasAddr64()) { 1663 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1664 1665 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1666 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : 1667 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; 1668 SDValue CmpVal = Mem->getOperand(2); 1669 1670 // XXX - Do we care about glue operands? 1671 1672 SDValue Ops[] = { 1673 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1674 }; 1675 1676 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1677 } 1678 } 1679 1680 if (!CmpSwap) { 1681 SDValue SRsrc, SOffset, Offset, SLC; 1682 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1683 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : 1684 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; 1685 1686 SDValue CmpVal = Mem->getOperand(2); 1687 SDValue Ops[] = { 1688 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1689 }; 1690 1691 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1692 } 1693 } 1694 1695 if (!CmpSwap) { 1696 SelectCode(N); 1697 return; 1698 } 1699 1700 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1701 *MMOs = Mem->getMemOperand(); 1702 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1703 1704 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1705 SDValue Extract 1706 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1707 1708 ReplaceUses(SDValue(N, 0), Extract); 1709 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1710 CurDAG->RemoveDeadNode(N); 1711 } 1712 1713 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1714 SDValue &SrcMods) const { 1715 unsigned Mods = 0; 1716 Src = In; 1717 1718 if (Src.getOpcode() == ISD::FNEG) { 1719 Mods |= SISrcMods::NEG; 1720 Src = Src.getOperand(0); 1721 } 1722 1723 if (Src.getOpcode() == ISD::FABS) { 1724 Mods |= SISrcMods::ABS; 1725 Src = Src.getOperand(0); 1726 } 1727 1728 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1729 return true; 1730 } 1731 1732 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1733 SDValue &SrcMods) const { 1734 SelectVOP3Mods(In, Src, SrcMods); 1735 return isNoNanSrc(Src); 1736 } 1737 1738 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1739 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1740 return false; 1741 1742 Src = In; 1743 return true; 1744 } 1745 1746 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1747 SDValue &SrcMods, SDValue &Clamp, 1748 SDValue &Omod) const { 1749 SDLoc DL(In); 1750 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1751 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1752 1753 return SelectVOP3Mods(In, Src, SrcMods); 1754 } 1755 1756 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1757 SDValue &SrcMods, 1758 SDValue &Clamp, 1759 SDValue &Omod) const { 1760 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1761 return SelectVOP3Mods(In, Src, SrcMods); 1762 } 1763 1764 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1765 SDValue &Clamp, SDValue &Omod) const { 1766 Src = In; 1767 1768 SDLoc DL(In); 1769 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1770 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1771 1772 return true; 1773 } 1774 1775 static SDValue stripBitcast(SDValue Val) { 1776 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1777 } 1778 1779 // Figure out if this is really an extract of the high 16-bits of a dword. 1780 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1781 In = stripBitcast(In); 1782 if (In.getOpcode() != ISD::TRUNCATE) 1783 return false; 1784 1785 SDValue Srl = In.getOperand(0); 1786 if (Srl.getOpcode() == ISD::SRL) { 1787 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1788 if (ShiftAmt->getZExtValue() == 16) { 1789 Out = stripBitcast(Srl.getOperand(0)); 1790 return true; 1791 } 1792 } 1793 } 1794 1795 return false; 1796 } 1797 1798 // Look through operations that obscure just looking at the low 16-bits of the 1799 // same register. 1800 static SDValue stripExtractLoElt(SDValue In) { 1801 if (In.getOpcode() == ISD::TRUNCATE) { 1802 SDValue Src = In.getOperand(0); 1803 if (Src.getValueType().getSizeInBits() == 32) 1804 return stripBitcast(Src); 1805 } 1806 1807 return In; 1808 } 1809 1810 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1811 SDValue &SrcMods) const { 1812 unsigned Mods = 0; 1813 Src = In; 1814 1815 if (Src.getOpcode() == ISD::FNEG) { 1816 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1817 Src = Src.getOperand(0); 1818 } 1819 1820 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1821 unsigned VecMods = Mods; 1822 1823 SDValue Lo = stripBitcast(Src.getOperand(0)); 1824 SDValue Hi = stripBitcast(Src.getOperand(1)); 1825 1826 if (Lo.getOpcode() == ISD::FNEG) { 1827 Lo = stripBitcast(Lo.getOperand(0)); 1828 Mods ^= SISrcMods::NEG; 1829 } 1830 1831 if (Hi.getOpcode() == ISD::FNEG) { 1832 Hi = stripBitcast(Hi.getOperand(0)); 1833 Mods ^= SISrcMods::NEG_HI; 1834 } 1835 1836 if (isExtractHiElt(Lo, Lo)) 1837 Mods |= SISrcMods::OP_SEL_0; 1838 1839 if (isExtractHiElt(Hi, Hi)) 1840 Mods |= SISrcMods::OP_SEL_1; 1841 1842 Lo = stripExtractLoElt(Lo); 1843 Hi = stripExtractLoElt(Hi); 1844 1845 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1846 // Really a scalar input. Just select from the low half of the register to 1847 // avoid packing. 1848 1849 Src = Lo; 1850 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1851 return true; 1852 } 1853 1854 Mods = VecMods; 1855 } 1856 1857 // Packed instructions do not have abs modifiers. 1858 Mods |= SISrcMods::OP_SEL_1; 1859 1860 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1861 return true; 1862 } 1863 1864 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1865 SDValue &SrcMods, 1866 SDValue &Clamp) const { 1867 SDLoc SL(In); 1868 1869 // FIXME: Handle clamp and op_sel 1870 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1871 1872 return SelectVOP3PMods(In, Src, SrcMods); 1873 } 1874 1875 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, 1876 SDValue &SrcMods) const { 1877 Src = In; 1878 // FIXME: Handle op_sel 1879 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1880 return true; 1881 } 1882 1883 bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, 1884 SDValue &SrcMods, 1885 SDValue &Clamp) const { 1886 SDLoc SL(In); 1887 1888 // FIXME: Handle clamp 1889 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1890 1891 return SelectVOP3OpSel(In, Src, SrcMods); 1892 } 1893 1894 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, 1895 SDValue &SrcMods) const { 1896 // FIXME: Handle op_sel 1897 return SelectVOP3Mods(In, Src, SrcMods); 1898 } 1899 1900 bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, 1901 SDValue &SrcMods, 1902 SDValue &Clamp) const { 1903 SDLoc SL(In); 1904 1905 // FIXME: Handle clamp 1906 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1907 1908 return SelectVOP3OpSelMods(In, Src, SrcMods); 1909 } 1910 1911 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1912 const AMDGPUTargetLowering& Lowering = 1913 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1914 bool IsModified = false; 1915 do { 1916 IsModified = false; 1917 // Go over all selected nodes and try to fold them a bit more 1918 for (SDNode &Node : CurDAG->allnodes()) { 1919 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1920 if (!MachineNode) 1921 continue; 1922 1923 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1924 if (ResNode != &Node) { 1925 ReplaceUses(&Node, ResNode); 1926 IsModified = true; 1927 } 1928 } 1929 CurDAG->RemoveDeadNodes(); 1930 } while (IsModified); 1931 } 1932