1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUISelLowering.h" // For AMDGPUISD 17 #include "AMDGPUInstrInfo.h" 18 #include "AMDGPURegisterInfo.h" 19 #include "AMDGPUSubtarget.h" 20 #include "SIDefines.h" 21 #include "SIISelLowering.h" 22 #include "SIInstrInfo.h" 23 #include "SIMachineFunctionInfo.h" 24 #include "SIRegisterInfo.h" 25 #include "llvm/ADT/APInt.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/MachineValueType.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Instruction.h" 40 #include "llvm/MC/MCInstrDesc.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/MathExtras.h" 45 #include <cassert> 46 #include <cstdint> 47 #include <new> 48 #include <vector> 49 50 using namespace llvm; 51 52 namespace llvm { 53 54 class R600InstrInfo; 55 56 } // end namespace llvm 57 58 //===----------------------------------------------------------------------===// 59 // Instruction Selector Implementation 60 //===----------------------------------------------------------------------===// 61 62 namespace { 63 64 /// AMDGPU specific code to select AMDGPU machine instructions for 65 /// SelectionDAG operations. 66 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 67 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 68 // make the right decision when generating code for different targets. 69 const AMDGPUSubtarget *Subtarget; 70 AMDGPUAS AMDGPUASI; 71 72 public: 73 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 74 : SelectionDAGISel(TM, OptLevel){ 75 AMDGPUASI = AMDGPU::getAMDGPUAS(TM); 76 } 77 ~AMDGPUDAGToDAGISel() override = default; 78 79 bool runOnMachineFunction(MachineFunction &MF) override; 80 void Select(SDNode *N) override; 81 StringRef getPassName() const override; 82 void PostprocessISelDAG() override; 83 84 private: 85 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 86 bool isNoNanSrc(SDValue N) const; 87 bool isInlineImmediate(const SDNode *N) const; 88 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 89 const R600InstrInfo *TII); 90 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 91 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 92 93 bool isConstantLoad(const MemSDNode *N, int cbID) const; 94 bool isUniformBr(const SDNode *N) const; 95 96 SDNode *glueCopyToM0(SDNode *N) const; 97 98 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 99 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 100 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 101 SDValue& Offset); 102 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 103 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 104 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 105 unsigned OffsetBits) const; 106 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 107 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 108 SDValue &Offset1) const; 109 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 110 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 111 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 114 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 115 SDValue &SLC, SDValue &TFE) const; 116 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 117 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 118 SDValue &SLC) const; 119 bool SelectMUBUFScratchOffen(SDNode *Root, 120 SDValue Addr, SDValue &RSrc, SDValue &VAddr, 121 SDValue &SOffset, SDValue &ImmOffset) const; 122 bool SelectMUBUFScratchOffset(SDNode *Root, 123 SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 124 SDValue &Offset) const; 125 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 127 SDValue &Offset, SDValue &GLC, SDValue &SLC, 128 SDValue &TFE) const; 129 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 130 SDValue &Offset, SDValue &SLC) const; 131 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 132 SDValue &Offset) const; 133 bool SelectMUBUFConstant(SDValue Constant, 134 SDValue &SOffset, 135 SDValue &ImmOffset) const; 136 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 137 SDValue &ImmOffset) const; 138 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 139 SDValue &ImmOffset, SDValue &VOffset) const; 140 141 bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, 142 SDValue &Offset, SDValue &SLC) const; 143 bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, 144 SDValue &Offset, SDValue &SLC) const; 145 146 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 147 bool &Imm) const; 148 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 149 bool &Imm) const; 150 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 151 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 152 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 153 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 154 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 155 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 156 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 157 158 bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; 159 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 160 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 161 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 162 SDValue &Clamp, SDValue &Omod) const; 163 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 164 SDValue &Clamp, SDValue &Omod) const; 165 166 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 167 SDValue &Clamp, 168 SDValue &Omod) const; 169 170 bool SelectVOP3OMods(SDValue In, SDValue &Src, 171 SDValue &Clamp, SDValue &Omod) const; 172 173 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 174 bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 175 SDValue &Clamp) const; 176 177 void SelectADD_SUB_I64(SDNode *N); 178 void SelectUADDO_USUBO(SDNode *N); 179 void SelectDIV_SCALE(SDNode *N); 180 void SelectFMA_W_CHAIN(SDNode *N); 181 void SelectFMUL_W_CHAIN(SDNode *N); 182 183 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 184 uint32_t Offset, uint32_t Width); 185 void SelectS_BFEFromShifts(SDNode *N); 186 void SelectS_BFE(SDNode *N); 187 bool isCBranchSCC(const SDNode *N) const; 188 void SelectBRCOND(SDNode *N); 189 void SelectATOMIC_CMP_SWAP(SDNode *N); 190 191 // Include the pieces autogenerated from the target description. 192 #include "AMDGPUGenDAGISel.inc" 193 }; 194 195 } // end anonymous namespace 196 197 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 198 // DAG, ready for instruction scheduling. 199 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 200 CodeGenOpt::Level OptLevel) { 201 return new AMDGPUDAGToDAGISel(TM, OptLevel); 202 } 203 204 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 205 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 206 return SelectionDAGISel::runOnMachineFunction(MF); 207 } 208 209 bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { 210 if (TM.Options.NoNaNsFPMath) 211 return true; 212 213 // TODO: Move into isKnownNeverNaN 214 if (N->getFlags().isDefined()) 215 return N->getFlags().hasNoNaNs(); 216 217 return CurDAG->isKnownNeverNaN(N); 218 } 219 220 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 221 const SIInstrInfo *TII 222 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 223 224 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 225 return TII->isInlineConstant(C->getAPIntValue()); 226 227 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 228 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 229 230 return false; 231 } 232 233 /// \brief Determine the register class for \p OpNo 234 /// \returns The register class of the virtual register that will be used for 235 /// the given operand number \OpNo or NULL if the register class cannot be 236 /// determined. 237 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 238 unsigned OpNo) const { 239 if (!N->isMachineOpcode()) { 240 if (N->getOpcode() == ISD::CopyToReg) { 241 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 242 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 243 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 244 return MRI.getRegClass(Reg); 245 } 246 247 const SIRegisterInfo *TRI 248 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 249 return TRI->getPhysRegClass(Reg); 250 } 251 252 return nullptr; 253 } 254 255 switch (N->getMachineOpcode()) { 256 default: { 257 const MCInstrDesc &Desc = 258 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 259 unsigned OpIdx = Desc.getNumDefs() + OpNo; 260 if (OpIdx >= Desc.getNumOperands()) 261 return nullptr; 262 int RegClass = Desc.OpInfo[OpIdx].RegClass; 263 if (RegClass == -1) 264 return nullptr; 265 266 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 267 } 268 case AMDGPU::REG_SEQUENCE: { 269 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 270 const TargetRegisterClass *SuperRC = 271 Subtarget->getRegisterInfo()->getRegClass(RCID); 272 273 SDValue SubRegOp = N->getOperand(OpNo + 1); 274 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 275 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 276 SubRegIdx); 277 } 278 } 279 } 280 281 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 282 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 283 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) 284 return N; 285 286 const SITargetLowering& Lowering = 287 *static_cast<const SITargetLowering*>(getTargetLowering()); 288 289 // Write max value to m0 before each load operation 290 291 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 292 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 293 294 SDValue Glue = M0.getValue(1); 295 296 SmallVector <SDValue, 8> Ops; 297 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 298 Ops.push_back(N->getOperand(i)); 299 } 300 Ops.push_back(Glue); 301 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 302 303 return N; 304 } 305 306 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 307 switch (NumVectorElts) { 308 case 1: 309 return AMDGPU::SReg_32_XM0RegClassID; 310 case 2: 311 return AMDGPU::SReg_64RegClassID; 312 case 4: 313 return AMDGPU::SReg_128RegClassID; 314 case 8: 315 return AMDGPU::SReg_256RegClassID; 316 case 16: 317 return AMDGPU::SReg_512RegClassID; 318 } 319 320 llvm_unreachable("invalid vector size"); 321 } 322 323 static bool getConstantValue(SDValue N, uint32_t &Out) { 324 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 325 Out = C->getAPIntValue().getZExtValue(); 326 return true; 327 } 328 329 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 330 Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); 331 return true; 332 } 333 334 return false; 335 } 336 337 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 338 unsigned int Opc = N->getOpcode(); 339 if (N->isMachineOpcode()) { 340 N->setNodeId(-1); 341 return; // Already selected. 342 } 343 344 if (isa<AtomicSDNode>(N) || 345 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 346 N = glueCopyToM0(N); 347 348 switch (Opc) { 349 default: break; 350 // We are selecting i64 ADD here instead of custom lower it during 351 // DAG legalization, so we can fold some i64 ADDs used for address 352 // calculation into the LOAD and STORE instructions. 353 case ISD::ADD: 354 case ISD::ADDC: 355 case ISD::ADDE: 356 case ISD::SUB: 357 case ISD::SUBC: 358 case ISD::SUBE: { 359 if (N->getValueType(0) != MVT::i64 || 360 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 361 break; 362 363 SelectADD_SUB_I64(N); 364 return; 365 } 366 case ISD::UADDO: 367 case ISD::USUBO: { 368 SelectUADDO_USUBO(N); 369 return; 370 } 371 case AMDGPUISD::FMUL_W_CHAIN: { 372 SelectFMUL_W_CHAIN(N); 373 return; 374 } 375 case AMDGPUISD::FMA_W_CHAIN: { 376 SelectFMA_W_CHAIN(N); 377 return; 378 } 379 380 case ISD::SCALAR_TO_VECTOR: 381 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 382 case ISD::BUILD_VECTOR: { 383 unsigned RegClassID; 384 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 385 EVT VT = N->getValueType(0); 386 unsigned NumVectorElts = VT.getVectorNumElements(); 387 EVT EltVT = VT.getVectorElementType(); 388 389 if (VT == MVT::v2i16 || VT == MVT::v2f16) { 390 if (Opc == ISD::BUILD_VECTOR) { 391 uint32_t LHSVal, RHSVal; 392 if (getConstantValue(N->getOperand(0), LHSVal) && 393 getConstantValue(N->getOperand(1), RHSVal)) { 394 uint32_t K = LHSVal | (RHSVal << 16); 395 CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, 396 CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); 397 return; 398 } 399 } 400 401 break; 402 } 403 404 assert(EltVT.bitsEq(MVT::i32)); 405 406 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 407 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 408 } else { 409 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 410 // that adds a 128 bits reg copy when going through TwoAddressInstructions 411 // pass. We want to avoid 128 bits copies as much as possible because they 412 // can't be bundled by our scheduler. 413 switch(NumVectorElts) { 414 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 415 case 4: 416 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 417 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 418 else 419 RegClassID = AMDGPU::R600_Reg128RegClassID; 420 break; 421 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 422 } 423 } 424 425 SDLoc DL(N); 426 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 427 428 if (NumVectorElts == 1) { 429 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 430 RegClass); 431 return; 432 } 433 434 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 435 "supported yet"); 436 // 16 = Max Num Vector Elements 437 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 438 // 1 = Vector Register Class 439 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 440 441 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 442 bool IsRegSeq = true; 443 unsigned NOps = N->getNumOperands(); 444 for (unsigned i = 0; i < NOps; i++) { 445 // XXX: Why is this here? 446 if (isa<RegisterSDNode>(N->getOperand(i))) { 447 IsRegSeq = false; 448 break; 449 } 450 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 451 RegSeqArgs[1 + (2 * i) + 1] = 452 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 453 MVT::i32); 454 } 455 456 if (NOps != NumVectorElts) { 457 // Fill in the missing undef elements if this was a scalar_to_vector. 458 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 459 460 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 461 DL, EltVT); 462 for (unsigned i = NOps; i < NumVectorElts; ++i) { 463 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 464 RegSeqArgs[1 + (2 * i) + 1] = 465 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 466 } 467 } 468 469 if (!IsRegSeq) 470 break; 471 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 472 return; 473 } 474 case ISD::BUILD_PAIR: { 475 SDValue RC, SubReg0, SubReg1; 476 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 477 break; 478 } 479 SDLoc DL(N); 480 if (N->getValueType(0) == MVT::i128) { 481 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 482 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 483 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 484 } else if (N->getValueType(0) == MVT::i64) { 485 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 486 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 487 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 488 } else { 489 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 490 } 491 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 492 N->getOperand(1), SubReg1 }; 493 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 494 N->getValueType(0), Ops)); 495 return; 496 } 497 498 case ISD::Constant: 499 case ISD::ConstantFP: { 500 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 501 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 502 break; 503 504 uint64_t Imm; 505 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 506 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 507 else { 508 ConstantSDNode *C = cast<ConstantSDNode>(N); 509 Imm = C->getZExtValue(); 510 } 511 512 SDLoc DL(N); 513 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 514 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 515 MVT::i32)); 516 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 517 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 518 const SDValue Ops[] = { 519 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 520 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 521 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 522 }; 523 524 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 525 N->getValueType(0), Ops)); 526 return; 527 } 528 case ISD::LOAD: 529 case ISD::STORE: { 530 N = glueCopyToM0(N); 531 break; 532 } 533 534 case AMDGPUISD::BFE_I32: 535 case AMDGPUISD::BFE_U32: { 536 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 537 break; 538 539 // There is a scalar version available, but unlike the vector version which 540 // has a separate operand for the offset and width, the scalar version packs 541 // the width and offset into a single operand. Try to move to the scalar 542 // version if the offsets are constant, so that we can try to keep extended 543 // loads of kernel arguments in SGPRs. 544 545 // TODO: Technically we could try to pattern match scalar bitshifts of 546 // dynamic values, but it's probably not useful. 547 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 548 if (!Offset) 549 break; 550 551 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 552 if (!Width) 553 break; 554 555 bool Signed = Opc == AMDGPUISD::BFE_I32; 556 557 uint32_t OffsetVal = Offset->getZExtValue(); 558 uint32_t WidthVal = Width->getZExtValue(); 559 560 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 561 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 562 return; 563 } 564 case AMDGPUISD::DIV_SCALE: { 565 SelectDIV_SCALE(N); 566 return; 567 } 568 case ISD::CopyToReg: { 569 const SITargetLowering& Lowering = 570 *static_cast<const SITargetLowering*>(getTargetLowering()); 571 N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); 572 break; 573 } 574 case ISD::AND: 575 case ISD::SRL: 576 case ISD::SRA: 577 case ISD::SIGN_EXTEND_INREG: 578 if (N->getValueType(0) != MVT::i32 || 579 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 580 break; 581 582 SelectS_BFE(N); 583 return; 584 case ISD::BRCOND: 585 SelectBRCOND(N); 586 return; 587 588 case AMDGPUISD::ATOMIC_CMP_SWAP: 589 SelectATOMIC_CMP_SWAP(N); 590 return; 591 } 592 593 SelectCode(N); 594 } 595 596 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 597 if (!N->readMem()) 598 return false; 599 if (CbId == -1) 600 return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; 601 602 return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; 603 } 604 605 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 606 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 607 const Instruction *Term = BB->getTerminator(); 608 return Term->getMetadata("amdgpu.uniform") || 609 Term->getMetadata("structurizecfg.uniform"); 610 } 611 612 StringRef AMDGPUDAGToDAGISel::getPassName() const { 613 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 614 } 615 616 //===----------------------------------------------------------------------===// 617 // Complex Patterns 618 //===----------------------------------------------------------------------===// 619 620 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 621 SDValue& IntPtr) { 622 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 623 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 624 true); 625 return true; 626 } 627 return false; 628 } 629 630 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 631 SDValue& BaseReg, SDValue &Offset) { 632 if (!isa<ConstantSDNode>(Addr)) { 633 BaseReg = Addr; 634 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 635 return true; 636 } 637 return false; 638 } 639 640 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 641 SDValue &Offset) { 642 ConstantSDNode *IMMOffset; 643 644 if (Addr.getOpcode() == ISD::ADD 645 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 646 && isInt<16>(IMMOffset->getZExtValue())) { 647 648 Base = Addr.getOperand(0); 649 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 650 MVT::i32); 651 return true; 652 // If the pointer address is constant, we can move it to the offset field. 653 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 654 && isInt<16>(IMMOffset->getZExtValue())) { 655 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 656 SDLoc(CurDAG->getEntryNode()), 657 AMDGPU::ZERO, MVT::i32); 658 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 659 MVT::i32); 660 return true; 661 } 662 663 // Default case, no offset 664 Base = Addr; 665 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 666 return true; 667 } 668 669 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 670 SDValue &Offset) { 671 ConstantSDNode *C; 672 SDLoc DL(Addr); 673 674 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 675 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 676 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 677 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && 678 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { 679 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 680 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 681 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 682 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 683 Base = Addr.getOperand(0); 684 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 685 } else { 686 Base = Addr; 687 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 688 } 689 690 return true; 691 } 692 693 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 694 SDLoc DL(N); 695 SDValue LHS = N->getOperand(0); 696 SDValue RHS = N->getOperand(1); 697 698 unsigned Opcode = N->getOpcode(); 699 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 700 bool ProduceCarry = 701 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 702 bool IsAdd = 703 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 704 705 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 706 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 707 708 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 709 DL, MVT::i32, LHS, Sub0); 710 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 711 DL, MVT::i32, LHS, Sub1); 712 713 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 714 DL, MVT::i32, RHS, Sub0); 715 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 716 DL, MVT::i32, RHS, Sub1); 717 718 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 719 720 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 721 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 722 723 SDNode *AddLo; 724 if (!ConsumeCarry) { 725 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 726 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 727 } else { 728 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 729 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 730 } 731 SDValue AddHiArgs[] = { 732 SDValue(Hi0, 0), 733 SDValue(Hi1, 0), 734 SDValue(AddLo, 1) 735 }; 736 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 737 738 SDValue RegSequenceArgs[] = { 739 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 740 SDValue(AddLo,0), 741 Sub0, 742 SDValue(AddHi,0), 743 Sub1, 744 }; 745 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 746 MVT::i64, RegSequenceArgs); 747 748 if (ProduceCarry) { 749 // Replace the carry-use 750 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 751 } 752 753 // Replace the remaining uses. 754 CurDAG->ReplaceAllUsesWith(N, RegSequence); 755 CurDAG->RemoveDeadNode(N); 756 } 757 758 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { 759 // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned 760 // carry out despite the _i32 name. These were renamed in VI to _U32. 761 // FIXME: We should probably rename the opcodes here. 762 unsigned Opc = N->getOpcode() == ISD::UADDO ? 763 AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; 764 765 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), 766 { N->getOperand(0), N->getOperand(1) }); 767 } 768 769 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { 770 SDLoc SL(N); 771 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 772 SDValue Ops[10]; 773 774 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); 775 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 776 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); 777 Ops[8] = N->getOperand(0); 778 Ops[9] = N->getOperand(4); 779 780 CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); 781 } 782 783 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { 784 SDLoc SL(N); 785 // src0_modifiers, src0, src1_modifiers, src1, clamp, omod 786 SDValue Ops[8]; 787 788 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); 789 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); 790 Ops[6] = N->getOperand(0); 791 Ops[7] = N->getOperand(3); 792 793 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); 794 } 795 796 // We need to handle this here because tablegen doesn't support matching 797 // instructions with multiple outputs. 798 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 799 SDLoc SL(N); 800 EVT VT = N->getValueType(0); 801 802 assert(VT == MVT::f32 || VT == MVT::f64); 803 804 unsigned Opc 805 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 806 807 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; 808 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); 809 } 810 811 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 812 unsigned OffsetBits) const { 813 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 814 (OffsetBits == 8 && !isUInt<8>(Offset))) 815 return false; 816 817 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 818 Subtarget->unsafeDSOffsetFoldingEnabled()) 819 return true; 820 821 // On Southern Islands instruction with a negative base value and an offset 822 // don't seem to work. 823 return CurDAG->SignBitIsZero(Base); 824 } 825 826 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 827 SDValue &Offset) const { 828 SDLoc DL(Addr); 829 if (CurDAG->isBaseWithConstantOffset(Addr)) { 830 SDValue N0 = Addr.getOperand(0); 831 SDValue N1 = Addr.getOperand(1); 832 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 833 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 834 // (add n0, c0) 835 Base = N0; 836 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 837 return true; 838 } 839 } else if (Addr.getOpcode() == ISD::SUB) { 840 // sub C, x -> add (sub 0, x), C 841 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 842 int64_t ByteOffset = C->getSExtValue(); 843 if (isUInt<16>(ByteOffset)) { 844 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 845 846 // XXX - This is kind of hacky. Create a dummy sub node so we can check 847 // the known bits in isDSOffsetLegal. We need to emit the selected node 848 // here, so this is thrown away. 849 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 850 Zero, Addr.getOperand(1)); 851 852 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 853 MachineSDNode *MachineSub 854 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 855 Zero, Addr.getOperand(1)); 856 857 Base = SDValue(MachineSub, 0); 858 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 859 return true; 860 } 861 } 862 } 863 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 864 // If we have a constant address, prefer to put the constant into the 865 // offset. This can save moves to load the constant address since multiple 866 // operations can share the zero base address register, and enables merging 867 // into read2 / write2 instructions. 868 869 SDLoc DL(Addr); 870 871 if (isUInt<16>(CAddr->getZExtValue())) { 872 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 873 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 874 DL, MVT::i32, Zero); 875 Base = SDValue(MovZero, 0); 876 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 877 return true; 878 } 879 } 880 881 // default case 882 Base = Addr; 883 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 884 return true; 885 } 886 887 // TODO: If offset is too big, put low 16-bit into offset. 888 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 889 SDValue &Offset0, 890 SDValue &Offset1) const { 891 SDLoc DL(Addr); 892 893 if (CurDAG->isBaseWithConstantOffset(Addr)) { 894 SDValue N0 = Addr.getOperand(0); 895 SDValue N1 = Addr.getOperand(1); 896 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 897 unsigned DWordOffset0 = C1->getZExtValue() / 4; 898 unsigned DWordOffset1 = DWordOffset0 + 1; 899 // (add n0, c0) 900 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 901 Base = N0; 902 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 903 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 904 return true; 905 } 906 } else if (Addr.getOpcode() == ISD::SUB) { 907 // sub C, x -> add (sub 0, x), C 908 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 909 unsigned DWordOffset0 = C->getZExtValue() / 4; 910 unsigned DWordOffset1 = DWordOffset0 + 1; 911 912 if (isUInt<8>(DWordOffset0)) { 913 SDLoc DL(Addr); 914 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 915 916 // XXX - This is kind of hacky. Create a dummy sub node so we can check 917 // the known bits in isDSOffsetLegal. We need to emit the selected node 918 // here, so this is thrown away. 919 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 920 Zero, Addr.getOperand(1)); 921 922 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 923 MachineSDNode *MachineSub 924 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 925 Zero, Addr.getOperand(1)); 926 927 Base = SDValue(MachineSub, 0); 928 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 929 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 930 return true; 931 } 932 } 933 } 934 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 935 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 936 unsigned DWordOffset1 = DWordOffset0 + 1; 937 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 938 939 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 940 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 941 MachineSDNode *MovZero 942 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 943 DL, MVT::i32, Zero); 944 Base = SDValue(MovZero, 0); 945 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 946 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 947 return true; 948 } 949 } 950 951 // default case 952 953 // FIXME: This is broken on SI where we still need to check if the base 954 // pointer is positive here. 955 Base = Addr; 956 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 957 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 958 return true; 959 } 960 961 static bool isLegalMUBUFImmOffset(unsigned Imm) { 962 return isUInt<12>(Imm); 963 } 964 965 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 966 return isLegalMUBUFImmOffset(Imm->getZExtValue()); 967 } 968 969 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 970 SDValue &VAddr, SDValue &SOffset, 971 SDValue &Offset, SDValue &Offen, 972 SDValue &Idxen, SDValue &Addr64, 973 SDValue &GLC, SDValue &SLC, 974 SDValue &TFE) const { 975 // Subtarget prefers to use flat instruction 976 if (Subtarget->useFlatForGlobal()) 977 return false; 978 979 SDLoc DL(Addr); 980 981 if (!GLC.getNode()) 982 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 983 if (!SLC.getNode()) 984 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 985 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 986 987 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 988 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 989 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 990 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 991 992 if (CurDAG->isBaseWithConstantOffset(Addr)) { 993 SDValue N0 = Addr.getOperand(0); 994 SDValue N1 = Addr.getOperand(1); 995 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 996 997 if (N0.getOpcode() == ISD::ADD) { 998 // (add (add N2, N3), C1) -> addr64 999 SDValue N2 = N0.getOperand(0); 1000 SDValue N3 = N0.getOperand(1); 1001 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1002 Ptr = N2; 1003 VAddr = N3; 1004 } else { 1005 // (add N0, C1) -> offset 1006 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1007 Ptr = N0; 1008 } 1009 1010 if (isLegalMUBUFImmOffset(C1)) { 1011 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1012 return true; 1013 } 1014 1015 if (isUInt<32>(C1->getZExtValue())) { 1016 // Illegal offset, store it in soffset. 1017 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1018 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1019 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1020 0); 1021 return true; 1022 } 1023 } 1024 1025 if (Addr.getOpcode() == ISD::ADD) { 1026 // (add N0, N1) -> addr64 1027 SDValue N0 = Addr.getOperand(0); 1028 SDValue N1 = Addr.getOperand(1); 1029 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1030 Ptr = N0; 1031 VAddr = N1; 1032 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1033 return true; 1034 } 1035 1036 // default case -> offset 1037 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1038 Ptr = Addr; 1039 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1040 1041 return true; 1042 } 1043 1044 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1045 SDValue &VAddr, SDValue &SOffset, 1046 SDValue &Offset, SDValue &GLC, 1047 SDValue &SLC, SDValue &TFE) const { 1048 SDValue Ptr, Offen, Idxen, Addr64; 1049 1050 // addr64 bit was removed for volcanic islands. 1051 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1052 return false; 1053 1054 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1055 GLC, SLC, TFE)) 1056 return false; 1057 1058 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1059 if (C->getSExtValue()) { 1060 SDLoc DL(Addr); 1061 1062 const SITargetLowering& Lowering = 1063 *static_cast<const SITargetLowering*>(getTargetLowering()); 1064 1065 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1066 return true; 1067 } 1068 1069 return false; 1070 } 1071 1072 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1073 SDValue &VAddr, SDValue &SOffset, 1074 SDValue &Offset, 1075 SDValue &SLC) const { 1076 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1077 SDValue GLC, TFE; 1078 1079 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1080 } 1081 1082 static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { 1083 auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); 1084 return PSV && PSV->isStack(); 1085 } 1086 1087 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 1088 const MachineFunction &MF = CurDAG->getMachineFunction(); 1089 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1090 1091 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { 1092 SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), 1093 FI->getValueType(0)); 1094 1095 // If we can resolve this to a frame index access, this is relative to the 1096 // frame pointer SGPR. 1097 return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), 1098 MVT::i32)); 1099 } 1100 1101 // If we don't know this private access is a local stack object, it needs to 1102 // be relative to the entry point's scratch wave offset register. 1103 return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), 1104 MVT::i32)); 1105 } 1106 1107 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, 1108 SDValue Addr, SDValue &Rsrc, 1109 SDValue &VAddr, SDValue &SOffset, 1110 SDValue &ImmOffset) const { 1111 1112 SDLoc DL(Addr); 1113 MachineFunction &MF = CurDAG->getMachineFunction(); 1114 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1115 1116 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1117 1118 if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 1119 unsigned Imm = CAddr->getZExtValue(); 1120 assert(!isLegalMUBUFImmOffset(Imm) && 1121 "should have been selected by other pattern"); 1122 1123 SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); 1124 MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1125 DL, MVT::i32, HighBits); 1126 VAddr = SDValue(MovHighBits, 0); 1127 1128 // In a call sequence, stores to the argument stack area are relative to the 1129 // stack pointer. 1130 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1131 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1132 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1133 1134 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1135 ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); 1136 return true; 1137 } 1138 1139 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1140 // (add n0, c1) 1141 1142 SDValue N0 = Addr.getOperand(0); 1143 SDValue N1 = Addr.getOperand(1); 1144 1145 // Offsets in vaddr must be positive. 1146 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1147 if (isLegalMUBUFImmOffset(C1)) { 1148 std::tie(VAddr, SOffset) = foldFrameIndex(N0); 1149 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1150 return true; 1151 } 1152 } 1153 1154 // (node) 1155 std::tie(VAddr, SOffset) = foldFrameIndex(Addr); 1156 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1157 return true; 1158 } 1159 1160 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, 1161 SDValue Addr, 1162 SDValue &SRsrc, 1163 SDValue &SOffset, 1164 SDValue &Offset) const { 1165 ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); 1166 if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) 1167 return false; 1168 1169 SDLoc DL(Addr); 1170 MachineFunction &MF = CurDAG->getMachineFunction(); 1171 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1172 1173 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1174 1175 const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Root)->getPointerInfo(); 1176 unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? 1177 Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); 1178 1179 // FIXME: Get from MachinePointerInfo? We should only be using the frame 1180 // offset if we know this is in a call sequence. 1181 SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); 1182 1183 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 1184 return true; 1185 } 1186 1187 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1188 SDValue &SOffset, SDValue &Offset, 1189 SDValue &GLC, SDValue &SLC, 1190 SDValue &TFE) const { 1191 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1192 const SIInstrInfo *TII = 1193 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1194 1195 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1196 GLC, SLC, TFE)) 1197 return false; 1198 1199 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1200 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1201 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1202 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1203 APInt::getAllOnesValue(32).getZExtValue(); // Size 1204 SDLoc DL(Addr); 1205 1206 const SITargetLowering& Lowering = 1207 *static_cast<const SITargetLowering*>(getTargetLowering()); 1208 1209 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1210 return true; 1211 } 1212 return false; 1213 } 1214 1215 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1216 SDValue &Soffset, SDValue &Offset 1217 ) const { 1218 SDValue GLC, SLC, TFE; 1219 1220 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1221 } 1222 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1223 SDValue &Soffset, SDValue &Offset, 1224 SDValue &SLC) const { 1225 SDValue GLC, TFE; 1226 1227 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1228 } 1229 1230 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1231 SDValue &SOffset, 1232 SDValue &ImmOffset) const { 1233 SDLoc DL(Constant); 1234 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1235 uint32_t Overflow = 0; 1236 1237 if (Imm >= 4096) { 1238 if (Imm <= 4095 + 64) { 1239 // Use an SOffset inline constant for 1..64 1240 Overflow = Imm - 4095; 1241 Imm = 4095; 1242 } else { 1243 // Try to keep the same value in SOffset for adjacent loads, so that 1244 // the corresponding register contents can be re-used. 1245 // 1246 // Load values with all low-bits set into SOffset, so that a larger 1247 // range of values can be covered using s_movk_i32 1248 uint32_t High = (Imm + 1) & ~4095; 1249 uint32_t Low = (Imm + 1) & 4095; 1250 Imm = Low; 1251 Overflow = High - 1; 1252 } 1253 } 1254 1255 // There is a hardware bug in SI and CI which prevents address clamping in 1256 // MUBUF instructions from working correctly with SOffsets. The immediate 1257 // offset is unaffected. 1258 if (Overflow > 0 && 1259 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1260 return false; 1261 1262 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1263 1264 if (Overflow <= 64) 1265 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1266 else 1267 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1268 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1269 0); 1270 1271 return true; 1272 } 1273 1274 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1275 SDValue &SOffset, 1276 SDValue &ImmOffset) const { 1277 SDLoc DL(Offset); 1278 1279 if (!isa<ConstantSDNode>(Offset)) 1280 return false; 1281 1282 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1283 } 1284 1285 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1286 SDValue &SOffset, 1287 SDValue &ImmOffset, 1288 SDValue &VOffset) const { 1289 SDLoc DL(Offset); 1290 1291 // Don't generate an unnecessary voffset for constant offsets. 1292 if (isa<ConstantSDNode>(Offset)) { 1293 SDValue Tmp1, Tmp2; 1294 1295 // When necessary, use a voffset in <= CI anyway to work around a hardware 1296 // bug. 1297 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1298 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1299 return false; 1300 } 1301 1302 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1303 SDValue N0 = Offset.getOperand(0); 1304 SDValue N1 = Offset.getOperand(1); 1305 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1306 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1307 VOffset = N0; 1308 return true; 1309 } 1310 } 1311 1312 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1313 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1314 VOffset = Offset; 1315 1316 return true; 1317 } 1318 1319 bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, 1320 SDValue &VAddr, 1321 SDValue &Offset, 1322 SDValue &SLC) const { 1323 int64_t OffsetVal = 0; 1324 1325 if (Subtarget->hasFlatInstOffsets() && 1326 CurDAG->isBaseWithConstantOffset(Addr)) { 1327 SDValue N0 = Addr.getOperand(0); 1328 SDValue N1 = Addr.getOperand(1); 1329 uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getZExtValue(); 1330 if (isUInt<12>(COffsetVal)) { 1331 Addr = N0; 1332 OffsetVal = COffsetVal; 1333 } 1334 } 1335 1336 VAddr = Addr; 1337 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); 1338 SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1339 1340 return true; 1341 } 1342 1343 bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, 1344 SDValue &VAddr, 1345 SDValue &Offset, 1346 SDValue &SLC) const { 1347 return SelectFlatOffset(Addr, VAddr, Offset, SLC); 1348 } 1349 1350 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1351 SDValue &Offset, bool &Imm) const { 1352 1353 // FIXME: Handle non-constant offsets. 1354 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1355 if (!C) 1356 return false; 1357 1358 SDLoc SL(ByteOffsetNode); 1359 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1360 int64_t ByteOffset = C->getSExtValue(); 1361 int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); 1362 1363 if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { 1364 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1365 Imm = true; 1366 return true; 1367 } 1368 1369 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1370 return false; 1371 1372 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1373 // 32-bit Immediates are supported on Sea Islands. 1374 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1375 } else { 1376 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1377 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1378 C32Bit), 0); 1379 } 1380 Imm = false; 1381 return true; 1382 } 1383 1384 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1385 SDValue &Offset, bool &Imm) const { 1386 SDLoc SL(Addr); 1387 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1388 SDValue N0 = Addr.getOperand(0); 1389 SDValue N1 = Addr.getOperand(1); 1390 1391 if (SelectSMRDOffset(N1, Offset, Imm)) { 1392 SBase = N0; 1393 return true; 1394 } 1395 } 1396 SBase = Addr; 1397 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1398 Imm = true; 1399 return true; 1400 } 1401 1402 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1403 SDValue &Offset) const { 1404 bool Imm; 1405 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1406 } 1407 1408 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1409 SDValue &Offset) const { 1410 1411 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1412 return false; 1413 1414 bool Imm; 1415 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1416 return false; 1417 1418 return !Imm && isa<ConstantSDNode>(Offset); 1419 } 1420 1421 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1422 SDValue &Offset) const { 1423 bool Imm; 1424 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1425 !isa<ConstantSDNode>(Offset); 1426 } 1427 1428 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1429 SDValue &Offset) const { 1430 bool Imm; 1431 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1432 } 1433 1434 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1435 SDValue &Offset) const { 1436 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1437 return false; 1438 1439 bool Imm; 1440 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1441 return false; 1442 1443 return !Imm && isa<ConstantSDNode>(Offset); 1444 } 1445 1446 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1447 SDValue &Offset) const { 1448 bool Imm; 1449 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1450 !isa<ConstantSDNode>(Offset); 1451 } 1452 1453 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1454 SDValue &Base, 1455 SDValue &Offset) const { 1456 SDLoc DL(Index); 1457 1458 if (CurDAG->isBaseWithConstantOffset(Index)) { 1459 SDValue N0 = Index.getOperand(0); 1460 SDValue N1 = Index.getOperand(1); 1461 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1462 1463 // (add n0, c0) 1464 Base = N0; 1465 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1466 return true; 1467 } 1468 1469 if (isa<ConstantSDNode>(Index)) 1470 return false; 1471 1472 Base = Index; 1473 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1474 return true; 1475 } 1476 1477 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1478 SDValue Val, uint32_t Offset, 1479 uint32_t Width) { 1480 // Transformation function, pack the offset and width of a BFE into 1481 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1482 // source, bits [5:0] contain the offset and bits [22:16] the width. 1483 uint32_t PackedVal = Offset | (Width << 16); 1484 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1485 1486 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1487 } 1488 1489 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1490 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1491 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1492 // Predicate: 0 < b <= c < 32 1493 1494 const SDValue &Shl = N->getOperand(0); 1495 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1496 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1497 1498 if (B && C) { 1499 uint32_t BVal = B->getZExtValue(); 1500 uint32_t CVal = C->getZExtValue(); 1501 1502 if (0 < BVal && BVal <= CVal && CVal < 32) { 1503 bool Signed = N->getOpcode() == ISD::SRA; 1504 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1505 1506 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1507 32 - CVal)); 1508 return; 1509 } 1510 } 1511 SelectCode(N); 1512 } 1513 1514 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1515 switch (N->getOpcode()) { 1516 case ISD::AND: 1517 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1518 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1519 // Predicate: isMask(mask) 1520 const SDValue &Srl = N->getOperand(0); 1521 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1522 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1523 1524 if (Shift && Mask) { 1525 uint32_t ShiftVal = Shift->getZExtValue(); 1526 uint32_t MaskVal = Mask->getZExtValue(); 1527 1528 if (isMask_32(MaskVal)) { 1529 uint32_t WidthVal = countPopulation(MaskVal); 1530 1531 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1532 Srl.getOperand(0), ShiftVal, WidthVal)); 1533 return; 1534 } 1535 } 1536 } 1537 break; 1538 case ISD::SRL: 1539 if (N->getOperand(0).getOpcode() == ISD::AND) { 1540 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1541 // Predicate: isMask(mask >> b) 1542 const SDValue &And = N->getOperand(0); 1543 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1544 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1545 1546 if (Shift && Mask) { 1547 uint32_t ShiftVal = Shift->getZExtValue(); 1548 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1549 1550 if (isMask_32(MaskVal)) { 1551 uint32_t WidthVal = countPopulation(MaskVal); 1552 1553 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1554 And.getOperand(0), ShiftVal, WidthVal)); 1555 return; 1556 } 1557 } 1558 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1559 SelectS_BFEFromShifts(N); 1560 return; 1561 } 1562 break; 1563 case ISD::SRA: 1564 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1565 SelectS_BFEFromShifts(N); 1566 return; 1567 } 1568 break; 1569 1570 case ISD::SIGN_EXTEND_INREG: { 1571 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1572 SDValue Src = N->getOperand(0); 1573 if (Src.getOpcode() != ISD::SRL) 1574 break; 1575 1576 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1577 if (!Amt) 1578 break; 1579 1580 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1581 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1582 Amt->getZExtValue(), Width)); 1583 return; 1584 } 1585 } 1586 1587 SelectCode(N); 1588 } 1589 1590 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1591 assert(N->getOpcode() == ISD::BRCOND); 1592 if (!N->hasOneUse()) 1593 return false; 1594 1595 SDValue Cond = N->getOperand(1); 1596 if (Cond.getOpcode() == ISD::CopyToReg) 1597 Cond = Cond.getOperand(2); 1598 1599 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1600 return false; 1601 1602 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1603 if (VT == MVT::i32) 1604 return true; 1605 1606 if (VT == MVT::i64) { 1607 auto ST = static_cast<const SISubtarget *>(Subtarget); 1608 1609 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1610 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1611 } 1612 1613 return false; 1614 } 1615 1616 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1617 SDValue Cond = N->getOperand(1); 1618 1619 if (Cond.isUndef()) { 1620 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, 1621 N->getOperand(2), N->getOperand(0)); 1622 return; 1623 } 1624 1625 if (isCBranchSCC(N)) { 1626 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1627 SelectCode(N); 1628 return; 1629 } 1630 1631 SDLoc SL(N); 1632 1633 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1634 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1635 N->getOperand(2), // Basic Block 1636 VCC.getValue(0)); 1637 } 1638 1639 // This is here because there isn't a way to use the generated sub0_sub1 as the 1640 // subreg index to EXTRACT_SUBREG in tablegen. 1641 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1642 MemSDNode *Mem = cast<MemSDNode>(N); 1643 unsigned AS = Mem->getAddressSpace(); 1644 if (AS == AMDGPUASI.FLAT_ADDRESS) { 1645 SelectCode(N); 1646 return; 1647 } 1648 1649 MVT VT = N->getSimpleValueType(0); 1650 bool Is32 = (VT == MVT::i32); 1651 SDLoc SL(N); 1652 1653 MachineSDNode *CmpSwap = nullptr; 1654 if (Subtarget->hasAddr64()) { 1655 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1656 1657 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1658 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1659 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1660 SDValue CmpVal = Mem->getOperand(2); 1661 1662 // XXX - Do we care about glue operands? 1663 1664 SDValue Ops[] = { 1665 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1666 }; 1667 1668 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1669 } 1670 } 1671 1672 if (!CmpSwap) { 1673 SDValue SRsrc, SOffset, Offset, SLC; 1674 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1675 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1676 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1677 1678 SDValue CmpVal = Mem->getOperand(2); 1679 SDValue Ops[] = { 1680 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1681 }; 1682 1683 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1684 } 1685 } 1686 1687 if (!CmpSwap) { 1688 SelectCode(N); 1689 return; 1690 } 1691 1692 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1693 *MMOs = Mem->getMemOperand(); 1694 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1695 1696 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1697 SDValue Extract 1698 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1699 1700 ReplaceUses(SDValue(N, 0), Extract); 1701 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1702 CurDAG->RemoveDeadNode(N); 1703 } 1704 1705 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1706 SDValue &SrcMods) const { 1707 unsigned Mods = 0; 1708 Src = In; 1709 1710 if (Src.getOpcode() == ISD::FNEG) { 1711 Mods |= SISrcMods::NEG; 1712 Src = Src.getOperand(0); 1713 } 1714 1715 if (Src.getOpcode() == ISD::FABS) { 1716 Mods |= SISrcMods::ABS; 1717 Src = Src.getOperand(0); 1718 } 1719 1720 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1721 return true; 1722 } 1723 1724 bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, 1725 SDValue &SrcMods) const { 1726 SelectVOP3Mods(In, Src, SrcMods); 1727 return isNoNanSrc(Src); 1728 } 1729 1730 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { 1731 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) 1732 return false; 1733 1734 Src = In; 1735 return true; 1736 } 1737 1738 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1739 SDValue &SrcMods, SDValue &Clamp, 1740 SDValue &Omod) const { 1741 SDLoc DL(In); 1742 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1743 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1744 1745 return SelectVOP3Mods(In, Src, SrcMods); 1746 } 1747 1748 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1749 SDValue &SrcMods, 1750 SDValue &Clamp, 1751 SDValue &Omod) const { 1752 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1753 return SelectVOP3Mods(In, Src, SrcMods); 1754 } 1755 1756 bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, 1757 SDValue &Clamp, SDValue &Omod) const { 1758 Src = In; 1759 1760 SDLoc DL(In); 1761 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); 1762 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); 1763 1764 return true; 1765 } 1766 1767 static SDValue stripBitcast(SDValue Val) { 1768 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 1769 } 1770 1771 // Figure out if this is really an extract of the high 16-bits of a dword. 1772 static bool isExtractHiElt(SDValue In, SDValue &Out) { 1773 In = stripBitcast(In); 1774 if (In.getOpcode() != ISD::TRUNCATE) 1775 return false; 1776 1777 SDValue Srl = In.getOperand(0); 1778 if (Srl.getOpcode() == ISD::SRL) { 1779 if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { 1780 if (ShiftAmt->getZExtValue() == 16) { 1781 Out = stripBitcast(Srl.getOperand(0)); 1782 return true; 1783 } 1784 } 1785 } 1786 1787 return false; 1788 } 1789 1790 // Look through operations that obscure just looking at the low 16-bits of the 1791 // same register. 1792 static SDValue stripExtractLoElt(SDValue In) { 1793 if (In.getOpcode() == ISD::TRUNCATE) { 1794 SDValue Src = In.getOperand(0); 1795 if (Src.getValueType().getSizeInBits() == 32) 1796 return stripBitcast(Src); 1797 } 1798 1799 return In; 1800 } 1801 1802 bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, 1803 SDValue &SrcMods) const { 1804 unsigned Mods = 0; 1805 Src = In; 1806 1807 if (Src.getOpcode() == ISD::FNEG) { 1808 Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 1809 Src = Src.getOperand(0); 1810 } 1811 1812 if (Src.getOpcode() == ISD::BUILD_VECTOR) { 1813 unsigned VecMods = Mods; 1814 1815 SDValue Lo = stripBitcast(Src.getOperand(0)); 1816 SDValue Hi = stripBitcast(Src.getOperand(1)); 1817 1818 if (Lo.getOpcode() == ISD::FNEG) { 1819 Lo = stripBitcast(Lo.getOperand(0)); 1820 Mods ^= SISrcMods::NEG; 1821 } 1822 1823 if (Hi.getOpcode() == ISD::FNEG) { 1824 Hi = stripBitcast(Hi.getOperand(0)); 1825 Mods ^= SISrcMods::NEG_HI; 1826 } 1827 1828 if (isExtractHiElt(Lo, Lo)) 1829 Mods |= SISrcMods::OP_SEL_0; 1830 1831 if (isExtractHiElt(Hi, Hi)) 1832 Mods |= SISrcMods::OP_SEL_1; 1833 1834 Lo = stripExtractLoElt(Lo); 1835 Hi = stripExtractLoElt(Hi); 1836 1837 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { 1838 // Really a scalar input. Just select from the low half of the register to 1839 // avoid packing. 1840 1841 Src = Lo; 1842 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1843 return true; 1844 } 1845 1846 Mods = VecMods; 1847 } 1848 1849 // Packed instructions do not have abs modifiers. 1850 Mods |= SISrcMods::OP_SEL_1; 1851 1852 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1853 return true; 1854 } 1855 1856 bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, 1857 SDValue &SrcMods, 1858 SDValue &Clamp) const { 1859 SDLoc SL(In); 1860 1861 // FIXME: Handle clamp and op_sel 1862 Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); 1863 1864 return SelectVOP3PMods(In, Src, SrcMods); 1865 } 1866 1867 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1868 const AMDGPUTargetLowering& Lowering = 1869 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1870 bool IsModified = false; 1871 do { 1872 IsModified = false; 1873 // Go over all selected nodes and try to fold them a bit more 1874 for (SDNode &Node : CurDAG->allnodes()) { 1875 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1876 if (!MachineNode) 1877 continue; 1878 1879 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1880 if (ResNode != &Node) { 1881 ReplaceUses(&Node, ResNode); 1882 IsModified = true; 1883 } 1884 } 1885 CurDAG->RemoveDeadNodes(); 1886 } while (IsModified); 1887 } 1888