1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 class R600InstrInfo; 33 } 34 35 //===----------------------------------------------------------------------===// 36 // Instruction Selector Implementation 37 //===----------------------------------------------------------------------===// 38 39 namespace { 40 41 /// AMDGPU specific code to select AMDGPU machine instructions for 42 /// SelectionDAG operations. 43 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 44 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 45 // make the right decision when generating code for different targets. 46 const AMDGPUSubtarget *Subtarget; 47 48 public: 49 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 50 : SelectionDAGISel(TM, OptLevel) {} 51 52 virtual ~AMDGPUDAGToDAGISel(); 53 bool runOnMachineFunction(MachineFunction &MF) override; 54 void Select(SDNode *N) override; 55 StringRef getPassName() const override; 56 void PostprocessISelDAG() override; 57 58 private: 59 SDValue foldFrameIndex(SDValue N) const; 60 bool isInlineImmediate(const SDNode *N) const; 61 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 62 const R600InstrInfo *TII); 63 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 64 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 65 66 bool isConstantLoad(const MemSDNode *N, int cbID) const; 67 bool isUniformBr(const SDNode *N) const; 68 69 SDNode *glueCopyToM0(SDNode *N) const; 70 71 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 72 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 73 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 74 SDValue& Offset); 75 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 76 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 77 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 78 unsigned OffsetBits) const; 79 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 80 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 81 SDValue &Offset1) const; 82 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 83 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 84 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 85 SDValue &TFE) const; 86 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 87 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 88 SDValue &SLC, SDValue &TFE) const; 89 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 90 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 91 SDValue &SLC) const; 92 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 93 SDValue &SOffset, SDValue &ImmOffset) const; 94 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 95 SDValue &Offset, SDValue &GLC, SDValue &SLC, 96 SDValue &TFE) const; 97 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 98 SDValue &Offset, SDValue &SLC) const; 99 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 100 SDValue &Offset) const; 101 bool SelectMUBUFConstant(SDValue Constant, 102 SDValue &SOffset, 103 SDValue &ImmOffset) const; 104 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 105 SDValue &ImmOffset) const; 106 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 107 SDValue &ImmOffset, SDValue &VOffset) const; 108 109 bool SelectFlat(SDValue Addr, SDValue &VAddr, 110 SDValue &SLC, SDValue &TFE) const; 111 112 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 113 bool &Imm) const; 114 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 115 bool &Imm) const; 116 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 117 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 120 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 122 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 123 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 124 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 126 SDValue &Clamp, SDValue &Omod) const; 127 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 128 SDValue &Clamp, SDValue &Omod) const; 129 130 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 131 SDValue &Omod) const; 132 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 133 SDValue &Clamp, 134 SDValue &Omod) const; 135 136 void SelectADD_SUB_I64(SDNode *N); 137 void SelectDIV_SCALE(SDNode *N); 138 139 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 140 uint32_t Offset, uint32_t Width); 141 void SelectS_BFEFromShifts(SDNode *N); 142 void SelectS_BFE(SDNode *N); 143 bool isCBranchSCC(const SDNode *N) const; 144 void SelectBRCOND(SDNode *N); 145 void SelectATOMIC_CMP_SWAP(SDNode *N); 146 147 // Include the pieces autogenerated from the target description. 148 #include "AMDGPUGenDAGISel.inc" 149 }; 150 } // end anonymous namespace 151 152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 153 // DAG, ready for instruction scheduling. 154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 155 CodeGenOpt::Level OptLevel) { 156 return new AMDGPUDAGToDAGISel(TM, OptLevel); 157 } 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 168 const SIInstrInfo *TII 169 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 170 171 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 172 return TII->isInlineConstant(C->getAPIntValue()); 173 174 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 175 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 176 177 return false; 178 } 179 180 /// \brief Determine the register class for \p OpNo 181 /// \returns The register class of the virtual register that will be used for 182 /// the given operand number \OpNo or NULL if the register class cannot be 183 /// determined. 184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 185 unsigned OpNo) const { 186 if (!N->isMachineOpcode()) 187 return nullptr; 188 189 switch (N->getMachineOpcode()) { 190 default: { 191 const MCInstrDesc &Desc = 192 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 193 unsigned OpIdx = Desc.getNumDefs() + OpNo; 194 if (OpIdx >= Desc.getNumOperands()) 195 return nullptr; 196 int RegClass = Desc.OpInfo[OpIdx].RegClass; 197 if (RegClass == -1) 198 return nullptr; 199 200 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 201 } 202 case AMDGPU::REG_SEQUENCE: { 203 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 204 const TargetRegisterClass *SuperRC = 205 Subtarget->getRegisterInfo()->getRegClass(RCID); 206 207 SDValue SubRegOp = N->getOperand(OpNo + 1); 208 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 209 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 210 SubRegIdx); 211 } 212 } 213 } 214 215 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 216 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 217 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 218 return N; 219 220 const SITargetLowering& Lowering = 221 *static_cast<const SITargetLowering*>(getTargetLowering()); 222 223 // Write max value to m0 before each load operation 224 225 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 226 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 227 228 SDValue Glue = M0.getValue(1); 229 230 SmallVector <SDValue, 8> Ops; 231 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 232 Ops.push_back(N->getOperand(i)); 233 } 234 Ops.push_back(Glue); 235 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 236 237 return N; 238 } 239 240 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 241 switch (NumVectorElts) { 242 case 1: 243 return AMDGPU::SReg_32RegClassID; 244 case 2: 245 return AMDGPU::SReg_64RegClassID; 246 case 4: 247 return AMDGPU::SReg_128RegClassID; 248 case 8: 249 return AMDGPU::SReg_256RegClassID; 250 case 16: 251 return AMDGPU::SReg_512RegClassID; 252 } 253 254 llvm_unreachable("invalid vector size"); 255 } 256 257 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 258 unsigned int Opc = N->getOpcode(); 259 if (N->isMachineOpcode()) { 260 N->setNodeId(-1); 261 return; // Already selected. 262 } 263 264 if (isa<AtomicSDNode>(N) || 265 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 266 N = glueCopyToM0(N); 267 268 switch (Opc) { 269 default: break; 270 // We are selecting i64 ADD here instead of custom lower it during 271 // DAG legalization, so we can fold some i64 ADDs used for address 272 // calculation into the LOAD and STORE instructions. 273 case ISD::ADD: 274 case ISD::SUB: { 275 if (N->getValueType(0) != MVT::i64 || 276 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 277 break; 278 279 SelectADD_SUB_I64(N); 280 return; 281 } 282 case ISD::SCALAR_TO_VECTOR: 283 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 284 case ISD::BUILD_VECTOR: { 285 unsigned RegClassID; 286 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 287 EVT VT = N->getValueType(0); 288 unsigned NumVectorElts = VT.getVectorNumElements(); 289 EVT EltVT = VT.getVectorElementType(); 290 assert(EltVT.bitsEq(MVT::i32)); 291 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 292 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 293 } else { 294 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 295 // that adds a 128 bits reg copy when going through TwoAddressInstructions 296 // pass. We want to avoid 128 bits copies as much as possible because they 297 // can't be bundled by our scheduler. 298 switch(NumVectorElts) { 299 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 300 case 4: 301 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 302 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 303 else 304 RegClassID = AMDGPU::R600_Reg128RegClassID; 305 break; 306 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 307 } 308 } 309 310 SDLoc DL(N); 311 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 312 313 if (NumVectorElts == 1) { 314 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 315 RegClass); 316 return; 317 } 318 319 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 320 "supported yet"); 321 // 16 = Max Num Vector Elements 322 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 323 // 1 = Vector Register Class 324 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 325 326 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 327 bool IsRegSeq = true; 328 unsigned NOps = N->getNumOperands(); 329 for (unsigned i = 0; i < NOps; i++) { 330 // XXX: Why is this here? 331 if (isa<RegisterSDNode>(N->getOperand(i))) { 332 IsRegSeq = false; 333 break; 334 } 335 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 336 RegSeqArgs[1 + (2 * i) + 1] = 337 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 338 MVT::i32); 339 } 340 341 if (NOps != NumVectorElts) { 342 // Fill in the missing undef elements if this was a scalar_to_vector. 343 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 344 345 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 346 DL, EltVT); 347 for (unsigned i = NOps; i < NumVectorElts; ++i) { 348 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 349 RegSeqArgs[1 + (2 * i) + 1] = 350 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 351 } 352 } 353 354 if (!IsRegSeq) 355 break; 356 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 357 return; 358 } 359 case ISD::BUILD_PAIR: { 360 SDValue RC, SubReg0, SubReg1; 361 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 362 break; 363 } 364 SDLoc DL(N); 365 if (N->getValueType(0) == MVT::i128) { 366 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 367 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 368 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 369 } else if (N->getValueType(0) == MVT::i64) { 370 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 371 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 372 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 373 } else { 374 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 375 } 376 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 377 N->getOperand(1), SubReg1 }; 378 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 379 N->getValueType(0), Ops)); 380 return; 381 } 382 383 case ISD::Constant: 384 case ISD::ConstantFP: { 385 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 386 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 387 break; 388 389 uint64_t Imm; 390 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 391 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 392 else { 393 ConstantSDNode *C = cast<ConstantSDNode>(N); 394 Imm = C->getZExtValue(); 395 } 396 397 SDLoc DL(N); 398 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 399 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 400 MVT::i32)); 401 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 402 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 403 const SDValue Ops[] = { 404 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 405 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 406 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 407 }; 408 409 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 410 N->getValueType(0), Ops)); 411 return; 412 } 413 case ISD::LOAD: 414 case ISD::STORE: { 415 N = glueCopyToM0(N); 416 break; 417 } 418 419 case AMDGPUISD::BFE_I32: 420 case AMDGPUISD::BFE_U32: { 421 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 422 break; 423 424 // There is a scalar version available, but unlike the vector version which 425 // has a separate operand for the offset and width, the scalar version packs 426 // the width and offset into a single operand. Try to move to the scalar 427 // version if the offsets are constant, so that we can try to keep extended 428 // loads of kernel arguments in SGPRs. 429 430 // TODO: Technically we could try to pattern match scalar bitshifts of 431 // dynamic values, but it's probably not useful. 432 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 433 if (!Offset) 434 break; 435 436 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 437 if (!Width) 438 break; 439 440 bool Signed = Opc == AMDGPUISD::BFE_I32; 441 442 uint32_t OffsetVal = Offset->getZExtValue(); 443 uint32_t WidthVal = Width->getZExtValue(); 444 445 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 446 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 447 return; 448 } 449 case AMDGPUISD::DIV_SCALE: { 450 SelectDIV_SCALE(N); 451 return; 452 } 453 case ISD::CopyToReg: { 454 const SITargetLowering& Lowering = 455 *static_cast<const SITargetLowering*>(getTargetLowering()); 456 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 457 break; 458 } 459 case ISD::AND: 460 case ISD::SRL: 461 case ISD::SRA: 462 case ISD::SIGN_EXTEND_INREG: 463 if (N->getValueType(0) != MVT::i32 || 464 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 465 break; 466 467 SelectS_BFE(N); 468 return; 469 case ISD::BRCOND: 470 SelectBRCOND(N); 471 return; 472 473 case AMDGPUISD::ATOMIC_CMP_SWAP: 474 SelectATOMIC_CMP_SWAP(N); 475 return; 476 } 477 478 SelectCode(N); 479 } 480 481 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 482 if (!N->readMem()) 483 return false; 484 if (CbId == -1) 485 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 486 487 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 488 } 489 490 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 491 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 492 const Instruction *Term = BB->getTerminator(); 493 return Term->getMetadata("amdgpu.uniform") || 494 Term->getMetadata("structurizecfg.uniform"); 495 } 496 497 StringRef AMDGPUDAGToDAGISel::getPassName() const { 498 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 499 } 500 501 //===----------------------------------------------------------------------===// 502 // Complex Patterns 503 //===----------------------------------------------------------------------===// 504 505 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 506 SDValue& IntPtr) { 507 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 508 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 509 true); 510 return true; 511 } 512 return false; 513 } 514 515 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 516 SDValue& BaseReg, SDValue &Offset) { 517 if (!isa<ConstantSDNode>(Addr)) { 518 BaseReg = Addr; 519 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 520 return true; 521 } 522 return false; 523 } 524 525 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 526 SDValue &Offset) { 527 ConstantSDNode *IMMOffset; 528 529 if (Addr.getOpcode() == ISD::ADD 530 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 531 && isInt<16>(IMMOffset->getZExtValue())) { 532 533 Base = Addr.getOperand(0); 534 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 535 MVT::i32); 536 return true; 537 // If the pointer address is constant, we can move it to the offset field. 538 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 539 && isInt<16>(IMMOffset->getZExtValue())) { 540 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 541 SDLoc(CurDAG->getEntryNode()), 542 AMDGPU::ZERO, MVT::i32); 543 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 544 MVT::i32); 545 return true; 546 } 547 548 // Default case, no offset 549 Base = Addr; 550 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 551 return true; 552 } 553 554 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 555 SDValue &Offset) { 556 ConstantSDNode *C; 557 SDLoc DL(Addr); 558 559 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 560 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 561 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 562 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 563 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 564 Base = Addr.getOperand(0); 565 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 566 } else { 567 Base = Addr; 568 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 569 } 570 571 return true; 572 } 573 574 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 575 SDLoc DL(N); 576 SDValue LHS = N->getOperand(0); 577 SDValue RHS = N->getOperand(1); 578 579 bool IsAdd = (N->getOpcode() == ISD::ADD); 580 581 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 582 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 583 584 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 585 DL, MVT::i32, LHS, Sub0); 586 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 587 DL, MVT::i32, LHS, Sub1); 588 589 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 590 DL, MVT::i32, RHS, Sub0); 591 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 592 DL, MVT::i32, RHS, Sub1); 593 594 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 595 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 596 597 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 598 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 599 600 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 601 SDValue Carry(AddLo, 1); 602 SDNode *AddHi 603 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 604 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 605 606 SDValue Args[5] = { 607 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 608 SDValue(AddLo,0), 609 Sub0, 610 SDValue(AddHi,0), 611 Sub1, 612 }; 613 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 614 } 615 616 // We need to handle this here because tablegen doesn't support matching 617 // instructions with multiple outputs. 618 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 619 SDLoc SL(N); 620 EVT VT = N->getValueType(0); 621 622 assert(VT == MVT::f32 || VT == MVT::f64); 623 624 unsigned Opc 625 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 626 627 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 628 // omod 629 SDValue Ops[8]; 630 631 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 632 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 633 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 634 CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 635 } 636 637 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 638 unsigned OffsetBits) const { 639 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 640 (OffsetBits == 8 && !isUInt<8>(Offset))) 641 return false; 642 643 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 644 Subtarget->unsafeDSOffsetFoldingEnabled()) 645 return true; 646 647 // On Southern Islands instruction with a negative base value and an offset 648 // don't seem to work. 649 return CurDAG->SignBitIsZero(Base); 650 } 651 652 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 653 SDValue &Offset) const { 654 SDLoc DL(Addr); 655 if (CurDAG->isBaseWithConstantOffset(Addr)) { 656 SDValue N0 = Addr.getOperand(0); 657 SDValue N1 = Addr.getOperand(1); 658 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 659 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 660 // (add n0, c0) 661 Base = N0; 662 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 663 return true; 664 } 665 } else if (Addr.getOpcode() == ISD::SUB) { 666 // sub C, x -> add (sub 0, x), C 667 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 668 int64_t ByteOffset = C->getSExtValue(); 669 if (isUInt<16>(ByteOffset)) { 670 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 671 672 // XXX - This is kind of hacky. Create a dummy sub node so we can check 673 // the known bits in isDSOffsetLegal. We need to emit the selected node 674 // here, so this is thrown away. 675 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 676 Zero, Addr.getOperand(1)); 677 678 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 679 MachineSDNode *MachineSub 680 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 681 Zero, Addr.getOperand(1)); 682 683 Base = SDValue(MachineSub, 0); 684 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 685 return true; 686 } 687 } 688 } 689 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 690 // If we have a constant address, prefer to put the constant into the 691 // offset. This can save moves to load the constant address since multiple 692 // operations can share the zero base address register, and enables merging 693 // into read2 / write2 instructions. 694 695 SDLoc DL(Addr); 696 697 if (isUInt<16>(CAddr->getZExtValue())) { 698 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 699 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 700 DL, MVT::i32, Zero); 701 Base = SDValue(MovZero, 0); 702 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 703 return true; 704 } 705 } 706 707 // default case 708 Base = Addr; 709 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 710 return true; 711 } 712 713 // TODO: If offset is too big, put low 16-bit into offset. 714 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 715 SDValue &Offset0, 716 SDValue &Offset1) const { 717 SDLoc DL(Addr); 718 719 if (CurDAG->isBaseWithConstantOffset(Addr)) { 720 SDValue N0 = Addr.getOperand(0); 721 SDValue N1 = Addr.getOperand(1); 722 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 723 unsigned DWordOffset0 = C1->getZExtValue() / 4; 724 unsigned DWordOffset1 = DWordOffset0 + 1; 725 // (add n0, c0) 726 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 727 Base = N0; 728 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 729 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 730 return true; 731 } 732 } else if (Addr.getOpcode() == ISD::SUB) { 733 // sub C, x -> add (sub 0, x), C 734 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 735 unsigned DWordOffset0 = C->getZExtValue() / 4; 736 unsigned DWordOffset1 = DWordOffset0 + 1; 737 738 if (isUInt<8>(DWordOffset0)) { 739 SDLoc DL(Addr); 740 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 741 742 // XXX - This is kind of hacky. Create a dummy sub node so we can check 743 // the known bits in isDSOffsetLegal. We need to emit the selected node 744 // here, so this is thrown away. 745 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 746 Zero, Addr.getOperand(1)); 747 748 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 749 MachineSDNode *MachineSub 750 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 751 Zero, Addr.getOperand(1)); 752 753 Base = SDValue(MachineSub, 0); 754 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 755 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 756 return true; 757 } 758 } 759 } 760 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 761 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 762 unsigned DWordOffset1 = DWordOffset0 + 1; 763 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 764 765 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 766 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 767 MachineSDNode *MovZero 768 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 769 DL, MVT::i32, Zero); 770 Base = SDValue(MovZero, 0); 771 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 772 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 773 return true; 774 } 775 } 776 777 // default case 778 779 // FIXME: This is broken on SI where we still need to check if the base 780 // pointer is positive here. 781 Base = Addr; 782 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 783 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 784 return true; 785 } 786 787 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 788 return isUInt<12>(Imm->getZExtValue()); 789 } 790 791 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 792 SDValue &VAddr, SDValue &SOffset, 793 SDValue &Offset, SDValue &Offen, 794 SDValue &Idxen, SDValue &Addr64, 795 SDValue &GLC, SDValue &SLC, 796 SDValue &TFE) const { 797 // Subtarget prefers to use flat instruction 798 if (Subtarget->useFlatForGlobal()) 799 return false; 800 801 SDLoc DL(Addr); 802 803 if (!GLC.getNode()) 804 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 805 if (!SLC.getNode()) 806 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 807 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 808 809 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 810 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 811 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 812 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 813 814 if (CurDAG->isBaseWithConstantOffset(Addr)) { 815 SDValue N0 = Addr.getOperand(0); 816 SDValue N1 = Addr.getOperand(1); 817 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 818 819 if (N0.getOpcode() == ISD::ADD) { 820 // (add (add N2, N3), C1) -> addr64 821 SDValue N2 = N0.getOperand(0); 822 SDValue N3 = N0.getOperand(1); 823 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 824 Ptr = N2; 825 VAddr = N3; 826 } else { 827 828 // (add N0, C1) -> offset 829 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 830 Ptr = N0; 831 } 832 833 if (isLegalMUBUFImmOffset(C1)) { 834 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 835 return true; 836 } 837 838 if (isUInt<32>(C1->getZExtValue())) { 839 // Illegal offset, store it in soffset. 840 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 841 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 842 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 843 0); 844 return true; 845 } 846 } 847 848 if (Addr.getOpcode() == ISD::ADD) { 849 // (add N0, N1) -> addr64 850 SDValue N0 = Addr.getOperand(0); 851 SDValue N1 = Addr.getOperand(1); 852 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 853 Ptr = N0; 854 VAddr = N1; 855 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 856 return true; 857 } 858 859 // default case -> offset 860 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 861 Ptr = Addr; 862 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 863 864 return true; 865 } 866 867 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 868 SDValue &VAddr, SDValue &SOffset, 869 SDValue &Offset, SDValue &GLC, 870 SDValue &SLC, SDValue &TFE) const { 871 SDValue Ptr, Offen, Idxen, Addr64; 872 873 // addr64 bit was removed for volcanic islands. 874 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 875 return false; 876 877 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 878 GLC, SLC, TFE)) 879 return false; 880 881 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 882 if (C->getSExtValue()) { 883 SDLoc DL(Addr); 884 885 const SITargetLowering& Lowering = 886 *static_cast<const SITargetLowering*>(getTargetLowering()); 887 888 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 889 return true; 890 } 891 892 return false; 893 } 894 895 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 896 SDValue &VAddr, SDValue &SOffset, 897 SDValue &Offset, 898 SDValue &SLC) const { 899 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 900 SDValue GLC, TFE; 901 902 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 903 } 904 905 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 906 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 907 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 908 return N; 909 } 910 911 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 912 SDValue &VAddr, SDValue &SOffset, 913 SDValue &ImmOffset) const { 914 915 SDLoc DL(Addr); 916 MachineFunction &MF = CurDAG->getMachineFunction(); 917 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 918 919 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 920 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 921 922 // (add n0, c1) 923 if (CurDAG->isBaseWithConstantOffset(Addr)) { 924 SDValue N0 = Addr.getOperand(0); 925 SDValue N1 = Addr.getOperand(1); 926 927 // Offsets in vaddr must be positive. 928 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 929 if (isLegalMUBUFImmOffset(C1)) { 930 VAddr = foldFrameIndex(N0); 931 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 932 return true; 933 } 934 } 935 936 // (node) 937 VAddr = foldFrameIndex(Addr); 938 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 939 return true; 940 } 941 942 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 943 SDValue &SOffset, SDValue &Offset, 944 SDValue &GLC, SDValue &SLC, 945 SDValue &TFE) const { 946 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 947 const SIInstrInfo *TII = 948 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 949 950 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 951 GLC, SLC, TFE)) 952 return false; 953 954 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 955 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 956 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 957 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 958 APInt::getAllOnesValue(32).getZExtValue(); // Size 959 SDLoc DL(Addr); 960 961 const SITargetLowering& Lowering = 962 *static_cast<const SITargetLowering*>(getTargetLowering()); 963 964 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 965 return true; 966 } 967 return false; 968 } 969 970 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 971 SDValue &Soffset, SDValue &Offset 972 ) const { 973 SDValue GLC, SLC, TFE; 974 975 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 976 } 977 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 978 SDValue &Soffset, SDValue &Offset, 979 SDValue &SLC) const { 980 SDValue GLC, TFE; 981 982 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 983 } 984 985 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 986 SDValue &SOffset, 987 SDValue &ImmOffset) const { 988 SDLoc DL(Constant); 989 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 990 uint32_t Overflow = 0; 991 992 if (Imm >= 4096) { 993 if (Imm <= 4095 + 64) { 994 // Use an SOffset inline constant for 1..64 995 Overflow = Imm - 4095; 996 Imm = 4095; 997 } else { 998 // Try to keep the same value in SOffset for adjacent loads, so that 999 // the corresponding register contents can be re-used. 1000 // 1001 // Load values with all low-bits set into SOffset, so that a larger 1002 // range of values can be covered using s_movk_i32 1003 uint32_t High = (Imm + 1) & ~4095; 1004 uint32_t Low = (Imm + 1) & 4095; 1005 Imm = Low; 1006 Overflow = High - 1; 1007 } 1008 } 1009 1010 // There is a hardware bug in SI and CI which prevents address clamping in 1011 // MUBUF instructions from working correctly with SOffsets. The immediate 1012 // offset is unaffected. 1013 if (Overflow > 0 && 1014 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1015 return false; 1016 1017 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1018 1019 if (Overflow <= 64) 1020 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1021 else 1022 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1023 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1024 0); 1025 1026 return true; 1027 } 1028 1029 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1030 SDValue &SOffset, 1031 SDValue &ImmOffset) const { 1032 SDLoc DL(Offset); 1033 1034 if (!isa<ConstantSDNode>(Offset)) 1035 return false; 1036 1037 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1038 } 1039 1040 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1041 SDValue &SOffset, 1042 SDValue &ImmOffset, 1043 SDValue &VOffset) const { 1044 SDLoc DL(Offset); 1045 1046 // Don't generate an unnecessary voffset for constant offsets. 1047 if (isa<ConstantSDNode>(Offset)) { 1048 SDValue Tmp1, Tmp2; 1049 1050 // When necessary, use a voffset in <= CI anyway to work around a hardware 1051 // bug. 1052 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1053 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1054 return false; 1055 } 1056 1057 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1058 SDValue N0 = Offset.getOperand(0); 1059 SDValue N1 = Offset.getOperand(1); 1060 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1061 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1062 VOffset = N0; 1063 return true; 1064 } 1065 } 1066 1067 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1068 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1069 VOffset = Offset; 1070 1071 return true; 1072 } 1073 1074 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1075 SDValue &VAddr, 1076 SDValue &SLC, 1077 SDValue &TFE) const { 1078 VAddr = Addr; 1079 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1080 return true; 1081 } 1082 1083 /// 1084 /// \param EncodedOffset This is the immediate value that will be encoded 1085 /// directly into the instruction. On SI/CI the \p EncodedOffset 1086 /// will be in units of dwords and on VI+ it will be units of bytes. 1087 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1088 int64_t EncodedOffset) { 1089 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1090 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1091 } 1092 1093 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1094 SDValue &Offset, bool &Imm) const { 1095 1096 // FIXME: Handle non-constant offsets. 1097 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1098 if (!C) 1099 return false; 1100 1101 SDLoc SL(ByteOffsetNode); 1102 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1103 int64_t ByteOffset = C->getSExtValue(); 1104 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1105 ByteOffset >> 2 : ByteOffset; 1106 1107 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1108 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1109 Imm = true; 1110 return true; 1111 } 1112 1113 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1114 return false; 1115 1116 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1117 // 32-bit Immediates are supported on Sea Islands. 1118 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1119 } else { 1120 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1121 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1122 C32Bit), 0); 1123 } 1124 Imm = false; 1125 return true; 1126 } 1127 1128 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1129 SDValue &Offset, bool &Imm) const { 1130 1131 SDLoc SL(Addr); 1132 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1133 SDValue N0 = Addr.getOperand(0); 1134 SDValue N1 = Addr.getOperand(1); 1135 1136 if (SelectSMRDOffset(N1, Offset, Imm)) { 1137 SBase = N0; 1138 return true; 1139 } 1140 } 1141 SBase = Addr; 1142 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1143 Imm = true; 1144 return true; 1145 } 1146 1147 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1148 SDValue &Offset) const { 1149 bool Imm; 1150 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1151 } 1152 1153 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1154 SDValue &Offset) const { 1155 1156 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1157 return false; 1158 1159 bool Imm; 1160 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1161 return false; 1162 1163 return !Imm && isa<ConstantSDNode>(Offset); 1164 } 1165 1166 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1167 SDValue &Offset) const { 1168 bool Imm; 1169 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1170 !isa<ConstantSDNode>(Offset); 1171 } 1172 1173 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1174 SDValue &Offset) const { 1175 bool Imm; 1176 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1177 } 1178 1179 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1180 SDValue &Offset) const { 1181 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1182 return false; 1183 1184 bool Imm; 1185 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1186 return false; 1187 1188 return !Imm && isa<ConstantSDNode>(Offset); 1189 } 1190 1191 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1192 SDValue &Offset) const { 1193 bool Imm; 1194 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1195 !isa<ConstantSDNode>(Offset); 1196 } 1197 1198 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1199 SDValue &Base, 1200 SDValue &Offset) const { 1201 SDLoc DL(Index); 1202 1203 if (CurDAG->isBaseWithConstantOffset(Index)) { 1204 SDValue N0 = Index.getOperand(0); 1205 SDValue N1 = Index.getOperand(1); 1206 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1207 1208 // (add n0, c0) 1209 Base = N0; 1210 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1211 return true; 1212 } 1213 1214 if (isa<ConstantSDNode>(Index)) 1215 return false; 1216 1217 Base = Index; 1218 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1219 return true; 1220 } 1221 1222 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1223 SDValue Val, uint32_t Offset, 1224 uint32_t Width) { 1225 // Transformation function, pack the offset and width of a BFE into 1226 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1227 // source, bits [5:0] contain the offset and bits [22:16] the width. 1228 uint32_t PackedVal = Offset | (Width << 16); 1229 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1230 1231 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1232 } 1233 1234 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1235 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1236 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1237 // Predicate: 0 < b <= c < 32 1238 1239 const SDValue &Shl = N->getOperand(0); 1240 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1241 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1242 1243 if (B && C) { 1244 uint32_t BVal = B->getZExtValue(); 1245 uint32_t CVal = C->getZExtValue(); 1246 1247 if (0 < BVal && BVal <= CVal && CVal < 32) { 1248 bool Signed = N->getOpcode() == ISD::SRA; 1249 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1250 1251 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1252 32 - CVal)); 1253 return; 1254 } 1255 } 1256 SelectCode(N); 1257 } 1258 1259 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1260 switch (N->getOpcode()) { 1261 case ISD::AND: 1262 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1263 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1264 // Predicate: isMask(mask) 1265 const SDValue &Srl = N->getOperand(0); 1266 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1267 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1268 1269 if (Shift && Mask) { 1270 uint32_t ShiftVal = Shift->getZExtValue(); 1271 uint32_t MaskVal = Mask->getZExtValue(); 1272 1273 if (isMask_32(MaskVal)) { 1274 uint32_t WidthVal = countPopulation(MaskVal); 1275 1276 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1277 Srl.getOperand(0), ShiftVal, WidthVal)); 1278 return; 1279 } 1280 } 1281 } 1282 break; 1283 case ISD::SRL: 1284 if (N->getOperand(0).getOpcode() == ISD::AND) { 1285 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1286 // Predicate: isMask(mask >> b) 1287 const SDValue &And = N->getOperand(0); 1288 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1289 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1290 1291 if (Shift && Mask) { 1292 uint32_t ShiftVal = Shift->getZExtValue(); 1293 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1294 1295 if (isMask_32(MaskVal)) { 1296 uint32_t WidthVal = countPopulation(MaskVal); 1297 1298 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1299 And.getOperand(0), ShiftVal, WidthVal)); 1300 return; 1301 } 1302 } 1303 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1304 SelectS_BFEFromShifts(N); 1305 return; 1306 } 1307 break; 1308 case ISD::SRA: 1309 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1310 SelectS_BFEFromShifts(N); 1311 return; 1312 } 1313 break; 1314 1315 case ISD::SIGN_EXTEND_INREG: { 1316 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1317 SDValue Src = N->getOperand(0); 1318 if (Src.getOpcode() != ISD::SRL) 1319 break; 1320 1321 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1322 if (!Amt) 1323 break; 1324 1325 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1326 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1327 Amt->getZExtValue(), Width)); 1328 return; 1329 } 1330 } 1331 1332 SelectCode(N); 1333 } 1334 1335 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1336 assert(N->getOpcode() == ISD::BRCOND); 1337 if (!N->hasOneUse()) 1338 return false; 1339 1340 SDValue Cond = N->getOperand(1); 1341 if (Cond.getOpcode() == ISD::CopyToReg) 1342 Cond = Cond.getOperand(2); 1343 1344 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1345 return false; 1346 1347 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1348 if (VT == MVT::i32) 1349 return true; 1350 1351 if (VT == MVT::i64) { 1352 auto ST = static_cast<const SISubtarget *>(Subtarget); 1353 1354 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1355 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1356 } 1357 1358 return false; 1359 } 1360 1361 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1362 SDValue Cond = N->getOperand(1); 1363 1364 if (isCBranchSCC(N)) { 1365 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1366 SelectCode(N); 1367 return; 1368 } 1369 1370 // The result of VOPC instructions is or'd against ~EXEC before it is 1371 // written to vcc or another SGPR. This means that the value '1' is always 1372 // written to the corresponding bit for results that are masked. In order 1373 // to correctly check against vccz, we need to and VCC with the EXEC 1374 // register in order to clear the value from the masked bits. 1375 1376 SDLoc SL(N); 1377 1378 SDNode *MaskedCond = 1379 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1380 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1381 Cond); 1382 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1383 SDValue(MaskedCond, 0), 1384 SDValue()); // Passing SDValue() adds a 1385 // glue output. 1386 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1387 N->getOperand(2), // Basic Block 1388 VCC.getValue(0), // Chain 1389 VCC.getValue(1)); // Glue 1390 return; 1391 } 1392 1393 // This is here because there isn't a way to use the generated sub0_sub1 as the 1394 // subreg index to EXTRACT_SUBREG in tablegen. 1395 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1396 MemSDNode *Mem = cast<MemSDNode>(N); 1397 unsigned AS = Mem->getAddressSpace(); 1398 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1399 SelectCode(N); 1400 return; 1401 } 1402 1403 MVT VT = N->getSimpleValueType(0); 1404 bool Is32 = (VT == MVT::i32); 1405 SDLoc SL(N); 1406 1407 MachineSDNode *CmpSwap = nullptr; 1408 if (Subtarget->hasAddr64()) { 1409 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1410 1411 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1412 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1413 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1414 SDValue CmpVal = Mem->getOperand(2); 1415 1416 // XXX - Do we care about glue operands? 1417 1418 SDValue Ops[] = { 1419 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1420 }; 1421 1422 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1423 } 1424 } 1425 1426 if (!CmpSwap) { 1427 SDValue SRsrc, SOffset, Offset, SLC; 1428 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1429 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1430 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1431 1432 SDValue CmpVal = Mem->getOperand(2); 1433 SDValue Ops[] = { 1434 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1435 }; 1436 1437 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1438 } 1439 } 1440 1441 if (!CmpSwap) { 1442 SelectCode(N); 1443 return; 1444 } 1445 1446 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1447 *MMOs = Mem->getMemOperand(); 1448 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1449 1450 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1451 SDValue Extract 1452 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1453 1454 ReplaceUses(SDValue(N, 0), Extract); 1455 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1456 CurDAG->RemoveDeadNode(N); 1457 } 1458 1459 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1460 SDValue &SrcMods) const { 1461 1462 unsigned Mods = 0; 1463 1464 Src = In; 1465 1466 if (Src.getOpcode() == ISD::FNEG) { 1467 Mods |= SISrcMods::NEG; 1468 Src = Src.getOperand(0); 1469 } 1470 1471 if (Src.getOpcode() == ISD::FABS) { 1472 Mods |= SISrcMods::ABS; 1473 Src = Src.getOperand(0); 1474 } 1475 1476 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1477 1478 return true; 1479 } 1480 1481 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1482 SDValue &SrcMods) const { 1483 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1484 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1485 } 1486 1487 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1488 SDValue &SrcMods, SDValue &Clamp, 1489 SDValue &Omod) const { 1490 SDLoc DL(In); 1491 // FIXME: Handle Clamp and Omod 1492 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1493 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1494 1495 return SelectVOP3Mods(In, Src, SrcMods); 1496 } 1497 1498 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1499 SDValue &SrcMods, SDValue &Clamp, 1500 SDValue &Omod) const { 1501 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1502 1503 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1504 cast<ConstantSDNode>(Clamp)->isNullValue() && 1505 cast<ConstantSDNode>(Omod)->isNullValue(); 1506 } 1507 1508 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1509 SDValue &SrcMods, 1510 SDValue &Omod) const { 1511 // FIXME: Handle Omod 1512 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1513 1514 return SelectVOP3Mods(In, Src, SrcMods); 1515 } 1516 1517 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1518 SDValue &SrcMods, 1519 SDValue &Clamp, 1520 SDValue &Omod) const { 1521 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1522 return SelectVOP3Mods(In, Src, SrcMods); 1523 } 1524 1525 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1526 const AMDGPUTargetLowering& Lowering = 1527 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1528 bool IsModified = false; 1529 do { 1530 IsModified = false; 1531 // Go over all selected nodes and try to fold them a bit more 1532 for (SDNode &Node : CurDAG->allnodes()) { 1533 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1534 if (!MachineNode) 1535 continue; 1536 1537 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1538 if (ResNode != &Node) { 1539 ReplaceUses(&Node, ResNode); 1540 IsModified = true; 1541 } 1542 } 1543 CurDAG->RemoveDeadNodes(); 1544 } while (IsModified); 1545 } 1546