1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUISelLowering.h" // For AMDGPUISD 17 #include "AMDGPURegisterInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "R600InstrInfo.h" 20 #include "SIDefines.h" 21 #include "SIISelLowering.h" 22 #include "SIMachineFunctionInfo.h" 23 #include "llvm/CodeGen/FunctionLoweringInfo.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/PseudoSourceValue.h" 27 #include "llvm/CodeGen/SelectionDAG.h" 28 #include "llvm/CodeGen/SelectionDAGISel.h" 29 #include "llvm/IR/DiagnosticInfo.h" 30 #include "llvm/IR/Function.h" 31 32 using namespace llvm; 33 34 //===----------------------------------------------------------------------===// 35 // Instruction Selector Implementation 36 //===----------------------------------------------------------------------===// 37 38 namespace { 39 /// AMDGPU specific code to select AMDGPU machine instructions for 40 /// SelectionDAG operations. 41 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 42 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 43 // make the right decision when generating code for different targets. 44 const AMDGPUSubtarget *Subtarget; 45 46 public: 47 AMDGPUDAGToDAGISel(TargetMachine &TM); 48 virtual ~AMDGPUDAGToDAGISel(); 49 bool runOnMachineFunction(MachineFunction &MF) override; 50 SDNode *Select(SDNode *N) override; 51 const char *getPassName() const override; 52 void PreprocessISelDAG() override; 53 void PostprocessISelDAG() override; 54 55 private: 56 bool isInlineImmediate(SDNode *N) const; 57 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 58 const R600InstrInfo *TII); 59 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 60 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 61 62 // Complex pattern selectors 63 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 64 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 65 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 66 67 static bool checkType(const Value *ptr, unsigned int addrspace); 68 static bool checkPrivateAddress(const MachineMemOperand *Op); 69 70 static bool isGlobalStore(const StoreSDNode *N); 71 static bool isFlatStore(const StoreSDNode *N); 72 static bool isPrivateStore(const StoreSDNode *N); 73 static bool isLocalStore(const StoreSDNode *N); 74 static bool isRegionStore(const StoreSDNode *N); 75 76 bool isCPLoad(const LoadSDNode *N) const; 77 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 78 bool isGlobalLoad(const LoadSDNode *N) const; 79 bool isFlatLoad(const LoadSDNode *N) const; 80 bool isParamLoad(const LoadSDNode *N) const; 81 bool isPrivateLoad(const LoadSDNode *N) const; 82 bool isLocalLoad(const LoadSDNode *N) const; 83 bool isRegionLoad(const LoadSDNode *N) const; 84 85 SDNode *glueCopyToM0(SDNode *N) const; 86 87 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 88 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 89 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 90 SDValue& Offset); 91 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 92 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 93 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 94 unsigned OffsetBits) const; 95 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 96 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 97 SDValue &Offset1) const; 98 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 99 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 100 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 101 SDValue &TFE) const; 102 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 103 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 104 SDValue &SLC, SDValue &TFE) const; 105 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 106 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 107 SDValue &SLC) const; 108 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 109 SDValue &SOffset, SDValue &ImmOffset) const; 110 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 111 SDValue &Offset, SDValue &GLC, SDValue &SLC, 112 SDValue &TFE) const; 113 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 114 SDValue &Offset, SDValue &GLC) const; 115 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 116 bool &Imm) const; 117 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 118 bool &Imm) const; 119 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 121 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 122 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 123 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 124 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 125 SDNode *SelectAddrSpaceCast(SDNode *N); 126 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 127 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 128 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129 SDValue &Clamp, SDValue &Omod) const; 130 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 131 SDValue &Clamp, SDValue &Omod) const; 132 133 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 134 SDValue &Omod) const; 135 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 136 SDValue &Clamp, 137 SDValue &Omod) const; 138 139 SDNode *SelectADD_SUB_I64(SDNode *N); 140 SDNode *SelectDIV_SCALE(SDNode *N); 141 142 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 143 uint32_t Offset, uint32_t Width); 144 SDNode *SelectS_BFEFromShifts(SDNode *N); 145 SDNode *SelectS_BFE(SDNode *N); 146 147 // Include the pieces autogenerated from the target description. 148 #include "AMDGPUGenDAGISel.inc" 149 }; 150 } // end anonymous namespace 151 152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 153 // DAG, ready for instruction scheduling. 154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 155 return new AMDGPUDAGToDAGISel(TM); 156 } 157 158 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 159 : SelectionDAGISel(TM) {} 160 161 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 162 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 163 return SelectionDAGISel::runOnMachineFunction(MF); 164 } 165 166 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 167 } 168 169 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 170 const SITargetLowering *TL 171 = static_cast<const SITargetLowering *>(getTargetLowering()); 172 return TL->analyzeImmediate(N) == 0; 173 } 174 175 /// \brief Determine the register class for \p OpNo 176 /// \returns The register class of the virtual register that will be used for 177 /// the given operand number \OpNo or NULL if the register class cannot be 178 /// determined. 179 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 180 unsigned OpNo) const { 181 if (!N->isMachineOpcode()) 182 return nullptr; 183 184 switch (N->getMachineOpcode()) { 185 default: { 186 const MCInstrDesc &Desc = 187 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 188 unsigned OpIdx = Desc.getNumDefs() + OpNo; 189 if (OpIdx >= Desc.getNumOperands()) 190 return nullptr; 191 int RegClass = Desc.OpInfo[OpIdx].RegClass; 192 if (RegClass == -1) 193 return nullptr; 194 195 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 196 } 197 case AMDGPU::REG_SEQUENCE: { 198 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 199 const TargetRegisterClass *SuperRC = 200 Subtarget->getRegisterInfo()->getRegClass(RCID); 201 202 SDValue SubRegOp = N->getOperand(OpNo + 1); 203 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 204 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 205 SubRegIdx); 206 } 207 } 208 } 209 210 bool AMDGPUDAGToDAGISel::SelectADDRParam( 211 SDValue Addr, SDValue& R1, SDValue& R2) { 212 213 if (Addr.getOpcode() == ISD::FrameIndex) { 214 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 215 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 216 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 217 } else { 218 R1 = Addr; 219 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 220 } 221 } else if (Addr.getOpcode() == ISD::ADD) { 222 R1 = Addr.getOperand(0); 223 R2 = Addr.getOperand(1); 224 } else { 225 R1 = Addr; 226 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 227 } 228 return true; 229 } 230 231 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 232 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 233 Addr.getOpcode() == ISD::TargetGlobalAddress) { 234 return false; 235 } 236 return SelectADDRParam(Addr, R1, R2); 237 } 238 239 240 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 241 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 242 Addr.getOpcode() == ISD::TargetGlobalAddress) { 243 return false; 244 } 245 246 if (Addr.getOpcode() == ISD::FrameIndex) { 247 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 248 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 249 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 250 } else { 251 R1 = Addr; 252 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 253 } 254 } else if (Addr.getOpcode() == ISD::ADD) { 255 R1 = Addr.getOperand(0); 256 R2 = Addr.getOperand(1); 257 } else { 258 R1 = Addr; 259 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 260 } 261 return true; 262 } 263 264 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 265 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 266 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 267 AMDGPUAS::LOCAL_ADDRESS)) 268 return N; 269 270 const SITargetLowering& Lowering = 271 *static_cast<const SITargetLowering*>(getTargetLowering()); 272 273 // Write max value to m0 before each load operation 274 275 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 276 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 277 278 SDValue Glue = M0.getValue(1); 279 280 SmallVector <SDValue, 8> Ops; 281 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 282 Ops.push_back(N->getOperand(i)); 283 } 284 Ops.push_back(Glue); 285 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 286 287 return N; 288 } 289 290 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 291 switch (NumVectorElts) { 292 case 1: 293 return AMDGPU::SReg_32RegClassID; 294 case 2: 295 return AMDGPU::SReg_64RegClassID; 296 case 4: 297 return AMDGPU::SReg_128RegClassID; 298 case 8: 299 return AMDGPU::SReg_256RegClassID; 300 case 16: 301 return AMDGPU::SReg_512RegClassID; 302 } 303 304 llvm_unreachable("invalid vector size"); 305 } 306 307 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 308 unsigned int Opc = N->getOpcode(); 309 if (N->isMachineOpcode()) { 310 N->setNodeId(-1); 311 return nullptr; // Already selected. 312 } 313 314 if (isa<AtomicSDNode>(N)) 315 N = glueCopyToM0(N); 316 317 switch (Opc) { 318 default: break; 319 // We are selecting i64 ADD here instead of custom lower it during 320 // DAG legalization, so we can fold some i64 ADDs used for address 321 // calculation into the LOAD and STORE instructions. 322 case ISD::ADD: 323 case ISD::SUB: { 324 if (N->getValueType(0) != MVT::i64 || 325 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 326 break; 327 328 return SelectADD_SUB_I64(N); 329 } 330 case ISD::SCALAR_TO_VECTOR: 331 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 332 case ISD::BUILD_VECTOR: { 333 unsigned RegClassID; 334 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 335 EVT VT = N->getValueType(0); 336 unsigned NumVectorElts = VT.getVectorNumElements(); 337 EVT EltVT = VT.getVectorElementType(); 338 assert(EltVT.bitsEq(MVT::i32)); 339 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 340 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 341 } else { 342 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 343 // that adds a 128 bits reg copy when going through TwoAddressInstructions 344 // pass. We want to avoid 128 bits copies as much as possible because they 345 // can't be bundled by our scheduler. 346 switch(NumVectorElts) { 347 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 348 case 4: 349 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 350 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 351 else 352 RegClassID = AMDGPU::R600_Reg128RegClassID; 353 break; 354 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 355 } 356 } 357 358 SDLoc DL(N); 359 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 360 361 if (NumVectorElts == 1) { 362 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 363 N->getOperand(0), RegClass); 364 } 365 366 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 367 "supported yet"); 368 // 16 = Max Num Vector Elements 369 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 370 // 1 = Vector Register Class 371 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 372 373 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 374 bool IsRegSeq = true; 375 unsigned NOps = N->getNumOperands(); 376 for (unsigned i = 0; i < NOps; i++) { 377 // XXX: Why is this here? 378 if (isa<RegisterSDNode>(N->getOperand(i))) { 379 IsRegSeq = false; 380 break; 381 } 382 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 383 RegSeqArgs[1 + (2 * i) + 1] = 384 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 385 MVT::i32); 386 } 387 388 if (NOps != NumVectorElts) { 389 // Fill in the missing undef elements if this was a scalar_to_vector. 390 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 391 392 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 393 DL, EltVT); 394 for (unsigned i = NOps; i < NumVectorElts; ++i) { 395 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 396 RegSeqArgs[1 + (2 * i) + 1] = 397 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 398 } 399 } 400 401 if (!IsRegSeq) 402 break; 403 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 404 RegSeqArgs); 405 } 406 case ISD::BUILD_PAIR: { 407 SDValue RC, SubReg0, SubReg1; 408 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 409 break; 410 } 411 SDLoc DL(N); 412 if (N->getValueType(0) == MVT::i128) { 413 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 414 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 415 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 416 } else if (N->getValueType(0) == MVT::i64) { 417 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 418 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 419 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 420 } else { 421 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 422 } 423 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 424 N->getOperand(1), SubReg1 }; 425 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 426 DL, N->getValueType(0), Ops); 427 } 428 429 case ISD::Constant: 430 case ISD::ConstantFP: { 431 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 432 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 433 break; 434 435 uint64_t Imm; 436 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 437 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 438 else { 439 ConstantSDNode *C = cast<ConstantSDNode>(N); 440 Imm = C->getZExtValue(); 441 } 442 443 SDLoc DL(N); 444 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 445 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 446 MVT::i32)); 447 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 448 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 449 const SDValue Ops[] = { 450 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 451 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 452 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 453 }; 454 455 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 456 N->getValueType(0), Ops); 457 } 458 case ISD::LOAD: 459 case ISD::STORE: { 460 N = glueCopyToM0(N); 461 break; 462 } 463 464 case AMDGPUISD::BFE_I32: 465 case AMDGPUISD::BFE_U32: { 466 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 467 break; 468 469 // There is a scalar version available, but unlike the vector version which 470 // has a separate operand for the offset and width, the scalar version packs 471 // the width and offset into a single operand. Try to move to the scalar 472 // version if the offsets are constant, so that we can try to keep extended 473 // loads of kernel arguments in SGPRs. 474 475 // TODO: Technically we could try to pattern match scalar bitshifts of 476 // dynamic values, but it's probably not useful. 477 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 478 if (!Offset) 479 break; 480 481 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 482 if (!Width) 483 break; 484 485 bool Signed = Opc == AMDGPUISD::BFE_I32; 486 487 uint32_t OffsetVal = Offset->getZExtValue(); 488 uint32_t WidthVal = Width->getZExtValue(); 489 490 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 491 N->getOperand(0), OffsetVal, WidthVal); 492 } 493 case AMDGPUISD::DIV_SCALE: { 494 return SelectDIV_SCALE(N); 495 } 496 case ISD::CopyToReg: { 497 const SITargetLowering& Lowering = 498 *static_cast<const SITargetLowering*>(getTargetLowering()); 499 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 500 break; 501 } 502 case ISD::ADDRSPACECAST: 503 return SelectAddrSpaceCast(N); 504 case ISD::AND: 505 case ISD::SRL: 506 case ISD::SRA: 507 if (N->getValueType(0) != MVT::i32 || 508 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 509 break; 510 511 return SelectS_BFE(N); 512 } 513 514 return SelectCode(N); 515 } 516 517 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 518 assert(AS != 0 && "Use checkPrivateAddress instead."); 519 if (!Ptr) 520 return false; 521 522 return Ptr->getType()->getPointerAddressSpace() == AS; 523 } 524 525 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 526 if (Op->getPseudoValue()) 527 return true; 528 529 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 530 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 531 532 return false; 533 } 534 535 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 536 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 537 } 538 539 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 540 const Value *MemVal = N->getMemOperand()->getValue(); 541 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 542 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 543 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 544 } 545 546 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 547 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 548 } 549 550 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 551 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 552 } 553 554 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 555 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 556 } 557 558 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 559 const Value *MemVal = N->getMemOperand()->getValue(); 560 if (CbId == -1) 561 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 562 563 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 564 } 565 566 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 567 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 568 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 569 N->getMemoryVT().bitsLT(MVT::i32)) 570 return true; 571 572 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 573 } 574 575 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 576 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 577 } 578 579 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 580 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 581 } 582 583 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 584 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 585 } 586 587 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 588 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 589 } 590 591 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 592 MachineMemOperand *MMO = N->getMemOperand(); 593 if (checkPrivateAddress(N->getMemOperand())) { 594 if (MMO) { 595 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 596 if (PSV && PSV->isConstantPool()) { 597 return true; 598 } 599 } 600 } 601 return false; 602 } 603 604 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 605 if (checkPrivateAddress(N->getMemOperand())) { 606 // Check to make sure we are not a constant pool load or a constant load 607 // that is marked as a private load 608 if (isCPLoad(N) || isConstantLoad(N, -1)) { 609 return false; 610 } 611 } 612 613 const Value *MemVal = N->getMemOperand()->getValue(); 614 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 615 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 616 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 617 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 618 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 619 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 620 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 621 return true; 622 } 623 return false; 624 } 625 626 const char *AMDGPUDAGToDAGISel::getPassName() const { 627 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 628 } 629 630 #ifdef DEBUGTMP 631 #undef INT64_C 632 #endif 633 #undef DEBUGTMP 634 635 //===----------------------------------------------------------------------===// 636 // Complex Patterns 637 //===----------------------------------------------------------------------===// 638 639 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 640 SDValue& IntPtr) { 641 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 642 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 643 true); 644 return true; 645 } 646 return false; 647 } 648 649 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 650 SDValue& BaseReg, SDValue &Offset) { 651 if (!isa<ConstantSDNode>(Addr)) { 652 BaseReg = Addr; 653 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 654 return true; 655 } 656 return false; 657 } 658 659 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 660 SDValue &Offset) { 661 ConstantSDNode *IMMOffset; 662 663 if (Addr.getOpcode() == ISD::ADD 664 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 665 && isInt<16>(IMMOffset->getZExtValue())) { 666 667 Base = Addr.getOperand(0); 668 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 669 MVT::i32); 670 return true; 671 // If the pointer address is constant, we can move it to the offset field. 672 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 673 && isInt<16>(IMMOffset->getZExtValue())) { 674 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 675 SDLoc(CurDAG->getEntryNode()), 676 AMDGPU::ZERO, MVT::i32); 677 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 678 MVT::i32); 679 return true; 680 } 681 682 // Default case, no offset 683 Base = Addr; 684 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 685 return true; 686 } 687 688 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 689 SDValue &Offset) { 690 ConstantSDNode *C; 691 SDLoc DL(Addr); 692 693 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 694 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 695 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 696 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 697 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 698 Base = Addr.getOperand(0); 699 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 700 } else { 701 Base = Addr; 702 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 703 } 704 705 return true; 706 } 707 708 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 709 SDLoc DL(N); 710 SDValue LHS = N->getOperand(0); 711 SDValue RHS = N->getOperand(1); 712 713 bool IsAdd = (N->getOpcode() == ISD::ADD); 714 715 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 716 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 717 718 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 719 DL, MVT::i32, LHS, Sub0); 720 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 721 DL, MVT::i32, LHS, Sub1); 722 723 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 724 DL, MVT::i32, RHS, Sub0); 725 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 726 DL, MVT::i32, RHS, Sub1); 727 728 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 729 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 730 731 732 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 733 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 734 735 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 736 SDValue Carry(AddLo, 1); 737 SDNode *AddHi 738 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 739 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 740 741 SDValue Args[5] = { 742 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 743 SDValue(AddLo,0), 744 Sub0, 745 SDValue(AddHi,0), 746 Sub1, 747 }; 748 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 749 } 750 751 // We need to handle this here because tablegen doesn't support matching 752 // instructions with multiple outputs. 753 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 754 SDLoc SL(N); 755 EVT VT = N->getValueType(0); 756 757 assert(VT == MVT::f32 || VT == MVT::f64); 758 759 unsigned Opc 760 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 761 762 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 763 // omod 764 SDValue Ops[8]; 765 766 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 767 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 768 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 769 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 770 } 771 772 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 773 unsigned OffsetBits) const { 774 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 775 (OffsetBits == 8 && !isUInt<8>(Offset))) 776 return false; 777 778 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 779 Subtarget->unsafeDSOffsetFoldingEnabled()) 780 return true; 781 782 // On Southern Islands instruction with a negative base value and an offset 783 // don't seem to work. 784 return CurDAG->SignBitIsZero(Base); 785 } 786 787 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 788 SDValue &Offset) const { 789 if (CurDAG->isBaseWithConstantOffset(Addr)) { 790 SDValue N0 = Addr.getOperand(0); 791 SDValue N1 = Addr.getOperand(1); 792 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 793 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 794 // (add n0, c0) 795 Base = N0; 796 Offset = N1; 797 return true; 798 } 799 } else if (Addr.getOpcode() == ISD::SUB) { 800 // sub C, x -> add (sub 0, x), C 801 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 802 int64_t ByteOffset = C->getSExtValue(); 803 if (isUInt<16>(ByteOffset)) { 804 SDLoc DL(Addr); 805 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 806 807 // XXX - This is kind of hacky. Create a dummy sub node so we can check 808 // the known bits in isDSOffsetLegal. We need to emit the selected node 809 // here, so this is thrown away. 810 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 811 Zero, Addr.getOperand(1)); 812 813 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 814 MachineSDNode *MachineSub 815 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 816 Zero, Addr.getOperand(1)); 817 818 Base = SDValue(MachineSub, 0); 819 Offset = Addr.getOperand(0); 820 return true; 821 } 822 } 823 } 824 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 825 // If we have a constant address, prefer to put the constant into the 826 // offset. This can save moves to load the constant address since multiple 827 // operations can share the zero base address register, and enables merging 828 // into read2 / write2 instructions. 829 830 SDLoc DL(Addr); 831 832 if (isUInt<16>(CAddr->getZExtValue())) { 833 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 834 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 835 DL, MVT::i32, Zero); 836 Base = SDValue(MovZero, 0); 837 Offset = Addr; 838 return true; 839 } 840 } 841 842 // default case 843 Base = Addr; 844 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 845 return true; 846 } 847 848 // TODO: If offset is too big, put low 16-bit into offset. 849 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 850 SDValue &Offset0, 851 SDValue &Offset1) const { 852 SDLoc DL(Addr); 853 854 if (CurDAG->isBaseWithConstantOffset(Addr)) { 855 SDValue N0 = Addr.getOperand(0); 856 SDValue N1 = Addr.getOperand(1); 857 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 858 unsigned DWordOffset0 = C1->getZExtValue() / 4; 859 unsigned DWordOffset1 = DWordOffset0 + 1; 860 // (add n0, c0) 861 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 862 Base = N0; 863 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 864 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 865 return true; 866 } 867 } else if (Addr.getOpcode() == ISD::SUB) { 868 // sub C, x -> add (sub 0, x), C 869 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 870 unsigned DWordOffset0 = C->getZExtValue() / 4; 871 unsigned DWordOffset1 = DWordOffset0 + 1; 872 873 if (isUInt<8>(DWordOffset0)) { 874 SDLoc DL(Addr); 875 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 876 877 // XXX - This is kind of hacky. Create a dummy sub node so we can check 878 // the known bits in isDSOffsetLegal. We need to emit the selected node 879 // here, so this is thrown away. 880 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 881 Zero, Addr.getOperand(1)); 882 883 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 884 MachineSDNode *MachineSub 885 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 886 Zero, Addr.getOperand(1)); 887 888 Base = SDValue(MachineSub, 0); 889 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 890 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 891 return true; 892 } 893 } 894 } 895 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 896 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 897 unsigned DWordOffset1 = DWordOffset0 + 1; 898 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 899 900 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 901 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 902 MachineSDNode *MovZero 903 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 904 DL, MVT::i32, Zero); 905 Base = SDValue(MovZero, 0); 906 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 907 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 908 return true; 909 } 910 } 911 912 // default case 913 Base = Addr; 914 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 915 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 916 return true; 917 } 918 919 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 920 return isUInt<12>(Imm->getZExtValue()); 921 } 922 923 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 924 SDValue &VAddr, SDValue &SOffset, 925 SDValue &Offset, SDValue &Offen, 926 SDValue &Idxen, SDValue &Addr64, 927 SDValue &GLC, SDValue &SLC, 928 SDValue &TFE) const { 929 // Subtarget prefers to use flat instruction 930 if (Subtarget->useFlatForGlobal()) 931 return false; 932 933 SDLoc DL(Addr); 934 935 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 936 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 937 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 938 939 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 940 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 941 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 942 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 943 944 if (CurDAG->isBaseWithConstantOffset(Addr)) { 945 SDValue N0 = Addr.getOperand(0); 946 SDValue N1 = Addr.getOperand(1); 947 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 948 949 if (N0.getOpcode() == ISD::ADD) { 950 // (add (add N2, N3), C1) -> addr64 951 SDValue N2 = N0.getOperand(0); 952 SDValue N3 = N0.getOperand(1); 953 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 954 Ptr = N2; 955 VAddr = N3; 956 } else { 957 958 // (add N0, C1) -> offset 959 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 960 Ptr = N0; 961 } 962 963 if (isLegalMUBUFImmOffset(C1)) { 964 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 965 return true; 966 } else if (isUInt<32>(C1->getZExtValue())) { 967 // Illegal offset, store it in soffset. 968 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 969 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 970 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 971 0); 972 return true; 973 } 974 } 975 976 if (Addr.getOpcode() == ISD::ADD) { 977 // (add N0, N1) -> addr64 978 SDValue N0 = Addr.getOperand(0); 979 SDValue N1 = Addr.getOperand(1); 980 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 981 Ptr = N0; 982 VAddr = N1; 983 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 984 return true; 985 } 986 987 // default case -> offset 988 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 989 Ptr = Addr; 990 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 991 992 return true; 993 } 994 995 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 996 SDValue &VAddr, SDValue &SOffset, 997 SDValue &Offset, SDValue &GLC, 998 SDValue &SLC, SDValue &TFE) const { 999 SDValue Ptr, Offen, Idxen, Addr64; 1000 1001 // addr64 bit was removed for volcanic islands. 1002 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1003 return false; 1004 1005 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1006 GLC, SLC, TFE)) 1007 return false; 1008 1009 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1010 if (C->getSExtValue()) { 1011 SDLoc DL(Addr); 1012 1013 const SITargetLowering& Lowering = 1014 *static_cast<const SITargetLowering*>(getTargetLowering()); 1015 1016 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1017 return true; 1018 } 1019 1020 return false; 1021 } 1022 1023 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1024 SDValue &VAddr, SDValue &SOffset, 1025 SDValue &Offset, 1026 SDValue &SLC) const { 1027 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1028 SDValue GLC, TFE; 1029 1030 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1031 } 1032 1033 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1034 SDValue &VAddr, SDValue &SOffset, 1035 SDValue &ImmOffset) const { 1036 1037 SDLoc DL(Addr); 1038 MachineFunction &MF = CurDAG->getMachineFunction(); 1039 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1040 1041 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1042 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1043 1044 // (add n0, c1) 1045 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1046 SDValue N0 = Addr.getOperand(0); 1047 SDValue N1 = Addr.getOperand(1); 1048 // Offsets in vaddr must be positive. 1049 if (CurDAG->SignBitIsZero(N0)) { 1050 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1051 if (isLegalMUBUFImmOffset(C1)) { 1052 VAddr = N0; 1053 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1054 return true; 1055 } 1056 } 1057 } 1058 1059 // (node) 1060 VAddr = Addr; 1061 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1062 return true; 1063 } 1064 1065 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1066 SDValue &SOffset, SDValue &Offset, 1067 SDValue &GLC, SDValue &SLC, 1068 SDValue &TFE) const { 1069 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1070 const SIInstrInfo *TII = 1071 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1072 1073 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1074 GLC, SLC, TFE)) 1075 return false; 1076 1077 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1078 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1079 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1080 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1081 APInt::getAllOnesValue(32).getZExtValue(); // Size 1082 SDLoc DL(Addr); 1083 1084 const SITargetLowering& Lowering = 1085 *static_cast<const SITargetLowering*>(getTargetLowering()); 1086 1087 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1088 return true; 1089 } 1090 return false; 1091 } 1092 1093 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1094 SDValue &Soffset, SDValue &Offset, 1095 SDValue &GLC) const { 1096 SDValue SLC, TFE; 1097 1098 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1099 } 1100 1101 /// 1102 /// \param EncodedOffset This is the immediate value that will be encoded 1103 /// directly into the instruction. On SI/CI the \p EncodedOffset 1104 /// will be in units of dwords and on VI+ it will be units of bytes. 1105 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1106 int64_t EncodedOffset) { 1107 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1108 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1109 } 1110 1111 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1112 SDValue &Offset, bool &Imm) const { 1113 1114 // FIXME: Handle non-constant offsets. 1115 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1116 if (!C) 1117 return false; 1118 1119 SDLoc SL(ByteOffsetNode); 1120 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1121 int64_t ByteOffset = C->getSExtValue(); 1122 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1123 ByteOffset >> 2 : ByteOffset; 1124 1125 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1126 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1127 Imm = true; 1128 return true; 1129 } 1130 1131 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1132 return false; 1133 1134 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1135 // 32-bit Immediates are supported on Sea Islands. 1136 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1137 } else { 1138 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1139 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1140 C32Bit), 0); 1141 } 1142 Imm = false; 1143 return true; 1144 } 1145 1146 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1147 SDValue &Offset, bool &Imm) const { 1148 1149 SDLoc SL(Addr); 1150 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1151 SDValue N0 = Addr.getOperand(0); 1152 SDValue N1 = Addr.getOperand(1); 1153 1154 if (SelectSMRDOffset(N1, Offset, Imm)) { 1155 SBase = N0; 1156 return true; 1157 } 1158 } 1159 SBase = Addr; 1160 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1161 Imm = true; 1162 return true; 1163 } 1164 1165 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1166 SDValue &Offset) const { 1167 bool Imm; 1168 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1169 } 1170 1171 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1172 SDValue &Offset) const { 1173 1174 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1175 return false; 1176 1177 bool Imm; 1178 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1179 return false; 1180 1181 return !Imm && isa<ConstantSDNode>(Offset); 1182 } 1183 1184 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1185 SDValue &Offset) const { 1186 bool Imm; 1187 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1188 !isa<ConstantSDNode>(Offset); 1189 } 1190 1191 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1192 SDValue &Offset) const { 1193 bool Imm; 1194 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1195 } 1196 1197 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1198 SDValue &Offset) const { 1199 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1200 return false; 1201 1202 bool Imm; 1203 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1204 return false; 1205 1206 return !Imm && isa<ConstantSDNode>(Offset); 1207 } 1208 1209 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1210 SDValue &Offset) const { 1211 bool Imm; 1212 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1213 !isa<ConstantSDNode>(Offset); 1214 } 1215 1216 // FIXME: This is incorrect and only enough to be able to compile. 1217 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1218 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1219 SDLoc DL(N); 1220 1221 const MachineFunction &MF = CurDAG->getMachineFunction(); 1222 DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(), 1223 "addrspacecast not implemented", DL); 1224 CurDAG->getContext()->diagnose(NotImplemented); 1225 1226 assert(Subtarget->hasFlatAddressSpace() && 1227 "addrspacecast only supported with flat address space!"); 1228 1229 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1230 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1231 "Can only cast to / from flat address space!"); 1232 1233 // The flat instructions read the address as the index of the VGPR holding the 1234 // address, so casting should just be reinterpreting the base VGPR, so just 1235 // insert trunc / bitcast / zext. 1236 1237 SDValue Src = ASC->getOperand(0); 1238 EVT DestVT = ASC->getValueType(0); 1239 EVT SrcVT = Src.getValueType(); 1240 1241 unsigned SrcSize = SrcVT.getSizeInBits(); 1242 unsigned DestSize = DestVT.getSizeInBits(); 1243 1244 if (SrcSize > DestSize) { 1245 assert(SrcSize == 64 && DestSize == 32); 1246 return CurDAG->getMachineNode( 1247 TargetOpcode::EXTRACT_SUBREG, 1248 DL, 1249 DestVT, 1250 Src, 1251 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1252 } 1253 1254 if (DestSize > SrcSize) { 1255 assert(SrcSize == 32 && DestSize == 64); 1256 1257 // FIXME: This is probably wrong, we should never be defining 1258 // a register class with both VGPRs and SGPRs 1259 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1260 MVT::i32); 1261 1262 const SDValue Ops[] = { 1263 RC, 1264 Src, 1265 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1266 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1267 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1268 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1269 }; 1270 1271 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1272 DL, N->getValueType(0), Ops); 1273 } 1274 1275 assert(SrcSize == 64 && DestSize == 64); 1276 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1277 } 1278 1279 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1280 uint32_t Offset, uint32_t Width) { 1281 // Transformation function, pack the offset and width of a BFE into 1282 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1283 // source, bits [5:0] contain the offset and bits [22:16] the width. 1284 uint32_t PackedVal = Offset | (Width << 16); 1285 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1286 1287 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1288 } 1289 1290 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1291 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1292 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1293 // Predicate: 0 < b <= c < 32 1294 1295 const SDValue &Shl = N->getOperand(0); 1296 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1297 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1298 1299 if (B && C) { 1300 uint32_t BVal = B->getZExtValue(); 1301 uint32_t CVal = C->getZExtValue(); 1302 1303 if (0 < BVal && BVal <= CVal && CVal < 32) { 1304 bool Signed = N->getOpcode() == ISD::SRA; 1305 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1306 1307 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1308 CVal - BVal, 32 - CVal); 1309 } 1310 } 1311 return SelectCode(N); 1312 } 1313 1314 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1315 switch (N->getOpcode()) { 1316 case ISD::AND: 1317 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1318 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1319 // Predicate: isMask(mask) 1320 const SDValue &Srl = N->getOperand(0); 1321 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1322 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1323 1324 if (Shift && Mask) { 1325 uint32_t ShiftVal = Shift->getZExtValue(); 1326 uint32_t MaskVal = Mask->getZExtValue(); 1327 1328 if (isMask_32(MaskVal)) { 1329 uint32_t WidthVal = countPopulation(MaskVal); 1330 1331 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1332 ShiftVal, WidthVal); 1333 } 1334 } 1335 } 1336 break; 1337 case ISD::SRL: 1338 if (N->getOperand(0).getOpcode() == ISD::AND) { 1339 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1340 // Predicate: isMask(mask >> b) 1341 const SDValue &And = N->getOperand(0); 1342 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1343 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1344 1345 if (Shift && Mask) { 1346 uint32_t ShiftVal = Shift->getZExtValue(); 1347 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1348 1349 if (isMask_32(MaskVal)) { 1350 uint32_t WidthVal = countPopulation(MaskVal); 1351 1352 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1353 ShiftVal, WidthVal); 1354 } 1355 } 1356 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1357 return SelectS_BFEFromShifts(N); 1358 break; 1359 case ISD::SRA: 1360 if (N->getOperand(0).getOpcode() == ISD::SHL) 1361 return SelectS_BFEFromShifts(N); 1362 break; 1363 } 1364 1365 return SelectCode(N); 1366 } 1367 1368 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1369 SDValue &SrcMods) const { 1370 1371 unsigned Mods = 0; 1372 1373 Src = In; 1374 1375 if (Src.getOpcode() == ISD::FNEG) { 1376 Mods |= SISrcMods::NEG; 1377 Src = Src.getOperand(0); 1378 } 1379 1380 if (Src.getOpcode() == ISD::FABS) { 1381 Mods |= SISrcMods::ABS; 1382 Src = Src.getOperand(0); 1383 } 1384 1385 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1386 1387 return true; 1388 } 1389 1390 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1391 SDValue &SrcMods) const { 1392 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1393 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1394 } 1395 1396 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1397 SDValue &SrcMods, SDValue &Clamp, 1398 SDValue &Omod) const { 1399 SDLoc DL(In); 1400 // FIXME: Handle Clamp and Omod 1401 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1402 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1403 1404 return SelectVOP3Mods(In, Src, SrcMods); 1405 } 1406 1407 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1408 SDValue &SrcMods, SDValue &Clamp, 1409 SDValue &Omod) const { 1410 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1411 1412 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1413 cast<ConstantSDNode>(Clamp)->isNullValue() && 1414 cast<ConstantSDNode>(Omod)->isNullValue(); 1415 } 1416 1417 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1418 SDValue &SrcMods, 1419 SDValue &Omod) const { 1420 // FIXME: Handle Omod 1421 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1422 1423 return SelectVOP3Mods(In, Src, SrcMods); 1424 } 1425 1426 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1427 SDValue &SrcMods, 1428 SDValue &Clamp, 1429 SDValue &Omod) const { 1430 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1431 return SelectVOP3Mods(In, Src, SrcMods); 1432 } 1433 1434 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1435 bool Modified = false; 1436 1437 // XXX - Other targets seem to be able to do this without a worklist. 1438 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1439 SmallVector<StoreSDNode *, 8> StoresToReplace; 1440 1441 for (SDNode &Node : CurDAG->allnodes()) { 1442 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1443 EVT VT = LD->getValueType(0); 1444 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1445 continue; 1446 1447 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1448 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1449 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1450 // legalizer assume that if i64 is legal, so doing this promotion early 1451 // can cause problems. 1452 LoadsToReplace.push_back(LD); 1453 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1454 // Handle i64 stores here for the same reason mentioned above for loads. 1455 SDValue Value = ST->getValue(); 1456 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1457 continue; 1458 StoresToReplace.push_back(ST); 1459 } 1460 } 1461 1462 for (LoadSDNode *LD : LoadsToReplace) { 1463 SDLoc SL(LD); 1464 1465 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1466 LD->getBasePtr(), LD->getMemOperand()); 1467 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1468 MVT::i64, NewLoad); 1469 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1470 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1471 Modified = true; 1472 } 1473 1474 for (StoreSDNode *ST : StoresToReplace) { 1475 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1476 MVT::v2i32, ST->getValue()); 1477 const SDValue StoreOps[] = { 1478 ST->getChain(), 1479 NewValue, 1480 ST->getBasePtr(), 1481 ST->getOffset() 1482 }; 1483 1484 CurDAG->UpdateNodeOperands(ST, StoreOps); 1485 Modified = true; 1486 } 1487 1488 // XXX - Is this necessary? 1489 if (Modified) 1490 CurDAG->RemoveDeadNodes(); 1491 } 1492 1493 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1494 const AMDGPUTargetLowering& Lowering = 1495 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1496 bool IsModified = false; 1497 do { 1498 IsModified = false; 1499 // Go over all selected nodes and try to fold them a bit more 1500 for (SDNode &Node : CurDAG->allnodes()) { 1501 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1502 if (!MachineNode) 1503 continue; 1504 1505 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1506 if (ResNode != &Node) { 1507 ReplaceUses(&Node, ResNode); 1508 IsModified = true; 1509 } 1510 } 1511 CurDAG->RemoveDeadNodes(); 1512 } while (IsModified); 1513 } 1514