1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/PseudoSourceValue.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/IR/Function.h" 29 30 using namespace llvm; 31 32 //===----------------------------------------------------------------------===// 33 // Instruction Selector Implementation 34 //===----------------------------------------------------------------------===// 35 36 namespace { 37 /// AMDGPU specific code to select AMDGPU machine instructions for 38 /// SelectionDAG operations. 39 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 41 // make the right decision when generating code for different targets. 42 const AMDGPUSubtarget *Subtarget; 43 44 public: 45 AMDGPUDAGToDAGISel(TargetMachine &TM); 46 virtual ~AMDGPUDAGToDAGISel(); 47 bool runOnMachineFunction(MachineFunction &MF) override; 48 SDNode *Select(SDNode *N) override; 49 const char *getPassName() const override; 50 void PreprocessISelDAG() override; 51 void PostprocessISelDAG() override; 52 53 private: 54 bool isInlineImmediate(SDNode *N) const; 55 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 56 const R600InstrInfo *TII); 57 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 58 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 59 60 // Complex pattern selectors 61 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 62 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 63 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 64 65 static bool checkType(const Value *ptr, unsigned int addrspace); 66 static bool checkPrivateAddress(const MachineMemOperand *Op); 67 68 static bool isGlobalStore(const StoreSDNode *N); 69 static bool isFlatStore(const StoreSDNode *N); 70 static bool isPrivateStore(const StoreSDNode *N); 71 static bool isLocalStore(const StoreSDNode *N); 72 static bool isRegionStore(const StoreSDNode *N); 73 74 bool isCPLoad(const LoadSDNode *N) const; 75 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 76 bool isGlobalLoad(const LoadSDNode *N) const; 77 bool isFlatLoad(const LoadSDNode *N) const; 78 bool isParamLoad(const LoadSDNode *N) const; 79 bool isPrivateLoad(const LoadSDNode *N) const; 80 bool isLocalLoad(const LoadSDNode *N) const; 81 bool isRegionLoad(const LoadSDNode *N) const; 82 83 SDNode *glueCopyToM0(SDNode *N) const; 84 85 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 86 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 87 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 88 SDValue& Offset); 89 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 90 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 91 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 92 unsigned OffsetBits) const; 93 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 94 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 95 SDValue &Offset1) const; 96 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 97 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 98 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 99 SDValue &TFE) const; 100 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 101 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 102 SDValue &SLC, SDValue &TFE) const; 103 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 104 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 105 SDValue &SLC) const; 106 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 107 SDValue &SOffset, SDValue &ImmOffset) const; 108 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 109 SDValue &Offset, SDValue &GLC, SDValue &SLC, 110 SDValue &TFE) const; 111 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 112 SDValue &Offset, SDValue &GLC) const; 113 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 114 bool &Imm) const; 115 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 116 bool &Imm) const; 117 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 122 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 123 SDNode *SelectAddrSpaceCast(SDNode *N); 124 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 126 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 127 SDValue &Clamp, SDValue &Omod) const; 128 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129 SDValue &Clamp, SDValue &Omod) const; 130 131 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 132 SDValue &Omod) const; 133 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 134 SDValue &Clamp, 135 SDValue &Omod) const; 136 137 SDNode *SelectADD_SUB_I64(SDNode *N); 138 SDNode *SelectDIV_SCALE(SDNode *N); 139 140 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 141 uint32_t Offset, uint32_t Width); 142 SDNode *SelectS_BFEFromShifts(SDNode *N); 143 SDNode *SelectS_BFE(SDNode *N); 144 145 // Include the pieces autogenerated from the target description. 146 #include "AMDGPUGenDAGISel.inc" 147 }; 148 } // end anonymous namespace 149 150 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 151 // DAG, ready for instruction scheduling. 152 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 153 return new AMDGPUDAGToDAGISel(TM); 154 } 155 156 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 157 : SelectionDAGISel(TM) {} 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 168 const SITargetLowering *TL 169 = static_cast<const SITargetLowering *>(getTargetLowering()); 170 return TL->analyzeImmediate(N) == 0; 171 } 172 173 /// \brief Determine the register class for \p OpNo 174 /// \returns The register class of the virtual register that will be used for 175 /// the given operand number \OpNo or NULL if the register class cannot be 176 /// determined. 177 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 178 unsigned OpNo) const { 179 if (!N->isMachineOpcode()) 180 return nullptr; 181 182 switch (N->getMachineOpcode()) { 183 default: { 184 const MCInstrDesc &Desc = 185 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 186 unsigned OpIdx = Desc.getNumDefs() + OpNo; 187 if (OpIdx >= Desc.getNumOperands()) 188 return nullptr; 189 int RegClass = Desc.OpInfo[OpIdx].RegClass; 190 if (RegClass == -1) 191 return nullptr; 192 193 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 194 } 195 case AMDGPU::REG_SEQUENCE: { 196 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 197 const TargetRegisterClass *SuperRC = 198 Subtarget->getRegisterInfo()->getRegClass(RCID); 199 200 SDValue SubRegOp = N->getOperand(OpNo + 1); 201 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 202 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 203 SubRegIdx); 204 } 205 } 206 } 207 208 bool AMDGPUDAGToDAGISel::SelectADDRParam( 209 SDValue Addr, SDValue& R1, SDValue& R2) { 210 211 if (Addr.getOpcode() == ISD::FrameIndex) { 212 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 213 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 214 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 215 } else { 216 R1 = Addr; 217 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 218 } 219 } else if (Addr.getOpcode() == ISD::ADD) { 220 R1 = Addr.getOperand(0); 221 R2 = Addr.getOperand(1); 222 } else { 223 R1 = Addr; 224 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 225 } 226 return true; 227 } 228 229 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 230 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 231 Addr.getOpcode() == ISD::TargetGlobalAddress) { 232 return false; 233 } 234 return SelectADDRParam(Addr, R1, R2); 235 } 236 237 238 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 239 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 240 Addr.getOpcode() == ISD::TargetGlobalAddress) { 241 return false; 242 } 243 244 if (Addr.getOpcode() == ISD::FrameIndex) { 245 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 246 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 247 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 248 } else { 249 R1 = Addr; 250 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 251 } 252 } else if (Addr.getOpcode() == ISD::ADD) { 253 R1 = Addr.getOperand(0); 254 R2 = Addr.getOperand(1); 255 } else { 256 R1 = Addr; 257 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 258 } 259 return true; 260 } 261 262 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 263 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 264 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 265 AMDGPUAS::LOCAL_ADDRESS)) 266 return N; 267 268 const SITargetLowering& Lowering = 269 *static_cast<const SITargetLowering*>(getTargetLowering()); 270 271 // Write max value to m0 before each load operation 272 273 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 274 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 275 276 SDValue Glue = M0.getValue(1); 277 278 SmallVector <SDValue, 8> Ops; 279 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 280 Ops.push_back(N->getOperand(i)); 281 } 282 Ops.push_back(Glue); 283 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 284 285 return N; 286 } 287 288 static unsigned selectVectorRegClassID(unsigned NumVectorElts, bool UseVGPR) { 289 if (UseVGPR) { 290 switch (NumVectorElts) { 291 case 1: 292 return AMDGPU::VGPR_32RegClassID; 293 case 2: 294 return AMDGPU::VReg_64RegClassID; 295 case 4: 296 return AMDGPU::VReg_128RegClassID; 297 case 8: 298 return AMDGPU::VReg_256RegClassID; 299 case 16: 300 return AMDGPU::VReg_512RegClassID; 301 } 302 } 303 304 switch (NumVectorElts) { 305 case 1: 306 return AMDGPU::SReg_32RegClassID; 307 case 2: 308 return AMDGPU::SReg_64RegClassID; 309 case 4: 310 return AMDGPU::SReg_128RegClassID; 311 case 8: 312 return AMDGPU::SReg_256RegClassID; 313 case 16: 314 return AMDGPU::SReg_512RegClassID; 315 } 316 317 llvm_unreachable("invalid vector size"); 318 } 319 320 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 321 unsigned int Opc = N->getOpcode(); 322 if (N->isMachineOpcode()) { 323 N->setNodeId(-1); 324 return nullptr; // Already selected. 325 } 326 327 if (isa<AtomicSDNode>(N)) 328 N = glueCopyToM0(N); 329 330 switch (Opc) { 331 default: break; 332 // We are selecting i64 ADD here instead of custom lower it during 333 // DAG legalization, so we can fold some i64 ADDs used for address 334 // calculation into the LOAD and STORE instructions. 335 case ISD::ADD: 336 case ISD::SUB: { 337 if (N->getValueType(0) != MVT::i64 || 338 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 339 break; 340 341 return SelectADD_SUB_I64(N); 342 } 343 case ISD::SCALAR_TO_VECTOR: 344 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 345 case ISD::BUILD_VECTOR: { 346 unsigned RegClassID; 347 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 348 EVT VT = N->getValueType(0); 349 unsigned NumVectorElts = VT.getVectorNumElements(); 350 EVT EltVT = VT.getVectorElementType(); 351 assert(EltVT.bitsEq(MVT::i32)); 352 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 353 bool UseVReg = false; 354 355 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 356 U != E; ++U) { 357 if (!U->isMachineOpcode()) { 358 continue; 359 } 360 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 361 if (!RC) { 362 continue; 363 } 364 if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { 365 UseVReg = false; 366 } 367 } 368 369 RegClassID = selectVectorRegClassID(NumVectorElts, UseVReg); 370 } else { 371 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 372 // that adds a 128 bits reg copy when going through TwoAddressInstructions 373 // pass. We want to avoid 128 bits copies as much as possible because they 374 // can't be bundled by our scheduler. 375 switch(NumVectorElts) { 376 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 377 case 4: 378 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 379 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 380 else 381 RegClassID = AMDGPU::R600_Reg128RegClassID; 382 break; 383 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 384 } 385 } 386 387 SDLoc DL(N); 388 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 389 390 if (NumVectorElts == 1) { 391 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 392 N->getOperand(0), RegClass); 393 } 394 395 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 396 "supported yet"); 397 // 16 = Max Num Vector Elements 398 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 399 // 1 = Vector Register Class 400 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 401 402 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 403 bool IsRegSeq = true; 404 unsigned NOps = N->getNumOperands(); 405 for (unsigned i = 0; i < NOps; i++) { 406 // XXX: Why is this here? 407 if (isa<RegisterSDNode>(N->getOperand(i))) { 408 IsRegSeq = false; 409 break; 410 } 411 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 412 RegSeqArgs[1 + (2 * i) + 1] = 413 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 414 MVT::i32); 415 } 416 417 if (NOps != NumVectorElts) { 418 // Fill in the missing undef elements if this was a scalar_to_vector. 419 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 420 421 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 422 DL, EltVT); 423 for (unsigned i = NOps; i < NumVectorElts; ++i) { 424 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 425 RegSeqArgs[1 + (2 * i) + 1] = 426 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 427 } 428 } 429 430 if (!IsRegSeq) 431 break; 432 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 433 RegSeqArgs); 434 } 435 case ISD::BUILD_PAIR: { 436 SDValue RC, SubReg0, SubReg1; 437 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 438 break; 439 } 440 SDLoc DL(N); 441 if (N->getValueType(0) == MVT::i128) { 442 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 443 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 444 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 445 } else if (N->getValueType(0) == MVT::i64) { 446 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 447 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 448 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 449 } else { 450 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 451 } 452 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 453 N->getOperand(1), SubReg1 }; 454 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 455 DL, N->getValueType(0), Ops); 456 } 457 458 case ISD::Constant: 459 case ISD::ConstantFP: { 460 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 461 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 462 break; 463 464 uint64_t Imm; 465 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 466 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 467 else { 468 ConstantSDNode *C = cast<ConstantSDNode>(N); 469 Imm = C->getZExtValue(); 470 } 471 472 SDLoc DL(N); 473 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 474 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 475 MVT::i32)); 476 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 477 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 478 const SDValue Ops[] = { 479 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 480 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 481 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 482 }; 483 484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 485 N->getValueType(0), Ops); 486 } 487 case ISD::LOAD: 488 case ISD::STORE: { 489 N = glueCopyToM0(N); 490 break; 491 } 492 case AMDGPUISD::REGISTER_LOAD: { 493 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 494 break; 495 SDValue Addr, Offset; 496 497 SDLoc DL(N); 498 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 499 const SDValue Ops[] = { 500 Addr, 501 Offset, 502 CurDAG->getTargetConstant(0, DL, MVT::i32), 503 N->getOperand(0), 504 }; 505 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, 506 CurDAG->getVTList(MVT::i32, MVT::i64, 507 MVT::Other), 508 Ops); 509 } 510 case AMDGPUISD::REGISTER_STORE: { 511 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 512 break; 513 SDValue Addr, Offset; 514 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 515 SDLoc DL(N); 516 const SDValue Ops[] = { 517 N->getOperand(1), 518 Addr, 519 Offset, 520 CurDAG->getTargetConstant(0, DL, MVT::i32), 521 N->getOperand(0), 522 }; 523 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, 524 CurDAG->getVTList(MVT::Other), 525 Ops); 526 } 527 528 case AMDGPUISD::BFE_I32: 529 case AMDGPUISD::BFE_U32: { 530 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 531 break; 532 533 // There is a scalar version available, but unlike the vector version which 534 // has a separate operand for the offset and width, the scalar version packs 535 // the width and offset into a single operand. Try to move to the scalar 536 // version if the offsets are constant, so that we can try to keep extended 537 // loads of kernel arguments in SGPRs. 538 539 // TODO: Technically we could try to pattern match scalar bitshifts of 540 // dynamic values, but it's probably not useful. 541 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 542 if (!Offset) 543 break; 544 545 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 546 if (!Width) 547 break; 548 549 bool Signed = Opc == AMDGPUISD::BFE_I32; 550 551 uint32_t OffsetVal = Offset->getZExtValue(); 552 uint32_t WidthVal = Width->getZExtValue(); 553 554 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 555 N->getOperand(0), OffsetVal, WidthVal); 556 } 557 case AMDGPUISD::DIV_SCALE: { 558 return SelectDIV_SCALE(N); 559 } 560 case ISD::CopyToReg: { 561 const SITargetLowering& Lowering = 562 *static_cast<const SITargetLowering*>(getTargetLowering()); 563 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 564 break; 565 } 566 case ISD::ADDRSPACECAST: 567 return SelectAddrSpaceCast(N); 568 case ISD::AND: 569 case ISD::SRL: 570 case ISD::SRA: 571 if (N->getValueType(0) != MVT::i32 || 572 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 573 break; 574 575 return SelectS_BFE(N); 576 } 577 578 return SelectCode(N); 579 } 580 581 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 582 assert(AS != 0 && "Use checkPrivateAddress instead."); 583 if (!Ptr) 584 return false; 585 586 return Ptr->getType()->getPointerAddressSpace() == AS; 587 } 588 589 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 590 if (Op->getPseudoValue()) 591 return true; 592 593 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 594 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 595 596 return false; 597 } 598 599 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 600 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 601 } 602 603 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 604 const Value *MemVal = N->getMemOperand()->getValue(); 605 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 606 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 607 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 608 } 609 610 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 611 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 612 } 613 614 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 615 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 616 } 617 618 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 619 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 620 } 621 622 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 623 const Value *MemVal = N->getMemOperand()->getValue(); 624 if (CbId == -1) 625 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 626 627 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 628 } 629 630 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 631 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 632 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 633 N->getMemoryVT().bitsLT(MVT::i32)) 634 return true; 635 636 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 637 } 638 639 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 640 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 641 } 642 643 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 644 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 645 } 646 647 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 648 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 649 } 650 651 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 652 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 653 } 654 655 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 656 MachineMemOperand *MMO = N->getMemOperand(); 657 if (checkPrivateAddress(N->getMemOperand())) { 658 if (MMO) { 659 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 660 if (PSV && PSV->isConstantPool()) { 661 return true; 662 } 663 } 664 } 665 return false; 666 } 667 668 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 669 if (checkPrivateAddress(N->getMemOperand())) { 670 // Check to make sure we are not a constant pool load or a constant load 671 // that is marked as a private load 672 if (isCPLoad(N) || isConstantLoad(N, -1)) { 673 return false; 674 } 675 } 676 677 const Value *MemVal = N->getMemOperand()->getValue(); 678 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 679 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 680 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 681 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 682 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 683 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 684 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 685 return true; 686 } 687 return false; 688 } 689 690 const char *AMDGPUDAGToDAGISel::getPassName() const { 691 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 692 } 693 694 #ifdef DEBUGTMP 695 #undef INT64_C 696 #endif 697 #undef DEBUGTMP 698 699 //===----------------------------------------------------------------------===// 700 // Complex Patterns 701 //===----------------------------------------------------------------------===// 702 703 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 704 SDValue& IntPtr) { 705 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 706 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 707 true); 708 return true; 709 } 710 return false; 711 } 712 713 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 714 SDValue& BaseReg, SDValue &Offset) { 715 if (!isa<ConstantSDNode>(Addr)) { 716 BaseReg = Addr; 717 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 718 return true; 719 } 720 return false; 721 } 722 723 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 724 SDValue &Offset) { 725 ConstantSDNode *IMMOffset; 726 727 if (Addr.getOpcode() == ISD::ADD 728 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 729 && isInt<16>(IMMOffset->getZExtValue())) { 730 731 Base = Addr.getOperand(0); 732 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 733 MVT::i32); 734 return true; 735 // If the pointer address is constant, we can move it to the offset field. 736 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 737 && isInt<16>(IMMOffset->getZExtValue())) { 738 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 739 SDLoc(CurDAG->getEntryNode()), 740 AMDGPU::ZERO, MVT::i32); 741 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 742 MVT::i32); 743 return true; 744 } 745 746 // Default case, no offset 747 Base = Addr; 748 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 749 return true; 750 } 751 752 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 753 SDValue &Offset) { 754 ConstantSDNode *C; 755 SDLoc DL(Addr); 756 757 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 758 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 759 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 760 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 761 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 762 Base = Addr.getOperand(0); 763 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 764 } else { 765 Base = Addr; 766 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 767 } 768 769 return true; 770 } 771 772 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 773 SDLoc DL(N); 774 SDValue LHS = N->getOperand(0); 775 SDValue RHS = N->getOperand(1); 776 777 bool IsAdd = (N->getOpcode() == ISD::ADD); 778 779 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 780 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 781 782 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 783 DL, MVT::i32, LHS, Sub0); 784 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 785 DL, MVT::i32, LHS, Sub1); 786 787 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 788 DL, MVT::i32, RHS, Sub0); 789 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 790 DL, MVT::i32, RHS, Sub1); 791 792 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 793 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 794 795 796 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 797 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 798 799 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 800 SDValue Carry(AddLo, 1); 801 SDNode *AddHi 802 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 803 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 804 805 SDValue Args[5] = { 806 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 807 SDValue(AddLo,0), 808 Sub0, 809 SDValue(AddHi,0), 810 Sub1, 811 }; 812 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 813 } 814 815 // We need to handle this here because tablegen doesn't support matching 816 // instructions with multiple outputs. 817 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 818 SDLoc SL(N); 819 EVT VT = N->getValueType(0); 820 821 assert(VT == MVT::f32 || VT == MVT::f64); 822 823 unsigned Opc 824 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 825 826 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 827 // omod 828 SDValue Ops[8]; 829 830 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 831 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 832 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 833 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 834 } 835 836 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 837 unsigned OffsetBits) const { 838 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 839 (OffsetBits == 8 && !isUInt<8>(Offset))) 840 return false; 841 842 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 843 Subtarget->unsafeDSOffsetFoldingEnabled()) 844 return true; 845 846 // On Southern Islands instruction with a negative base value and an offset 847 // don't seem to work. 848 return CurDAG->SignBitIsZero(Base); 849 } 850 851 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 852 SDValue &Offset) const { 853 if (CurDAG->isBaseWithConstantOffset(Addr)) { 854 SDValue N0 = Addr.getOperand(0); 855 SDValue N1 = Addr.getOperand(1); 856 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 857 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 858 // (add n0, c0) 859 Base = N0; 860 Offset = N1; 861 return true; 862 } 863 } else if (Addr.getOpcode() == ISD::SUB) { 864 // sub C, x -> add (sub 0, x), C 865 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 866 int64_t ByteOffset = C->getSExtValue(); 867 if (isUInt<16>(ByteOffset)) { 868 SDLoc DL(Addr); 869 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 870 871 // XXX - This is kind of hacky. Create a dummy sub node so we can check 872 // the known bits in isDSOffsetLegal. We need to emit the selected node 873 // here, so this is thrown away. 874 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 875 Zero, Addr.getOperand(1)); 876 877 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 878 MachineSDNode *MachineSub 879 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 880 Zero, Addr.getOperand(1)); 881 882 Base = SDValue(MachineSub, 0); 883 Offset = Addr.getOperand(0); 884 return true; 885 } 886 } 887 } 888 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 889 // If we have a constant address, prefer to put the constant into the 890 // offset. This can save moves to load the constant address since multiple 891 // operations can share the zero base address register, and enables merging 892 // into read2 / write2 instructions. 893 894 SDLoc DL(Addr); 895 896 if (isUInt<16>(CAddr->getZExtValue())) { 897 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 898 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 899 DL, MVT::i32, Zero); 900 Base = SDValue(MovZero, 0); 901 Offset = Addr; 902 return true; 903 } 904 } 905 906 // default case 907 Base = Addr; 908 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 909 return true; 910 } 911 912 // TODO: If offset is too big, put low 16-bit into offset. 913 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 914 SDValue &Offset0, 915 SDValue &Offset1) const { 916 SDLoc DL(Addr); 917 918 if (CurDAG->isBaseWithConstantOffset(Addr)) { 919 SDValue N0 = Addr.getOperand(0); 920 SDValue N1 = Addr.getOperand(1); 921 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 922 unsigned DWordOffset0 = C1->getZExtValue() / 4; 923 unsigned DWordOffset1 = DWordOffset0 + 1; 924 // (add n0, c0) 925 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 926 Base = N0; 927 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 928 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 929 return true; 930 } 931 } else if (Addr.getOpcode() == ISD::SUB) { 932 // sub C, x -> add (sub 0, x), C 933 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 934 unsigned DWordOffset0 = C->getZExtValue() / 4; 935 unsigned DWordOffset1 = DWordOffset0 + 1; 936 937 if (isUInt<8>(DWordOffset0)) { 938 SDLoc DL(Addr); 939 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 940 941 // XXX - This is kind of hacky. Create a dummy sub node so we can check 942 // the known bits in isDSOffsetLegal. We need to emit the selected node 943 // here, so this is thrown away. 944 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 945 Zero, Addr.getOperand(1)); 946 947 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 948 MachineSDNode *MachineSub 949 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 950 Zero, Addr.getOperand(1)); 951 952 Base = SDValue(MachineSub, 0); 953 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 954 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 955 return true; 956 } 957 } 958 } 959 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 960 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 961 unsigned DWordOffset1 = DWordOffset0 + 1; 962 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 963 964 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 965 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 966 MachineSDNode *MovZero 967 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 968 DL, MVT::i32, Zero); 969 Base = SDValue(MovZero, 0); 970 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 971 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 972 return true; 973 } 974 } 975 976 // default case 977 Base = Addr; 978 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 979 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 980 return true; 981 } 982 983 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 984 return isUInt<12>(Imm->getZExtValue()); 985 } 986 987 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 988 SDValue &VAddr, SDValue &SOffset, 989 SDValue &Offset, SDValue &Offen, 990 SDValue &Idxen, SDValue &Addr64, 991 SDValue &GLC, SDValue &SLC, 992 SDValue &TFE) const { 993 SDLoc DL(Addr); 994 995 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 996 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 997 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 998 999 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1000 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1001 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1002 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1003 1004 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1005 SDValue N0 = Addr.getOperand(0); 1006 SDValue N1 = Addr.getOperand(1); 1007 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1008 1009 if (N0.getOpcode() == ISD::ADD) { 1010 // (add (add N2, N3), C1) -> addr64 1011 SDValue N2 = N0.getOperand(0); 1012 SDValue N3 = N0.getOperand(1); 1013 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1014 Ptr = N2; 1015 VAddr = N3; 1016 } else { 1017 1018 // (add N0, C1) -> offset 1019 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1020 Ptr = N0; 1021 } 1022 1023 if (isLegalMUBUFImmOffset(C1)) { 1024 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1025 return; 1026 } else if (isUInt<32>(C1->getZExtValue())) { 1027 // Illegal offset, store it in soffset. 1028 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1029 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1030 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1031 0); 1032 return; 1033 } 1034 } 1035 1036 if (Addr.getOpcode() == ISD::ADD) { 1037 // (add N0, N1) -> addr64 1038 SDValue N0 = Addr.getOperand(0); 1039 SDValue N1 = Addr.getOperand(1); 1040 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1041 Ptr = N0; 1042 VAddr = N1; 1043 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1044 return; 1045 } 1046 1047 // default case -> offset 1048 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1049 Ptr = Addr; 1050 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1051 } 1052 1053 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1054 SDValue &VAddr, SDValue &SOffset, 1055 SDValue &Offset, SDValue &GLC, 1056 SDValue &SLC, SDValue &TFE) const { 1057 SDValue Ptr, Offen, Idxen, Addr64; 1058 1059 // addr64 bit was removed for volcanic islands. 1060 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1061 return false; 1062 1063 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1064 GLC, SLC, TFE); 1065 1066 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1067 if (C->getSExtValue()) { 1068 SDLoc DL(Addr); 1069 1070 const SITargetLowering& Lowering = 1071 *static_cast<const SITargetLowering*>(getTargetLowering()); 1072 1073 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1074 return true; 1075 } 1076 1077 return false; 1078 } 1079 1080 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1081 SDValue &VAddr, SDValue &SOffset, 1082 SDValue &Offset, 1083 SDValue &SLC) const { 1084 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1085 SDValue GLC, TFE; 1086 1087 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1088 } 1089 1090 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1091 SDValue &VAddr, SDValue &SOffset, 1092 SDValue &ImmOffset) const { 1093 1094 SDLoc DL(Addr); 1095 MachineFunction &MF = CurDAG->getMachineFunction(); 1096 const SIRegisterInfo *TRI = 1097 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 1098 MachineRegisterInfo &MRI = MF.getRegInfo(); 1099 const SITargetLowering& Lowering = 1100 *static_cast<const SITargetLowering*>(getTargetLowering()); 1101 1102 unsigned ScratchOffsetReg = 1103 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 1104 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 1105 ScratchOffsetReg, MVT::i32); 1106 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); 1107 SDValue ScratchRsrcDword0 = 1108 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); 1109 1110 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); 1111 SDValue ScratchRsrcDword1 = 1112 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); 1113 1114 const SDValue RsrcOps[] = { 1115 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1116 ScratchRsrcDword0, 1117 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1118 ScratchRsrcDword1, 1119 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), 1120 }; 1121 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1122 MVT::v2i32, RsrcOps), 0); 1123 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); 1124 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 1125 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 1126 1127 // (add n0, c1) 1128 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1129 SDValue N0 = Addr.getOperand(0); 1130 SDValue N1 = Addr.getOperand(1); 1131 // Offsets in vaddr must be positive. 1132 if (CurDAG->SignBitIsZero(N0)) { 1133 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1134 if (isLegalMUBUFImmOffset(C1)) { 1135 VAddr = N0; 1136 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1137 return true; 1138 } 1139 } 1140 } 1141 1142 // (node) 1143 VAddr = Addr; 1144 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1145 return true; 1146 } 1147 1148 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1149 SDValue &SOffset, SDValue &Offset, 1150 SDValue &GLC, SDValue &SLC, 1151 SDValue &TFE) const { 1152 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1153 const SIInstrInfo *TII = 1154 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1155 1156 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1157 GLC, SLC, TFE); 1158 1159 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1160 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1161 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1162 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1163 APInt::getAllOnesValue(32).getZExtValue(); // Size 1164 SDLoc DL(Addr); 1165 1166 const SITargetLowering& Lowering = 1167 *static_cast<const SITargetLowering*>(getTargetLowering()); 1168 1169 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1170 return true; 1171 } 1172 return false; 1173 } 1174 1175 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1176 SDValue &Soffset, SDValue &Offset, 1177 SDValue &GLC) const { 1178 SDValue SLC, TFE; 1179 1180 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1181 } 1182 1183 /// 1184 /// \param EncodedOffset This is the immediate value that will be encoded 1185 /// directly into the instruction. On SI/CI the \p EncodedOffset 1186 /// will be in units of dwords and on VI+ it will be units of bytes. 1187 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1188 int64_t EncodedOffset) { 1189 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1190 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1191 } 1192 1193 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1194 SDValue &Offset, bool &Imm) const { 1195 1196 // FIXME: Handle non-constant offsets. 1197 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1198 if (!C) 1199 return false; 1200 1201 SDLoc SL(ByteOffsetNode); 1202 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1203 int64_t ByteOffset = C->getSExtValue(); 1204 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1205 ByteOffset >> 2 : ByteOffset; 1206 1207 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1208 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1209 Imm = true; 1210 return true; 1211 } 1212 1213 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1214 return false; 1215 1216 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1217 // 32-bit Immediates are supported on Sea Islands. 1218 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1219 } else { 1220 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1221 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1222 C32Bit), 0); 1223 } 1224 Imm = false; 1225 return true; 1226 } 1227 1228 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1229 SDValue &Offset, bool &Imm) const { 1230 1231 SDLoc SL(Addr); 1232 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1233 SDValue N0 = Addr.getOperand(0); 1234 SDValue N1 = Addr.getOperand(1); 1235 1236 if (SelectSMRDOffset(N1, Offset, Imm)) { 1237 SBase = N0; 1238 return true; 1239 } 1240 } 1241 SBase = Addr; 1242 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1243 Imm = true; 1244 return true; 1245 } 1246 1247 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1248 SDValue &Offset) const { 1249 bool Imm; 1250 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1251 } 1252 1253 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1254 SDValue &Offset) const { 1255 1256 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1257 return false; 1258 1259 bool Imm; 1260 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1261 return false; 1262 1263 return !Imm && isa<ConstantSDNode>(Offset); 1264 } 1265 1266 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1267 SDValue &Offset) const { 1268 bool Imm; 1269 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1270 !isa<ConstantSDNode>(Offset); 1271 } 1272 1273 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1274 SDValue &Offset) const { 1275 bool Imm; 1276 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1277 } 1278 1279 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1280 SDValue &Offset) const { 1281 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1282 return false; 1283 1284 bool Imm; 1285 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1286 return false; 1287 1288 return !Imm && isa<ConstantSDNode>(Offset); 1289 } 1290 1291 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1292 SDValue &Offset) const { 1293 bool Imm; 1294 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1295 !isa<ConstantSDNode>(Offset); 1296 } 1297 1298 // FIXME: This is incorrect and only enough to be able to compile. 1299 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1300 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1301 SDLoc DL(N); 1302 1303 assert(Subtarget->hasFlatAddressSpace() && 1304 "addrspacecast only supported with flat address space!"); 1305 1306 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 1307 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 1308 "Cannot cast address space to / from constant address!"); 1309 1310 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1311 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1312 "Can only cast to / from flat address space!"); 1313 1314 // The flat instructions read the address as the index of the VGPR holding the 1315 // address, so casting should just be reinterpreting the base VGPR, so just 1316 // insert trunc / bitcast / zext. 1317 1318 SDValue Src = ASC->getOperand(0); 1319 EVT DestVT = ASC->getValueType(0); 1320 EVT SrcVT = Src.getValueType(); 1321 1322 unsigned SrcSize = SrcVT.getSizeInBits(); 1323 unsigned DestSize = DestVT.getSizeInBits(); 1324 1325 if (SrcSize > DestSize) { 1326 assert(SrcSize == 64 && DestSize == 32); 1327 return CurDAG->getMachineNode( 1328 TargetOpcode::EXTRACT_SUBREG, 1329 DL, 1330 DestVT, 1331 Src, 1332 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1333 } 1334 1335 if (DestSize > SrcSize) { 1336 assert(SrcSize == 32 && DestSize == 64); 1337 1338 // FIXME: This is probably wrong, we should never be defining 1339 // a register class with both VGPRs and SGPRs 1340 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1341 MVT::i32); 1342 1343 const SDValue Ops[] = { 1344 RC, 1345 Src, 1346 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1347 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1348 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1349 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1350 }; 1351 1352 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1353 DL, N->getValueType(0), Ops); 1354 } 1355 1356 assert(SrcSize == 64 && DestSize == 64); 1357 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1358 } 1359 1360 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1361 uint32_t Offset, uint32_t Width) { 1362 // Transformation function, pack the offset and width of a BFE into 1363 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1364 // source, bits [5:0] contain the offset and bits [22:16] the width. 1365 uint32_t PackedVal = Offset | (Width << 16); 1366 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1367 1368 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1369 } 1370 1371 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1372 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1373 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1374 // Predicate: 0 < b <= c < 32 1375 1376 const SDValue &Shl = N->getOperand(0); 1377 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1378 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1379 1380 if (B && C) { 1381 uint32_t BVal = B->getZExtValue(); 1382 uint32_t CVal = C->getZExtValue(); 1383 1384 if (0 < BVal && BVal <= CVal && CVal < 32) { 1385 bool Signed = N->getOpcode() == ISD::SRA; 1386 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1387 1388 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1389 CVal - BVal, 32 - CVal); 1390 } 1391 } 1392 return SelectCode(N); 1393 } 1394 1395 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1396 switch (N->getOpcode()) { 1397 case ISD::AND: 1398 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1399 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1400 // Predicate: isMask(mask) 1401 const SDValue &Srl = N->getOperand(0); 1402 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1403 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1404 1405 if (Shift && Mask) { 1406 uint32_t ShiftVal = Shift->getZExtValue(); 1407 uint32_t MaskVal = Mask->getZExtValue(); 1408 1409 if (isMask_32(MaskVal)) { 1410 uint32_t WidthVal = countPopulation(MaskVal); 1411 1412 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1413 ShiftVal, WidthVal); 1414 } 1415 } 1416 } 1417 break; 1418 case ISD::SRL: 1419 if (N->getOperand(0).getOpcode() == ISD::AND) { 1420 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1421 // Predicate: isMask(mask >> b) 1422 const SDValue &And = N->getOperand(0); 1423 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1424 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1425 1426 if (Shift && Mask) { 1427 uint32_t ShiftVal = Shift->getZExtValue(); 1428 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1429 1430 if (isMask_32(MaskVal)) { 1431 uint32_t WidthVal = countPopulation(MaskVal); 1432 1433 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1434 ShiftVal, WidthVal); 1435 } 1436 } 1437 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1438 return SelectS_BFEFromShifts(N); 1439 break; 1440 case ISD::SRA: 1441 if (N->getOperand(0).getOpcode() == ISD::SHL) 1442 return SelectS_BFEFromShifts(N); 1443 break; 1444 } 1445 1446 return SelectCode(N); 1447 } 1448 1449 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1450 SDValue &SrcMods) const { 1451 1452 unsigned Mods = 0; 1453 1454 Src = In; 1455 1456 if (Src.getOpcode() == ISD::FNEG) { 1457 Mods |= SISrcMods::NEG; 1458 Src = Src.getOperand(0); 1459 } 1460 1461 if (Src.getOpcode() == ISD::FABS) { 1462 Mods |= SISrcMods::ABS; 1463 Src = Src.getOperand(0); 1464 } 1465 1466 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1467 1468 return true; 1469 } 1470 1471 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1472 SDValue &SrcMods) const { 1473 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1474 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1475 } 1476 1477 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1478 SDValue &SrcMods, SDValue &Clamp, 1479 SDValue &Omod) const { 1480 SDLoc DL(In); 1481 // FIXME: Handle Clamp and Omod 1482 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1483 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1484 1485 return SelectVOP3Mods(In, Src, SrcMods); 1486 } 1487 1488 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1489 SDValue &SrcMods, SDValue &Clamp, 1490 SDValue &Omod) const { 1491 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1492 1493 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1494 cast<ConstantSDNode>(Clamp)->isNullValue() && 1495 cast<ConstantSDNode>(Omod)->isNullValue(); 1496 } 1497 1498 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1499 SDValue &SrcMods, 1500 SDValue &Omod) const { 1501 // FIXME: Handle Omod 1502 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1503 1504 return SelectVOP3Mods(In, Src, SrcMods); 1505 } 1506 1507 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1508 SDValue &SrcMods, 1509 SDValue &Clamp, 1510 SDValue &Omod) const { 1511 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1512 return SelectVOP3Mods(In, Src, SrcMods); 1513 } 1514 1515 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1516 bool Modified = false; 1517 1518 // XXX - Other targets seem to be able to do this without a worklist. 1519 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1520 SmallVector<StoreSDNode *, 8> StoresToReplace; 1521 1522 for (SDNode &Node : CurDAG->allnodes()) { 1523 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1524 EVT VT = LD->getValueType(0); 1525 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1526 continue; 1527 1528 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1529 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1530 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1531 // legalizer assume that if i64 is legal, so doing this promotion early 1532 // can cause problems. 1533 LoadsToReplace.push_back(LD); 1534 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1535 // Handle i64 stores here for the same reason mentioned above for loads. 1536 SDValue Value = ST->getValue(); 1537 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1538 continue; 1539 StoresToReplace.push_back(ST); 1540 } 1541 } 1542 1543 for (LoadSDNode *LD : LoadsToReplace) { 1544 SDLoc SL(LD); 1545 1546 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1547 LD->getBasePtr(), LD->getMemOperand()); 1548 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1549 MVT::i64, NewLoad); 1550 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1551 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1552 Modified = true; 1553 } 1554 1555 for (StoreSDNode *ST : StoresToReplace) { 1556 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1557 MVT::v2i32, ST->getValue()); 1558 const SDValue StoreOps[] = { 1559 ST->getChain(), 1560 NewValue, 1561 ST->getBasePtr(), 1562 ST->getOffset() 1563 }; 1564 1565 CurDAG->UpdateNodeOperands(ST, StoreOps); 1566 Modified = true; 1567 } 1568 1569 // XXX - Is this necessary? 1570 if (Modified) 1571 CurDAG->RemoveDeadNodes(); 1572 } 1573 1574 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1575 const AMDGPUTargetLowering& Lowering = 1576 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1577 bool IsModified = false; 1578 do { 1579 IsModified = false; 1580 // Go over all selected nodes and try to fold them a bit more 1581 for (SDNode &Node : CurDAG->allnodes()) { 1582 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1583 if (!MachineNode) 1584 continue; 1585 1586 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1587 if (ResNode != &Node) { 1588 ReplaceUses(&Node, ResNode); 1589 IsModified = true; 1590 } 1591 } 1592 CurDAG->RemoveDeadNodes(); 1593 } while (IsModified); 1594 } 1595