1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/PseudoSourceValue.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/IR/Function.h" 29 30 using namespace llvm; 31 32 //===----------------------------------------------------------------------===// 33 // Instruction Selector Implementation 34 //===----------------------------------------------------------------------===// 35 36 namespace { 37 /// AMDGPU specific code to select AMDGPU machine instructions for 38 /// SelectionDAG operations. 39 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 41 // make the right decision when generating code for different targets. 42 const AMDGPUSubtarget *Subtarget; 43 44 public: 45 AMDGPUDAGToDAGISel(TargetMachine &TM); 46 virtual ~AMDGPUDAGToDAGISel(); 47 bool runOnMachineFunction(MachineFunction &MF) override; 48 SDNode *Select(SDNode *N) override; 49 const char *getPassName() const override; 50 void PreprocessISelDAG() override; 51 void PostprocessISelDAG() override; 52 53 private: 54 bool isInlineImmediate(SDNode *N) const; 55 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 56 const R600InstrInfo *TII); 57 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 58 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 59 60 // Complex pattern selectors 61 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 62 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 63 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 64 65 static bool checkType(const Value *ptr, unsigned int addrspace); 66 static bool checkPrivateAddress(const MachineMemOperand *Op); 67 68 static bool isGlobalStore(const StoreSDNode *N); 69 static bool isFlatStore(const StoreSDNode *N); 70 static bool isPrivateStore(const StoreSDNode *N); 71 static bool isLocalStore(const StoreSDNode *N); 72 static bool isRegionStore(const StoreSDNode *N); 73 74 bool isCPLoad(const LoadSDNode *N) const; 75 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 76 bool isGlobalLoad(const LoadSDNode *N) const; 77 bool isFlatLoad(const LoadSDNode *N) const; 78 bool isParamLoad(const LoadSDNode *N) const; 79 bool isPrivateLoad(const LoadSDNode *N) const; 80 bool isLocalLoad(const LoadSDNode *N) const; 81 bool isRegionLoad(const LoadSDNode *N) const; 82 83 SDNode *glueCopyToM0(SDNode *N) const; 84 85 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 86 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 87 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 88 SDValue& Offset); 89 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 90 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 91 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 92 unsigned OffsetBits) const; 93 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 94 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 95 SDValue &Offset1) const; 96 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 97 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 98 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 99 SDValue &TFE) const; 100 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 101 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 102 SDValue &SLC, SDValue &TFE) const; 103 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 104 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 105 SDValue &SLC) const; 106 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 107 SDValue &SOffset, SDValue &ImmOffset) const; 108 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 109 SDValue &Offset, SDValue &GLC, SDValue &SLC, 110 SDValue &TFE) const; 111 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 112 SDValue &Offset, SDValue &GLC) const; 113 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 114 bool &Imm) const; 115 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 116 bool &Imm) const; 117 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 122 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 123 SDNode *SelectAddrSpaceCast(SDNode *N); 124 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 126 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 127 SDValue &Clamp, SDValue &Omod) const; 128 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129 SDValue &Clamp, SDValue &Omod) const; 130 131 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 132 SDValue &Omod) const; 133 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 134 SDValue &Clamp, 135 SDValue &Omod) const; 136 137 SDNode *SelectADD_SUB_I64(SDNode *N); 138 SDNode *SelectDIV_SCALE(SDNode *N); 139 140 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 141 uint32_t Offset, uint32_t Width); 142 SDNode *SelectS_BFEFromShifts(SDNode *N); 143 SDNode *SelectS_BFE(SDNode *N); 144 145 // Include the pieces autogenerated from the target description. 146 #include "AMDGPUGenDAGISel.inc" 147 }; 148 } // end anonymous namespace 149 150 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 151 // DAG, ready for instruction scheduling. 152 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 153 return new AMDGPUDAGToDAGISel(TM); 154 } 155 156 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 157 : SelectionDAGISel(TM) {} 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 168 const SITargetLowering *TL 169 = static_cast<const SITargetLowering *>(getTargetLowering()); 170 return TL->analyzeImmediate(N) == 0; 171 } 172 173 /// \brief Determine the register class for \p OpNo 174 /// \returns The register class of the virtual register that will be used for 175 /// the given operand number \OpNo or NULL if the register class cannot be 176 /// determined. 177 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 178 unsigned OpNo) const { 179 if (!N->isMachineOpcode()) 180 return nullptr; 181 182 switch (N->getMachineOpcode()) { 183 default: { 184 const MCInstrDesc &Desc = 185 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 186 unsigned OpIdx = Desc.getNumDefs() + OpNo; 187 if (OpIdx >= Desc.getNumOperands()) 188 return nullptr; 189 int RegClass = Desc.OpInfo[OpIdx].RegClass; 190 if (RegClass == -1) 191 return nullptr; 192 193 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 194 } 195 case AMDGPU::REG_SEQUENCE: { 196 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 197 const TargetRegisterClass *SuperRC = 198 Subtarget->getRegisterInfo()->getRegClass(RCID); 199 200 SDValue SubRegOp = N->getOperand(OpNo + 1); 201 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 202 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 203 SubRegIdx); 204 } 205 } 206 } 207 208 bool AMDGPUDAGToDAGISel::SelectADDRParam( 209 SDValue Addr, SDValue& R1, SDValue& R2) { 210 211 if (Addr.getOpcode() == ISD::FrameIndex) { 212 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 213 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 214 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 215 } else { 216 R1 = Addr; 217 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 218 } 219 } else if (Addr.getOpcode() == ISD::ADD) { 220 R1 = Addr.getOperand(0); 221 R2 = Addr.getOperand(1); 222 } else { 223 R1 = Addr; 224 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 225 } 226 return true; 227 } 228 229 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 230 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 231 Addr.getOpcode() == ISD::TargetGlobalAddress) { 232 return false; 233 } 234 return SelectADDRParam(Addr, R1, R2); 235 } 236 237 238 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 239 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 240 Addr.getOpcode() == ISD::TargetGlobalAddress) { 241 return false; 242 } 243 244 if (Addr.getOpcode() == ISD::FrameIndex) { 245 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 246 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 247 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 248 } else { 249 R1 = Addr; 250 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 251 } 252 } else if (Addr.getOpcode() == ISD::ADD) { 253 R1 = Addr.getOperand(0); 254 R2 = Addr.getOperand(1); 255 } else { 256 R1 = Addr; 257 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 258 } 259 return true; 260 } 261 262 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 263 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 264 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 265 AMDGPUAS::LOCAL_ADDRESS)) 266 return N; 267 268 const SITargetLowering& Lowering = 269 *static_cast<const SITargetLowering*>(getTargetLowering()); 270 271 // Write max value to m0 before each load operation 272 273 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 274 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 275 276 SDValue Glue = M0.getValue(1); 277 278 SmallVector <SDValue, 8> Ops; 279 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 280 Ops.push_back(N->getOperand(i)); 281 } 282 Ops.push_back(Glue); 283 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 284 285 return N; 286 } 287 288 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 289 switch (NumVectorElts) { 290 case 1: 291 return AMDGPU::SReg_32RegClassID; 292 case 2: 293 return AMDGPU::SReg_64RegClassID; 294 case 4: 295 return AMDGPU::SReg_128RegClassID; 296 case 8: 297 return AMDGPU::SReg_256RegClassID; 298 case 16: 299 return AMDGPU::SReg_512RegClassID; 300 } 301 302 llvm_unreachable("invalid vector size"); 303 } 304 305 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 306 unsigned int Opc = N->getOpcode(); 307 if (N->isMachineOpcode()) { 308 N->setNodeId(-1); 309 return nullptr; // Already selected. 310 } 311 312 if (isa<AtomicSDNode>(N)) 313 N = glueCopyToM0(N); 314 315 switch (Opc) { 316 default: break; 317 // We are selecting i64 ADD here instead of custom lower it during 318 // DAG legalization, so we can fold some i64 ADDs used for address 319 // calculation into the LOAD and STORE instructions. 320 case ISD::ADD: 321 case ISD::SUB: { 322 if (N->getValueType(0) != MVT::i64 || 323 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 324 break; 325 326 return SelectADD_SUB_I64(N); 327 } 328 case ISD::SCALAR_TO_VECTOR: 329 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 330 case ISD::BUILD_VECTOR: { 331 unsigned RegClassID; 332 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 333 EVT VT = N->getValueType(0); 334 unsigned NumVectorElts = VT.getVectorNumElements(); 335 EVT EltVT = VT.getVectorElementType(); 336 assert(EltVT.bitsEq(MVT::i32)); 337 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 338 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 339 } else { 340 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 341 // that adds a 128 bits reg copy when going through TwoAddressInstructions 342 // pass. We want to avoid 128 bits copies as much as possible because they 343 // can't be bundled by our scheduler. 344 switch(NumVectorElts) { 345 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 346 case 4: 347 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 348 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 349 else 350 RegClassID = AMDGPU::R600_Reg128RegClassID; 351 break; 352 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 353 } 354 } 355 356 SDLoc DL(N); 357 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 358 359 if (NumVectorElts == 1) { 360 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 361 N->getOperand(0), RegClass); 362 } 363 364 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 365 "supported yet"); 366 // 16 = Max Num Vector Elements 367 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 368 // 1 = Vector Register Class 369 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 370 371 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 372 bool IsRegSeq = true; 373 unsigned NOps = N->getNumOperands(); 374 for (unsigned i = 0; i < NOps; i++) { 375 // XXX: Why is this here? 376 if (isa<RegisterSDNode>(N->getOperand(i))) { 377 IsRegSeq = false; 378 break; 379 } 380 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 381 RegSeqArgs[1 + (2 * i) + 1] = 382 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 383 MVT::i32); 384 } 385 386 if (NOps != NumVectorElts) { 387 // Fill in the missing undef elements if this was a scalar_to_vector. 388 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 389 390 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 391 DL, EltVT); 392 for (unsigned i = NOps; i < NumVectorElts; ++i) { 393 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 394 RegSeqArgs[1 + (2 * i) + 1] = 395 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 396 } 397 } 398 399 if (!IsRegSeq) 400 break; 401 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 402 RegSeqArgs); 403 } 404 case ISD::BUILD_PAIR: { 405 SDValue RC, SubReg0, SubReg1; 406 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 407 break; 408 } 409 SDLoc DL(N); 410 if (N->getValueType(0) == MVT::i128) { 411 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 412 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 413 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 414 } else if (N->getValueType(0) == MVT::i64) { 415 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 416 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 417 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 418 } else { 419 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 420 } 421 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 422 N->getOperand(1), SubReg1 }; 423 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 424 DL, N->getValueType(0), Ops); 425 } 426 427 case ISD::Constant: 428 case ISD::ConstantFP: { 429 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 430 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 431 break; 432 433 uint64_t Imm; 434 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 435 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 436 else { 437 ConstantSDNode *C = cast<ConstantSDNode>(N); 438 Imm = C->getZExtValue(); 439 } 440 441 SDLoc DL(N); 442 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 443 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 444 MVT::i32)); 445 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 446 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 447 const SDValue Ops[] = { 448 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 449 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 450 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 451 }; 452 453 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 454 N->getValueType(0), Ops); 455 } 456 case ISD::LOAD: 457 case ISD::STORE: { 458 N = glueCopyToM0(N); 459 break; 460 } 461 case AMDGPUISD::REGISTER_LOAD: { 462 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 463 break; 464 SDValue Addr, Offset; 465 466 SDLoc DL(N); 467 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 468 const SDValue Ops[] = { 469 Addr, 470 Offset, 471 CurDAG->getTargetConstant(0, DL, MVT::i32), 472 N->getOperand(0), 473 }; 474 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, 475 CurDAG->getVTList(MVT::i32, MVT::i64, 476 MVT::Other), 477 Ops); 478 } 479 case AMDGPUISD::REGISTER_STORE: { 480 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 481 break; 482 SDValue Addr, Offset; 483 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 484 SDLoc DL(N); 485 const SDValue Ops[] = { 486 N->getOperand(1), 487 Addr, 488 Offset, 489 CurDAG->getTargetConstant(0, DL, MVT::i32), 490 N->getOperand(0), 491 }; 492 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, 493 CurDAG->getVTList(MVT::Other), 494 Ops); 495 } 496 497 case AMDGPUISD::BFE_I32: 498 case AMDGPUISD::BFE_U32: { 499 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 500 break; 501 502 // There is a scalar version available, but unlike the vector version which 503 // has a separate operand for the offset and width, the scalar version packs 504 // the width and offset into a single operand. Try to move to the scalar 505 // version if the offsets are constant, so that we can try to keep extended 506 // loads of kernel arguments in SGPRs. 507 508 // TODO: Technically we could try to pattern match scalar bitshifts of 509 // dynamic values, but it's probably not useful. 510 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 511 if (!Offset) 512 break; 513 514 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 515 if (!Width) 516 break; 517 518 bool Signed = Opc == AMDGPUISD::BFE_I32; 519 520 uint32_t OffsetVal = Offset->getZExtValue(); 521 uint32_t WidthVal = Width->getZExtValue(); 522 523 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 524 N->getOperand(0), OffsetVal, WidthVal); 525 } 526 case AMDGPUISD::DIV_SCALE: { 527 return SelectDIV_SCALE(N); 528 } 529 case ISD::CopyToReg: { 530 const SITargetLowering& Lowering = 531 *static_cast<const SITargetLowering*>(getTargetLowering()); 532 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 533 break; 534 } 535 case ISD::ADDRSPACECAST: 536 return SelectAddrSpaceCast(N); 537 case ISD::AND: 538 case ISD::SRL: 539 case ISD::SRA: 540 if (N->getValueType(0) != MVT::i32 || 541 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 542 break; 543 544 return SelectS_BFE(N); 545 } 546 547 return SelectCode(N); 548 } 549 550 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 551 assert(AS != 0 && "Use checkPrivateAddress instead."); 552 if (!Ptr) 553 return false; 554 555 return Ptr->getType()->getPointerAddressSpace() == AS; 556 } 557 558 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 559 if (Op->getPseudoValue()) 560 return true; 561 562 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 563 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 564 565 return false; 566 } 567 568 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 569 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 570 } 571 572 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 573 const Value *MemVal = N->getMemOperand()->getValue(); 574 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 575 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 576 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 577 } 578 579 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 580 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 581 } 582 583 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 584 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 585 } 586 587 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 588 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 589 } 590 591 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 592 const Value *MemVal = N->getMemOperand()->getValue(); 593 if (CbId == -1) 594 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 595 596 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 597 } 598 599 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 600 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 601 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 602 N->getMemoryVT().bitsLT(MVT::i32)) 603 return true; 604 605 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 606 } 607 608 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 609 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 610 } 611 612 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 613 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 614 } 615 616 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 617 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 618 } 619 620 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 621 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 622 } 623 624 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 625 MachineMemOperand *MMO = N->getMemOperand(); 626 if (checkPrivateAddress(N->getMemOperand())) { 627 if (MMO) { 628 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 629 if (PSV && PSV->isConstantPool()) { 630 return true; 631 } 632 } 633 } 634 return false; 635 } 636 637 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 638 if (checkPrivateAddress(N->getMemOperand())) { 639 // Check to make sure we are not a constant pool load or a constant load 640 // that is marked as a private load 641 if (isCPLoad(N) || isConstantLoad(N, -1)) { 642 return false; 643 } 644 } 645 646 const Value *MemVal = N->getMemOperand()->getValue(); 647 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 648 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 649 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 650 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 651 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 652 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 653 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 654 return true; 655 } 656 return false; 657 } 658 659 const char *AMDGPUDAGToDAGISel::getPassName() const { 660 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 661 } 662 663 #ifdef DEBUGTMP 664 #undef INT64_C 665 #endif 666 #undef DEBUGTMP 667 668 //===----------------------------------------------------------------------===// 669 // Complex Patterns 670 //===----------------------------------------------------------------------===// 671 672 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 673 SDValue& IntPtr) { 674 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 675 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 676 true); 677 return true; 678 } 679 return false; 680 } 681 682 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 683 SDValue& BaseReg, SDValue &Offset) { 684 if (!isa<ConstantSDNode>(Addr)) { 685 BaseReg = Addr; 686 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 687 return true; 688 } 689 return false; 690 } 691 692 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 693 SDValue &Offset) { 694 ConstantSDNode *IMMOffset; 695 696 if (Addr.getOpcode() == ISD::ADD 697 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 698 && isInt<16>(IMMOffset->getZExtValue())) { 699 700 Base = Addr.getOperand(0); 701 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 702 MVT::i32); 703 return true; 704 // If the pointer address is constant, we can move it to the offset field. 705 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 706 && isInt<16>(IMMOffset->getZExtValue())) { 707 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 708 SDLoc(CurDAG->getEntryNode()), 709 AMDGPU::ZERO, MVT::i32); 710 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 711 MVT::i32); 712 return true; 713 } 714 715 // Default case, no offset 716 Base = Addr; 717 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 718 return true; 719 } 720 721 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 722 SDValue &Offset) { 723 ConstantSDNode *C; 724 SDLoc DL(Addr); 725 726 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 727 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 728 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 729 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 730 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 731 Base = Addr.getOperand(0); 732 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 733 } else { 734 Base = Addr; 735 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 736 } 737 738 return true; 739 } 740 741 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 742 SDLoc DL(N); 743 SDValue LHS = N->getOperand(0); 744 SDValue RHS = N->getOperand(1); 745 746 bool IsAdd = (N->getOpcode() == ISD::ADD); 747 748 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 749 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 750 751 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 752 DL, MVT::i32, LHS, Sub0); 753 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 754 DL, MVT::i32, LHS, Sub1); 755 756 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 757 DL, MVT::i32, RHS, Sub0); 758 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 759 DL, MVT::i32, RHS, Sub1); 760 761 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 762 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 763 764 765 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 766 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 767 768 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 769 SDValue Carry(AddLo, 1); 770 SDNode *AddHi 771 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 772 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 773 774 SDValue Args[5] = { 775 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 776 SDValue(AddLo,0), 777 Sub0, 778 SDValue(AddHi,0), 779 Sub1, 780 }; 781 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 782 } 783 784 // We need to handle this here because tablegen doesn't support matching 785 // instructions with multiple outputs. 786 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 787 SDLoc SL(N); 788 EVT VT = N->getValueType(0); 789 790 assert(VT == MVT::f32 || VT == MVT::f64); 791 792 unsigned Opc 793 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 794 795 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 796 // omod 797 SDValue Ops[8]; 798 799 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 800 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 801 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 802 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 803 } 804 805 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 806 unsigned OffsetBits) const { 807 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 808 (OffsetBits == 8 && !isUInt<8>(Offset))) 809 return false; 810 811 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 812 Subtarget->unsafeDSOffsetFoldingEnabled()) 813 return true; 814 815 // On Southern Islands instruction with a negative base value and an offset 816 // don't seem to work. 817 return CurDAG->SignBitIsZero(Base); 818 } 819 820 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 821 SDValue &Offset) const { 822 if (CurDAG->isBaseWithConstantOffset(Addr)) { 823 SDValue N0 = Addr.getOperand(0); 824 SDValue N1 = Addr.getOperand(1); 825 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 826 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 827 // (add n0, c0) 828 Base = N0; 829 Offset = N1; 830 return true; 831 } 832 } else if (Addr.getOpcode() == ISD::SUB) { 833 // sub C, x -> add (sub 0, x), C 834 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 835 int64_t ByteOffset = C->getSExtValue(); 836 if (isUInt<16>(ByteOffset)) { 837 SDLoc DL(Addr); 838 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 839 840 // XXX - This is kind of hacky. Create a dummy sub node so we can check 841 // the known bits in isDSOffsetLegal. We need to emit the selected node 842 // here, so this is thrown away. 843 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 844 Zero, Addr.getOperand(1)); 845 846 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 847 MachineSDNode *MachineSub 848 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 849 Zero, Addr.getOperand(1)); 850 851 Base = SDValue(MachineSub, 0); 852 Offset = Addr.getOperand(0); 853 return true; 854 } 855 } 856 } 857 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 858 // If we have a constant address, prefer to put the constant into the 859 // offset. This can save moves to load the constant address since multiple 860 // operations can share the zero base address register, and enables merging 861 // into read2 / write2 instructions. 862 863 SDLoc DL(Addr); 864 865 if (isUInt<16>(CAddr->getZExtValue())) { 866 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 867 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 868 DL, MVT::i32, Zero); 869 Base = SDValue(MovZero, 0); 870 Offset = Addr; 871 return true; 872 } 873 } 874 875 // default case 876 Base = Addr; 877 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 878 return true; 879 } 880 881 // TODO: If offset is too big, put low 16-bit into offset. 882 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 883 SDValue &Offset0, 884 SDValue &Offset1) const { 885 SDLoc DL(Addr); 886 887 if (CurDAG->isBaseWithConstantOffset(Addr)) { 888 SDValue N0 = Addr.getOperand(0); 889 SDValue N1 = Addr.getOperand(1); 890 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 891 unsigned DWordOffset0 = C1->getZExtValue() / 4; 892 unsigned DWordOffset1 = DWordOffset0 + 1; 893 // (add n0, c0) 894 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 895 Base = N0; 896 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 897 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 898 return true; 899 } 900 } else if (Addr.getOpcode() == ISD::SUB) { 901 // sub C, x -> add (sub 0, x), C 902 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 903 unsigned DWordOffset0 = C->getZExtValue() / 4; 904 unsigned DWordOffset1 = DWordOffset0 + 1; 905 906 if (isUInt<8>(DWordOffset0)) { 907 SDLoc DL(Addr); 908 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 909 910 // XXX - This is kind of hacky. Create a dummy sub node so we can check 911 // the known bits in isDSOffsetLegal. We need to emit the selected node 912 // here, so this is thrown away. 913 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 914 Zero, Addr.getOperand(1)); 915 916 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 917 MachineSDNode *MachineSub 918 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 919 Zero, Addr.getOperand(1)); 920 921 Base = SDValue(MachineSub, 0); 922 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 923 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 924 return true; 925 } 926 } 927 } 928 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 929 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 930 unsigned DWordOffset1 = DWordOffset0 + 1; 931 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 932 933 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 934 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 935 MachineSDNode *MovZero 936 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 937 DL, MVT::i32, Zero); 938 Base = SDValue(MovZero, 0); 939 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 940 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 941 return true; 942 } 943 } 944 945 // default case 946 Base = Addr; 947 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 948 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 949 return true; 950 } 951 952 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 953 return isUInt<12>(Imm->getZExtValue()); 954 } 955 956 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 957 SDValue &VAddr, SDValue &SOffset, 958 SDValue &Offset, SDValue &Offen, 959 SDValue &Idxen, SDValue &Addr64, 960 SDValue &GLC, SDValue &SLC, 961 SDValue &TFE) const { 962 SDLoc DL(Addr); 963 964 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 965 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 966 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 967 968 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 969 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 970 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 971 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 972 973 if (CurDAG->isBaseWithConstantOffset(Addr)) { 974 SDValue N0 = Addr.getOperand(0); 975 SDValue N1 = Addr.getOperand(1); 976 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 977 978 if (N0.getOpcode() == ISD::ADD) { 979 // (add (add N2, N3), C1) -> addr64 980 SDValue N2 = N0.getOperand(0); 981 SDValue N3 = N0.getOperand(1); 982 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 983 Ptr = N2; 984 VAddr = N3; 985 } else { 986 987 // (add N0, C1) -> offset 988 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 989 Ptr = N0; 990 } 991 992 if (isLegalMUBUFImmOffset(C1)) { 993 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 994 return; 995 } else if (isUInt<32>(C1->getZExtValue())) { 996 // Illegal offset, store it in soffset. 997 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 998 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 999 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1000 0); 1001 return; 1002 } 1003 } 1004 1005 if (Addr.getOpcode() == ISD::ADD) { 1006 // (add N0, N1) -> addr64 1007 SDValue N0 = Addr.getOperand(0); 1008 SDValue N1 = Addr.getOperand(1); 1009 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1010 Ptr = N0; 1011 VAddr = N1; 1012 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1013 return; 1014 } 1015 1016 // default case -> offset 1017 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1018 Ptr = Addr; 1019 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1020 } 1021 1022 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1023 SDValue &VAddr, SDValue &SOffset, 1024 SDValue &Offset, SDValue &GLC, 1025 SDValue &SLC, SDValue &TFE) const { 1026 SDValue Ptr, Offen, Idxen, Addr64; 1027 1028 // addr64 bit was removed for volcanic islands. 1029 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1030 return false; 1031 1032 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1033 GLC, SLC, TFE); 1034 1035 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1036 if (C->getSExtValue()) { 1037 SDLoc DL(Addr); 1038 1039 const SITargetLowering& Lowering = 1040 *static_cast<const SITargetLowering*>(getTargetLowering()); 1041 1042 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1043 return true; 1044 } 1045 1046 return false; 1047 } 1048 1049 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1050 SDValue &VAddr, SDValue &SOffset, 1051 SDValue &Offset, 1052 SDValue &SLC) const { 1053 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1054 SDValue GLC, TFE; 1055 1056 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1057 } 1058 1059 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1060 SDValue &VAddr, SDValue &SOffset, 1061 SDValue &ImmOffset) const { 1062 1063 SDLoc DL(Addr); 1064 MachineFunction &MF = CurDAG->getMachineFunction(); 1065 const SIRegisterInfo *TRI = 1066 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 1067 MachineRegisterInfo &MRI = MF.getRegInfo(); 1068 const SITargetLowering& Lowering = 1069 *static_cast<const SITargetLowering*>(getTargetLowering()); 1070 1071 unsigned ScratchOffsetReg = 1072 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 1073 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 1074 ScratchOffsetReg, MVT::i32); 1075 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); 1076 SDValue ScratchRsrcDword0 = 1077 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); 1078 1079 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); 1080 SDValue ScratchRsrcDword1 = 1081 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); 1082 1083 const SDValue RsrcOps[] = { 1084 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1085 ScratchRsrcDword0, 1086 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1087 ScratchRsrcDword1, 1088 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), 1089 }; 1090 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1091 MVT::v2i32, RsrcOps), 0); 1092 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); 1093 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 1094 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 1095 1096 // (add n0, c1) 1097 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1098 SDValue N0 = Addr.getOperand(0); 1099 SDValue N1 = Addr.getOperand(1); 1100 // Offsets in vaddr must be positive. 1101 if (CurDAG->SignBitIsZero(N0)) { 1102 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1103 if (isLegalMUBUFImmOffset(C1)) { 1104 VAddr = N0; 1105 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1106 return true; 1107 } 1108 } 1109 } 1110 1111 // (node) 1112 VAddr = Addr; 1113 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1114 return true; 1115 } 1116 1117 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1118 SDValue &SOffset, SDValue &Offset, 1119 SDValue &GLC, SDValue &SLC, 1120 SDValue &TFE) const { 1121 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1122 const SIInstrInfo *TII = 1123 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1124 1125 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1126 GLC, SLC, TFE); 1127 1128 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1129 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1130 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1131 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1132 APInt::getAllOnesValue(32).getZExtValue(); // Size 1133 SDLoc DL(Addr); 1134 1135 const SITargetLowering& Lowering = 1136 *static_cast<const SITargetLowering*>(getTargetLowering()); 1137 1138 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1139 return true; 1140 } 1141 return false; 1142 } 1143 1144 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1145 SDValue &Soffset, SDValue &Offset, 1146 SDValue &GLC) const { 1147 SDValue SLC, TFE; 1148 1149 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1150 } 1151 1152 /// 1153 /// \param EncodedOffset This is the immediate value that will be encoded 1154 /// directly into the instruction. On SI/CI the \p EncodedOffset 1155 /// will be in units of dwords and on VI+ it will be units of bytes. 1156 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1157 int64_t EncodedOffset) { 1158 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1159 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1160 } 1161 1162 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1163 SDValue &Offset, bool &Imm) const { 1164 1165 // FIXME: Handle non-constant offsets. 1166 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1167 if (!C) 1168 return false; 1169 1170 SDLoc SL(ByteOffsetNode); 1171 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1172 int64_t ByteOffset = C->getSExtValue(); 1173 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1174 ByteOffset >> 2 : ByteOffset; 1175 1176 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1177 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1178 Imm = true; 1179 return true; 1180 } 1181 1182 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1183 return false; 1184 1185 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1186 // 32-bit Immediates are supported on Sea Islands. 1187 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1188 } else { 1189 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1190 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1191 C32Bit), 0); 1192 } 1193 Imm = false; 1194 return true; 1195 } 1196 1197 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1198 SDValue &Offset, bool &Imm) const { 1199 1200 SDLoc SL(Addr); 1201 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1202 SDValue N0 = Addr.getOperand(0); 1203 SDValue N1 = Addr.getOperand(1); 1204 1205 if (SelectSMRDOffset(N1, Offset, Imm)) { 1206 SBase = N0; 1207 return true; 1208 } 1209 } 1210 SBase = Addr; 1211 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1212 Imm = true; 1213 return true; 1214 } 1215 1216 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1217 SDValue &Offset) const { 1218 bool Imm; 1219 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1220 } 1221 1222 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1223 SDValue &Offset) const { 1224 1225 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1226 return false; 1227 1228 bool Imm; 1229 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1230 return false; 1231 1232 return !Imm && isa<ConstantSDNode>(Offset); 1233 } 1234 1235 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1236 SDValue &Offset) const { 1237 bool Imm; 1238 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1239 !isa<ConstantSDNode>(Offset); 1240 } 1241 1242 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1243 SDValue &Offset) const { 1244 bool Imm; 1245 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1246 } 1247 1248 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1249 SDValue &Offset) const { 1250 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1251 return false; 1252 1253 bool Imm; 1254 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1255 return false; 1256 1257 return !Imm && isa<ConstantSDNode>(Offset); 1258 } 1259 1260 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1261 SDValue &Offset) const { 1262 bool Imm; 1263 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1264 !isa<ConstantSDNode>(Offset); 1265 } 1266 1267 // FIXME: This is incorrect and only enough to be able to compile. 1268 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1269 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1270 SDLoc DL(N); 1271 1272 assert(Subtarget->hasFlatAddressSpace() && 1273 "addrspacecast only supported with flat address space!"); 1274 1275 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 1276 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 1277 "Cannot cast address space to / from constant address!"); 1278 1279 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1280 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1281 "Can only cast to / from flat address space!"); 1282 1283 // The flat instructions read the address as the index of the VGPR holding the 1284 // address, so casting should just be reinterpreting the base VGPR, so just 1285 // insert trunc / bitcast / zext. 1286 1287 SDValue Src = ASC->getOperand(0); 1288 EVT DestVT = ASC->getValueType(0); 1289 EVT SrcVT = Src.getValueType(); 1290 1291 unsigned SrcSize = SrcVT.getSizeInBits(); 1292 unsigned DestSize = DestVT.getSizeInBits(); 1293 1294 if (SrcSize > DestSize) { 1295 assert(SrcSize == 64 && DestSize == 32); 1296 return CurDAG->getMachineNode( 1297 TargetOpcode::EXTRACT_SUBREG, 1298 DL, 1299 DestVT, 1300 Src, 1301 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1302 } 1303 1304 if (DestSize > SrcSize) { 1305 assert(SrcSize == 32 && DestSize == 64); 1306 1307 // FIXME: This is probably wrong, we should never be defining 1308 // a register class with both VGPRs and SGPRs 1309 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1310 MVT::i32); 1311 1312 const SDValue Ops[] = { 1313 RC, 1314 Src, 1315 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1316 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1317 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1318 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1319 }; 1320 1321 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1322 DL, N->getValueType(0), Ops); 1323 } 1324 1325 assert(SrcSize == 64 && DestSize == 64); 1326 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1327 } 1328 1329 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1330 uint32_t Offset, uint32_t Width) { 1331 // Transformation function, pack the offset and width of a BFE into 1332 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1333 // source, bits [5:0] contain the offset and bits [22:16] the width. 1334 uint32_t PackedVal = Offset | (Width << 16); 1335 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1336 1337 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1338 } 1339 1340 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1341 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1342 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1343 // Predicate: 0 < b <= c < 32 1344 1345 const SDValue &Shl = N->getOperand(0); 1346 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1347 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1348 1349 if (B && C) { 1350 uint32_t BVal = B->getZExtValue(); 1351 uint32_t CVal = C->getZExtValue(); 1352 1353 if (0 < BVal && BVal <= CVal && CVal < 32) { 1354 bool Signed = N->getOpcode() == ISD::SRA; 1355 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1356 1357 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1358 CVal - BVal, 32 - CVal); 1359 } 1360 } 1361 return SelectCode(N); 1362 } 1363 1364 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1365 switch (N->getOpcode()) { 1366 case ISD::AND: 1367 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1368 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1369 // Predicate: isMask(mask) 1370 const SDValue &Srl = N->getOperand(0); 1371 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1372 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1373 1374 if (Shift && Mask) { 1375 uint32_t ShiftVal = Shift->getZExtValue(); 1376 uint32_t MaskVal = Mask->getZExtValue(); 1377 1378 if (isMask_32(MaskVal)) { 1379 uint32_t WidthVal = countPopulation(MaskVal); 1380 1381 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1382 ShiftVal, WidthVal); 1383 } 1384 } 1385 } 1386 break; 1387 case ISD::SRL: 1388 if (N->getOperand(0).getOpcode() == ISD::AND) { 1389 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1390 // Predicate: isMask(mask >> b) 1391 const SDValue &And = N->getOperand(0); 1392 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1393 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1394 1395 if (Shift && Mask) { 1396 uint32_t ShiftVal = Shift->getZExtValue(); 1397 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1398 1399 if (isMask_32(MaskVal)) { 1400 uint32_t WidthVal = countPopulation(MaskVal); 1401 1402 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1403 ShiftVal, WidthVal); 1404 } 1405 } 1406 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1407 return SelectS_BFEFromShifts(N); 1408 break; 1409 case ISD::SRA: 1410 if (N->getOperand(0).getOpcode() == ISD::SHL) 1411 return SelectS_BFEFromShifts(N); 1412 break; 1413 } 1414 1415 return SelectCode(N); 1416 } 1417 1418 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1419 SDValue &SrcMods) const { 1420 1421 unsigned Mods = 0; 1422 1423 Src = In; 1424 1425 if (Src.getOpcode() == ISD::FNEG) { 1426 Mods |= SISrcMods::NEG; 1427 Src = Src.getOperand(0); 1428 } 1429 1430 if (Src.getOpcode() == ISD::FABS) { 1431 Mods |= SISrcMods::ABS; 1432 Src = Src.getOperand(0); 1433 } 1434 1435 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1436 1437 return true; 1438 } 1439 1440 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1441 SDValue &SrcMods) const { 1442 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1443 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1444 } 1445 1446 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1447 SDValue &SrcMods, SDValue &Clamp, 1448 SDValue &Omod) const { 1449 SDLoc DL(In); 1450 // FIXME: Handle Clamp and Omod 1451 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1452 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1453 1454 return SelectVOP3Mods(In, Src, SrcMods); 1455 } 1456 1457 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1458 SDValue &SrcMods, SDValue &Clamp, 1459 SDValue &Omod) const { 1460 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1461 1462 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1463 cast<ConstantSDNode>(Clamp)->isNullValue() && 1464 cast<ConstantSDNode>(Omod)->isNullValue(); 1465 } 1466 1467 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1468 SDValue &SrcMods, 1469 SDValue &Omod) const { 1470 // FIXME: Handle Omod 1471 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1472 1473 return SelectVOP3Mods(In, Src, SrcMods); 1474 } 1475 1476 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1477 SDValue &SrcMods, 1478 SDValue &Clamp, 1479 SDValue &Omod) const { 1480 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1481 return SelectVOP3Mods(In, Src, SrcMods); 1482 } 1483 1484 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1485 bool Modified = false; 1486 1487 // XXX - Other targets seem to be able to do this without a worklist. 1488 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1489 SmallVector<StoreSDNode *, 8> StoresToReplace; 1490 1491 for (SDNode &Node : CurDAG->allnodes()) { 1492 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1493 EVT VT = LD->getValueType(0); 1494 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1495 continue; 1496 1497 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1498 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1499 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1500 // legalizer assume that if i64 is legal, so doing this promotion early 1501 // can cause problems. 1502 LoadsToReplace.push_back(LD); 1503 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1504 // Handle i64 stores here for the same reason mentioned above for loads. 1505 SDValue Value = ST->getValue(); 1506 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1507 continue; 1508 StoresToReplace.push_back(ST); 1509 } 1510 } 1511 1512 for (LoadSDNode *LD : LoadsToReplace) { 1513 SDLoc SL(LD); 1514 1515 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1516 LD->getBasePtr(), LD->getMemOperand()); 1517 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1518 MVT::i64, NewLoad); 1519 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1520 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1521 Modified = true; 1522 } 1523 1524 for (StoreSDNode *ST : StoresToReplace) { 1525 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1526 MVT::v2i32, ST->getValue()); 1527 const SDValue StoreOps[] = { 1528 ST->getChain(), 1529 NewValue, 1530 ST->getBasePtr(), 1531 ST->getOffset() 1532 }; 1533 1534 CurDAG->UpdateNodeOperands(ST, StoreOps); 1535 Modified = true; 1536 } 1537 1538 // XXX - Is this necessary? 1539 if (Modified) 1540 CurDAG->RemoveDeadNodes(); 1541 } 1542 1543 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1544 const AMDGPUTargetLowering& Lowering = 1545 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1546 bool IsModified = false; 1547 do { 1548 IsModified = false; 1549 // Go over all selected nodes and try to fold them a bit more 1550 for (SDNode &Node : CurDAG->allnodes()) { 1551 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1552 if (!MachineNode) 1553 continue; 1554 1555 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1556 if (ResNode != &Node) { 1557 ReplaceUses(&Node, ResNode); 1558 IsModified = true; 1559 } 1560 } 1561 CurDAG->RemoveDeadNodes(); 1562 } while (IsModified); 1563 } 1564