1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/IR/Function.h" 29 30 using namespace llvm; 31 32 //===----------------------------------------------------------------------===// 33 // Instruction Selector Implementation 34 //===----------------------------------------------------------------------===// 35 36 namespace { 37 /// AMDGPU specific code to select AMDGPU machine instructions for 38 /// SelectionDAG operations. 39 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 41 // make the right decision when generating code for different targets. 42 const AMDGPUSubtarget *Subtarget; 43 44 public: 45 AMDGPUDAGToDAGISel(TargetMachine &TM); 46 virtual ~AMDGPUDAGToDAGISel(); 47 bool runOnMachineFunction(MachineFunction &MF) override; 48 SDNode *Select(SDNode *N) override; 49 const char *getPassName() const override; 50 void PreprocessISelDAG() override; 51 void PostprocessISelDAG() override; 52 53 private: 54 bool isInlineImmediate(SDNode *N) const; 55 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 56 const R600InstrInfo *TII); 57 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 58 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 59 60 // Complex pattern selectors 61 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 62 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 63 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 64 65 static bool checkType(const Value *ptr, unsigned int addrspace); 66 static bool checkPrivateAddress(const MachineMemOperand *Op); 67 68 static bool isGlobalStore(const StoreSDNode *N); 69 static bool isFlatStore(const StoreSDNode *N); 70 static bool isPrivateStore(const StoreSDNode *N); 71 static bool isLocalStore(const StoreSDNode *N); 72 static bool isRegionStore(const StoreSDNode *N); 73 74 bool isCPLoad(const LoadSDNode *N) const; 75 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 76 bool isGlobalLoad(const LoadSDNode *N) const; 77 bool isFlatLoad(const LoadSDNode *N) const; 78 bool isParamLoad(const LoadSDNode *N) const; 79 bool isPrivateLoad(const LoadSDNode *N) const; 80 bool isLocalLoad(const LoadSDNode *N) const; 81 bool isRegionLoad(const LoadSDNode *N) const; 82 83 SDNode *glueCopyToM0(SDNode *N) const; 84 85 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 86 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 87 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 88 SDValue& Offset); 89 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 90 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 91 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 92 unsigned OffsetBits) const; 93 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 94 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 95 SDValue &Offset1) const; 96 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 97 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 98 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 99 SDValue &TFE) const; 100 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 101 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 102 SDValue &SLC, SDValue &TFE) const; 103 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 104 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 105 SDValue &SLC) const; 106 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 107 SDValue &SOffset, SDValue &ImmOffset) const; 108 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 109 SDValue &Offset, SDValue &GLC, SDValue &SLC, 110 SDValue &TFE) const; 111 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 112 SDValue &Offset, SDValue &GLC) const; 113 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 114 bool &Imm) const; 115 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 116 bool &Imm) const; 117 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 120 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 122 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 123 SDNode *SelectAddrSpaceCast(SDNode *N); 124 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 126 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 127 SDValue &Clamp, SDValue &Omod) const; 128 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 129 SDValue &Clamp, SDValue &Omod) const; 130 131 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 132 SDValue &Omod) const; 133 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 134 SDValue &Clamp, 135 SDValue &Omod) const; 136 137 SDNode *SelectADD_SUB_I64(SDNode *N); 138 SDNode *SelectDIV_SCALE(SDNode *N); 139 140 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 141 uint32_t Offset, uint32_t Width); 142 SDNode *SelectS_BFEFromShifts(SDNode *N); 143 SDNode *SelectS_BFE(SDNode *N); 144 145 // Include the pieces autogenerated from the target description. 146 #include "AMDGPUGenDAGISel.inc" 147 }; 148 } // end anonymous namespace 149 150 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 151 // DAG, ready for instruction scheduling. 152 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 153 return new AMDGPUDAGToDAGISel(TM); 154 } 155 156 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 157 : SelectionDAGISel(TM) {} 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 168 const SITargetLowering *TL 169 = static_cast<const SITargetLowering *>(getTargetLowering()); 170 return TL->analyzeImmediate(N) == 0; 171 } 172 173 /// \brief Determine the register class for \p OpNo 174 /// \returns The register class of the virtual register that will be used for 175 /// the given operand number \OpNo or NULL if the register class cannot be 176 /// determined. 177 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 178 unsigned OpNo) const { 179 if (!N->isMachineOpcode()) 180 return nullptr; 181 182 switch (N->getMachineOpcode()) { 183 default: { 184 const MCInstrDesc &Desc = 185 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 186 unsigned OpIdx = Desc.getNumDefs() + OpNo; 187 if (OpIdx >= Desc.getNumOperands()) 188 return nullptr; 189 int RegClass = Desc.OpInfo[OpIdx].RegClass; 190 if (RegClass == -1) 191 return nullptr; 192 193 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 194 } 195 case AMDGPU::REG_SEQUENCE: { 196 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 197 const TargetRegisterClass *SuperRC = 198 Subtarget->getRegisterInfo()->getRegClass(RCID); 199 200 SDValue SubRegOp = N->getOperand(OpNo + 1); 201 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 202 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 203 SubRegIdx); 204 } 205 } 206 } 207 208 bool AMDGPUDAGToDAGISel::SelectADDRParam( 209 SDValue Addr, SDValue& R1, SDValue& R2) { 210 211 if (Addr.getOpcode() == ISD::FrameIndex) { 212 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 213 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 214 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 215 } else { 216 R1 = Addr; 217 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 218 } 219 } else if (Addr.getOpcode() == ISD::ADD) { 220 R1 = Addr.getOperand(0); 221 R2 = Addr.getOperand(1); 222 } else { 223 R1 = Addr; 224 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 225 } 226 return true; 227 } 228 229 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 230 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 231 Addr.getOpcode() == ISD::TargetGlobalAddress) { 232 return false; 233 } 234 return SelectADDRParam(Addr, R1, R2); 235 } 236 237 238 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 239 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 240 Addr.getOpcode() == ISD::TargetGlobalAddress) { 241 return false; 242 } 243 244 if (Addr.getOpcode() == ISD::FrameIndex) { 245 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 246 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 247 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 248 } else { 249 R1 = Addr; 250 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 251 } 252 } else if (Addr.getOpcode() == ISD::ADD) { 253 R1 = Addr.getOperand(0); 254 R2 = Addr.getOperand(1); 255 } else { 256 R1 = Addr; 257 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 258 } 259 return true; 260 } 261 262 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 263 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 264 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 265 AMDGPUAS::LOCAL_ADDRESS)) 266 return N; 267 268 const SITargetLowering& Lowering = 269 *static_cast<const SITargetLowering*>(getTargetLowering()); 270 271 // Write max value to m0 before each load operation 272 273 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 274 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 275 276 SDValue Glue = M0.getValue(1); 277 278 SmallVector <SDValue, 8> Ops; 279 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 280 Ops.push_back(N->getOperand(i)); 281 } 282 Ops.push_back(Glue); 283 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 284 285 return N; 286 } 287 288 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 289 unsigned int Opc = N->getOpcode(); 290 if (N->isMachineOpcode()) { 291 N->setNodeId(-1); 292 return nullptr; // Already selected. 293 } 294 295 if (isa<AtomicSDNode>(N)) 296 N = glueCopyToM0(N); 297 298 switch (Opc) { 299 default: break; 300 // We are selecting i64 ADD here instead of custom lower it during 301 // DAG legalization, so we can fold some i64 ADDs used for address 302 // calculation into the LOAD and STORE instructions. 303 case ISD::ADD: 304 case ISD::SUB: { 305 if (N->getValueType(0) != MVT::i64 || 306 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 307 break; 308 309 return SelectADD_SUB_I64(N); 310 } 311 case ISD::SCALAR_TO_VECTOR: 312 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 313 case ISD::BUILD_VECTOR: { 314 unsigned RegClassID; 315 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 316 EVT VT = N->getValueType(0); 317 unsigned NumVectorElts = VT.getVectorNumElements(); 318 EVT EltVT = VT.getVectorElementType(); 319 assert(EltVT.bitsEq(MVT::i32)); 320 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 321 bool UseVReg = true; 322 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 323 U != E; ++U) { 324 if (!U->isMachineOpcode()) { 325 continue; 326 } 327 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 328 if (!RC) { 329 continue; 330 } 331 if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { 332 UseVReg = false; 333 } 334 } 335 switch(NumVectorElts) { 336 case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID : 337 AMDGPU::SReg_32RegClassID; 338 break; 339 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 340 AMDGPU::SReg_64RegClassID; 341 break; 342 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 343 AMDGPU::SReg_128RegClassID; 344 break; 345 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 346 AMDGPU::SReg_256RegClassID; 347 break; 348 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 349 AMDGPU::SReg_512RegClassID; 350 break; 351 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 352 } 353 } else { 354 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 355 // that adds a 128 bits reg copy when going through TwoAddressInstructions 356 // pass. We want to avoid 128 bits copies as much as possible because they 357 // can't be bundled by our scheduler. 358 switch(NumVectorElts) { 359 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 360 case 4: 361 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 362 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 363 else 364 RegClassID = AMDGPU::R600_Reg128RegClassID; 365 break; 366 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 367 } 368 } 369 370 SDLoc DL(N); 371 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 372 373 if (NumVectorElts == 1) { 374 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 375 N->getOperand(0), RegClass); 376 } 377 378 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 379 "supported yet"); 380 // 16 = Max Num Vector Elements 381 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 382 // 1 = Vector Register Class 383 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 384 385 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 386 bool IsRegSeq = true; 387 unsigned NOps = N->getNumOperands(); 388 for (unsigned i = 0; i < NOps; i++) { 389 // XXX: Why is this here? 390 if (isa<RegisterSDNode>(N->getOperand(i))) { 391 IsRegSeq = false; 392 break; 393 } 394 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 395 RegSeqArgs[1 + (2 * i) + 1] = 396 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 397 MVT::i32); 398 } 399 400 if (NOps != NumVectorElts) { 401 // Fill in the missing undef elements if this was a scalar_to_vector. 402 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 403 404 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 405 DL, EltVT); 406 for (unsigned i = NOps; i < NumVectorElts; ++i) { 407 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 408 RegSeqArgs[1 + (2 * i) + 1] = 409 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 410 } 411 } 412 413 if (!IsRegSeq) 414 break; 415 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 416 RegSeqArgs); 417 } 418 case ISD::BUILD_PAIR: { 419 SDValue RC, SubReg0, SubReg1; 420 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 421 break; 422 } 423 SDLoc DL(N); 424 if (N->getValueType(0) == MVT::i128) { 425 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 426 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 427 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 428 } else if (N->getValueType(0) == MVT::i64) { 429 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 430 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 431 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 432 } else { 433 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 434 } 435 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 436 N->getOperand(1), SubReg1 }; 437 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 438 DL, N->getValueType(0), Ops); 439 } 440 441 case ISD::Constant: 442 case ISD::ConstantFP: { 443 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 444 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 445 break; 446 447 uint64_t Imm; 448 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 449 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 450 else { 451 ConstantSDNode *C = cast<ConstantSDNode>(N); 452 Imm = C->getZExtValue(); 453 } 454 455 SDLoc DL(N); 456 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 457 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 458 MVT::i32)); 459 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 460 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 461 const SDValue Ops[] = { 462 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 463 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 464 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 465 }; 466 467 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 468 N->getValueType(0), Ops); 469 } 470 case ISD::LOAD: 471 case ISD::STORE: { 472 N = glueCopyToM0(N); 473 break; 474 } 475 case AMDGPUISD::REGISTER_LOAD: { 476 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 477 break; 478 SDValue Addr, Offset; 479 480 SDLoc DL(N); 481 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 482 const SDValue Ops[] = { 483 Addr, 484 Offset, 485 CurDAG->getTargetConstant(0, DL, MVT::i32), 486 N->getOperand(0), 487 }; 488 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, 489 CurDAG->getVTList(MVT::i32, MVT::i64, 490 MVT::Other), 491 Ops); 492 } 493 case AMDGPUISD::REGISTER_STORE: { 494 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 495 break; 496 SDValue Addr, Offset; 497 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 498 SDLoc DL(N); 499 const SDValue Ops[] = { 500 N->getOperand(1), 501 Addr, 502 Offset, 503 CurDAG->getTargetConstant(0, DL, MVT::i32), 504 N->getOperand(0), 505 }; 506 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, 507 CurDAG->getVTList(MVT::Other), 508 Ops); 509 } 510 511 case AMDGPUISD::BFE_I32: 512 case AMDGPUISD::BFE_U32: { 513 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 514 break; 515 516 // There is a scalar version available, but unlike the vector version which 517 // has a separate operand for the offset and width, the scalar version packs 518 // the width and offset into a single operand. Try to move to the scalar 519 // version if the offsets are constant, so that we can try to keep extended 520 // loads of kernel arguments in SGPRs. 521 522 // TODO: Technically we could try to pattern match scalar bitshifts of 523 // dynamic values, but it's probably not useful. 524 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 525 if (!Offset) 526 break; 527 528 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 529 if (!Width) 530 break; 531 532 bool Signed = Opc == AMDGPUISD::BFE_I32; 533 534 uint32_t OffsetVal = Offset->getZExtValue(); 535 uint32_t WidthVal = Width->getZExtValue(); 536 537 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 538 N->getOperand(0), OffsetVal, WidthVal); 539 } 540 case AMDGPUISD::DIV_SCALE: { 541 return SelectDIV_SCALE(N); 542 } 543 case ISD::CopyToReg: { 544 const SITargetLowering& Lowering = 545 *static_cast<const SITargetLowering*>(getTargetLowering()); 546 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 547 break; 548 } 549 case ISD::ADDRSPACECAST: 550 return SelectAddrSpaceCast(N); 551 case ISD::AND: 552 case ISD::SRL: 553 case ISD::SRA: 554 if (N->getValueType(0) != MVT::i32 || 555 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 556 break; 557 558 return SelectS_BFE(N); 559 } 560 561 return SelectCode(N); 562 } 563 564 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 565 assert(AS != 0 && "Use checkPrivateAddress instead."); 566 if (!Ptr) 567 return false; 568 569 return Ptr->getType()->getPointerAddressSpace() == AS; 570 } 571 572 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 573 if (Op->getPseudoValue()) 574 return true; 575 576 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 577 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 578 579 return false; 580 } 581 582 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 583 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 584 } 585 586 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 587 const Value *MemVal = N->getMemOperand()->getValue(); 588 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 589 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 590 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 591 } 592 593 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 594 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 595 } 596 597 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 598 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 599 } 600 601 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 602 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 603 } 604 605 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 606 const Value *MemVal = N->getMemOperand()->getValue(); 607 if (CbId == -1) 608 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 609 610 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 611 } 612 613 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 614 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 615 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 616 N->getMemoryVT().bitsLT(MVT::i32)) 617 return true; 618 619 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 620 } 621 622 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 623 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 624 } 625 626 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 627 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 628 } 629 630 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 631 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 632 } 633 634 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 635 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 636 } 637 638 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 639 MachineMemOperand *MMO = N->getMemOperand(); 640 if (checkPrivateAddress(N->getMemOperand())) { 641 if (MMO) { 642 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 643 if (PSV && PSV->isConstantPool()) { 644 return true; 645 } 646 } 647 } 648 return false; 649 } 650 651 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 652 if (checkPrivateAddress(N->getMemOperand())) { 653 // Check to make sure we are not a constant pool load or a constant load 654 // that is marked as a private load 655 if (isCPLoad(N) || isConstantLoad(N, -1)) { 656 return false; 657 } 658 } 659 660 const Value *MemVal = N->getMemOperand()->getValue(); 661 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 662 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 663 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 664 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 665 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 666 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 667 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 668 return true; 669 } 670 return false; 671 } 672 673 const char *AMDGPUDAGToDAGISel::getPassName() const { 674 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 675 } 676 677 #ifdef DEBUGTMP 678 #undef INT64_C 679 #endif 680 #undef DEBUGTMP 681 682 //===----------------------------------------------------------------------===// 683 // Complex Patterns 684 //===----------------------------------------------------------------------===// 685 686 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 687 SDValue& IntPtr) { 688 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 689 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 690 true); 691 return true; 692 } 693 return false; 694 } 695 696 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 697 SDValue& BaseReg, SDValue &Offset) { 698 if (!isa<ConstantSDNode>(Addr)) { 699 BaseReg = Addr; 700 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 701 return true; 702 } 703 return false; 704 } 705 706 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 707 SDValue &Offset) { 708 ConstantSDNode *IMMOffset; 709 710 if (Addr.getOpcode() == ISD::ADD 711 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 712 && isInt<16>(IMMOffset->getZExtValue())) { 713 714 Base = Addr.getOperand(0); 715 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 716 MVT::i32); 717 return true; 718 // If the pointer address is constant, we can move it to the offset field. 719 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 720 && isInt<16>(IMMOffset->getZExtValue())) { 721 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 722 SDLoc(CurDAG->getEntryNode()), 723 AMDGPU::ZERO, MVT::i32); 724 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 725 MVT::i32); 726 return true; 727 } 728 729 // Default case, no offset 730 Base = Addr; 731 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 732 return true; 733 } 734 735 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 736 SDValue &Offset) { 737 ConstantSDNode *C; 738 SDLoc DL(Addr); 739 740 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 741 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 742 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 743 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 744 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 745 Base = Addr.getOperand(0); 746 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 747 } else { 748 Base = Addr; 749 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 750 } 751 752 return true; 753 } 754 755 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 756 SDLoc DL(N); 757 SDValue LHS = N->getOperand(0); 758 SDValue RHS = N->getOperand(1); 759 760 bool IsAdd = (N->getOpcode() == ISD::ADD); 761 762 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 763 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 764 765 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 766 DL, MVT::i32, LHS, Sub0); 767 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 768 DL, MVT::i32, LHS, Sub1); 769 770 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 771 DL, MVT::i32, RHS, Sub0); 772 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 773 DL, MVT::i32, RHS, Sub1); 774 775 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 776 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 777 778 779 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 780 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 781 782 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 783 SDValue Carry(AddLo, 1); 784 SDNode *AddHi 785 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 786 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 787 788 SDValue Args[5] = { 789 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 790 SDValue(AddLo,0), 791 Sub0, 792 SDValue(AddHi,0), 793 Sub1, 794 }; 795 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 796 } 797 798 // We need to handle this here because tablegen doesn't support matching 799 // instructions with multiple outputs. 800 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 801 SDLoc SL(N); 802 EVT VT = N->getValueType(0); 803 804 assert(VT == MVT::f32 || VT == MVT::f64); 805 806 unsigned Opc 807 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 808 809 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 810 // omod 811 SDValue Ops[8]; 812 813 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 814 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 815 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 816 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 817 } 818 819 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 820 unsigned OffsetBits) const { 821 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 822 (OffsetBits == 8 && !isUInt<8>(Offset))) 823 return false; 824 825 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 826 Subtarget->unsafeDSOffsetFoldingEnabled()) 827 return true; 828 829 // On Southern Islands instruction with a negative base value and an offset 830 // don't seem to work. 831 return CurDAG->SignBitIsZero(Base); 832 } 833 834 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 835 SDValue &Offset) const { 836 if (CurDAG->isBaseWithConstantOffset(Addr)) { 837 SDValue N0 = Addr.getOperand(0); 838 SDValue N1 = Addr.getOperand(1); 839 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 840 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 841 // (add n0, c0) 842 Base = N0; 843 Offset = N1; 844 return true; 845 } 846 } else if (Addr.getOpcode() == ISD::SUB) { 847 // sub C, x -> add (sub 0, x), C 848 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 849 int64_t ByteOffset = C->getSExtValue(); 850 if (isUInt<16>(ByteOffset)) { 851 SDLoc DL(Addr); 852 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 853 854 // XXX - This is kind of hacky. Create a dummy sub node so we can check 855 // the known bits in isDSOffsetLegal. We need to emit the selected node 856 // here, so this is thrown away. 857 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 858 Zero, Addr.getOperand(1)); 859 860 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 861 MachineSDNode *MachineSub 862 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 863 Zero, Addr.getOperand(1)); 864 865 Base = SDValue(MachineSub, 0); 866 Offset = Addr.getOperand(0); 867 return true; 868 } 869 } 870 } 871 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 872 // If we have a constant address, prefer to put the constant into the 873 // offset. This can save moves to load the constant address since multiple 874 // operations can share the zero base address register, and enables merging 875 // into read2 / write2 instructions. 876 877 SDLoc DL(Addr); 878 879 if (isUInt<16>(CAddr->getZExtValue())) { 880 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 881 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 882 DL, MVT::i32, Zero); 883 Base = SDValue(MovZero, 0); 884 Offset = Addr; 885 return true; 886 } 887 } 888 889 // default case 890 Base = Addr; 891 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 892 return true; 893 } 894 895 // TODO: If offset is too big, put low 16-bit into offset. 896 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 897 SDValue &Offset0, 898 SDValue &Offset1) const { 899 SDLoc DL(Addr); 900 901 if (CurDAG->isBaseWithConstantOffset(Addr)) { 902 SDValue N0 = Addr.getOperand(0); 903 SDValue N1 = Addr.getOperand(1); 904 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 905 unsigned DWordOffset0 = C1->getZExtValue() / 4; 906 unsigned DWordOffset1 = DWordOffset0 + 1; 907 // (add n0, c0) 908 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 909 Base = N0; 910 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 911 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 912 return true; 913 } 914 } else if (Addr.getOpcode() == ISD::SUB) { 915 // sub C, x -> add (sub 0, x), C 916 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 917 unsigned DWordOffset0 = C->getZExtValue() / 4; 918 unsigned DWordOffset1 = DWordOffset0 + 1; 919 920 if (isUInt<8>(DWordOffset0)) { 921 SDLoc DL(Addr); 922 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 923 924 // XXX - This is kind of hacky. Create a dummy sub node so we can check 925 // the known bits in isDSOffsetLegal. We need to emit the selected node 926 // here, so this is thrown away. 927 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 928 Zero, Addr.getOperand(1)); 929 930 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 931 MachineSDNode *MachineSub 932 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 933 Zero, Addr.getOperand(1)); 934 935 Base = SDValue(MachineSub, 0); 936 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 937 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 938 return true; 939 } 940 } 941 } 942 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 943 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 944 unsigned DWordOffset1 = DWordOffset0 + 1; 945 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 946 947 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 948 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 949 MachineSDNode *MovZero 950 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 951 DL, MVT::i32, Zero); 952 Base = SDValue(MovZero, 0); 953 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 954 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 955 return true; 956 } 957 } 958 959 // default case 960 Base = Addr; 961 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 962 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 963 return true; 964 } 965 966 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 967 return isUInt<12>(Imm->getZExtValue()); 968 } 969 970 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 971 SDValue &VAddr, SDValue &SOffset, 972 SDValue &Offset, SDValue &Offen, 973 SDValue &Idxen, SDValue &Addr64, 974 SDValue &GLC, SDValue &SLC, 975 SDValue &TFE) const { 976 SDLoc DL(Addr); 977 978 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 979 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 980 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 981 982 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 983 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 984 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 985 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 986 987 if (CurDAG->isBaseWithConstantOffset(Addr)) { 988 SDValue N0 = Addr.getOperand(0); 989 SDValue N1 = Addr.getOperand(1); 990 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 991 992 if (N0.getOpcode() == ISD::ADD) { 993 // (add (add N2, N3), C1) -> addr64 994 SDValue N2 = N0.getOperand(0); 995 SDValue N3 = N0.getOperand(1); 996 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 997 Ptr = N2; 998 VAddr = N3; 999 } else { 1000 1001 // (add N0, C1) -> offset 1002 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1003 Ptr = N0; 1004 } 1005 1006 if (isLegalMUBUFImmOffset(C1)) { 1007 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1008 return; 1009 } else if (isUInt<32>(C1->getZExtValue())) { 1010 // Illegal offset, store it in soffset. 1011 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1012 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1013 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1014 0); 1015 return; 1016 } 1017 } 1018 1019 if (Addr.getOpcode() == ISD::ADD) { 1020 // (add N0, N1) -> addr64 1021 SDValue N0 = Addr.getOperand(0); 1022 SDValue N1 = Addr.getOperand(1); 1023 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1024 Ptr = N0; 1025 VAddr = N1; 1026 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1027 return; 1028 } 1029 1030 // default case -> offset 1031 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1032 Ptr = Addr; 1033 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1034 } 1035 1036 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1037 SDValue &VAddr, SDValue &SOffset, 1038 SDValue &Offset, SDValue &GLC, 1039 SDValue &SLC, SDValue &TFE) const { 1040 SDValue Ptr, Offen, Idxen, Addr64; 1041 1042 // addr64 bit was removed for volcanic islands. 1043 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1044 return false; 1045 1046 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1047 GLC, SLC, TFE); 1048 1049 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1050 if (C->getSExtValue()) { 1051 SDLoc DL(Addr); 1052 1053 const SITargetLowering& Lowering = 1054 *static_cast<const SITargetLowering*>(getTargetLowering()); 1055 1056 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1057 return true; 1058 } 1059 1060 return false; 1061 } 1062 1063 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1064 SDValue &VAddr, SDValue &SOffset, 1065 SDValue &Offset, 1066 SDValue &SLC) const { 1067 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1068 SDValue GLC, TFE; 1069 1070 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1071 } 1072 1073 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1074 SDValue &VAddr, SDValue &SOffset, 1075 SDValue &ImmOffset) const { 1076 1077 SDLoc DL(Addr); 1078 MachineFunction &MF = CurDAG->getMachineFunction(); 1079 const SIRegisterInfo *TRI = 1080 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 1081 MachineRegisterInfo &MRI = MF.getRegInfo(); 1082 const SITargetLowering& Lowering = 1083 *static_cast<const SITargetLowering*>(getTargetLowering()); 1084 1085 unsigned ScratchOffsetReg = 1086 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 1087 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 1088 ScratchOffsetReg, MVT::i32); 1089 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); 1090 SDValue ScratchRsrcDword0 = 1091 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); 1092 1093 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); 1094 SDValue ScratchRsrcDword1 = 1095 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); 1096 1097 const SDValue RsrcOps[] = { 1098 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1099 ScratchRsrcDword0, 1100 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1101 ScratchRsrcDword1, 1102 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), 1103 }; 1104 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1105 MVT::v2i32, RsrcOps), 0); 1106 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); 1107 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 1108 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 1109 1110 // (add n0, c1) 1111 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1112 SDValue N0 = Addr.getOperand(0); 1113 SDValue N1 = Addr.getOperand(1); 1114 // Offsets in vaddr must be positive. 1115 if (CurDAG->SignBitIsZero(N0)) { 1116 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1117 if (isLegalMUBUFImmOffset(C1)) { 1118 VAddr = N0; 1119 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1120 return true; 1121 } 1122 } 1123 } 1124 1125 // (node) 1126 VAddr = Addr; 1127 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1128 return true; 1129 } 1130 1131 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1132 SDValue &SOffset, SDValue &Offset, 1133 SDValue &GLC, SDValue &SLC, 1134 SDValue &TFE) const { 1135 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1136 const SIInstrInfo *TII = 1137 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1138 1139 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1140 GLC, SLC, TFE); 1141 1142 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1143 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1144 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1145 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1146 APInt::getAllOnesValue(32).getZExtValue(); // Size 1147 SDLoc DL(Addr); 1148 1149 const SITargetLowering& Lowering = 1150 *static_cast<const SITargetLowering*>(getTargetLowering()); 1151 1152 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1153 return true; 1154 } 1155 return false; 1156 } 1157 1158 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1159 SDValue &Soffset, SDValue &Offset, 1160 SDValue &GLC) const { 1161 SDValue SLC, TFE; 1162 1163 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1164 } 1165 1166 /// 1167 /// \param EncodedOffset This is the immediate value that will be encoded 1168 /// directly into the instruction. On SI/CI the \p EncodedOffset 1169 /// will be in units of dwords and on VI+ it will be units of bytes. 1170 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1171 int64_t EncodedOffset) { 1172 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1173 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1174 } 1175 1176 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1177 SDValue &Offset, bool &Imm) const { 1178 1179 // FIXME: Handle non-constant offsets. 1180 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1181 if (!C) 1182 return false; 1183 1184 SDLoc SL(ByteOffsetNode); 1185 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1186 int64_t ByteOffset = C->getSExtValue(); 1187 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1188 ByteOffset >> 2 : ByteOffset; 1189 1190 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1191 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1192 Imm = true; 1193 return true; 1194 } 1195 1196 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1197 return false; 1198 1199 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1200 // 32-bit Immediates are supported on Sea Islands. 1201 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1202 } else { 1203 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1204 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1205 C32Bit), 0); 1206 } 1207 Imm = false; 1208 return true; 1209 } 1210 1211 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1212 SDValue &Offset, bool &Imm) const { 1213 1214 SDLoc SL(Addr); 1215 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1216 SDValue N0 = Addr.getOperand(0); 1217 SDValue N1 = Addr.getOperand(1); 1218 1219 if (SelectSMRDOffset(N1, Offset, Imm)) { 1220 SBase = N0; 1221 return true; 1222 } 1223 } 1224 SBase = Addr; 1225 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1226 Imm = true; 1227 return true; 1228 } 1229 1230 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1231 SDValue &Offset) const { 1232 bool Imm; 1233 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1234 } 1235 1236 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1237 SDValue &Offset) const { 1238 1239 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1240 return false; 1241 1242 bool Imm; 1243 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1244 return false; 1245 1246 return !Imm && isa<ConstantSDNode>(Offset); 1247 } 1248 1249 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1250 SDValue &Offset) const { 1251 bool Imm; 1252 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1253 !isa<ConstantSDNode>(Offset); 1254 } 1255 1256 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1257 SDValue &Offset) const { 1258 bool Imm; 1259 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1263 SDValue &Offset) const { 1264 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1265 return false; 1266 1267 bool Imm; 1268 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1269 return false; 1270 1271 return !Imm && isa<ConstantSDNode>(Offset); 1272 } 1273 1274 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1275 SDValue &Offset) const { 1276 bool Imm; 1277 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1278 !isa<ConstantSDNode>(Offset); 1279 } 1280 1281 // FIXME: This is incorrect and only enough to be able to compile. 1282 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1283 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1284 SDLoc DL(N); 1285 1286 assert(Subtarget->hasFlatAddressSpace() && 1287 "addrspacecast only supported with flat address space!"); 1288 1289 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 1290 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 1291 "Cannot cast address space to / from constant address!"); 1292 1293 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1294 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1295 "Can only cast to / from flat address space!"); 1296 1297 // The flat instructions read the address as the index of the VGPR holding the 1298 // address, so casting should just be reinterpreting the base VGPR, so just 1299 // insert trunc / bitcast / zext. 1300 1301 SDValue Src = ASC->getOperand(0); 1302 EVT DestVT = ASC->getValueType(0); 1303 EVT SrcVT = Src.getValueType(); 1304 1305 unsigned SrcSize = SrcVT.getSizeInBits(); 1306 unsigned DestSize = DestVT.getSizeInBits(); 1307 1308 if (SrcSize > DestSize) { 1309 assert(SrcSize == 64 && DestSize == 32); 1310 return CurDAG->getMachineNode( 1311 TargetOpcode::EXTRACT_SUBREG, 1312 DL, 1313 DestVT, 1314 Src, 1315 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1316 } 1317 1318 if (DestSize > SrcSize) { 1319 assert(SrcSize == 32 && DestSize == 64); 1320 1321 // FIXME: This is probably wrong, we should never be defining 1322 // a register class with both VGPRs and SGPRs 1323 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1324 MVT::i32); 1325 1326 const SDValue Ops[] = { 1327 RC, 1328 Src, 1329 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1330 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1331 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1332 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1333 }; 1334 1335 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1336 DL, N->getValueType(0), Ops); 1337 } 1338 1339 assert(SrcSize == 64 && DestSize == 64); 1340 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1341 } 1342 1343 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1344 uint32_t Offset, uint32_t Width) { 1345 // Transformation function, pack the offset and width of a BFE into 1346 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1347 // source, bits [5:0] contain the offset and bits [22:16] the width. 1348 uint32_t PackedVal = Offset | (Width << 16); 1349 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1350 1351 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1352 } 1353 1354 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1355 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1356 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1357 // Predicate: 0 < b <= c < 32 1358 1359 const SDValue &Shl = N->getOperand(0); 1360 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1361 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1362 1363 if (B && C) { 1364 uint32_t BVal = B->getZExtValue(); 1365 uint32_t CVal = C->getZExtValue(); 1366 1367 if (0 < BVal && BVal <= CVal && CVal < 32) { 1368 bool Signed = N->getOpcode() == ISD::SRA; 1369 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1370 1371 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1372 CVal - BVal, 32 - CVal); 1373 } 1374 } 1375 return SelectCode(N); 1376 } 1377 1378 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1379 switch (N->getOpcode()) { 1380 case ISD::AND: 1381 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1382 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1383 // Predicate: isMask(mask) 1384 const SDValue &Srl = N->getOperand(0); 1385 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1386 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1387 1388 if (Shift && Mask) { 1389 uint32_t ShiftVal = Shift->getZExtValue(); 1390 uint32_t MaskVal = Mask->getZExtValue(); 1391 1392 if (isMask_32(MaskVal)) { 1393 uint32_t WidthVal = countPopulation(MaskVal); 1394 1395 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1396 ShiftVal, WidthVal); 1397 } 1398 } 1399 } 1400 break; 1401 case ISD::SRL: 1402 if (N->getOperand(0).getOpcode() == ISD::AND) { 1403 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1404 // Predicate: isMask(mask >> b) 1405 const SDValue &And = N->getOperand(0); 1406 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1407 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1408 1409 if (Shift && Mask) { 1410 uint32_t ShiftVal = Shift->getZExtValue(); 1411 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1412 1413 if (isMask_32(MaskVal)) { 1414 uint32_t WidthVal = countPopulation(MaskVal); 1415 1416 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1417 ShiftVal, WidthVal); 1418 } 1419 } 1420 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1421 return SelectS_BFEFromShifts(N); 1422 break; 1423 case ISD::SRA: 1424 if (N->getOperand(0).getOpcode() == ISD::SHL) 1425 return SelectS_BFEFromShifts(N); 1426 break; 1427 } 1428 1429 return SelectCode(N); 1430 } 1431 1432 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1433 SDValue &SrcMods) const { 1434 1435 unsigned Mods = 0; 1436 1437 Src = In; 1438 1439 if (Src.getOpcode() == ISD::FNEG) { 1440 Mods |= SISrcMods::NEG; 1441 Src = Src.getOperand(0); 1442 } 1443 1444 if (Src.getOpcode() == ISD::FABS) { 1445 Mods |= SISrcMods::ABS; 1446 Src = Src.getOperand(0); 1447 } 1448 1449 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1450 1451 return true; 1452 } 1453 1454 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1455 SDValue &SrcMods) const { 1456 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1457 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1458 } 1459 1460 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1461 SDValue &SrcMods, SDValue &Clamp, 1462 SDValue &Omod) const { 1463 SDLoc DL(In); 1464 // FIXME: Handle Clamp and Omod 1465 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1466 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1467 1468 return SelectVOP3Mods(In, Src, SrcMods); 1469 } 1470 1471 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1472 SDValue &SrcMods, SDValue &Clamp, 1473 SDValue &Omod) const { 1474 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1475 1476 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1477 cast<ConstantSDNode>(Clamp)->isNullValue() && 1478 cast<ConstantSDNode>(Omod)->isNullValue(); 1479 } 1480 1481 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1482 SDValue &SrcMods, 1483 SDValue &Omod) const { 1484 // FIXME: Handle Omod 1485 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1486 1487 return SelectVOP3Mods(In, Src, SrcMods); 1488 } 1489 1490 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1491 SDValue &SrcMods, 1492 SDValue &Clamp, 1493 SDValue &Omod) const { 1494 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1495 return SelectVOP3Mods(In, Src, SrcMods); 1496 } 1497 1498 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1499 bool Modified = false; 1500 1501 // XXX - Other targets seem to be able to do this without a worklist. 1502 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1503 SmallVector<StoreSDNode *, 8> StoresToReplace; 1504 1505 for (SDNode &Node : CurDAG->allnodes()) { 1506 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1507 EVT VT = LD->getValueType(0); 1508 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1509 continue; 1510 1511 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1512 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1513 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1514 // legalizer assume that if i64 is legal, so doing this promotion early 1515 // can cause problems. 1516 LoadsToReplace.push_back(LD); 1517 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1518 // Handle i64 stores here for the same reason mentioned above for loads. 1519 SDValue Value = ST->getValue(); 1520 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1521 continue; 1522 StoresToReplace.push_back(ST); 1523 } 1524 } 1525 1526 for (LoadSDNode *LD : LoadsToReplace) { 1527 SDLoc SL(LD); 1528 1529 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1530 LD->getBasePtr(), LD->getMemOperand()); 1531 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1532 MVT::i64, NewLoad); 1533 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1534 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1535 Modified = true; 1536 } 1537 1538 for (StoreSDNode *ST : StoresToReplace) { 1539 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1540 MVT::v2i32, ST->getValue()); 1541 const SDValue StoreOps[] = { 1542 ST->getChain(), 1543 NewValue, 1544 ST->getBasePtr(), 1545 ST->getOffset() 1546 }; 1547 1548 CurDAG->UpdateNodeOperands(ST, StoreOps); 1549 Modified = true; 1550 } 1551 1552 // XXX - Is this necessary? 1553 if (Modified) 1554 CurDAG->RemoveDeadNodes(); 1555 } 1556 1557 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1558 const AMDGPUTargetLowering& Lowering = 1559 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1560 bool IsModified = false; 1561 do { 1562 IsModified = false; 1563 // Go over all selected nodes and try to fold them a bit more 1564 for (SDNode &Node : CurDAG->allnodes()) { 1565 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1566 if (!MachineNode) 1567 continue; 1568 1569 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1570 if (ResNode != &Node) { 1571 ReplaceUses(&Node, ResNode); 1572 IsModified = true; 1573 } 1574 } 1575 CurDAG->RemoveDeadNodes(); 1576 } while (IsModified); 1577 } 1578