1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/FunctionLoweringInfo.h" 22 #include "llvm/CodeGen/PseudoSourceValue.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/IR/DiagnosticInfo.h" 26 27 using namespace llvm; 28 29 namespace llvm { 30 class R600InstrInfo; 31 } 32 33 //===----------------------------------------------------------------------===// 34 // Instruction Selector Implementation 35 //===----------------------------------------------------------------------===// 36 37 namespace { 38 39 static bool isCBranchSCC(const SDNode *N) { 40 assert(N->getOpcode() == ISD::BRCOND); 41 if (!N->hasOneUse()) 42 return false; 43 44 SDValue Cond = N->getOperand(1); 45 if (Cond.getOpcode() == ISD::CopyToReg) 46 Cond = Cond.getOperand(2); 47 return Cond.getOpcode() == ISD::SETCC && 48 Cond.getOperand(0).getValueType() == MVT::i32 && 49 Cond.hasOneUse(); 50 } 51 52 /// AMDGPU specific code to select AMDGPU machine instructions for 53 /// SelectionDAG operations. 54 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 55 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 56 // make the right decision when generating code for different targets. 57 const AMDGPUSubtarget *Subtarget; 58 59 public: 60 AMDGPUDAGToDAGISel(TargetMachine &TM); 61 virtual ~AMDGPUDAGToDAGISel(); 62 bool runOnMachineFunction(MachineFunction &MF) override; 63 SDNode *Select(SDNode *N) override; 64 const char *getPassName() const override; 65 void PreprocessISelDAG() override; 66 void PostprocessISelDAG() override; 67 68 private: 69 bool isInlineImmediate(SDNode *N) const; 70 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 71 const R600InstrInfo *TII); 72 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 73 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 74 75 // Complex pattern selectors 76 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 77 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 78 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 79 80 static bool checkType(const Value *ptr, unsigned int addrspace); 81 static bool checkPrivateAddress(const MachineMemOperand *Op); 82 83 static bool isGlobalStore(const StoreSDNode *N); 84 static bool isFlatStore(const StoreSDNode *N); 85 static bool isPrivateStore(const StoreSDNode *N); 86 static bool isLocalStore(const StoreSDNode *N); 87 static bool isRegionStore(const StoreSDNode *N); 88 89 bool isCPLoad(const LoadSDNode *N) const; 90 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 91 bool isGlobalLoad(const LoadSDNode *N) const; 92 bool isFlatLoad(const LoadSDNode *N) const; 93 bool isParamLoad(const LoadSDNode *N) const; 94 bool isPrivateLoad(const LoadSDNode *N) const; 95 bool isLocalLoad(const LoadSDNode *N) const; 96 bool isRegionLoad(const LoadSDNode *N) const; 97 98 bool isUniformBr(const SDNode *N) const; 99 100 SDNode *glueCopyToM0(SDNode *N) const; 101 102 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 103 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 104 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 105 SDValue& Offset); 106 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 107 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 108 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 109 unsigned OffsetBits) const; 110 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 111 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 112 SDValue &Offset1) const; 113 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 114 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 115 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 116 SDValue &TFE) const; 117 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 118 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 119 SDValue &SLC, SDValue &TFE) const; 120 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 121 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 122 SDValue &SLC) const; 123 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 124 SDValue &SOffset, SDValue &ImmOffset) const; 125 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 126 SDValue &Offset, SDValue &GLC, SDValue &SLC, 127 SDValue &TFE) const; 128 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 129 SDValue &Offset, SDValue &GLC) const; 130 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 131 bool &Imm) const; 132 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 133 bool &Imm) const; 134 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 135 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 136 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 137 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 138 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 139 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 140 SDNode *SelectAddrSpaceCast(SDNode *N); 141 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 142 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 143 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 144 SDValue &Clamp, SDValue &Omod) const; 145 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 146 SDValue &Clamp, SDValue &Omod) const; 147 148 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 149 SDValue &Omod) const; 150 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 151 SDValue &Clamp, 152 SDValue &Omod) const; 153 154 SDNode *SelectADD_SUB_I64(SDNode *N); 155 SDNode *SelectDIV_SCALE(SDNode *N); 156 157 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 158 uint32_t Offset, uint32_t Width); 159 SDNode *SelectS_BFEFromShifts(SDNode *N); 160 SDNode *SelectS_BFE(SDNode *N); 161 SDNode *SelectBRCOND(SDNode *N); 162 163 // Include the pieces autogenerated from the target description. 164 #include "AMDGPUGenDAGISel.inc" 165 }; 166 } // end anonymous namespace 167 168 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 169 // DAG, ready for instruction scheduling. 170 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 171 return new AMDGPUDAGToDAGISel(TM); 172 } 173 174 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 175 : SelectionDAGISel(TM) {} 176 177 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 178 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 179 return SelectionDAGISel::runOnMachineFunction(MF); 180 } 181 182 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 183 } 184 185 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 186 const SITargetLowering *TL 187 = static_cast<const SITargetLowering *>(getTargetLowering()); 188 return TL->analyzeImmediate(N) == 0; 189 } 190 191 /// \brief Determine the register class for \p OpNo 192 /// \returns The register class of the virtual register that will be used for 193 /// the given operand number \OpNo or NULL if the register class cannot be 194 /// determined. 195 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 196 unsigned OpNo) const { 197 if (!N->isMachineOpcode()) 198 return nullptr; 199 200 switch (N->getMachineOpcode()) { 201 default: { 202 const MCInstrDesc &Desc = 203 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 204 unsigned OpIdx = Desc.getNumDefs() + OpNo; 205 if (OpIdx >= Desc.getNumOperands()) 206 return nullptr; 207 int RegClass = Desc.OpInfo[OpIdx].RegClass; 208 if (RegClass == -1) 209 return nullptr; 210 211 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 212 } 213 case AMDGPU::REG_SEQUENCE: { 214 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 215 const TargetRegisterClass *SuperRC = 216 Subtarget->getRegisterInfo()->getRegClass(RCID); 217 218 SDValue SubRegOp = N->getOperand(OpNo + 1); 219 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 220 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 221 SubRegIdx); 222 } 223 } 224 } 225 226 bool AMDGPUDAGToDAGISel::SelectADDRParam( 227 SDValue Addr, SDValue& R1, SDValue& R2) { 228 229 if (Addr.getOpcode() == ISD::FrameIndex) { 230 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 231 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 232 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 233 } else { 234 R1 = Addr; 235 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 236 } 237 } else if (Addr.getOpcode() == ISD::ADD) { 238 R1 = Addr.getOperand(0); 239 R2 = Addr.getOperand(1); 240 } else { 241 R1 = Addr; 242 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 243 } 244 return true; 245 } 246 247 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 248 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 249 Addr.getOpcode() == ISD::TargetGlobalAddress) { 250 return false; 251 } 252 return SelectADDRParam(Addr, R1, R2); 253 } 254 255 256 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 257 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 258 Addr.getOpcode() == ISD::TargetGlobalAddress) { 259 return false; 260 } 261 262 if (Addr.getOpcode() == ISD::FrameIndex) { 263 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 264 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 265 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 266 } else { 267 R1 = Addr; 268 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 269 } 270 } else if (Addr.getOpcode() == ISD::ADD) { 271 R1 = Addr.getOperand(0); 272 R2 = Addr.getOperand(1); 273 } else { 274 R1 = Addr; 275 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 276 } 277 return true; 278 } 279 280 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 281 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 282 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 283 AMDGPUAS::LOCAL_ADDRESS)) 284 return N; 285 286 const SITargetLowering& Lowering = 287 *static_cast<const SITargetLowering*>(getTargetLowering()); 288 289 // Write max value to m0 before each load operation 290 291 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 292 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 293 294 SDValue Glue = M0.getValue(1); 295 296 SmallVector <SDValue, 8> Ops; 297 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 298 Ops.push_back(N->getOperand(i)); 299 } 300 Ops.push_back(Glue); 301 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 302 303 return N; 304 } 305 306 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 307 switch (NumVectorElts) { 308 case 1: 309 return AMDGPU::SReg_32RegClassID; 310 case 2: 311 return AMDGPU::SReg_64RegClassID; 312 case 4: 313 return AMDGPU::SReg_128RegClassID; 314 case 8: 315 return AMDGPU::SReg_256RegClassID; 316 case 16: 317 return AMDGPU::SReg_512RegClassID; 318 } 319 320 llvm_unreachable("invalid vector size"); 321 } 322 323 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 324 unsigned int Opc = N->getOpcode(); 325 if (N->isMachineOpcode()) { 326 N->setNodeId(-1); 327 return nullptr; // Already selected. 328 } 329 330 if (isa<AtomicSDNode>(N)) 331 N = glueCopyToM0(N); 332 333 switch (Opc) { 334 default: break; 335 // We are selecting i64 ADD here instead of custom lower it during 336 // DAG legalization, so we can fold some i64 ADDs used for address 337 // calculation into the LOAD and STORE instructions. 338 case ISD::ADD: 339 case ISD::SUB: { 340 if (N->getValueType(0) != MVT::i64 || 341 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 342 break; 343 344 return SelectADD_SUB_I64(N); 345 } 346 case ISD::SCALAR_TO_VECTOR: 347 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 348 case ISD::BUILD_VECTOR: { 349 unsigned RegClassID; 350 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 351 EVT VT = N->getValueType(0); 352 unsigned NumVectorElts = VT.getVectorNumElements(); 353 EVT EltVT = VT.getVectorElementType(); 354 assert(EltVT.bitsEq(MVT::i32)); 355 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 356 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 357 } else { 358 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 359 // that adds a 128 bits reg copy when going through TwoAddressInstructions 360 // pass. We want to avoid 128 bits copies as much as possible because they 361 // can't be bundled by our scheduler. 362 switch(NumVectorElts) { 363 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 364 case 4: 365 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 366 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 367 else 368 RegClassID = AMDGPU::R600_Reg128RegClassID; 369 break; 370 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 371 } 372 } 373 374 SDLoc DL(N); 375 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 376 377 if (NumVectorElts == 1) { 378 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 379 N->getOperand(0), RegClass); 380 } 381 382 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 383 "supported yet"); 384 // 16 = Max Num Vector Elements 385 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 386 // 1 = Vector Register Class 387 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 388 389 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 390 bool IsRegSeq = true; 391 unsigned NOps = N->getNumOperands(); 392 for (unsigned i = 0; i < NOps; i++) { 393 // XXX: Why is this here? 394 if (isa<RegisterSDNode>(N->getOperand(i))) { 395 IsRegSeq = false; 396 break; 397 } 398 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 399 RegSeqArgs[1 + (2 * i) + 1] = 400 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 401 MVT::i32); 402 } 403 404 if (NOps != NumVectorElts) { 405 // Fill in the missing undef elements if this was a scalar_to_vector. 406 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 407 408 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 409 DL, EltVT); 410 for (unsigned i = NOps; i < NumVectorElts; ++i) { 411 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 412 RegSeqArgs[1 + (2 * i) + 1] = 413 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 414 } 415 } 416 417 if (!IsRegSeq) 418 break; 419 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 420 RegSeqArgs); 421 } 422 case ISD::BUILD_PAIR: { 423 SDValue RC, SubReg0, SubReg1; 424 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 425 break; 426 } 427 SDLoc DL(N); 428 if (N->getValueType(0) == MVT::i128) { 429 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 430 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 431 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 432 } else if (N->getValueType(0) == MVT::i64) { 433 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 434 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 435 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 436 } else { 437 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 438 } 439 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 440 N->getOperand(1), SubReg1 }; 441 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 442 DL, N->getValueType(0), Ops); 443 } 444 445 case ISD::Constant: 446 case ISD::ConstantFP: { 447 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 448 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 449 break; 450 451 uint64_t Imm; 452 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 453 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 454 else { 455 ConstantSDNode *C = cast<ConstantSDNode>(N); 456 Imm = C->getZExtValue(); 457 } 458 459 SDLoc DL(N); 460 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 461 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 462 MVT::i32)); 463 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 464 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 465 const SDValue Ops[] = { 466 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 467 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 468 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 469 }; 470 471 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 472 N->getValueType(0), Ops); 473 } 474 case ISD::LOAD: 475 case ISD::STORE: { 476 N = glueCopyToM0(N); 477 break; 478 } 479 480 case AMDGPUISD::BFE_I32: 481 case AMDGPUISD::BFE_U32: { 482 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 483 break; 484 485 // There is a scalar version available, but unlike the vector version which 486 // has a separate operand for the offset and width, the scalar version packs 487 // the width and offset into a single operand. Try to move to the scalar 488 // version if the offsets are constant, so that we can try to keep extended 489 // loads of kernel arguments in SGPRs. 490 491 // TODO: Technically we could try to pattern match scalar bitshifts of 492 // dynamic values, but it's probably not useful. 493 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 494 if (!Offset) 495 break; 496 497 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 498 if (!Width) 499 break; 500 501 bool Signed = Opc == AMDGPUISD::BFE_I32; 502 503 uint32_t OffsetVal = Offset->getZExtValue(); 504 uint32_t WidthVal = Width->getZExtValue(); 505 506 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 507 N->getOperand(0), OffsetVal, WidthVal); 508 } 509 case AMDGPUISD::DIV_SCALE: { 510 return SelectDIV_SCALE(N); 511 } 512 case ISD::CopyToReg: { 513 const SITargetLowering& Lowering = 514 *static_cast<const SITargetLowering*>(getTargetLowering()); 515 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 516 break; 517 } 518 case ISD::ADDRSPACECAST: 519 return SelectAddrSpaceCast(N); 520 case ISD::AND: 521 case ISD::SRL: 522 case ISD::SRA: 523 if (N->getValueType(0) != MVT::i32 || 524 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 525 break; 526 527 return SelectS_BFE(N); 528 case ISD::BRCOND: 529 return SelectBRCOND(N); 530 } 531 532 return SelectCode(N); 533 } 534 535 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 536 assert(AS != 0 && "Use checkPrivateAddress instead."); 537 if (!Ptr) 538 return false; 539 540 return Ptr->getType()->getPointerAddressSpace() == AS; 541 } 542 543 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 544 if (Op->getPseudoValue()) 545 return true; 546 547 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 548 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 549 550 return false; 551 } 552 553 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 554 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 555 } 556 557 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 558 const Value *MemVal = N->getMemOperand()->getValue(); 559 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 560 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 561 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 562 } 563 564 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 565 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 566 } 567 568 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 569 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 570 } 571 572 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 573 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 574 } 575 576 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 577 const Value *MemVal = N->getMemOperand()->getValue(); 578 if (CbId == -1) 579 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 580 581 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 582 } 583 584 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 585 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 586 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 587 N->getMemoryVT().bitsLT(MVT::i32)) 588 return true; 589 590 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 591 } 592 593 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 594 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 595 } 596 597 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 598 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 599 } 600 601 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 602 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 603 } 604 605 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 606 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 607 } 608 609 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 610 MachineMemOperand *MMO = N->getMemOperand(); 611 if (checkPrivateAddress(N->getMemOperand())) { 612 if (MMO) { 613 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 614 if (PSV && PSV->isConstantPool()) { 615 return true; 616 } 617 } 618 } 619 return false; 620 } 621 622 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 623 if (checkPrivateAddress(N->getMemOperand())) { 624 // Check to make sure we are not a constant pool load or a constant load 625 // that is marked as a private load 626 if (isCPLoad(N) || isConstantLoad(N, -1)) { 627 return false; 628 } 629 } 630 631 const Value *MemVal = N->getMemOperand()->getValue(); 632 return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 633 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 634 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 635 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 636 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 637 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 638 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS); 639 } 640 641 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 642 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 643 return BB->getTerminator()->getMetadata("amdgpu.uniform"); 644 } 645 646 const char *AMDGPUDAGToDAGISel::getPassName() const { 647 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 648 } 649 650 //===----------------------------------------------------------------------===// 651 // Complex Patterns 652 //===----------------------------------------------------------------------===// 653 654 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 655 SDValue& IntPtr) { 656 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 657 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 658 true); 659 return true; 660 } 661 return false; 662 } 663 664 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 665 SDValue& BaseReg, SDValue &Offset) { 666 if (!isa<ConstantSDNode>(Addr)) { 667 BaseReg = Addr; 668 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 669 return true; 670 } 671 return false; 672 } 673 674 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 675 SDValue &Offset) { 676 ConstantSDNode *IMMOffset; 677 678 if (Addr.getOpcode() == ISD::ADD 679 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 680 && isInt<16>(IMMOffset->getZExtValue())) { 681 682 Base = Addr.getOperand(0); 683 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 684 MVT::i32); 685 return true; 686 // If the pointer address is constant, we can move it to the offset field. 687 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 688 && isInt<16>(IMMOffset->getZExtValue())) { 689 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 690 SDLoc(CurDAG->getEntryNode()), 691 AMDGPU::ZERO, MVT::i32); 692 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 693 MVT::i32); 694 return true; 695 } 696 697 // Default case, no offset 698 Base = Addr; 699 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 700 return true; 701 } 702 703 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 704 SDValue &Offset) { 705 ConstantSDNode *C; 706 SDLoc DL(Addr); 707 708 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 709 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 710 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 711 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 712 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 713 Base = Addr.getOperand(0); 714 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 715 } else { 716 Base = Addr; 717 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 718 } 719 720 return true; 721 } 722 723 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 724 SDLoc DL(N); 725 SDValue LHS = N->getOperand(0); 726 SDValue RHS = N->getOperand(1); 727 728 bool IsAdd = (N->getOpcode() == ISD::ADD); 729 730 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 731 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 732 733 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 734 DL, MVT::i32, LHS, Sub0); 735 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 736 DL, MVT::i32, LHS, Sub1); 737 738 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 739 DL, MVT::i32, RHS, Sub0); 740 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 741 DL, MVT::i32, RHS, Sub1); 742 743 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 744 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 745 746 747 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 748 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 749 750 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 751 SDValue Carry(AddLo, 1); 752 SDNode *AddHi 753 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 754 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 755 756 SDValue Args[5] = { 757 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 758 SDValue(AddLo,0), 759 Sub0, 760 SDValue(AddHi,0), 761 Sub1, 762 }; 763 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 764 } 765 766 // We need to handle this here because tablegen doesn't support matching 767 // instructions with multiple outputs. 768 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 769 SDLoc SL(N); 770 EVT VT = N->getValueType(0); 771 772 assert(VT == MVT::f32 || VT == MVT::f64); 773 774 unsigned Opc 775 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 776 777 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 778 // omod 779 SDValue Ops[8]; 780 781 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 782 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 783 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 784 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 785 } 786 787 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 788 unsigned OffsetBits) const { 789 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 790 (OffsetBits == 8 && !isUInt<8>(Offset))) 791 return false; 792 793 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 794 Subtarget->unsafeDSOffsetFoldingEnabled()) 795 return true; 796 797 // On Southern Islands instruction with a negative base value and an offset 798 // don't seem to work. 799 return CurDAG->SignBitIsZero(Base); 800 } 801 802 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 803 SDValue &Offset) const { 804 if (CurDAG->isBaseWithConstantOffset(Addr)) { 805 SDValue N0 = Addr.getOperand(0); 806 SDValue N1 = Addr.getOperand(1); 807 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 808 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 809 // (add n0, c0) 810 Base = N0; 811 Offset = N1; 812 return true; 813 } 814 } else if (Addr.getOpcode() == ISD::SUB) { 815 // sub C, x -> add (sub 0, x), C 816 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 817 int64_t ByteOffset = C->getSExtValue(); 818 if (isUInt<16>(ByteOffset)) { 819 SDLoc DL(Addr); 820 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 821 822 // XXX - This is kind of hacky. Create a dummy sub node so we can check 823 // the known bits in isDSOffsetLegal. We need to emit the selected node 824 // here, so this is thrown away. 825 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 826 Zero, Addr.getOperand(1)); 827 828 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 829 MachineSDNode *MachineSub 830 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 831 Zero, Addr.getOperand(1)); 832 833 Base = SDValue(MachineSub, 0); 834 Offset = Addr.getOperand(0); 835 return true; 836 } 837 } 838 } 839 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 840 // If we have a constant address, prefer to put the constant into the 841 // offset. This can save moves to load the constant address since multiple 842 // operations can share the zero base address register, and enables merging 843 // into read2 / write2 instructions. 844 845 SDLoc DL(Addr); 846 847 if (isUInt<16>(CAddr->getZExtValue())) { 848 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 849 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 850 DL, MVT::i32, Zero); 851 Base = SDValue(MovZero, 0); 852 Offset = Addr; 853 return true; 854 } 855 } 856 857 // default case 858 Base = Addr; 859 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 860 return true; 861 } 862 863 // TODO: If offset is too big, put low 16-bit into offset. 864 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 865 SDValue &Offset0, 866 SDValue &Offset1) const { 867 SDLoc DL(Addr); 868 869 if (CurDAG->isBaseWithConstantOffset(Addr)) { 870 SDValue N0 = Addr.getOperand(0); 871 SDValue N1 = Addr.getOperand(1); 872 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 873 unsigned DWordOffset0 = C1->getZExtValue() / 4; 874 unsigned DWordOffset1 = DWordOffset0 + 1; 875 // (add n0, c0) 876 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 877 Base = N0; 878 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 879 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 880 return true; 881 } 882 } else if (Addr.getOpcode() == ISD::SUB) { 883 // sub C, x -> add (sub 0, x), C 884 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 885 unsigned DWordOffset0 = C->getZExtValue() / 4; 886 unsigned DWordOffset1 = DWordOffset0 + 1; 887 888 if (isUInt<8>(DWordOffset0)) { 889 SDLoc DL(Addr); 890 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 891 892 // XXX - This is kind of hacky. Create a dummy sub node so we can check 893 // the known bits in isDSOffsetLegal. We need to emit the selected node 894 // here, so this is thrown away. 895 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 896 Zero, Addr.getOperand(1)); 897 898 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 899 MachineSDNode *MachineSub 900 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 901 Zero, Addr.getOperand(1)); 902 903 Base = SDValue(MachineSub, 0); 904 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 905 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 906 return true; 907 } 908 } 909 } 910 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 911 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 912 unsigned DWordOffset1 = DWordOffset0 + 1; 913 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 914 915 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 916 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 917 MachineSDNode *MovZero 918 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 919 DL, MVT::i32, Zero); 920 Base = SDValue(MovZero, 0); 921 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 922 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 923 return true; 924 } 925 } 926 927 // default case 928 Base = Addr; 929 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 930 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 931 return true; 932 } 933 934 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 935 return isUInt<12>(Imm->getZExtValue()); 936 } 937 938 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 939 SDValue &VAddr, SDValue &SOffset, 940 SDValue &Offset, SDValue &Offen, 941 SDValue &Idxen, SDValue &Addr64, 942 SDValue &GLC, SDValue &SLC, 943 SDValue &TFE) const { 944 // Subtarget prefers to use flat instruction 945 if (Subtarget->useFlatForGlobal()) 946 return false; 947 948 SDLoc DL(Addr); 949 950 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 951 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 952 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 953 954 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 955 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 956 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 957 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 958 959 if (CurDAG->isBaseWithConstantOffset(Addr)) { 960 SDValue N0 = Addr.getOperand(0); 961 SDValue N1 = Addr.getOperand(1); 962 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 963 964 if (N0.getOpcode() == ISD::ADD) { 965 // (add (add N2, N3), C1) -> addr64 966 SDValue N2 = N0.getOperand(0); 967 SDValue N3 = N0.getOperand(1); 968 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 969 Ptr = N2; 970 VAddr = N3; 971 } else { 972 973 // (add N0, C1) -> offset 974 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 975 Ptr = N0; 976 } 977 978 if (isLegalMUBUFImmOffset(C1)) { 979 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 980 return true; 981 } else if (isUInt<32>(C1->getZExtValue())) { 982 // Illegal offset, store it in soffset. 983 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 984 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 985 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 986 0); 987 return true; 988 } 989 } 990 991 if (Addr.getOpcode() == ISD::ADD) { 992 // (add N0, N1) -> addr64 993 SDValue N0 = Addr.getOperand(0); 994 SDValue N1 = Addr.getOperand(1); 995 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 996 Ptr = N0; 997 VAddr = N1; 998 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 999 return true; 1000 } 1001 1002 // default case -> offset 1003 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1004 Ptr = Addr; 1005 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1006 1007 return true; 1008 } 1009 1010 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1011 SDValue &VAddr, SDValue &SOffset, 1012 SDValue &Offset, SDValue &GLC, 1013 SDValue &SLC, SDValue &TFE) const { 1014 SDValue Ptr, Offen, Idxen, Addr64; 1015 1016 // addr64 bit was removed for volcanic islands. 1017 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1018 return false; 1019 1020 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1021 GLC, SLC, TFE)) 1022 return false; 1023 1024 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1025 if (C->getSExtValue()) { 1026 SDLoc DL(Addr); 1027 1028 const SITargetLowering& Lowering = 1029 *static_cast<const SITargetLowering*>(getTargetLowering()); 1030 1031 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1032 return true; 1033 } 1034 1035 return false; 1036 } 1037 1038 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1039 SDValue &VAddr, SDValue &SOffset, 1040 SDValue &Offset, 1041 SDValue &SLC) const { 1042 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1043 SDValue GLC, TFE; 1044 1045 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1046 } 1047 1048 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1049 SDValue &VAddr, SDValue &SOffset, 1050 SDValue &ImmOffset) const { 1051 1052 SDLoc DL(Addr); 1053 MachineFunction &MF = CurDAG->getMachineFunction(); 1054 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1055 1056 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1057 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1058 1059 // (add n0, c1) 1060 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1061 SDValue N0 = Addr.getOperand(0); 1062 SDValue N1 = Addr.getOperand(1); 1063 1064 // Offsets in vaddr must be positive. 1065 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1066 if (isLegalMUBUFImmOffset(C1) && CurDAG->SignBitIsZero(N0)) { 1067 VAddr = N0; 1068 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1069 return true; 1070 } 1071 } 1072 1073 // (node) 1074 VAddr = Addr; 1075 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1076 return true; 1077 } 1078 1079 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1080 SDValue &SOffset, SDValue &Offset, 1081 SDValue &GLC, SDValue &SLC, 1082 SDValue &TFE) const { 1083 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1084 const SIInstrInfo *TII = 1085 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1086 1087 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1088 GLC, SLC, TFE)) 1089 return false; 1090 1091 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1092 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1093 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1094 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1095 APInt::getAllOnesValue(32).getZExtValue(); // Size 1096 SDLoc DL(Addr); 1097 1098 const SITargetLowering& Lowering = 1099 *static_cast<const SITargetLowering*>(getTargetLowering()); 1100 1101 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1102 return true; 1103 } 1104 return false; 1105 } 1106 1107 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1108 SDValue &Soffset, SDValue &Offset, 1109 SDValue &GLC) const { 1110 SDValue SLC, TFE; 1111 1112 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1113 } 1114 1115 /// 1116 /// \param EncodedOffset This is the immediate value that will be encoded 1117 /// directly into the instruction. On SI/CI the \p EncodedOffset 1118 /// will be in units of dwords and on VI+ it will be units of bytes. 1119 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1120 int64_t EncodedOffset) { 1121 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1122 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1123 } 1124 1125 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1126 SDValue &Offset, bool &Imm) const { 1127 1128 // FIXME: Handle non-constant offsets. 1129 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1130 if (!C) 1131 return false; 1132 1133 SDLoc SL(ByteOffsetNode); 1134 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1135 int64_t ByteOffset = C->getSExtValue(); 1136 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1137 ByteOffset >> 2 : ByteOffset; 1138 1139 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1140 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1141 Imm = true; 1142 return true; 1143 } 1144 1145 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1146 return false; 1147 1148 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1149 // 32-bit Immediates are supported on Sea Islands. 1150 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1151 } else { 1152 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1153 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1154 C32Bit), 0); 1155 } 1156 Imm = false; 1157 return true; 1158 } 1159 1160 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1161 SDValue &Offset, bool &Imm) const { 1162 1163 SDLoc SL(Addr); 1164 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1165 SDValue N0 = Addr.getOperand(0); 1166 SDValue N1 = Addr.getOperand(1); 1167 1168 if (SelectSMRDOffset(N1, Offset, Imm)) { 1169 SBase = N0; 1170 return true; 1171 } 1172 } 1173 SBase = Addr; 1174 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1175 Imm = true; 1176 return true; 1177 } 1178 1179 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1180 SDValue &Offset) const { 1181 bool Imm; 1182 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1183 } 1184 1185 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1186 SDValue &Offset) const { 1187 1188 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1189 return false; 1190 1191 bool Imm; 1192 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1193 return false; 1194 1195 return !Imm && isa<ConstantSDNode>(Offset); 1196 } 1197 1198 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1199 SDValue &Offset) const { 1200 bool Imm; 1201 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1202 !isa<ConstantSDNode>(Offset); 1203 } 1204 1205 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1206 SDValue &Offset) const { 1207 bool Imm; 1208 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1209 } 1210 1211 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1212 SDValue &Offset) const { 1213 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1214 return false; 1215 1216 bool Imm; 1217 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1218 return false; 1219 1220 return !Imm && isa<ConstantSDNode>(Offset); 1221 } 1222 1223 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1224 SDValue &Offset) const { 1225 bool Imm; 1226 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1227 !isa<ConstantSDNode>(Offset); 1228 } 1229 1230 // FIXME: This is incorrect and only enough to be able to compile. 1231 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1232 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1233 SDLoc DL(N); 1234 1235 const MachineFunction &MF = CurDAG->getMachineFunction(); 1236 DiagnosticInfoUnsupported NotImplemented( 1237 *MF.getFunction(), "addrspacecast not implemented", DL.getDebugLoc()); 1238 CurDAG->getContext()->diagnose(NotImplemented); 1239 1240 assert(Subtarget->hasFlatAddressSpace() && 1241 "addrspacecast only supported with flat address space!"); 1242 1243 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1244 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1245 "Can only cast to / from flat address space!"); 1246 1247 // The flat instructions read the address as the index of the VGPR holding the 1248 // address, so casting should just be reinterpreting the base VGPR, so just 1249 // insert trunc / bitcast / zext. 1250 1251 SDValue Src = ASC->getOperand(0); 1252 EVT DestVT = ASC->getValueType(0); 1253 EVT SrcVT = Src.getValueType(); 1254 1255 unsigned SrcSize = SrcVT.getSizeInBits(); 1256 unsigned DestSize = DestVT.getSizeInBits(); 1257 1258 if (SrcSize > DestSize) { 1259 assert(SrcSize == 64 && DestSize == 32); 1260 return CurDAG->getMachineNode( 1261 TargetOpcode::EXTRACT_SUBREG, 1262 DL, 1263 DestVT, 1264 Src, 1265 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1266 } 1267 1268 if (DestSize > SrcSize) { 1269 assert(SrcSize == 32 && DestSize == 64); 1270 1271 // FIXME: This is probably wrong, we should never be defining 1272 // a register class with both VGPRs and SGPRs 1273 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1274 MVT::i32); 1275 1276 const SDValue Ops[] = { 1277 RC, 1278 Src, 1279 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1280 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1281 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1282 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1283 }; 1284 1285 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1286 DL, N->getValueType(0), Ops); 1287 } 1288 1289 assert(SrcSize == 64 && DestSize == 64); 1290 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1291 } 1292 1293 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1294 uint32_t Offset, uint32_t Width) { 1295 // Transformation function, pack the offset and width of a BFE into 1296 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1297 // source, bits [5:0] contain the offset and bits [22:16] the width. 1298 uint32_t PackedVal = Offset | (Width << 16); 1299 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1300 1301 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1302 } 1303 1304 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1305 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1306 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1307 // Predicate: 0 < b <= c < 32 1308 1309 const SDValue &Shl = N->getOperand(0); 1310 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1311 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1312 1313 if (B && C) { 1314 uint32_t BVal = B->getZExtValue(); 1315 uint32_t CVal = C->getZExtValue(); 1316 1317 if (0 < BVal && BVal <= CVal && CVal < 32) { 1318 bool Signed = N->getOpcode() == ISD::SRA; 1319 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1320 1321 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1322 CVal - BVal, 32 - CVal); 1323 } 1324 } 1325 return SelectCode(N); 1326 } 1327 1328 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1329 switch (N->getOpcode()) { 1330 case ISD::AND: 1331 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1332 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1333 // Predicate: isMask(mask) 1334 const SDValue &Srl = N->getOperand(0); 1335 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1336 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1337 1338 if (Shift && Mask) { 1339 uint32_t ShiftVal = Shift->getZExtValue(); 1340 uint32_t MaskVal = Mask->getZExtValue(); 1341 1342 if (isMask_32(MaskVal)) { 1343 uint32_t WidthVal = countPopulation(MaskVal); 1344 1345 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1346 ShiftVal, WidthVal); 1347 } 1348 } 1349 } 1350 break; 1351 case ISD::SRL: 1352 if (N->getOperand(0).getOpcode() == ISD::AND) { 1353 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1354 // Predicate: isMask(mask >> b) 1355 const SDValue &And = N->getOperand(0); 1356 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1357 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1358 1359 if (Shift && Mask) { 1360 uint32_t ShiftVal = Shift->getZExtValue(); 1361 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1362 1363 if (isMask_32(MaskVal)) { 1364 uint32_t WidthVal = countPopulation(MaskVal); 1365 1366 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1367 ShiftVal, WidthVal); 1368 } 1369 } 1370 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1371 return SelectS_BFEFromShifts(N); 1372 break; 1373 case ISD::SRA: 1374 if (N->getOperand(0).getOpcode() == ISD::SHL) 1375 return SelectS_BFEFromShifts(N); 1376 break; 1377 } 1378 1379 return SelectCode(N); 1380 } 1381 1382 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1383 SDValue Cond = N->getOperand(1); 1384 1385 if (isCBranchSCC(N)) { 1386 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1387 return SelectCode(N); 1388 } 1389 1390 // The result of VOPC instructions is or'd against ~EXEC before it is 1391 // written to vcc or another SGPR. This means that the value '1' is always 1392 // written to the corresponding bit for results that are masked. In order 1393 // to correctly check against vccz, we need to and VCC with the EXEC 1394 // register in order to clear the value from the masked bits. 1395 1396 SDLoc SL(N); 1397 1398 SDNode *MaskedCond = 1399 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1400 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1401 Cond); 1402 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1403 SDValue(MaskedCond, 0), 1404 SDValue()); // Passing SDValue() adds a 1405 // glue output. 1406 return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1407 N->getOperand(2), // Basic Block 1408 VCC.getValue(0), // Chain 1409 VCC.getValue(1)); // Glue 1410 } 1411 1412 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1413 SDValue &SrcMods) const { 1414 1415 unsigned Mods = 0; 1416 1417 Src = In; 1418 1419 if (Src.getOpcode() == ISD::FNEG) { 1420 Mods |= SISrcMods::NEG; 1421 Src = Src.getOperand(0); 1422 } 1423 1424 if (Src.getOpcode() == ISD::FABS) { 1425 Mods |= SISrcMods::ABS; 1426 Src = Src.getOperand(0); 1427 } 1428 1429 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1430 1431 return true; 1432 } 1433 1434 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1435 SDValue &SrcMods) const { 1436 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1437 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1438 } 1439 1440 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1441 SDValue &SrcMods, SDValue &Clamp, 1442 SDValue &Omod) const { 1443 SDLoc DL(In); 1444 // FIXME: Handle Clamp and Omod 1445 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1446 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1447 1448 return SelectVOP3Mods(In, Src, SrcMods); 1449 } 1450 1451 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1452 SDValue &SrcMods, SDValue &Clamp, 1453 SDValue &Omod) const { 1454 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1455 1456 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1457 cast<ConstantSDNode>(Clamp)->isNullValue() && 1458 cast<ConstantSDNode>(Omod)->isNullValue(); 1459 } 1460 1461 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1462 SDValue &SrcMods, 1463 SDValue &Omod) const { 1464 // FIXME: Handle Omod 1465 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1466 1467 return SelectVOP3Mods(In, Src, SrcMods); 1468 } 1469 1470 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1471 SDValue &SrcMods, 1472 SDValue &Clamp, 1473 SDValue &Omod) const { 1474 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1475 return SelectVOP3Mods(In, Src, SrcMods); 1476 } 1477 1478 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1479 bool Modified = false; 1480 1481 // XXX - Other targets seem to be able to do this without a worklist. 1482 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1483 SmallVector<StoreSDNode *, 8> StoresToReplace; 1484 1485 for (SDNode &Node : CurDAG->allnodes()) { 1486 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1487 EVT VT = LD->getValueType(0); 1488 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1489 continue; 1490 1491 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1492 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1493 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1494 // legalizer assume that if i64 is legal, so doing this promotion early 1495 // can cause problems. 1496 LoadsToReplace.push_back(LD); 1497 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1498 // Handle i64 stores here for the same reason mentioned above for loads. 1499 SDValue Value = ST->getValue(); 1500 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1501 continue; 1502 StoresToReplace.push_back(ST); 1503 } 1504 } 1505 1506 for (LoadSDNode *LD : LoadsToReplace) { 1507 SDLoc SL(LD); 1508 1509 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1510 LD->getBasePtr(), LD->getMemOperand()); 1511 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1512 MVT::i64, NewLoad); 1513 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1514 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1515 Modified = true; 1516 } 1517 1518 for (StoreSDNode *ST : StoresToReplace) { 1519 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1520 MVT::v2i32, ST->getValue()); 1521 const SDValue StoreOps[] = { 1522 ST->getChain(), 1523 NewValue, 1524 ST->getBasePtr(), 1525 ST->getOffset() 1526 }; 1527 1528 CurDAG->UpdateNodeOperands(ST, StoreOps); 1529 Modified = true; 1530 } 1531 1532 // XXX - Is this necessary? 1533 if (Modified) 1534 CurDAG->RemoveDeadNodes(); 1535 } 1536 1537 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1538 const AMDGPUTargetLowering& Lowering = 1539 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1540 bool IsModified = false; 1541 do { 1542 IsModified = false; 1543 // Go over all selected nodes and try to fold them a bit more 1544 for (SDNode &Node : CurDAG->allnodes()) { 1545 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1546 if (!MachineNode) 1547 continue; 1548 1549 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1550 if (ResNode != &Node) { 1551 ReplaceUses(&Node, ResNode); 1552 IsModified = true; 1553 } 1554 } 1555 CurDAG->RemoveDeadNodes(); 1556 } while (IsModified); 1557 } 1558