1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 class R600InstrInfo; 33 } 34 35 //===----------------------------------------------------------------------===// 36 // Instruction Selector Implementation 37 //===----------------------------------------------------------------------===// 38 39 namespace { 40 41 /// AMDGPU specific code to select AMDGPU machine instructions for 42 /// SelectionDAG operations. 43 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 44 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 45 // make the right decision when generating code for different targets. 46 const AMDGPUSubtarget *Subtarget; 47 48 public: 49 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 50 : SelectionDAGISel(TM, OptLevel) {} 51 52 virtual ~AMDGPUDAGToDAGISel(); 53 bool runOnMachineFunction(MachineFunction &MF) override; 54 void Select(SDNode *N) override; 55 StringRef getPassName() const override; 56 void PostprocessISelDAG() override; 57 58 private: 59 SDValue foldFrameIndex(SDValue N) const; 60 bool isInlineImmediate(const SDNode *N) const; 61 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 62 const R600InstrInfo *TII); 63 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 64 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 65 66 bool isConstantLoad(const MemSDNode *N, int cbID) const; 67 bool isUniformBr(const SDNode *N) const; 68 69 SDNode *glueCopyToM0(SDNode *N) const; 70 71 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 72 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 73 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 74 SDValue& Offset); 75 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 76 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 77 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 78 unsigned OffsetBits) const; 79 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 80 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 81 SDValue &Offset1) const; 82 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 83 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 84 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 85 SDValue &TFE) const; 86 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 87 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 88 SDValue &SLC, SDValue &TFE) const; 89 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 90 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 91 SDValue &SLC) const; 92 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 93 SDValue &SOffset, SDValue &ImmOffset) const; 94 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 95 SDValue &Offset, SDValue &GLC, SDValue &SLC, 96 SDValue &TFE) const; 97 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 98 SDValue &Offset, SDValue &SLC) const; 99 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 100 SDValue &Offset) const; 101 bool SelectMUBUFConstant(SDValue Constant, 102 SDValue &SOffset, 103 SDValue &ImmOffset) const; 104 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 105 SDValue &ImmOffset) const; 106 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 107 SDValue &ImmOffset, SDValue &VOffset) const; 108 109 bool SelectFlat(SDValue Addr, SDValue &VAddr, 110 SDValue &SLC, SDValue &TFE) const; 111 112 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 113 bool &Imm) const; 114 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 115 bool &Imm) const; 116 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 117 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 120 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 122 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 123 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 124 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 126 SDValue &Clamp, SDValue &Omod) const; 127 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 128 SDValue &Clamp, SDValue &Omod) const; 129 130 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 131 SDValue &Omod) const; 132 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 133 SDValue &Clamp, 134 SDValue &Omod) const; 135 136 void SelectADD_SUB_I64(SDNode *N); 137 void SelectDIV_SCALE(SDNode *N); 138 139 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 140 uint32_t Offset, uint32_t Width); 141 void SelectS_BFEFromShifts(SDNode *N); 142 void SelectS_BFE(SDNode *N); 143 bool isCBranchSCC(const SDNode *N) const; 144 void SelectBRCOND(SDNode *N); 145 void SelectATOMIC_CMP_SWAP(SDNode *N); 146 147 // Include the pieces autogenerated from the target description. 148 #include "AMDGPUGenDAGISel.inc" 149 }; 150 } // end anonymous namespace 151 152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 153 // DAG, ready for instruction scheduling. 154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 155 CodeGenOpt::Level OptLevel) { 156 return new AMDGPUDAGToDAGISel(TM, OptLevel); 157 } 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 168 const SIInstrInfo *TII 169 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 170 171 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 172 return TII->isInlineConstant(C->getAPIntValue()); 173 174 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 175 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 176 177 return false; 178 } 179 180 /// \brief Determine the register class for \p OpNo 181 /// \returns The register class of the virtual register that will be used for 182 /// the given operand number \OpNo or NULL if the register class cannot be 183 /// determined. 184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 185 unsigned OpNo) const { 186 if (!N->isMachineOpcode()) 187 return nullptr; 188 189 switch (N->getMachineOpcode()) { 190 default: { 191 const MCInstrDesc &Desc = 192 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 193 unsigned OpIdx = Desc.getNumDefs() + OpNo; 194 if (OpIdx >= Desc.getNumOperands()) 195 return nullptr; 196 int RegClass = Desc.OpInfo[OpIdx].RegClass; 197 if (RegClass == -1) 198 return nullptr; 199 200 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 201 } 202 case AMDGPU::REG_SEQUENCE: { 203 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 204 const TargetRegisterClass *SuperRC = 205 Subtarget->getRegisterInfo()->getRegClass(RCID); 206 207 SDValue SubRegOp = N->getOperand(OpNo + 1); 208 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 209 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 210 SubRegIdx); 211 } 212 } 213 } 214 215 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 216 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 217 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 218 return N; 219 220 const SITargetLowering& Lowering = 221 *static_cast<const SITargetLowering*>(getTargetLowering()); 222 223 // Write max value to m0 before each load operation 224 225 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 226 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 227 228 SDValue Glue = M0.getValue(1); 229 230 SmallVector <SDValue, 8> Ops; 231 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 232 Ops.push_back(N->getOperand(i)); 233 } 234 Ops.push_back(Glue); 235 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 236 237 return N; 238 } 239 240 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 241 switch (NumVectorElts) { 242 case 1: 243 return AMDGPU::SReg_32RegClassID; 244 case 2: 245 return AMDGPU::SReg_64RegClassID; 246 case 4: 247 return AMDGPU::SReg_128RegClassID; 248 case 8: 249 return AMDGPU::SReg_256RegClassID; 250 case 16: 251 return AMDGPU::SReg_512RegClassID; 252 } 253 254 llvm_unreachable("invalid vector size"); 255 } 256 257 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 258 unsigned int Opc = N->getOpcode(); 259 if (N->isMachineOpcode()) { 260 N->setNodeId(-1); 261 return; // Already selected. 262 } 263 264 if (isa<AtomicSDNode>(N) || 265 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 266 N = glueCopyToM0(N); 267 268 switch (Opc) { 269 default: break; 270 // We are selecting i64 ADD here instead of custom lower it during 271 // DAG legalization, so we can fold some i64 ADDs used for address 272 // calculation into the LOAD and STORE instructions. 273 case ISD::ADD: 274 case ISD::ADDC: 275 case ISD::ADDE: 276 case ISD::SUB: 277 case ISD::SUBC: 278 case ISD::SUBE: { 279 if (N->getValueType(0) != MVT::i64 || 280 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 281 break; 282 283 SelectADD_SUB_I64(N); 284 return; 285 } 286 case ISD::SCALAR_TO_VECTOR: 287 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 288 case ISD::BUILD_VECTOR: { 289 unsigned RegClassID; 290 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 291 EVT VT = N->getValueType(0); 292 unsigned NumVectorElts = VT.getVectorNumElements(); 293 EVT EltVT = VT.getVectorElementType(); 294 assert(EltVT.bitsEq(MVT::i32)); 295 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 296 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 297 } else { 298 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 299 // that adds a 128 bits reg copy when going through TwoAddressInstructions 300 // pass. We want to avoid 128 bits copies as much as possible because they 301 // can't be bundled by our scheduler. 302 switch(NumVectorElts) { 303 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 304 case 4: 305 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 306 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 307 else 308 RegClassID = AMDGPU::R600_Reg128RegClassID; 309 break; 310 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 311 } 312 } 313 314 SDLoc DL(N); 315 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 316 317 if (NumVectorElts == 1) { 318 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 319 RegClass); 320 return; 321 } 322 323 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 324 "supported yet"); 325 // 16 = Max Num Vector Elements 326 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 327 // 1 = Vector Register Class 328 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 329 330 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 331 bool IsRegSeq = true; 332 unsigned NOps = N->getNumOperands(); 333 for (unsigned i = 0; i < NOps; i++) { 334 // XXX: Why is this here? 335 if (isa<RegisterSDNode>(N->getOperand(i))) { 336 IsRegSeq = false; 337 break; 338 } 339 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 340 RegSeqArgs[1 + (2 * i) + 1] = 341 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 342 MVT::i32); 343 } 344 345 if (NOps != NumVectorElts) { 346 // Fill in the missing undef elements if this was a scalar_to_vector. 347 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 348 349 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 350 DL, EltVT); 351 for (unsigned i = NOps; i < NumVectorElts; ++i) { 352 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 353 RegSeqArgs[1 + (2 * i) + 1] = 354 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 355 } 356 } 357 358 if (!IsRegSeq) 359 break; 360 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 361 return; 362 } 363 case ISD::BUILD_PAIR: { 364 SDValue RC, SubReg0, SubReg1; 365 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 366 break; 367 } 368 SDLoc DL(N); 369 if (N->getValueType(0) == MVT::i128) { 370 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 371 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 372 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 373 } else if (N->getValueType(0) == MVT::i64) { 374 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 375 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 376 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 377 } else { 378 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 379 } 380 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 381 N->getOperand(1), SubReg1 }; 382 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 383 N->getValueType(0), Ops)); 384 return; 385 } 386 387 case ISD::Constant: 388 case ISD::ConstantFP: { 389 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 390 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 391 break; 392 393 uint64_t Imm; 394 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 395 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 396 else { 397 ConstantSDNode *C = cast<ConstantSDNode>(N); 398 Imm = C->getZExtValue(); 399 } 400 401 SDLoc DL(N); 402 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 403 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 404 MVT::i32)); 405 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 406 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 407 const SDValue Ops[] = { 408 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 409 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 410 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 411 }; 412 413 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 414 N->getValueType(0), Ops)); 415 return; 416 } 417 case ISD::LOAD: 418 case ISD::STORE: { 419 N = glueCopyToM0(N); 420 break; 421 } 422 423 case AMDGPUISD::BFE_I32: 424 case AMDGPUISD::BFE_U32: { 425 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 426 break; 427 428 // There is a scalar version available, but unlike the vector version which 429 // has a separate operand for the offset and width, the scalar version packs 430 // the width and offset into a single operand. Try to move to the scalar 431 // version if the offsets are constant, so that we can try to keep extended 432 // loads of kernel arguments in SGPRs. 433 434 // TODO: Technically we could try to pattern match scalar bitshifts of 435 // dynamic values, but it's probably not useful. 436 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 437 if (!Offset) 438 break; 439 440 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 441 if (!Width) 442 break; 443 444 bool Signed = Opc == AMDGPUISD::BFE_I32; 445 446 uint32_t OffsetVal = Offset->getZExtValue(); 447 uint32_t WidthVal = Width->getZExtValue(); 448 449 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 450 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 451 return; 452 } 453 case AMDGPUISD::DIV_SCALE: { 454 SelectDIV_SCALE(N); 455 return; 456 } 457 case ISD::CopyToReg: { 458 const SITargetLowering& Lowering = 459 *static_cast<const SITargetLowering*>(getTargetLowering()); 460 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 461 break; 462 } 463 case ISD::AND: 464 case ISD::SRL: 465 case ISD::SRA: 466 case ISD::SIGN_EXTEND_INREG: 467 if (N->getValueType(0) != MVT::i32 || 468 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 469 break; 470 471 SelectS_BFE(N); 472 return; 473 case ISD::BRCOND: 474 SelectBRCOND(N); 475 return; 476 477 case AMDGPUISD::ATOMIC_CMP_SWAP: 478 SelectATOMIC_CMP_SWAP(N); 479 return; 480 } 481 482 SelectCode(N); 483 } 484 485 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 486 if (!N->readMem()) 487 return false; 488 if (CbId == -1) 489 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 490 491 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 492 } 493 494 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 495 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 496 const Instruction *Term = BB->getTerminator(); 497 return Term->getMetadata("amdgpu.uniform") || 498 Term->getMetadata("structurizecfg.uniform"); 499 } 500 501 StringRef AMDGPUDAGToDAGISel::getPassName() const { 502 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 503 } 504 505 //===----------------------------------------------------------------------===// 506 // Complex Patterns 507 //===----------------------------------------------------------------------===// 508 509 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 510 SDValue& IntPtr) { 511 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 512 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 513 true); 514 return true; 515 } 516 return false; 517 } 518 519 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 520 SDValue& BaseReg, SDValue &Offset) { 521 if (!isa<ConstantSDNode>(Addr)) { 522 BaseReg = Addr; 523 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 524 return true; 525 } 526 return false; 527 } 528 529 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 530 SDValue &Offset) { 531 ConstantSDNode *IMMOffset; 532 533 if (Addr.getOpcode() == ISD::ADD 534 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 535 && isInt<16>(IMMOffset->getZExtValue())) { 536 537 Base = Addr.getOperand(0); 538 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 539 MVT::i32); 540 return true; 541 // If the pointer address is constant, we can move it to the offset field. 542 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 543 && isInt<16>(IMMOffset->getZExtValue())) { 544 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 545 SDLoc(CurDAG->getEntryNode()), 546 AMDGPU::ZERO, MVT::i32); 547 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 548 MVT::i32); 549 return true; 550 } 551 552 // Default case, no offset 553 Base = Addr; 554 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 555 return true; 556 } 557 558 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 559 SDValue &Offset) { 560 ConstantSDNode *C; 561 SDLoc DL(Addr); 562 563 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 564 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 565 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 566 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 567 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 568 Base = Addr.getOperand(0); 569 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 570 } else { 571 Base = Addr; 572 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 573 } 574 575 return true; 576 } 577 578 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 579 SDLoc DL(N); 580 SDValue LHS = N->getOperand(0); 581 SDValue RHS = N->getOperand(1); 582 583 unsigned Opcode = N->getOpcode(); 584 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 585 bool ProduceCarry = 586 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 587 bool IsAdd = 588 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 589 590 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 591 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 592 593 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 594 DL, MVT::i32, LHS, Sub0); 595 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 596 DL, MVT::i32, LHS, Sub1); 597 598 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 599 DL, MVT::i32, RHS, Sub0); 600 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 601 DL, MVT::i32, RHS, Sub1); 602 603 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 604 605 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 606 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 607 608 SDNode *AddLo; 609 if (!ConsumeCarry) { 610 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 611 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 612 } else { 613 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 614 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 615 } 616 SDValue AddHiArgs[] = { 617 SDValue(Hi0, 0), 618 SDValue(Hi1, 0), 619 SDValue(AddLo, 1) 620 }; 621 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 622 623 SDValue RegSequenceArgs[] = { 624 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 625 SDValue(AddLo,0), 626 Sub0, 627 SDValue(AddHi,0), 628 Sub1, 629 }; 630 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 631 MVT::i64, RegSequenceArgs); 632 633 if (ProduceCarry) { 634 // Replace the carry-use 635 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 636 } 637 638 // Replace the remaining uses. 639 CurDAG->ReplaceAllUsesWith(N, RegSequence); 640 CurDAG->RemoveDeadNode(N); 641 } 642 643 // We need to handle this here because tablegen doesn't support matching 644 // instructions with multiple outputs. 645 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 646 SDLoc SL(N); 647 EVT VT = N->getValueType(0); 648 649 assert(VT == MVT::f32 || VT == MVT::f64); 650 651 unsigned Opc 652 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 653 654 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 655 // omod 656 SDValue Ops[8]; 657 658 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 659 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 660 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 661 CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 662 } 663 664 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 665 unsigned OffsetBits) const { 666 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 667 (OffsetBits == 8 && !isUInt<8>(Offset))) 668 return false; 669 670 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 671 Subtarget->unsafeDSOffsetFoldingEnabled()) 672 return true; 673 674 // On Southern Islands instruction with a negative base value and an offset 675 // don't seem to work. 676 return CurDAG->SignBitIsZero(Base); 677 } 678 679 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 680 SDValue &Offset) const { 681 SDLoc DL(Addr); 682 if (CurDAG->isBaseWithConstantOffset(Addr)) { 683 SDValue N0 = Addr.getOperand(0); 684 SDValue N1 = Addr.getOperand(1); 685 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 686 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 687 // (add n0, c0) 688 Base = N0; 689 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 690 return true; 691 } 692 } else if (Addr.getOpcode() == ISD::SUB) { 693 // sub C, x -> add (sub 0, x), C 694 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 695 int64_t ByteOffset = C->getSExtValue(); 696 if (isUInt<16>(ByteOffset)) { 697 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 698 699 // XXX - This is kind of hacky. Create a dummy sub node so we can check 700 // the known bits in isDSOffsetLegal. We need to emit the selected node 701 // here, so this is thrown away. 702 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 703 Zero, Addr.getOperand(1)); 704 705 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 706 MachineSDNode *MachineSub 707 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 708 Zero, Addr.getOperand(1)); 709 710 Base = SDValue(MachineSub, 0); 711 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 712 return true; 713 } 714 } 715 } 716 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 717 // If we have a constant address, prefer to put the constant into the 718 // offset. This can save moves to load the constant address since multiple 719 // operations can share the zero base address register, and enables merging 720 // into read2 / write2 instructions. 721 722 SDLoc DL(Addr); 723 724 if (isUInt<16>(CAddr->getZExtValue())) { 725 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 726 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 727 DL, MVT::i32, Zero); 728 Base = SDValue(MovZero, 0); 729 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 730 return true; 731 } 732 } 733 734 // default case 735 Base = Addr; 736 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 737 return true; 738 } 739 740 // TODO: If offset is too big, put low 16-bit into offset. 741 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 742 SDValue &Offset0, 743 SDValue &Offset1) const { 744 SDLoc DL(Addr); 745 746 if (CurDAG->isBaseWithConstantOffset(Addr)) { 747 SDValue N0 = Addr.getOperand(0); 748 SDValue N1 = Addr.getOperand(1); 749 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 750 unsigned DWordOffset0 = C1->getZExtValue() / 4; 751 unsigned DWordOffset1 = DWordOffset0 + 1; 752 // (add n0, c0) 753 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 754 Base = N0; 755 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 756 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 757 return true; 758 } 759 } else if (Addr.getOpcode() == ISD::SUB) { 760 // sub C, x -> add (sub 0, x), C 761 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 762 unsigned DWordOffset0 = C->getZExtValue() / 4; 763 unsigned DWordOffset1 = DWordOffset0 + 1; 764 765 if (isUInt<8>(DWordOffset0)) { 766 SDLoc DL(Addr); 767 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 768 769 // XXX - This is kind of hacky. Create a dummy sub node so we can check 770 // the known bits in isDSOffsetLegal. We need to emit the selected node 771 // here, so this is thrown away. 772 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 773 Zero, Addr.getOperand(1)); 774 775 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 776 MachineSDNode *MachineSub 777 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 778 Zero, Addr.getOperand(1)); 779 780 Base = SDValue(MachineSub, 0); 781 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 782 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 783 return true; 784 } 785 } 786 } 787 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 788 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 789 unsigned DWordOffset1 = DWordOffset0 + 1; 790 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 791 792 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 793 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 794 MachineSDNode *MovZero 795 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 796 DL, MVT::i32, Zero); 797 Base = SDValue(MovZero, 0); 798 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 799 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 800 return true; 801 } 802 } 803 804 // default case 805 806 // FIXME: This is broken on SI where we still need to check if the base 807 // pointer is positive here. 808 Base = Addr; 809 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 810 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 811 return true; 812 } 813 814 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 815 return isUInt<12>(Imm->getZExtValue()); 816 } 817 818 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 819 SDValue &VAddr, SDValue &SOffset, 820 SDValue &Offset, SDValue &Offen, 821 SDValue &Idxen, SDValue &Addr64, 822 SDValue &GLC, SDValue &SLC, 823 SDValue &TFE) const { 824 // Subtarget prefers to use flat instruction 825 if (Subtarget->useFlatForGlobal()) 826 return false; 827 828 SDLoc DL(Addr); 829 830 if (!GLC.getNode()) 831 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 832 if (!SLC.getNode()) 833 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 834 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 835 836 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 837 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 838 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 839 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 840 841 if (CurDAG->isBaseWithConstantOffset(Addr)) { 842 SDValue N0 = Addr.getOperand(0); 843 SDValue N1 = Addr.getOperand(1); 844 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 845 846 if (N0.getOpcode() == ISD::ADD) { 847 // (add (add N2, N3), C1) -> addr64 848 SDValue N2 = N0.getOperand(0); 849 SDValue N3 = N0.getOperand(1); 850 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 851 Ptr = N2; 852 VAddr = N3; 853 } else { 854 855 // (add N0, C1) -> offset 856 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 857 Ptr = N0; 858 } 859 860 if (isLegalMUBUFImmOffset(C1)) { 861 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 862 return true; 863 } 864 865 if (isUInt<32>(C1->getZExtValue())) { 866 // Illegal offset, store it in soffset. 867 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 868 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 869 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 870 0); 871 return true; 872 } 873 } 874 875 if (Addr.getOpcode() == ISD::ADD) { 876 // (add N0, N1) -> addr64 877 SDValue N0 = Addr.getOperand(0); 878 SDValue N1 = Addr.getOperand(1); 879 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 880 Ptr = N0; 881 VAddr = N1; 882 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 883 return true; 884 } 885 886 // default case -> offset 887 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 888 Ptr = Addr; 889 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 890 891 return true; 892 } 893 894 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 895 SDValue &VAddr, SDValue &SOffset, 896 SDValue &Offset, SDValue &GLC, 897 SDValue &SLC, SDValue &TFE) const { 898 SDValue Ptr, Offen, Idxen, Addr64; 899 900 // addr64 bit was removed for volcanic islands. 901 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 902 return false; 903 904 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 905 GLC, SLC, TFE)) 906 return false; 907 908 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 909 if (C->getSExtValue()) { 910 SDLoc DL(Addr); 911 912 const SITargetLowering& Lowering = 913 *static_cast<const SITargetLowering*>(getTargetLowering()); 914 915 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 916 return true; 917 } 918 919 return false; 920 } 921 922 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 923 SDValue &VAddr, SDValue &SOffset, 924 SDValue &Offset, 925 SDValue &SLC) const { 926 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 927 SDValue GLC, TFE; 928 929 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 930 } 931 932 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 933 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 934 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 935 return N; 936 } 937 938 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 939 SDValue &VAddr, SDValue &SOffset, 940 SDValue &ImmOffset) const { 941 942 SDLoc DL(Addr); 943 MachineFunction &MF = CurDAG->getMachineFunction(); 944 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 945 946 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 947 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 948 949 // (add n0, c1) 950 if (CurDAG->isBaseWithConstantOffset(Addr)) { 951 SDValue N0 = Addr.getOperand(0); 952 SDValue N1 = Addr.getOperand(1); 953 954 // Offsets in vaddr must be positive. 955 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 956 if (isLegalMUBUFImmOffset(C1)) { 957 VAddr = foldFrameIndex(N0); 958 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 959 return true; 960 } 961 } 962 963 // (node) 964 VAddr = foldFrameIndex(Addr); 965 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 966 return true; 967 } 968 969 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 970 SDValue &SOffset, SDValue &Offset, 971 SDValue &GLC, SDValue &SLC, 972 SDValue &TFE) const { 973 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 974 const SIInstrInfo *TII = 975 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 976 977 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 978 GLC, SLC, TFE)) 979 return false; 980 981 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 982 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 983 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 984 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 985 APInt::getAllOnesValue(32).getZExtValue(); // Size 986 SDLoc DL(Addr); 987 988 const SITargetLowering& Lowering = 989 *static_cast<const SITargetLowering*>(getTargetLowering()); 990 991 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 992 return true; 993 } 994 return false; 995 } 996 997 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 998 SDValue &Soffset, SDValue &Offset 999 ) const { 1000 SDValue GLC, SLC, TFE; 1001 1002 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1003 } 1004 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1005 SDValue &Soffset, SDValue &Offset, 1006 SDValue &SLC) const { 1007 SDValue GLC, TFE; 1008 1009 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1010 } 1011 1012 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1013 SDValue &SOffset, 1014 SDValue &ImmOffset) const { 1015 SDLoc DL(Constant); 1016 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1017 uint32_t Overflow = 0; 1018 1019 if (Imm >= 4096) { 1020 if (Imm <= 4095 + 64) { 1021 // Use an SOffset inline constant for 1..64 1022 Overflow = Imm - 4095; 1023 Imm = 4095; 1024 } else { 1025 // Try to keep the same value in SOffset for adjacent loads, so that 1026 // the corresponding register contents can be re-used. 1027 // 1028 // Load values with all low-bits set into SOffset, so that a larger 1029 // range of values can be covered using s_movk_i32 1030 uint32_t High = (Imm + 1) & ~4095; 1031 uint32_t Low = (Imm + 1) & 4095; 1032 Imm = Low; 1033 Overflow = High - 1; 1034 } 1035 } 1036 1037 // There is a hardware bug in SI and CI which prevents address clamping in 1038 // MUBUF instructions from working correctly with SOffsets. The immediate 1039 // offset is unaffected. 1040 if (Overflow > 0 && 1041 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1042 return false; 1043 1044 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1045 1046 if (Overflow <= 64) 1047 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1048 else 1049 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1050 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1051 0); 1052 1053 return true; 1054 } 1055 1056 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1057 SDValue &SOffset, 1058 SDValue &ImmOffset) const { 1059 SDLoc DL(Offset); 1060 1061 if (!isa<ConstantSDNode>(Offset)) 1062 return false; 1063 1064 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1065 } 1066 1067 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1068 SDValue &SOffset, 1069 SDValue &ImmOffset, 1070 SDValue &VOffset) const { 1071 SDLoc DL(Offset); 1072 1073 // Don't generate an unnecessary voffset for constant offsets. 1074 if (isa<ConstantSDNode>(Offset)) { 1075 SDValue Tmp1, Tmp2; 1076 1077 // When necessary, use a voffset in <= CI anyway to work around a hardware 1078 // bug. 1079 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1080 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1081 return false; 1082 } 1083 1084 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1085 SDValue N0 = Offset.getOperand(0); 1086 SDValue N1 = Offset.getOperand(1); 1087 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1088 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1089 VOffset = N0; 1090 return true; 1091 } 1092 } 1093 1094 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1095 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1096 VOffset = Offset; 1097 1098 return true; 1099 } 1100 1101 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1102 SDValue &VAddr, 1103 SDValue &SLC, 1104 SDValue &TFE) const { 1105 VAddr = Addr; 1106 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1107 return true; 1108 } 1109 1110 /// 1111 /// \param EncodedOffset This is the immediate value that will be encoded 1112 /// directly into the instruction. On SI/CI the \p EncodedOffset 1113 /// will be in units of dwords and on VI+ it will be units of bytes. 1114 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1115 int64_t EncodedOffset) { 1116 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1117 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1118 } 1119 1120 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1121 SDValue &Offset, bool &Imm) const { 1122 1123 // FIXME: Handle non-constant offsets. 1124 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1125 if (!C) 1126 return false; 1127 1128 SDLoc SL(ByteOffsetNode); 1129 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1130 int64_t ByteOffset = C->getSExtValue(); 1131 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1132 ByteOffset >> 2 : ByteOffset; 1133 1134 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1135 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1136 Imm = true; 1137 return true; 1138 } 1139 1140 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1141 return false; 1142 1143 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1144 // 32-bit Immediates are supported on Sea Islands. 1145 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1146 } else { 1147 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1148 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1149 C32Bit), 0); 1150 } 1151 Imm = false; 1152 return true; 1153 } 1154 1155 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1156 SDValue &Offset, bool &Imm) const { 1157 1158 SDLoc SL(Addr); 1159 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1160 SDValue N0 = Addr.getOperand(0); 1161 SDValue N1 = Addr.getOperand(1); 1162 1163 if (SelectSMRDOffset(N1, Offset, Imm)) { 1164 SBase = N0; 1165 return true; 1166 } 1167 } 1168 SBase = Addr; 1169 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1170 Imm = true; 1171 return true; 1172 } 1173 1174 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1175 SDValue &Offset) const { 1176 bool Imm; 1177 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1178 } 1179 1180 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1181 SDValue &Offset) const { 1182 1183 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1184 return false; 1185 1186 bool Imm; 1187 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1188 return false; 1189 1190 return !Imm && isa<ConstantSDNode>(Offset); 1191 } 1192 1193 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1194 SDValue &Offset) const { 1195 bool Imm; 1196 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1197 !isa<ConstantSDNode>(Offset); 1198 } 1199 1200 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1201 SDValue &Offset) const { 1202 bool Imm; 1203 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1204 } 1205 1206 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1207 SDValue &Offset) const { 1208 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1209 return false; 1210 1211 bool Imm; 1212 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1213 return false; 1214 1215 return !Imm && isa<ConstantSDNode>(Offset); 1216 } 1217 1218 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1219 SDValue &Offset) const { 1220 bool Imm; 1221 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1222 !isa<ConstantSDNode>(Offset); 1223 } 1224 1225 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1226 SDValue &Base, 1227 SDValue &Offset) const { 1228 SDLoc DL(Index); 1229 1230 if (CurDAG->isBaseWithConstantOffset(Index)) { 1231 SDValue N0 = Index.getOperand(0); 1232 SDValue N1 = Index.getOperand(1); 1233 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1234 1235 // (add n0, c0) 1236 Base = N0; 1237 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1238 return true; 1239 } 1240 1241 if (isa<ConstantSDNode>(Index)) 1242 return false; 1243 1244 Base = Index; 1245 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1246 return true; 1247 } 1248 1249 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1250 SDValue Val, uint32_t Offset, 1251 uint32_t Width) { 1252 // Transformation function, pack the offset and width of a BFE into 1253 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1254 // source, bits [5:0] contain the offset and bits [22:16] the width. 1255 uint32_t PackedVal = Offset | (Width << 16); 1256 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1257 1258 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1259 } 1260 1261 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1262 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1263 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1264 // Predicate: 0 < b <= c < 32 1265 1266 const SDValue &Shl = N->getOperand(0); 1267 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1268 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1269 1270 if (B && C) { 1271 uint32_t BVal = B->getZExtValue(); 1272 uint32_t CVal = C->getZExtValue(); 1273 1274 if (0 < BVal && BVal <= CVal && CVal < 32) { 1275 bool Signed = N->getOpcode() == ISD::SRA; 1276 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1277 1278 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1279 32 - CVal)); 1280 return; 1281 } 1282 } 1283 SelectCode(N); 1284 } 1285 1286 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1287 switch (N->getOpcode()) { 1288 case ISD::AND: 1289 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1290 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1291 // Predicate: isMask(mask) 1292 const SDValue &Srl = N->getOperand(0); 1293 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1294 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1295 1296 if (Shift && Mask) { 1297 uint32_t ShiftVal = Shift->getZExtValue(); 1298 uint32_t MaskVal = Mask->getZExtValue(); 1299 1300 if (isMask_32(MaskVal)) { 1301 uint32_t WidthVal = countPopulation(MaskVal); 1302 1303 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1304 Srl.getOperand(0), ShiftVal, WidthVal)); 1305 return; 1306 } 1307 } 1308 } 1309 break; 1310 case ISD::SRL: 1311 if (N->getOperand(0).getOpcode() == ISD::AND) { 1312 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1313 // Predicate: isMask(mask >> b) 1314 const SDValue &And = N->getOperand(0); 1315 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1316 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1317 1318 if (Shift && Mask) { 1319 uint32_t ShiftVal = Shift->getZExtValue(); 1320 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1321 1322 if (isMask_32(MaskVal)) { 1323 uint32_t WidthVal = countPopulation(MaskVal); 1324 1325 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1326 And.getOperand(0), ShiftVal, WidthVal)); 1327 return; 1328 } 1329 } 1330 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1331 SelectS_BFEFromShifts(N); 1332 return; 1333 } 1334 break; 1335 case ISD::SRA: 1336 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1337 SelectS_BFEFromShifts(N); 1338 return; 1339 } 1340 break; 1341 1342 case ISD::SIGN_EXTEND_INREG: { 1343 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1344 SDValue Src = N->getOperand(0); 1345 if (Src.getOpcode() != ISD::SRL) 1346 break; 1347 1348 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1349 if (!Amt) 1350 break; 1351 1352 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1353 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1354 Amt->getZExtValue(), Width)); 1355 return; 1356 } 1357 } 1358 1359 SelectCode(N); 1360 } 1361 1362 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1363 assert(N->getOpcode() == ISD::BRCOND); 1364 if (!N->hasOneUse()) 1365 return false; 1366 1367 SDValue Cond = N->getOperand(1); 1368 if (Cond.getOpcode() == ISD::CopyToReg) 1369 Cond = Cond.getOperand(2); 1370 1371 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1372 return false; 1373 1374 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1375 if (VT == MVT::i32) 1376 return true; 1377 1378 if (VT == MVT::i64) { 1379 auto ST = static_cast<const SISubtarget *>(Subtarget); 1380 1381 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1382 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1383 } 1384 1385 return false; 1386 } 1387 1388 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1389 SDValue Cond = N->getOperand(1); 1390 1391 if (isCBranchSCC(N)) { 1392 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1393 SelectCode(N); 1394 return; 1395 } 1396 1397 // The result of VOPC instructions is or'd against ~EXEC before it is 1398 // written to vcc or another SGPR. This means that the value '1' is always 1399 // written to the corresponding bit for results that are masked. In order 1400 // to correctly check against vccz, we need to and VCC with the EXEC 1401 // register in order to clear the value from the masked bits. 1402 1403 SDLoc SL(N); 1404 1405 SDNode *MaskedCond = 1406 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1407 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1408 Cond); 1409 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1410 SDValue(MaskedCond, 0), 1411 SDValue()); // Passing SDValue() adds a 1412 // glue output. 1413 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1414 N->getOperand(2), // Basic Block 1415 VCC.getValue(0), // Chain 1416 VCC.getValue(1)); // Glue 1417 return; 1418 } 1419 1420 // This is here because there isn't a way to use the generated sub0_sub1 as the 1421 // subreg index to EXTRACT_SUBREG in tablegen. 1422 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1423 MemSDNode *Mem = cast<MemSDNode>(N); 1424 unsigned AS = Mem->getAddressSpace(); 1425 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1426 SelectCode(N); 1427 return; 1428 } 1429 1430 MVT VT = N->getSimpleValueType(0); 1431 bool Is32 = (VT == MVT::i32); 1432 SDLoc SL(N); 1433 1434 MachineSDNode *CmpSwap = nullptr; 1435 if (Subtarget->hasAddr64()) { 1436 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1437 1438 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1439 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1440 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1441 SDValue CmpVal = Mem->getOperand(2); 1442 1443 // XXX - Do we care about glue operands? 1444 1445 SDValue Ops[] = { 1446 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1447 }; 1448 1449 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1450 } 1451 } 1452 1453 if (!CmpSwap) { 1454 SDValue SRsrc, SOffset, Offset, SLC; 1455 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1456 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1457 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1458 1459 SDValue CmpVal = Mem->getOperand(2); 1460 SDValue Ops[] = { 1461 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1462 }; 1463 1464 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1465 } 1466 } 1467 1468 if (!CmpSwap) { 1469 SelectCode(N); 1470 return; 1471 } 1472 1473 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1474 *MMOs = Mem->getMemOperand(); 1475 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1476 1477 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1478 SDValue Extract 1479 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1480 1481 ReplaceUses(SDValue(N, 0), Extract); 1482 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1483 CurDAG->RemoveDeadNode(N); 1484 } 1485 1486 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1487 SDValue &SrcMods) const { 1488 1489 unsigned Mods = 0; 1490 1491 Src = In; 1492 1493 if (Src.getOpcode() == ISD::FNEG) { 1494 Mods |= SISrcMods::NEG; 1495 Src = Src.getOperand(0); 1496 } 1497 1498 if (Src.getOpcode() == ISD::FABS) { 1499 Mods |= SISrcMods::ABS; 1500 Src = Src.getOperand(0); 1501 } 1502 1503 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1504 1505 return true; 1506 } 1507 1508 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1509 SDValue &SrcMods) const { 1510 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1511 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1512 } 1513 1514 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1515 SDValue &SrcMods, SDValue &Clamp, 1516 SDValue &Omod) const { 1517 SDLoc DL(In); 1518 // FIXME: Handle Clamp and Omod 1519 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1520 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1521 1522 return SelectVOP3Mods(In, Src, SrcMods); 1523 } 1524 1525 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1526 SDValue &SrcMods, SDValue &Clamp, 1527 SDValue &Omod) const { 1528 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1529 1530 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1531 cast<ConstantSDNode>(Clamp)->isNullValue() && 1532 cast<ConstantSDNode>(Omod)->isNullValue(); 1533 } 1534 1535 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1536 SDValue &SrcMods, 1537 SDValue &Omod) const { 1538 // FIXME: Handle Omod 1539 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1540 1541 return SelectVOP3Mods(In, Src, SrcMods); 1542 } 1543 1544 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1545 SDValue &SrcMods, 1546 SDValue &Clamp, 1547 SDValue &Omod) const { 1548 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1549 return SelectVOP3Mods(In, Src, SrcMods); 1550 } 1551 1552 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1553 const AMDGPUTargetLowering& Lowering = 1554 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1555 bool IsModified = false; 1556 do { 1557 IsModified = false; 1558 // Go over all selected nodes and try to fold them a bit more 1559 for (SDNode &Node : CurDAG->allnodes()) { 1560 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1561 if (!MachineNode) 1562 continue; 1563 1564 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1565 if (ResNode != &Node) { 1566 ReplaceUses(&Node, ResNode); 1567 IsModified = true; 1568 } 1569 } 1570 CurDAG->RemoveDeadNodes(); 1571 } while (IsModified); 1572 } 1573