1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 class R600InstrInfo; 33 } 34 35 //===----------------------------------------------------------------------===// 36 // Instruction Selector Implementation 37 //===----------------------------------------------------------------------===// 38 39 namespace { 40 41 /// AMDGPU specific code to select AMDGPU machine instructions for 42 /// SelectionDAG operations. 43 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 44 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 45 // make the right decision when generating code for different targets. 46 const AMDGPUSubtarget *Subtarget; 47 48 public: 49 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) 50 : SelectionDAGISel(TM, OptLevel) {} 51 52 virtual ~AMDGPUDAGToDAGISel(); 53 bool runOnMachineFunction(MachineFunction &MF) override; 54 void Select(SDNode *N) override; 55 StringRef getPassName() const override; 56 void PostprocessISelDAG() override; 57 58 private: 59 SDValue foldFrameIndex(SDValue N) const; 60 bool isInlineImmediate(const SDNode *N) const; 61 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 62 const R600InstrInfo *TII); 63 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 64 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 65 66 bool isConstantLoad(const MemSDNode *N, int cbID) const; 67 bool isUniformBr(const SDNode *N) const; 68 69 SDNode *glueCopyToM0(SDNode *N) const; 70 71 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 72 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 73 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 74 SDValue& Offset); 75 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 76 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 77 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 78 unsigned OffsetBits) const; 79 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 80 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 81 SDValue &Offset1) const; 82 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 83 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 84 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 85 SDValue &TFE) const; 86 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 87 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 88 SDValue &SLC, SDValue &TFE) const; 89 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 90 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 91 SDValue &SLC) const; 92 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 93 SDValue &SOffset, SDValue &ImmOffset) const; 94 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 95 SDValue &Offset, SDValue &GLC, SDValue &SLC, 96 SDValue &TFE) const; 97 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 98 SDValue &Offset, SDValue &SLC) const; 99 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 100 SDValue &Offset) const; 101 bool SelectMUBUFConstant(SDValue Constant, 102 SDValue &SOffset, 103 SDValue &ImmOffset) const; 104 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 105 SDValue &ImmOffset) const; 106 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 107 SDValue &ImmOffset, SDValue &VOffset) const; 108 109 bool SelectFlat(SDValue Addr, SDValue &VAddr, 110 SDValue &SLC, SDValue &TFE) const; 111 112 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 113 bool &Imm) const; 114 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 115 bool &Imm) const; 116 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 117 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 119 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 120 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 121 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 122 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 123 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 124 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 125 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 126 SDValue &Clamp, SDValue &Omod) const; 127 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 128 SDValue &Clamp, SDValue &Omod) const; 129 130 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 131 SDValue &Omod) const; 132 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 133 SDValue &Clamp, 134 SDValue &Omod) const; 135 136 void SelectADD_SUB_I64(SDNode *N); 137 void SelectDIV_SCALE(SDNode *N); 138 139 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 140 uint32_t Offset, uint32_t Width); 141 void SelectS_BFEFromShifts(SDNode *N); 142 void SelectS_BFE(SDNode *N); 143 bool isCBranchSCC(const SDNode *N) const; 144 void SelectBRCOND(SDNode *N); 145 void SelectATOMIC_CMP_SWAP(SDNode *N); 146 147 // Include the pieces autogenerated from the target description. 148 #include "AMDGPUGenDAGISel.inc" 149 }; 150 } // end anonymous namespace 151 152 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 153 // DAG, ready for instruction scheduling. 154 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM, 155 CodeGenOpt::Level OptLevel) { 156 return new AMDGPUDAGToDAGISel(TM, OptLevel); 157 } 158 159 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 160 Subtarget = &MF.getSubtarget<AMDGPUSubtarget>(); 161 return SelectionDAGISel::runOnMachineFunction(MF); 162 } 163 164 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 165 } 166 167 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const { 168 const SIInstrInfo *TII 169 = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo(); 170 171 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) 172 return TII->isInlineConstant(C->getAPIntValue()); 173 174 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) 175 return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); 176 177 return false; 178 } 179 180 /// \brief Determine the register class for \p OpNo 181 /// \returns The register class of the virtual register that will be used for 182 /// the given operand number \OpNo or NULL if the register class cannot be 183 /// determined. 184 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 185 unsigned OpNo) const { 186 if (!N->isMachineOpcode()) { 187 if (N->getOpcode() == ISD::CopyToReg) { 188 unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); 189 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 190 MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); 191 return MRI.getRegClass(Reg); 192 } 193 194 const SIRegisterInfo *TRI 195 = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo(); 196 return TRI->getPhysRegClass(Reg); 197 } 198 199 return nullptr; 200 } 201 202 switch (N->getMachineOpcode()) { 203 default: { 204 const MCInstrDesc &Desc = 205 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 206 unsigned OpIdx = Desc.getNumDefs() + OpNo; 207 if (OpIdx >= Desc.getNumOperands()) 208 return nullptr; 209 int RegClass = Desc.OpInfo[OpIdx].RegClass; 210 if (RegClass == -1) 211 return nullptr; 212 213 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 214 } 215 case AMDGPU::REG_SEQUENCE: { 216 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 217 const TargetRegisterClass *SuperRC = 218 Subtarget->getRegisterInfo()->getRegClass(RCID); 219 220 SDValue SubRegOp = N->getOperand(OpNo + 1); 221 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 222 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 223 SubRegIdx); 224 } 225 } 226 } 227 228 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 229 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 230 cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 231 return N; 232 233 const SITargetLowering& Lowering = 234 *static_cast<const SITargetLowering*>(getTargetLowering()); 235 236 // Write max value to m0 before each load operation 237 238 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 239 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 240 241 SDValue Glue = M0.getValue(1); 242 243 SmallVector <SDValue, 8> Ops; 244 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 245 Ops.push_back(N->getOperand(i)); 246 } 247 Ops.push_back(Glue); 248 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 249 250 return N; 251 } 252 253 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 254 switch (NumVectorElts) { 255 case 1: 256 return AMDGPU::SReg_32RegClassID; 257 case 2: 258 return AMDGPU::SReg_64RegClassID; 259 case 4: 260 return AMDGPU::SReg_128RegClassID; 261 case 8: 262 return AMDGPU::SReg_256RegClassID; 263 case 16: 264 return AMDGPU::SReg_512RegClassID; 265 } 266 267 llvm_unreachable("invalid vector size"); 268 } 269 270 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 271 unsigned int Opc = N->getOpcode(); 272 if (N->isMachineOpcode()) { 273 N->setNodeId(-1); 274 return; // Already selected. 275 } 276 277 if (isa<AtomicSDNode>(N) || 278 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 279 N = glueCopyToM0(N); 280 281 switch (Opc) { 282 default: break; 283 // We are selecting i64 ADD here instead of custom lower it during 284 // DAG legalization, so we can fold some i64 ADDs used for address 285 // calculation into the LOAD and STORE instructions. 286 case ISD::ADD: 287 case ISD::ADDC: 288 case ISD::ADDE: 289 case ISD::SUB: 290 case ISD::SUBC: 291 case ISD::SUBE: { 292 if (N->getValueType(0) != MVT::i64 || 293 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 294 break; 295 296 SelectADD_SUB_I64(N); 297 return; 298 } 299 case ISD::SCALAR_TO_VECTOR: 300 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 301 case ISD::BUILD_VECTOR: { 302 unsigned RegClassID; 303 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 304 EVT VT = N->getValueType(0); 305 unsigned NumVectorElts = VT.getVectorNumElements(); 306 EVT EltVT = VT.getVectorElementType(); 307 assert(EltVT.bitsEq(MVT::i32)); 308 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 309 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 310 } else { 311 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 312 // that adds a 128 bits reg copy when going through TwoAddressInstructions 313 // pass. We want to avoid 128 bits copies as much as possible because they 314 // can't be bundled by our scheduler. 315 switch(NumVectorElts) { 316 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 317 case 4: 318 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 319 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 320 else 321 RegClassID = AMDGPU::R600_Reg128RegClassID; 322 break; 323 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 324 } 325 } 326 327 SDLoc DL(N); 328 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 329 330 if (NumVectorElts == 1) { 331 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 332 RegClass); 333 return; 334 } 335 336 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 337 "supported yet"); 338 // 16 = Max Num Vector Elements 339 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 340 // 1 = Vector Register Class 341 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 342 343 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 344 bool IsRegSeq = true; 345 unsigned NOps = N->getNumOperands(); 346 for (unsigned i = 0; i < NOps; i++) { 347 // XXX: Why is this here? 348 if (isa<RegisterSDNode>(N->getOperand(i))) { 349 IsRegSeq = false; 350 break; 351 } 352 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 353 RegSeqArgs[1 + (2 * i) + 1] = 354 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 355 MVT::i32); 356 } 357 358 if (NOps != NumVectorElts) { 359 // Fill in the missing undef elements if this was a scalar_to_vector. 360 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 361 362 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 363 DL, EltVT); 364 for (unsigned i = NOps; i < NumVectorElts; ++i) { 365 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 366 RegSeqArgs[1 + (2 * i) + 1] = 367 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 368 } 369 } 370 371 if (!IsRegSeq) 372 break; 373 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 374 return; 375 } 376 case ISD::BUILD_PAIR: { 377 SDValue RC, SubReg0, SubReg1; 378 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 379 break; 380 } 381 SDLoc DL(N); 382 if (N->getValueType(0) == MVT::i128) { 383 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 384 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 385 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 386 } else if (N->getValueType(0) == MVT::i64) { 387 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 388 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 389 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 390 } else { 391 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 392 } 393 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 394 N->getOperand(1), SubReg1 }; 395 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 396 N->getValueType(0), Ops)); 397 return; 398 } 399 400 case ISD::Constant: 401 case ISD::ConstantFP: { 402 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 403 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 404 break; 405 406 uint64_t Imm; 407 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 408 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 409 else { 410 ConstantSDNode *C = cast<ConstantSDNode>(N); 411 Imm = C->getZExtValue(); 412 } 413 414 SDLoc DL(N); 415 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 416 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 417 MVT::i32)); 418 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 419 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 420 const SDValue Ops[] = { 421 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 422 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 423 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 424 }; 425 426 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 427 N->getValueType(0), Ops)); 428 return; 429 } 430 case ISD::LOAD: 431 case ISD::STORE: { 432 N = glueCopyToM0(N); 433 break; 434 } 435 436 case AMDGPUISD::BFE_I32: 437 case AMDGPUISD::BFE_U32: { 438 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 439 break; 440 441 // There is a scalar version available, but unlike the vector version which 442 // has a separate operand for the offset and width, the scalar version packs 443 // the width and offset into a single operand. Try to move to the scalar 444 // version if the offsets are constant, so that we can try to keep extended 445 // loads of kernel arguments in SGPRs. 446 447 // TODO: Technically we could try to pattern match scalar bitshifts of 448 // dynamic values, but it's probably not useful. 449 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 450 if (!Offset) 451 break; 452 453 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 454 if (!Width) 455 break; 456 457 bool Signed = Opc == AMDGPUISD::BFE_I32; 458 459 uint32_t OffsetVal = Offset->getZExtValue(); 460 uint32_t WidthVal = Width->getZExtValue(); 461 462 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 463 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 464 return; 465 } 466 case AMDGPUISD::DIV_SCALE: { 467 SelectDIV_SCALE(N); 468 return; 469 } 470 case ISD::CopyToReg: { 471 const SITargetLowering& Lowering = 472 *static_cast<const SITargetLowering*>(getTargetLowering()); 473 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 474 break; 475 } 476 case ISD::AND: 477 case ISD::SRL: 478 case ISD::SRA: 479 case ISD::SIGN_EXTEND_INREG: 480 if (N->getValueType(0) != MVT::i32 || 481 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 482 break; 483 484 SelectS_BFE(N); 485 return; 486 case ISD::BRCOND: 487 SelectBRCOND(N); 488 return; 489 490 case AMDGPUISD::ATOMIC_CMP_SWAP: 491 SelectATOMIC_CMP_SWAP(N); 492 return; 493 } 494 495 SelectCode(N); 496 } 497 498 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 499 if (!N->readMem()) 500 return false; 501 if (CbId == -1) 502 return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 503 504 return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; 505 } 506 507 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 508 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 509 const Instruction *Term = BB->getTerminator(); 510 return Term->getMetadata("amdgpu.uniform") || 511 Term->getMetadata("structurizecfg.uniform"); 512 } 513 514 StringRef AMDGPUDAGToDAGISel::getPassName() const { 515 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 516 } 517 518 //===----------------------------------------------------------------------===// 519 // Complex Patterns 520 //===----------------------------------------------------------------------===// 521 522 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 523 SDValue& IntPtr) { 524 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 525 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 526 true); 527 return true; 528 } 529 return false; 530 } 531 532 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 533 SDValue& BaseReg, SDValue &Offset) { 534 if (!isa<ConstantSDNode>(Addr)) { 535 BaseReg = Addr; 536 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 537 return true; 538 } 539 return false; 540 } 541 542 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 543 SDValue &Offset) { 544 ConstantSDNode *IMMOffset; 545 546 if (Addr.getOpcode() == ISD::ADD 547 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 548 && isInt<16>(IMMOffset->getZExtValue())) { 549 550 Base = Addr.getOperand(0); 551 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 552 MVT::i32); 553 return true; 554 // If the pointer address is constant, we can move it to the offset field. 555 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 556 && isInt<16>(IMMOffset->getZExtValue())) { 557 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 558 SDLoc(CurDAG->getEntryNode()), 559 AMDGPU::ZERO, MVT::i32); 560 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 561 MVT::i32); 562 return true; 563 } 564 565 // Default case, no offset 566 Base = Addr; 567 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 568 return true; 569 } 570 571 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 572 SDValue &Offset) { 573 ConstantSDNode *C; 574 SDLoc DL(Addr); 575 576 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 577 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 578 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 579 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 580 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 581 Base = Addr.getOperand(0); 582 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 583 } else { 584 Base = Addr; 585 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 586 } 587 588 return true; 589 } 590 591 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 592 SDLoc DL(N); 593 SDValue LHS = N->getOperand(0); 594 SDValue RHS = N->getOperand(1); 595 596 unsigned Opcode = N->getOpcode(); 597 bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); 598 bool ProduceCarry = 599 ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; 600 bool IsAdd = 601 (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE); 602 603 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 604 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 605 606 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 607 DL, MVT::i32, LHS, Sub0); 608 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 609 DL, MVT::i32, LHS, Sub1); 610 611 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 612 DL, MVT::i32, RHS, Sub0); 613 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 614 DL, MVT::i32, RHS, Sub1); 615 616 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 617 618 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 619 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 620 621 SDNode *AddLo; 622 if (!ConsumeCarry) { 623 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 624 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); 625 } else { 626 SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; 627 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); 628 } 629 SDValue AddHiArgs[] = { 630 SDValue(Hi0, 0), 631 SDValue(Hi1, 0), 632 SDValue(AddLo, 1) 633 }; 634 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); 635 636 SDValue RegSequenceArgs[] = { 637 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 638 SDValue(AddLo,0), 639 Sub0, 640 SDValue(AddHi,0), 641 Sub1, 642 }; 643 SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 644 MVT::i64, RegSequenceArgs); 645 646 if (ProduceCarry) { 647 // Replace the carry-use 648 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); 649 } 650 651 // Replace the remaining uses. 652 CurDAG->ReplaceAllUsesWith(N, RegSequence); 653 CurDAG->RemoveDeadNode(N); 654 } 655 656 // We need to handle this here because tablegen doesn't support matching 657 // instructions with multiple outputs. 658 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 659 SDLoc SL(N); 660 EVT VT = N->getValueType(0); 661 662 assert(VT == MVT::f32 || VT == MVT::f64); 663 664 unsigned Opc 665 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 666 667 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 668 // omod 669 SDValue Ops[8]; 670 671 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 672 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 673 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 674 CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 675 } 676 677 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 678 unsigned OffsetBits) const { 679 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 680 (OffsetBits == 8 && !isUInt<8>(Offset))) 681 return false; 682 683 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 684 Subtarget->unsafeDSOffsetFoldingEnabled()) 685 return true; 686 687 // On Southern Islands instruction with a negative base value and an offset 688 // don't seem to work. 689 return CurDAG->SignBitIsZero(Base); 690 } 691 692 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 693 SDValue &Offset) const { 694 SDLoc DL(Addr); 695 if (CurDAG->isBaseWithConstantOffset(Addr)) { 696 SDValue N0 = Addr.getOperand(0); 697 SDValue N1 = Addr.getOperand(1); 698 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 699 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 700 // (add n0, c0) 701 Base = N0; 702 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 703 return true; 704 } 705 } else if (Addr.getOpcode() == ISD::SUB) { 706 // sub C, x -> add (sub 0, x), C 707 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 708 int64_t ByteOffset = C->getSExtValue(); 709 if (isUInt<16>(ByteOffset)) { 710 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 711 712 // XXX - This is kind of hacky. Create a dummy sub node so we can check 713 // the known bits in isDSOffsetLegal. We need to emit the selected node 714 // here, so this is thrown away. 715 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 716 Zero, Addr.getOperand(1)); 717 718 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 719 MachineSDNode *MachineSub 720 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 721 Zero, Addr.getOperand(1)); 722 723 Base = SDValue(MachineSub, 0); 724 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 725 return true; 726 } 727 } 728 } 729 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 730 // If we have a constant address, prefer to put the constant into the 731 // offset. This can save moves to load the constant address since multiple 732 // operations can share the zero base address register, and enables merging 733 // into read2 / write2 instructions. 734 735 SDLoc DL(Addr); 736 737 if (isUInt<16>(CAddr->getZExtValue())) { 738 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 739 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 740 DL, MVT::i32, Zero); 741 Base = SDValue(MovZero, 0); 742 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 743 return true; 744 } 745 } 746 747 // default case 748 Base = Addr; 749 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 750 return true; 751 } 752 753 // TODO: If offset is too big, put low 16-bit into offset. 754 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 755 SDValue &Offset0, 756 SDValue &Offset1) const { 757 SDLoc DL(Addr); 758 759 if (CurDAG->isBaseWithConstantOffset(Addr)) { 760 SDValue N0 = Addr.getOperand(0); 761 SDValue N1 = Addr.getOperand(1); 762 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 763 unsigned DWordOffset0 = C1->getZExtValue() / 4; 764 unsigned DWordOffset1 = DWordOffset0 + 1; 765 // (add n0, c0) 766 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 767 Base = N0; 768 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 769 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 770 return true; 771 } 772 } else if (Addr.getOpcode() == ISD::SUB) { 773 // sub C, x -> add (sub 0, x), C 774 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 775 unsigned DWordOffset0 = C->getZExtValue() / 4; 776 unsigned DWordOffset1 = DWordOffset0 + 1; 777 778 if (isUInt<8>(DWordOffset0)) { 779 SDLoc DL(Addr); 780 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 781 782 // XXX - This is kind of hacky. Create a dummy sub node so we can check 783 // the known bits in isDSOffsetLegal. We need to emit the selected node 784 // here, so this is thrown away. 785 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 786 Zero, Addr.getOperand(1)); 787 788 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 789 MachineSDNode *MachineSub 790 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 791 Zero, Addr.getOperand(1)); 792 793 Base = SDValue(MachineSub, 0); 794 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 795 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 796 return true; 797 } 798 } 799 } 800 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 801 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 802 unsigned DWordOffset1 = DWordOffset0 + 1; 803 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 804 805 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 806 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 807 MachineSDNode *MovZero 808 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 809 DL, MVT::i32, Zero); 810 Base = SDValue(MovZero, 0); 811 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 812 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 813 return true; 814 } 815 } 816 817 // default case 818 819 // FIXME: This is broken on SI where we still need to check if the base 820 // pointer is positive here. 821 Base = Addr; 822 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 823 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 824 return true; 825 } 826 827 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 828 return isUInt<12>(Imm->getZExtValue()); 829 } 830 831 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 832 SDValue &VAddr, SDValue &SOffset, 833 SDValue &Offset, SDValue &Offen, 834 SDValue &Idxen, SDValue &Addr64, 835 SDValue &GLC, SDValue &SLC, 836 SDValue &TFE) const { 837 // Subtarget prefers to use flat instruction 838 if (Subtarget->useFlatForGlobal()) 839 return false; 840 841 SDLoc DL(Addr); 842 843 if (!GLC.getNode()) 844 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 845 if (!SLC.getNode()) 846 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 847 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 848 849 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 850 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 851 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 852 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 853 854 if (CurDAG->isBaseWithConstantOffset(Addr)) { 855 SDValue N0 = Addr.getOperand(0); 856 SDValue N1 = Addr.getOperand(1); 857 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 858 859 if (N0.getOpcode() == ISD::ADD) { 860 // (add (add N2, N3), C1) -> addr64 861 SDValue N2 = N0.getOperand(0); 862 SDValue N3 = N0.getOperand(1); 863 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 864 Ptr = N2; 865 VAddr = N3; 866 } else { 867 868 // (add N0, C1) -> offset 869 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 870 Ptr = N0; 871 } 872 873 if (isLegalMUBUFImmOffset(C1)) { 874 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 875 return true; 876 } 877 878 if (isUInt<32>(C1->getZExtValue())) { 879 // Illegal offset, store it in soffset. 880 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 881 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 882 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 883 0); 884 return true; 885 } 886 } 887 888 if (Addr.getOpcode() == ISD::ADD) { 889 // (add N0, N1) -> addr64 890 SDValue N0 = Addr.getOperand(0); 891 SDValue N1 = Addr.getOperand(1); 892 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 893 Ptr = N0; 894 VAddr = N1; 895 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 896 return true; 897 } 898 899 // default case -> offset 900 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 901 Ptr = Addr; 902 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 903 904 return true; 905 } 906 907 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 908 SDValue &VAddr, SDValue &SOffset, 909 SDValue &Offset, SDValue &GLC, 910 SDValue &SLC, SDValue &TFE) const { 911 SDValue Ptr, Offen, Idxen, Addr64; 912 913 // addr64 bit was removed for volcanic islands. 914 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 915 return false; 916 917 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 918 GLC, SLC, TFE)) 919 return false; 920 921 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 922 if (C->getSExtValue()) { 923 SDLoc DL(Addr); 924 925 const SITargetLowering& Lowering = 926 *static_cast<const SITargetLowering*>(getTargetLowering()); 927 928 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 929 return true; 930 } 931 932 return false; 933 } 934 935 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 936 SDValue &VAddr, SDValue &SOffset, 937 SDValue &Offset, 938 SDValue &SLC) const { 939 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 940 SDValue GLC, TFE; 941 942 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 943 } 944 945 SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { 946 if (auto FI = dyn_cast<FrameIndexSDNode>(N)) 947 return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); 948 return N; 949 } 950 951 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 952 SDValue &VAddr, SDValue &SOffset, 953 SDValue &ImmOffset) const { 954 955 SDLoc DL(Addr); 956 MachineFunction &MF = CurDAG->getMachineFunction(); 957 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 958 959 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 960 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 961 962 // (add n0, c1) 963 if (CurDAG->isBaseWithConstantOffset(Addr)) { 964 SDValue N0 = Addr.getOperand(0); 965 SDValue N1 = Addr.getOperand(1); 966 967 // Offsets in vaddr must be positive. 968 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 969 if (isLegalMUBUFImmOffset(C1)) { 970 VAddr = foldFrameIndex(N0); 971 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 972 return true; 973 } 974 } 975 976 // (node) 977 VAddr = foldFrameIndex(Addr); 978 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 979 return true; 980 } 981 982 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 983 SDValue &SOffset, SDValue &Offset, 984 SDValue &GLC, SDValue &SLC, 985 SDValue &TFE) const { 986 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 987 const SIInstrInfo *TII = 988 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 989 990 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 991 GLC, SLC, TFE)) 992 return false; 993 994 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 995 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 996 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 997 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 998 APInt::getAllOnesValue(32).getZExtValue(); // Size 999 SDLoc DL(Addr); 1000 1001 const SITargetLowering& Lowering = 1002 *static_cast<const SITargetLowering*>(getTargetLowering()); 1003 1004 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1005 return true; 1006 } 1007 return false; 1008 } 1009 1010 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1011 SDValue &Soffset, SDValue &Offset 1012 ) const { 1013 SDValue GLC, SLC, TFE; 1014 1015 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1016 } 1017 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1018 SDValue &Soffset, SDValue &Offset, 1019 SDValue &SLC) const { 1020 SDValue GLC, TFE; 1021 1022 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1023 } 1024 1025 bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1026 SDValue &SOffset, 1027 SDValue &ImmOffset) const { 1028 SDLoc DL(Constant); 1029 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1030 uint32_t Overflow = 0; 1031 1032 if (Imm >= 4096) { 1033 if (Imm <= 4095 + 64) { 1034 // Use an SOffset inline constant for 1..64 1035 Overflow = Imm - 4095; 1036 Imm = 4095; 1037 } else { 1038 // Try to keep the same value in SOffset for adjacent loads, so that 1039 // the corresponding register contents can be re-used. 1040 // 1041 // Load values with all low-bits set into SOffset, so that a larger 1042 // range of values can be covered using s_movk_i32 1043 uint32_t High = (Imm + 1) & ~4095; 1044 uint32_t Low = (Imm + 1) & 4095; 1045 Imm = Low; 1046 Overflow = High - 1; 1047 } 1048 } 1049 1050 // There is a hardware bug in SI and CI which prevents address clamping in 1051 // MUBUF instructions from working correctly with SOffsets. The immediate 1052 // offset is unaffected. 1053 if (Overflow > 0 && 1054 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1055 return false; 1056 1057 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1058 1059 if (Overflow <= 64) 1060 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1061 else 1062 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1063 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1064 0); 1065 1066 return true; 1067 } 1068 1069 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1070 SDValue &SOffset, 1071 SDValue &ImmOffset) const { 1072 SDLoc DL(Offset); 1073 1074 if (!isa<ConstantSDNode>(Offset)) 1075 return false; 1076 1077 return SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1078 } 1079 1080 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1081 SDValue &SOffset, 1082 SDValue &ImmOffset, 1083 SDValue &VOffset) const { 1084 SDLoc DL(Offset); 1085 1086 // Don't generate an unnecessary voffset for constant offsets. 1087 if (isa<ConstantSDNode>(Offset)) { 1088 SDValue Tmp1, Tmp2; 1089 1090 // When necessary, use a voffset in <= CI anyway to work around a hardware 1091 // bug. 1092 if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS || 1093 SelectMUBUFConstant(Offset, Tmp1, Tmp2)) 1094 return false; 1095 } 1096 1097 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1098 SDValue N0 = Offset.getOperand(0); 1099 SDValue N1 = Offset.getOperand(1); 1100 if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 && 1101 SelectMUBUFConstant(N1, SOffset, ImmOffset)) { 1102 VOffset = N0; 1103 return true; 1104 } 1105 } 1106 1107 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1108 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1109 VOffset = Offset; 1110 1111 return true; 1112 } 1113 1114 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1115 SDValue &VAddr, 1116 SDValue &SLC, 1117 SDValue &TFE) const { 1118 VAddr = Addr; 1119 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1120 return true; 1121 } 1122 1123 /// 1124 /// \param EncodedOffset This is the immediate value that will be encoded 1125 /// directly into the instruction. On SI/CI the \p EncodedOffset 1126 /// will be in units of dwords and on VI+ it will be units of bytes. 1127 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1128 int64_t EncodedOffset) { 1129 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1130 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1131 } 1132 1133 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1134 SDValue &Offset, bool &Imm) const { 1135 1136 // FIXME: Handle non-constant offsets. 1137 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1138 if (!C) 1139 return false; 1140 1141 SDLoc SL(ByteOffsetNode); 1142 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1143 int64_t ByteOffset = C->getSExtValue(); 1144 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1145 ByteOffset >> 2 : ByteOffset; 1146 1147 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1148 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1149 Imm = true; 1150 return true; 1151 } 1152 1153 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1154 return false; 1155 1156 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1157 // 32-bit Immediates are supported on Sea Islands. 1158 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1159 } else { 1160 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1161 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1162 C32Bit), 0); 1163 } 1164 Imm = false; 1165 return true; 1166 } 1167 1168 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1169 SDValue &Offset, bool &Imm) const { 1170 1171 SDLoc SL(Addr); 1172 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1173 SDValue N0 = Addr.getOperand(0); 1174 SDValue N1 = Addr.getOperand(1); 1175 1176 if (SelectSMRDOffset(N1, Offset, Imm)) { 1177 SBase = N0; 1178 return true; 1179 } 1180 } 1181 SBase = Addr; 1182 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1183 Imm = true; 1184 return true; 1185 } 1186 1187 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1188 SDValue &Offset) const { 1189 bool Imm; 1190 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1191 } 1192 1193 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1194 SDValue &Offset) const { 1195 1196 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1197 return false; 1198 1199 bool Imm; 1200 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1201 return false; 1202 1203 return !Imm && isa<ConstantSDNode>(Offset); 1204 } 1205 1206 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1207 SDValue &Offset) const { 1208 bool Imm; 1209 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1210 !isa<ConstantSDNode>(Offset); 1211 } 1212 1213 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1214 SDValue &Offset) const { 1215 bool Imm; 1216 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1217 } 1218 1219 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1220 SDValue &Offset) const { 1221 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1222 return false; 1223 1224 bool Imm; 1225 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1226 return false; 1227 1228 return !Imm && isa<ConstantSDNode>(Offset); 1229 } 1230 1231 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1232 SDValue &Offset) const { 1233 bool Imm; 1234 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1235 !isa<ConstantSDNode>(Offset); 1236 } 1237 1238 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, 1239 SDValue &Base, 1240 SDValue &Offset) const { 1241 SDLoc DL(Index); 1242 1243 if (CurDAG->isBaseWithConstantOffset(Index)) { 1244 SDValue N0 = Index.getOperand(0); 1245 SDValue N1 = Index.getOperand(1); 1246 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1247 1248 // (add n0, c0) 1249 Base = N0; 1250 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); 1251 return true; 1252 } 1253 1254 if (isa<ConstantSDNode>(Index)) 1255 return false; 1256 1257 Base = Index; 1258 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1259 return true; 1260 } 1261 1262 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1263 SDValue Val, uint32_t Offset, 1264 uint32_t Width) { 1265 // Transformation function, pack the offset and width of a BFE into 1266 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1267 // source, bits [5:0] contain the offset and bits [22:16] the width. 1268 uint32_t PackedVal = Offset | (Width << 16); 1269 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1270 1271 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1272 } 1273 1274 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1275 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1276 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1277 // Predicate: 0 < b <= c < 32 1278 1279 const SDValue &Shl = N->getOperand(0); 1280 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1281 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1282 1283 if (B && C) { 1284 uint32_t BVal = B->getZExtValue(); 1285 uint32_t CVal = C->getZExtValue(); 1286 1287 if (0 < BVal && BVal <= CVal && CVal < 32) { 1288 bool Signed = N->getOpcode() == ISD::SRA; 1289 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1290 1291 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1292 32 - CVal)); 1293 return; 1294 } 1295 } 1296 SelectCode(N); 1297 } 1298 1299 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1300 switch (N->getOpcode()) { 1301 case ISD::AND: 1302 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1303 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1304 // Predicate: isMask(mask) 1305 const SDValue &Srl = N->getOperand(0); 1306 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1307 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1308 1309 if (Shift && Mask) { 1310 uint32_t ShiftVal = Shift->getZExtValue(); 1311 uint32_t MaskVal = Mask->getZExtValue(); 1312 1313 if (isMask_32(MaskVal)) { 1314 uint32_t WidthVal = countPopulation(MaskVal); 1315 1316 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1317 Srl.getOperand(0), ShiftVal, WidthVal)); 1318 return; 1319 } 1320 } 1321 } 1322 break; 1323 case ISD::SRL: 1324 if (N->getOperand(0).getOpcode() == ISD::AND) { 1325 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1326 // Predicate: isMask(mask >> b) 1327 const SDValue &And = N->getOperand(0); 1328 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1329 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1330 1331 if (Shift && Mask) { 1332 uint32_t ShiftVal = Shift->getZExtValue(); 1333 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1334 1335 if (isMask_32(MaskVal)) { 1336 uint32_t WidthVal = countPopulation(MaskVal); 1337 1338 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1339 And.getOperand(0), ShiftVal, WidthVal)); 1340 return; 1341 } 1342 } 1343 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1344 SelectS_BFEFromShifts(N); 1345 return; 1346 } 1347 break; 1348 case ISD::SRA: 1349 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1350 SelectS_BFEFromShifts(N); 1351 return; 1352 } 1353 break; 1354 1355 case ISD::SIGN_EXTEND_INREG: { 1356 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1357 SDValue Src = N->getOperand(0); 1358 if (Src.getOpcode() != ISD::SRL) 1359 break; 1360 1361 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1362 if (!Amt) 1363 break; 1364 1365 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1366 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1367 Amt->getZExtValue(), Width)); 1368 return; 1369 } 1370 } 1371 1372 SelectCode(N); 1373 } 1374 1375 bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { 1376 assert(N->getOpcode() == ISD::BRCOND); 1377 if (!N->hasOneUse()) 1378 return false; 1379 1380 SDValue Cond = N->getOperand(1); 1381 if (Cond.getOpcode() == ISD::CopyToReg) 1382 Cond = Cond.getOperand(2); 1383 1384 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) 1385 return false; 1386 1387 MVT VT = Cond.getOperand(0).getSimpleValueType(); 1388 if (VT == MVT::i32) 1389 return true; 1390 1391 if (VT == MVT::i64) { 1392 auto ST = static_cast<const SISubtarget *>(Subtarget); 1393 1394 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 1395 return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); 1396 } 1397 1398 return false; 1399 } 1400 1401 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1402 SDValue Cond = N->getOperand(1); 1403 1404 if (isCBranchSCC(N)) { 1405 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1406 SelectCode(N); 1407 return; 1408 } 1409 1410 SDLoc SL(N); 1411 1412 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); 1413 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1414 N->getOperand(2), // Basic Block 1415 VCC.getValue(0)); 1416 return; 1417 } 1418 1419 // This is here because there isn't a way to use the generated sub0_sub1 as the 1420 // subreg index to EXTRACT_SUBREG in tablegen. 1421 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1422 MemSDNode *Mem = cast<MemSDNode>(N); 1423 unsigned AS = Mem->getAddressSpace(); 1424 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1425 SelectCode(N); 1426 return; 1427 } 1428 1429 MVT VT = N->getSimpleValueType(0); 1430 bool Is32 = (VT == MVT::i32); 1431 SDLoc SL(N); 1432 1433 MachineSDNode *CmpSwap = nullptr; 1434 if (Subtarget->hasAddr64()) { 1435 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1436 1437 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1438 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1439 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1440 SDValue CmpVal = Mem->getOperand(2); 1441 1442 // XXX - Do we care about glue operands? 1443 1444 SDValue Ops[] = { 1445 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1446 }; 1447 1448 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1449 } 1450 } 1451 1452 if (!CmpSwap) { 1453 SDValue SRsrc, SOffset, Offset, SLC; 1454 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1455 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1456 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1457 1458 SDValue CmpVal = Mem->getOperand(2); 1459 SDValue Ops[] = { 1460 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1461 }; 1462 1463 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1464 } 1465 } 1466 1467 if (!CmpSwap) { 1468 SelectCode(N); 1469 return; 1470 } 1471 1472 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1473 *MMOs = Mem->getMemOperand(); 1474 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1475 1476 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1477 SDValue Extract 1478 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1479 1480 ReplaceUses(SDValue(N, 0), Extract); 1481 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1482 CurDAG->RemoveDeadNode(N); 1483 } 1484 1485 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1486 SDValue &SrcMods) const { 1487 1488 unsigned Mods = 0; 1489 1490 Src = In; 1491 1492 if (Src.getOpcode() == ISD::FNEG) { 1493 Mods |= SISrcMods::NEG; 1494 Src = Src.getOperand(0); 1495 } 1496 1497 if (Src.getOpcode() == ISD::FABS) { 1498 Mods |= SISrcMods::ABS; 1499 Src = Src.getOperand(0); 1500 } 1501 1502 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1503 1504 return true; 1505 } 1506 1507 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1508 SDValue &SrcMods) const { 1509 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1510 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1511 } 1512 1513 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1514 SDValue &SrcMods, SDValue &Clamp, 1515 SDValue &Omod) const { 1516 SDLoc DL(In); 1517 // FIXME: Handle Clamp and Omod 1518 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1519 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1520 1521 return SelectVOP3Mods(In, Src, SrcMods); 1522 } 1523 1524 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1525 SDValue &SrcMods, SDValue &Clamp, 1526 SDValue &Omod) const { 1527 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1528 1529 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1530 cast<ConstantSDNode>(Clamp)->isNullValue() && 1531 cast<ConstantSDNode>(Omod)->isNullValue(); 1532 } 1533 1534 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1535 SDValue &SrcMods, 1536 SDValue &Omod) const { 1537 // FIXME: Handle Omod 1538 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1539 1540 return SelectVOP3Mods(In, Src, SrcMods); 1541 } 1542 1543 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1544 SDValue &SrcMods, 1545 SDValue &Clamp, 1546 SDValue &Omod) const { 1547 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1548 return SelectVOP3Mods(In, Src, SrcMods); 1549 } 1550 1551 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1552 const AMDGPUTargetLowering& Lowering = 1553 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1554 bool IsModified = false; 1555 do { 1556 IsModified = false; 1557 // Go over all selected nodes and try to fold them a bit more 1558 for (SDNode &Node : CurDAG->allnodes()) { 1559 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1560 if (!MachineNode) 1561 continue; 1562 1563 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1564 if (ResNode != &Node) { 1565 ReplaceUses(&Node, ResNode); 1566 IsModified = true; 1567 } 1568 } 1569 CurDAG->RemoveDeadNodes(); 1570 } while (IsModified); 1571 } 1572