1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/FunctionLoweringInfo.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 28 using namespace llvm; 29 30 namespace llvm { 31 class R600InstrInfo; 32 } 33 34 //===----------------------------------------------------------------------===// 35 // Instruction Selector Implementation 36 //===----------------------------------------------------------------------===// 37 38 namespace { 39 40 static bool isCBranchSCC(const SDNode *N) { 41 assert(N->getOpcode() == ISD::BRCOND); 42 if (!N->hasOneUse()) 43 return false; 44 45 SDValue Cond = N->getOperand(1); 46 if (Cond.getOpcode() == ISD::CopyToReg) 47 Cond = Cond.getOperand(2); 48 return Cond.getOpcode() == ISD::SETCC && 49 Cond.getOperand(0).getValueType() == MVT::i32 && 50 Cond.hasOneUse(); 51 } 52 53 /// AMDGPU specific code to select AMDGPU machine instructions for 54 /// SelectionDAG operations. 55 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 56 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 57 // make the right decision when generating code for different targets. 58 const AMDGPUSubtarget *Subtarget; 59 60 public: 61 AMDGPUDAGToDAGISel(TargetMachine &TM); 62 virtual ~AMDGPUDAGToDAGISel(); 63 bool runOnMachineFunction(MachineFunction &MF) override; 64 SDNode *SelectImpl(SDNode *N) override; 65 const char *getPassName() const override; 66 void PreprocessISelDAG() override; 67 void PostprocessISelDAG() override; 68 69 private: 70 bool isInlineImmediate(SDNode *N) const; 71 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 72 const R600InstrInfo *TII); 73 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 74 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 75 76 // Complex pattern selectors 77 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 78 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 79 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 80 81 static bool checkType(const Value *ptr, unsigned int addrspace); 82 static bool checkPrivateAddress(const MachineMemOperand *Op); 83 84 static bool isGlobalStore(const MemSDNode *N); 85 static bool isFlatStore(const MemSDNode *N); 86 static bool isPrivateStore(const StoreSDNode *N); 87 static bool isLocalStore(const StoreSDNode *N); 88 static bool isRegionStore(const StoreSDNode *N); 89 90 bool isCPLoad(const LoadSDNode *N) const; 91 bool isConstantLoad(const MemSDNode *N, int cbID) const; 92 bool isGlobalLoad(const MemSDNode *N) const; 93 bool isFlatLoad(const MemSDNode *N) const; 94 bool isParamLoad(const LoadSDNode *N) const; 95 bool isPrivateLoad(const LoadSDNode *N) const; 96 bool isLocalLoad(const LoadSDNode *N) const; 97 bool isRegionLoad(const LoadSDNode *N) const; 98 99 bool isUniformBr(const SDNode *N) const; 100 101 SDNode *glueCopyToM0(SDNode *N) const; 102 103 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 104 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 105 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 106 SDValue& Offset); 107 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 108 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 109 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 110 unsigned OffsetBits) const; 111 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 112 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 113 SDValue &Offset1) const; 114 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 115 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 116 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 117 SDValue &TFE) const; 118 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 119 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 120 SDValue &SLC, SDValue &TFE) const; 121 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 122 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 123 SDValue &SLC) const; 124 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 125 SDValue &SOffset, SDValue &ImmOffset) const; 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 127 SDValue &Offset, SDValue &GLC, SDValue &SLC, 128 SDValue &TFE) const; 129 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 130 SDValue &Offset, SDValue &GLC) const; 131 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 132 SDValue &Offset) const; 133 void SelectMUBUFConstant(SDValue Constant, 134 SDValue &SOffset, 135 SDValue &ImmOffset) const; 136 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 137 SDValue &ImmOffset) const; 138 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 139 SDValue &ImmOffset, SDValue &VOffset) const; 140 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 141 bool &Imm) const; 142 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 143 bool &Imm) const; 144 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 145 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 146 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 147 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 148 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 149 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 150 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 151 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 152 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 153 SDValue &Clamp, SDValue &Omod) const; 154 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 155 SDValue &Clamp, SDValue &Omod) const; 156 157 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 158 SDValue &Omod) const; 159 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 160 SDValue &Clamp, 161 SDValue &Omod) const; 162 163 SDNode *SelectADD_SUB_I64(SDNode *N); 164 SDNode *SelectDIV_SCALE(SDNode *N); 165 166 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 167 uint32_t Offset, uint32_t Width); 168 SDNode *SelectS_BFEFromShifts(SDNode *N); 169 SDNode *SelectS_BFE(SDNode *N); 170 SDNode *SelectBRCOND(SDNode *N); 171 172 // Include the pieces autogenerated from the target description. 173 #include "AMDGPUGenDAGISel.inc" 174 }; 175 } // end anonymous namespace 176 177 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 178 // DAG, ready for instruction scheduling. 179 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 180 return new AMDGPUDAGToDAGISel(TM); 181 } 182 183 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 184 : SelectionDAGISel(TM) {} 185 186 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 187 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 188 return SelectionDAGISel::runOnMachineFunction(MF); 189 } 190 191 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 192 } 193 194 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 195 const SITargetLowering *TL 196 = static_cast<const SITargetLowering *>(getTargetLowering()); 197 return TL->analyzeImmediate(N) == 0; 198 } 199 200 /// \brief Determine the register class for \p OpNo 201 /// \returns The register class of the virtual register that will be used for 202 /// the given operand number \OpNo or NULL if the register class cannot be 203 /// determined. 204 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 205 unsigned OpNo) const { 206 if (!N->isMachineOpcode()) 207 return nullptr; 208 209 switch (N->getMachineOpcode()) { 210 default: { 211 const MCInstrDesc &Desc = 212 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 213 unsigned OpIdx = Desc.getNumDefs() + OpNo; 214 if (OpIdx >= Desc.getNumOperands()) 215 return nullptr; 216 int RegClass = Desc.OpInfo[OpIdx].RegClass; 217 if (RegClass == -1) 218 return nullptr; 219 220 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 221 } 222 case AMDGPU::REG_SEQUENCE: { 223 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 224 const TargetRegisterClass *SuperRC = 225 Subtarget->getRegisterInfo()->getRegClass(RCID); 226 227 SDValue SubRegOp = N->getOperand(OpNo + 1); 228 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 229 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 230 SubRegIdx); 231 } 232 } 233 } 234 235 bool AMDGPUDAGToDAGISel::SelectADDRParam( 236 SDValue Addr, SDValue& R1, SDValue& R2) { 237 238 if (Addr.getOpcode() == ISD::FrameIndex) { 239 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 240 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 241 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 242 } else { 243 R1 = Addr; 244 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 245 } 246 } else if (Addr.getOpcode() == ISD::ADD) { 247 R1 = Addr.getOperand(0); 248 R2 = Addr.getOperand(1); 249 } else { 250 R1 = Addr; 251 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 252 } 253 return true; 254 } 255 256 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 257 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 258 Addr.getOpcode() == ISD::TargetGlobalAddress) { 259 return false; 260 } 261 return SelectADDRParam(Addr, R1, R2); 262 } 263 264 265 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 266 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 267 Addr.getOpcode() == ISD::TargetGlobalAddress) { 268 return false; 269 } 270 271 if (Addr.getOpcode() == ISD::FrameIndex) { 272 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 273 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 274 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 275 } else { 276 R1 = Addr; 277 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 278 } 279 } else if (Addr.getOpcode() == ISD::ADD) { 280 R1 = Addr.getOperand(0); 281 R2 = Addr.getOperand(1); 282 } else { 283 R1 = Addr; 284 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 285 } 286 return true; 287 } 288 289 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 290 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 291 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 292 AMDGPUAS::LOCAL_ADDRESS)) 293 return N; 294 295 const SITargetLowering& Lowering = 296 *static_cast<const SITargetLowering*>(getTargetLowering()); 297 298 // Write max value to m0 before each load operation 299 300 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 301 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 302 303 SDValue Glue = M0.getValue(1); 304 305 SmallVector <SDValue, 8> Ops; 306 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 307 Ops.push_back(N->getOperand(i)); 308 } 309 Ops.push_back(Glue); 310 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 311 312 return N; 313 } 314 315 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 316 switch (NumVectorElts) { 317 case 1: 318 return AMDGPU::SReg_32RegClassID; 319 case 2: 320 return AMDGPU::SReg_64RegClassID; 321 case 4: 322 return AMDGPU::SReg_128RegClassID; 323 case 8: 324 return AMDGPU::SReg_256RegClassID; 325 case 16: 326 return AMDGPU::SReg_512RegClassID; 327 } 328 329 llvm_unreachable("invalid vector size"); 330 } 331 332 SDNode *AMDGPUDAGToDAGISel::SelectImpl(SDNode *N) { 333 unsigned int Opc = N->getOpcode(); 334 if (N->isMachineOpcode()) { 335 N->setNodeId(-1); 336 return nullptr; // Already selected. 337 } 338 339 if (isa<AtomicSDNode>(N) || 340 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 341 N = glueCopyToM0(N); 342 343 switch (Opc) { 344 default: break; 345 // We are selecting i64 ADD here instead of custom lower it during 346 // DAG legalization, so we can fold some i64 ADDs used for address 347 // calculation into the LOAD and STORE instructions. 348 case ISD::ADD: 349 case ISD::SUB: { 350 if (N->getValueType(0) != MVT::i64 || 351 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 352 break; 353 354 return SelectADD_SUB_I64(N); 355 } 356 case ISD::SCALAR_TO_VECTOR: 357 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 358 case ISD::BUILD_VECTOR: { 359 unsigned RegClassID; 360 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 361 EVT VT = N->getValueType(0); 362 unsigned NumVectorElts = VT.getVectorNumElements(); 363 EVT EltVT = VT.getVectorElementType(); 364 assert(EltVT.bitsEq(MVT::i32)); 365 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 366 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 367 } else { 368 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 369 // that adds a 128 bits reg copy when going through TwoAddressInstructions 370 // pass. We want to avoid 128 bits copies as much as possible because they 371 // can't be bundled by our scheduler. 372 switch(NumVectorElts) { 373 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 374 case 4: 375 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 376 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 377 else 378 RegClassID = AMDGPU::R600_Reg128RegClassID; 379 break; 380 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 381 } 382 } 383 384 SDLoc DL(N); 385 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 386 387 if (NumVectorElts == 1) { 388 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 389 N->getOperand(0), RegClass); 390 } 391 392 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 393 "supported yet"); 394 // 16 = Max Num Vector Elements 395 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 396 // 1 = Vector Register Class 397 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 398 399 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 400 bool IsRegSeq = true; 401 unsigned NOps = N->getNumOperands(); 402 for (unsigned i = 0; i < NOps; i++) { 403 // XXX: Why is this here? 404 if (isa<RegisterSDNode>(N->getOperand(i))) { 405 IsRegSeq = false; 406 break; 407 } 408 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 409 RegSeqArgs[1 + (2 * i) + 1] = 410 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 411 MVT::i32); 412 } 413 414 if (NOps != NumVectorElts) { 415 // Fill in the missing undef elements if this was a scalar_to_vector. 416 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 417 418 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 419 DL, EltVT); 420 for (unsigned i = NOps; i < NumVectorElts; ++i) { 421 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 422 RegSeqArgs[1 + (2 * i) + 1] = 423 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 424 } 425 } 426 427 if (!IsRegSeq) 428 break; 429 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 430 RegSeqArgs); 431 } 432 case ISD::BUILD_PAIR: { 433 SDValue RC, SubReg0, SubReg1; 434 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 435 break; 436 } 437 SDLoc DL(N); 438 if (N->getValueType(0) == MVT::i128) { 439 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 440 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 441 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 442 } else if (N->getValueType(0) == MVT::i64) { 443 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 444 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 445 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 446 } else { 447 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 448 } 449 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 450 N->getOperand(1), SubReg1 }; 451 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 452 DL, N->getValueType(0), Ops); 453 } 454 455 case ISD::Constant: 456 case ISD::ConstantFP: { 457 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 458 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 459 break; 460 461 uint64_t Imm; 462 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 463 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 464 else { 465 ConstantSDNode *C = cast<ConstantSDNode>(N); 466 Imm = C->getZExtValue(); 467 } 468 469 SDLoc DL(N); 470 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 471 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 472 MVT::i32)); 473 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 474 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 475 const SDValue Ops[] = { 476 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 477 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 478 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 479 }; 480 481 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 482 N->getValueType(0), Ops); 483 } 484 case ISD::LOAD: 485 case ISD::STORE: { 486 N = glueCopyToM0(N); 487 break; 488 } 489 490 case AMDGPUISD::BFE_I32: 491 case AMDGPUISD::BFE_U32: { 492 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 493 break; 494 495 // There is a scalar version available, but unlike the vector version which 496 // has a separate operand for the offset and width, the scalar version packs 497 // the width and offset into a single operand. Try to move to the scalar 498 // version if the offsets are constant, so that we can try to keep extended 499 // loads of kernel arguments in SGPRs. 500 501 // TODO: Technically we could try to pattern match scalar bitshifts of 502 // dynamic values, but it's probably not useful. 503 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 504 if (!Offset) 505 break; 506 507 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 508 if (!Width) 509 break; 510 511 bool Signed = Opc == AMDGPUISD::BFE_I32; 512 513 uint32_t OffsetVal = Offset->getZExtValue(); 514 uint32_t WidthVal = Width->getZExtValue(); 515 516 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 517 N->getOperand(0), OffsetVal, WidthVal); 518 } 519 case AMDGPUISD::DIV_SCALE: { 520 return SelectDIV_SCALE(N); 521 } 522 case ISD::CopyToReg: { 523 const SITargetLowering& Lowering = 524 *static_cast<const SITargetLowering*>(getTargetLowering()); 525 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 526 break; 527 } 528 case ISD::AND: 529 case ISD::SRL: 530 case ISD::SRA: 531 case ISD::SIGN_EXTEND_INREG: 532 if (N->getValueType(0) != MVT::i32 || 533 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 534 break; 535 536 return SelectS_BFE(N); 537 case ISD::BRCOND: 538 return SelectBRCOND(N); 539 } 540 541 return SelectCode(N); 542 } 543 544 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 545 assert(AS != 0 && "Use checkPrivateAddress instead."); 546 if (!Ptr) 547 return false; 548 549 return Ptr->getType()->getPointerAddressSpace() == AS; 550 } 551 552 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 553 if (Op->getPseudoValue()) 554 return true; 555 556 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 557 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 558 559 return false; 560 } 561 562 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) { 563 if (!N->writeMem()) 564 return false; 565 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 566 } 567 568 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 569 const Value *MemVal = N->getMemOperand()->getValue(); 570 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 571 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 572 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 573 } 574 575 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 576 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 577 } 578 579 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) { 580 if (!N->writeMem()) 581 return false; 582 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 583 } 584 585 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 586 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 587 } 588 589 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 590 if (!N->readMem()) 591 return false; 592 const Value *MemVal = N->getMemOperand()->getValue(); 593 if (CbId == -1) 594 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 595 596 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 597 } 598 599 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { 600 if (!N->readMem()) 601 return false; 602 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 603 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 604 N->getMemoryVT().bitsLT(MVT::i32)) 605 return true; 606 607 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 608 } 609 610 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 611 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 612 } 613 614 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 615 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 616 } 617 618 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const { 619 if (!N->readMem()) 620 return false; 621 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 622 } 623 624 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 625 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 626 } 627 628 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 629 MachineMemOperand *MMO = N->getMemOperand(); 630 if (checkPrivateAddress(N->getMemOperand())) { 631 if (MMO) { 632 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 633 if (PSV && PSV->isConstantPool()) { 634 return true; 635 } 636 } 637 } 638 return false; 639 } 640 641 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 642 if (checkPrivateAddress(N->getMemOperand())) { 643 // Check to make sure we are not a constant pool load or a constant load 644 // that is marked as a private load 645 if (isCPLoad(N) || isConstantLoad(N, -1)) { 646 return false; 647 } 648 } 649 650 const Value *MemVal = N->getMemOperand()->getValue(); 651 return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 652 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 653 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 654 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 655 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 656 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 657 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS); 658 } 659 660 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 661 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 662 const Instruction *Term = BB->getTerminator(); 663 return Term->getMetadata("amdgpu.uniform") || 664 Term->getMetadata("structurizecfg.uniform"); 665 } 666 667 const char *AMDGPUDAGToDAGISel::getPassName() const { 668 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 669 } 670 671 //===----------------------------------------------------------------------===// 672 // Complex Patterns 673 //===----------------------------------------------------------------------===// 674 675 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 676 SDValue& IntPtr) { 677 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 678 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 679 true); 680 return true; 681 } 682 return false; 683 } 684 685 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 686 SDValue& BaseReg, SDValue &Offset) { 687 if (!isa<ConstantSDNode>(Addr)) { 688 BaseReg = Addr; 689 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 690 return true; 691 } 692 return false; 693 } 694 695 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 696 SDValue &Offset) { 697 ConstantSDNode *IMMOffset; 698 699 if (Addr.getOpcode() == ISD::ADD 700 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 701 && isInt<16>(IMMOffset->getZExtValue())) { 702 703 Base = Addr.getOperand(0); 704 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 705 MVT::i32); 706 return true; 707 // If the pointer address is constant, we can move it to the offset field. 708 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 709 && isInt<16>(IMMOffset->getZExtValue())) { 710 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 711 SDLoc(CurDAG->getEntryNode()), 712 AMDGPU::ZERO, MVT::i32); 713 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 714 MVT::i32); 715 return true; 716 } 717 718 // Default case, no offset 719 Base = Addr; 720 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 721 return true; 722 } 723 724 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 725 SDValue &Offset) { 726 ConstantSDNode *C; 727 SDLoc DL(Addr); 728 729 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 730 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 731 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 732 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 733 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 734 Base = Addr.getOperand(0); 735 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 736 } else { 737 Base = Addr; 738 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 739 } 740 741 return true; 742 } 743 744 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 745 SDLoc DL(N); 746 SDValue LHS = N->getOperand(0); 747 SDValue RHS = N->getOperand(1); 748 749 bool IsAdd = (N->getOpcode() == ISD::ADD); 750 751 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 752 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 753 754 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 755 DL, MVT::i32, LHS, Sub0); 756 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 757 DL, MVT::i32, LHS, Sub1); 758 759 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 760 DL, MVT::i32, RHS, Sub0); 761 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 762 DL, MVT::i32, RHS, Sub1); 763 764 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 765 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 766 767 768 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 769 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 770 771 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 772 SDValue Carry(AddLo, 1); 773 SDNode *AddHi 774 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 775 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 776 777 SDValue Args[5] = { 778 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 779 SDValue(AddLo,0), 780 Sub0, 781 SDValue(AddHi,0), 782 Sub1, 783 }; 784 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 785 } 786 787 // We need to handle this here because tablegen doesn't support matching 788 // instructions with multiple outputs. 789 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 790 SDLoc SL(N); 791 EVT VT = N->getValueType(0); 792 793 assert(VT == MVT::f32 || VT == MVT::f64); 794 795 unsigned Opc 796 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 797 798 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 799 // omod 800 SDValue Ops[8]; 801 802 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 803 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 804 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 805 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 806 } 807 808 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 809 unsigned OffsetBits) const { 810 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 811 (OffsetBits == 8 && !isUInt<8>(Offset))) 812 return false; 813 814 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 815 Subtarget->unsafeDSOffsetFoldingEnabled()) 816 return true; 817 818 // On Southern Islands instruction with a negative base value and an offset 819 // don't seem to work. 820 return CurDAG->SignBitIsZero(Base); 821 } 822 823 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 824 SDValue &Offset) const { 825 SDLoc DL(Addr); 826 if (CurDAG->isBaseWithConstantOffset(Addr)) { 827 SDValue N0 = Addr.getOperand(0); 828 SDValue N1 = Addr.getOperand(1); 829 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 830 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 831 // (add n0, c0) 832 Base = N0; 833 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 834 return true; 835 } 836 } else if (Addr.getOpcode() == ISD::SUB) { 837 // sub C, x -> add (sub 0, x), C 838 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 839 int64_t ByteOffset = C->getSExtValue(); 840 if (isUInt<16>(ByteOffset)) { 841 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 842 843 // XXX - This is kind of hacky. Create a dummy sub node so we can check 844 // the known bits in isDSOffsetLegal. We need to emit the selected node 845 // here, so this is thrown away. 846 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 847 Zero, Addr.getOperand(1)); 848 849 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 850 MachineSDNode *MachineSub 851 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 852 Zero, Addr.getOperand(1)); 853 854 Base = SDValue(MachineSub, 0); 855 Offset = Addr.getOperand(0); 856 return true; 857 } 858 } 859 } 860 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 861 // If we have a constant address, prefer to put the constant into the 862 // offset. This can save moves to load the constant address since multiple 863 // operations can share the zero base address register, and enables merging 864 // into read2 / write2 instructions. 865 866 SDLoc DL(Addr); 867 868 if (isUInt<16>(CAddr->getZExtValue())) { 869 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 870 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 871 DL, MVT::i32, Zero); 872 Base = SDValue(MovZero, 0); 873 Offset = Addr; 874 return true; 875 } 876 } 877 878 // default case 879 Base = Addr; 880 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 881 return true; 882 } 883 884 // TODO: If offset is too big, put low 16-bit into offset. 885 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 886 SDValue &Offset0, 887 SDValue &Offset1) const { 888 SDLoc DL(Addr); 889 890 if (CurDAG->isBaseWithConstantOffset(Addr)) { 891 SDValue N0 = Addr.getOperand(0); 892 SDValue N1 = Addr.getOperand(1); 893 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 894 unsigned DWordOffset0 = C1->getZExtValue() / 4; 895 unsigned DWordOffset1 = DWordOffset0 + 1; 896 // (add n0, c0) 897 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 898 Base = N0; 899 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 900 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 901 return true; 902 } 903 } else if (Addr.getOpcode() == ISD::SUB) { 904 // sub C, x -> add (sub 0, x), C 905 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 906 unsigned DWordOffset0 = C->getZExtValue() / 4; 907 unsigned DWordOffset1 = DWordOffset0 + 1; 908 909 if (isUInt<8>(DWordOffset0)) { 910 SDLoc DL(Addr); 911 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 912 913 // XXX - This is kind of hacky. Create a dummy sub node so we can check 914 // the known bits in isDSOffsetLegal. We need to emit the selected node 915 // here, so this is thrown away. 916 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 917 Zero, Addr.getOperand(1)); 918 919 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 920 MachineSDNode *MachineSub 921 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 922 Zero, Addr.getOperand(1)); 923 924 Base = SDValue(MachineSub, 0); 925 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 926 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 927 return true; 928 } 929 } 930 } 931 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 932 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 933 unsigned DWordOffset1 = DWordOffset0 + 1; 934 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 935 936 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 937 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 938 MachineSDNode *MovZero 939 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 940 DL, MVT::i32, Zero); 941 Base = SDValue(MovZero, 0); 942 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 943 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 944 return true; 945 } 946 } 947 948 // default case 949 Base = Addr; 950 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 951 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 952 return true; 953 } 954 955 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 956 return isUInt<12>(Imm->getZExtValue()); 957 } 958 959 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 960 SDValue &VAddr, SDValue &SOffset, 961 SDValue &Offset, SDValue &Offen, 962 SDValue &Idxen, SDValue &Addr64, 963 SDValue &GLC, SDValue &SLC, 964 SDValue &TFE) const { 965 // Subtarget prefers to use flat instruction 966 if (Subtarget->useFlatForGlobal()) 967 return false; 968 969 SDLoc DL(Addr); 970 971 if (!GLC.getNode()) 972 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 973 if (!SLC.getNode()) 974 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 975 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 976 977 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 978 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 979 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 980 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 981 982 if (CurDAG->isBaseWithConstantOffset(Addr)) { 983 SDValue N0 = Addr.getOperand(0); 984 SDValue N1 = Addr.getOperand(1); 985 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 986 987 if (N0.getOpcode() == ISD::ADD) { 988 // (add (add N2, N3), C1) -> addr64 989 SDValue N2 = N0.getOperand(0); 990 SDValue N3 = N0.getOperand(1); 991 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 992 Ptr = N2; 993 VAddr = N3; 994 } else { 995 996 // (add N0, C1) -> offset 997 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 998 Ptr = N0; 999 } 1000 1001 if (isLegalMUBUFImmOffset(C1)) { 1002 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1003 return true; 1004 } else if (isUInt<32>(C1->getZExtValue())) { 1005 // Illegal offset, store it in soffset. 1006 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1007 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1008 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1009 0); 1010 return true; 1011 } 1012 } 1013 1014 if (Addr.getOpcode() == ISD::ADD) { 1015 // (add N0, N1) -> addr64 1016 SDValue N0 = Addr.getOperand(0); 1017 SDValue N1 = Addr.getOperand(1); 1018 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1019 Ptr = N0; 1020 VAddr = N1; 1021 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1022 return true; 1023 } 1024 1025 // default case -> offset 1026 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1027 Ptr = Addr; 1028 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1029 1030 return true; 1031 } 1032 1033 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1034 SDValue &VAddr, SDValue &SOffset, 1035 SDValue &Offset, SDValue &GLC, 1036 SDValue &SLC, SDValue &TFE) const { 1037 SDValue Ptr, Offen, Idxen, Addr64; 1038 1039 // addr64 bit was removed for volcanic islands. 1040 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1041 return false; 1042 1043 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1044 GLC, SLC, TFE)) 1045 return false; 1046 1047 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1048 if (C->getSExtValue()) { 1049 SDLoc DL(Addr); 1050 1051 const SITargetLowering& Lowering = 1052 *static_cast<const SITargetLowering*>(getTargetLowering()); 1053 1054 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1055 return true; 1056 } 1057 1058 return false; 1059 } 1060 1061 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1062 SDValue &VAddr, SDValue &SOffset, 1063 SDValue &Offset, 1064 SDValue &SLC) const { 1065 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1066 SDValue GLC, TFE; 1067 1068 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1069 } 1070 1071 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1072 SDValue &VAddr, SDValue &SOffset, 1073 SDValue &ImmOffset) const { 1074 1075 SDLoc DL(Addr); 1076 MachineFunction &MF = CurDAG->getMachineFunction(); 1077 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1078 1079 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1080 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1081 1082 // (add n0, c1) 1083 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1084 SDValue N0 = Addr.getOperand(0); 1085 SDValue N1 = Addr.getOperand(1); 1086 1087 // Offsets in vaddr must be positive. 1088 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1089 if (isLegalMUBUFImmOffset(C1)) { 1090 VAddr = N0; 1091 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1092 return true; 1093 } 1094 } 1095 1096 // (node) 1097 VAddr = Addr; 1098 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1099 return true; 1100 } 1101 1102 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1103 SDValue &SOffset, SDValue &Offset, 1104 SDValue &GLC, SDValue &SLC, 1105 SDValue &TFE) const { 1106 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1107 const SIInstrInfo *TII = 1108 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1109 1110 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1111 GLC, SLC, TFE)) 1112 return false; 1113 1114 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1115 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1116 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1117 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1118 APInt::getAllOnesValue(32).getZExtValue(); // Size 1119 SDLoc DL(Addr); 1120 1121 const SITargetLowering& Lowering = 1122 *static_cast<const SITargetLowering*>(getTargetLowering()); 1123 1124 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1125 return true; 1126 } 1127 return false; 1128 } 1129 1130 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1131 SDValue &Soffset, SDValue &Offset 1132 ) const { 1133 SDValue GLC, SLC, TFE; 1134 1135 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1136 } 1137 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1138 SDValue &Soffset, SDValue &Offset, 1139 SDValue &GLC) const { 1140 SDValue SLC, TFE; 1141 1142 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1143 } 1144 1145 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1146 SDValue &SOffset, 1147 SDValue &ImmOffset) const { 1148 SDLoc DL(Constant); 1149 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1150 uint32_t Overflow = 0; 1151 1152 if (Imm >= 4096) { 1153 if (Imm <= 4095 + 64) { 1154 // Use an SOffset inline constant for 1..64 1155 Overflow = Imm - 4095; 1156 Imm = 4095; 1157 } else { 1158 // Try to keep the same value in SOffset for adjacent loads, so that 1159 // the corresponding register contents can be re-used. 1160 // 1161 // Load values with all low-bits set into SOffset, so that a larger 1162 // range of values can be covered using s_movk_i32 1163 uint32_t High = (Imm + 1) & ~4095; 1164 uint32_t Low = (Imm + 1) & 4095; 1165 Imm = Low; 1166 Overflow = High - 1; 1167 } 1168 } 1169 1170 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1171 1172 if (Overflow <= 64) 1173 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1174 else 1175 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1176 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1177 0); 1178 } 1179 1180 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1181 SDValue &SOffset, 1182 SDValue &ImmOffset) const { 1183 SDLoc DL(Offset); 1184 1185 if (!isa<ConstantSDNode>(Offset)) 1186 return false; 1187 1188 SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1189 1190 return true; 1191 } 1192 1193 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1194 SDValue &SOffset, 1195 SDValue &ImmOffset, 1196 SDValue &VOffset) const { 1197 SDLoc DL(Offset); 1198 1199 // Don't generate an unnecessary voffset for constant offsets. 1200 if (isa<ConstantSDNode>(Offset)) 1201 return false; 1202 1203 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1204 SDValue N0 = Offset.getOperand(0); 1205 SDValue N1 = Offset.getOperand(1); 1206 SelectMUBUFConstant(N1, SOffset, ImmOffset); 1207 VOffset = N0; 1208 } else { 1209 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1210 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1211 VOffset = Offset; 1212 } 1213 1214 return true; 1215 } 1216 1217 /// 1218 /// \param EncodedOffset This is the immediate value that will be encoded 1219 /// directly into the instruction. On SI/CI the \p EncodedOffset 1220 /// will be in units of dwords and on VI+ it will be units of bytes. 1221 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1222 int64_t EncodedOffset) { 1223 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1224 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1225 } 1226 1227 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1228 SDValue &Offset, bool &Imm) const { 1229 1230 // FIXME: Handle non-constant offsets. 1231 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1232 if (!C) 1233 return false; 1234 1235 SDLoc SL(ByteOffsetNode); 1236 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1237 int64_t ByteOffset = C->getSExtValue(); 1238 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1239 ByteOffset >> 2 : ByteOffset; 1240 1241 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1242 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1243 Imm = true; 1244 return true; 1245 } 1246 1247 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1248 return false; 1249 1250 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1251 // 32-bit Immediates are supported on Sea Islands. 1252 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1253 } else { 1254 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1255 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1256 C32Bit), 0); 1257 } 1258 Imm = false; 1259 return true; 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1263 SDValue &Offset, bool &Imm) const { 1264 1265 SDLoc SL(Addr); 1266 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1267 SDValue N0 = Addr.getOperand(0); 1268 SDValue N1 = Addr.getOperand(1); 1269 1270 if (SelectSMRDOffset(N1, Offset, Imm)) { 1271 SBase = N0; 1272 return true; 1273 } 1274 } 1275 SBase = Addr; 1276 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1277 Imm = true; 1278 return true; 1279 } 1280 1281 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1282 SDValue &Offset) const { 1283 bool Imm; 1284 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1285 } 1286 1287 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1288 SDValue &Offset) const { 1289 1290 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1291 return false; 1292 1293 bool Imm; 1294 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1295 return false; 1296 1297 return !Imm && isa<ConstantSDNode>(Offset); 1298 } 1299 1300 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1301 SDValue &Offset) const { 1302 bool Imm; 1303 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1304 !isa<ConstantSDNode>(Offset); 1305 } 1306 1307 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1308 SDValue &Offset) const { 1309 bool Imm; 1310 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1311 } 1312 1313 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1314 SDValue &Offset) const { 1315 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1316 return false; 1317 1318 bool Imm; 1319 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1320 return false; 1321 1322 return !Imm && isa<ConstantSDNode>(Offset); 1323 } 1324 1325 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1326 SDValue &Offset) const { 1327 bool Imm; 1328 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1329 !isa<ConstantSDNode>(Offset); 1330 } 1331 1332 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1333 uint32_t Offset, uint32_t Width) { 1334 // Transformation function, pack the offset and width of a BFE into 1335 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1336 // source, bits [5:0] contain the offset and bits [22:16] the width. 1337 uint32_t PackedVal = Offset | (Width << 16); 1338 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1339 1340 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1341 } 1342 1343 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1344 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1345 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1346 // Predicate: 0 < b <= c < 32 1347 1348 const SDValue &Shl = N->getOperand(0); 1349 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1350 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1351 1352 if (B && C) { 1353 uint32_t BVal = B->getZExtValue(); 1354 uint32_t CVal = C->getZExtValue(); 1355 1356 if (0 < BVal && BVal <= CVal && CVal < 32) { 1357 bool Signed = N->getOpcode() == ISD::SRA; 1358 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1359 1360 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1361 CVal - BVal, 32 - CVal); 1362 } 1363 } 1364 return SelectCode(N); 1365 } 1366 1367 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1368 switch (N->getOpcode()) { 1369 case ISD::AND: 1370 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1371 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1372 // Predicate: isMask(mask) 1373 const SDValue &Srl = N->getOperand(0); 1374 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1375 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1376 1377 if (Shift && Mask) { 1378 uint32_t ShiftVal = Shift->getZExtValue(); 1379 uint32_t MaskVal = Mask->getZExtValue(); 1380 1381 if (isMask_32(MaskVal)) { 1382 uint32_t WidthVal = countPopulation(MaskVal); 1383 1384 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1385 ShiftVal, WidthVal); 1386 } 1387 } 1388 } 1389 break; 1390 case ISD::SRL: 1391 if (N->getOperand(0).getOpcode() == ISD::AND) { 1392 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1393 // Predicate: isMask(mask >> b) 1394 const SDValue &And = N->getOperand(0); 1395 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1396 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1397 1398 if (Shift && Mask) { 1399 uint32_t ShiftVal = Shift->getZExtValue(); 1400 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1401 1402 if (isMask_32(MaskVal)) { 1403 uint32_t WidthVal = countPopulation(MaskVal); 1404 1405 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1406 ShiftVal, WidthVal); 1407 } 1408 } 1409 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1410 return SelectS_BFEFromShifts(N); 1411 break; 1412 case ISD::SRA: 1413 if (N->getOperand(0).getOpcode() == ISD::SHL) 1414 return SelectS_BFEFromShifts(N); 1415 break; 1416 1417 case ISD::SIGN_EXTEND_INREG: { 1418 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1419 SDValue Src = N->getOperand(0); 1420 if (Src.getOpcode() != ISD::SRL) 1421 break; 1422 1423 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1424 if (!Amt) 1425 break; 1426 1427 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1428 return getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1429 Amt->getZExtValue(), Width); 1430 } 1431 } 1432 1433 return SelectCode(N); 1434 } 1435 1436 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1437 SDValue Cond = N->getOperand(1); 1438 1439 if (isCBranchSCC(N)) { 1440 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1441 return SelectCode(N); 1442 } 1443 1444 // The result of VOPC instructions is or'd against ~EXEC before it is 1445 // written to vcc or another SGPR. This means that the value '1' is always 1446 // written to the corresponding bit for results that are masked. In order 1447 // to correctly check against vccz, we need to and VCC with the EXEC 1448 // register in order to clear the value from the masked bits. 1449 1450 SDLoc SL(N); 1451 1452 SDNode *MaskedCond = 1453 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1454 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1455 Cond); 1456 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1457 SDValue(MaskedCond, 0), 1458 SDValue()); // Passing SDValue() adds a 1459 // glue output. 1460 return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1461 N->getOperand(2), // Basic Block 1462 VCC.getValue(0), // Chain 1463 VCC.getValue(1)); // Glue 1464 } 1465 1466 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1467 SDValue &SrcMods) const { 1468 1469 unsigned Mods = 0; 1470 1471 Src = In; 1472 1473 if (Src.getOpcode() == ISD::FNEG) { 1474 Mods |= SISrcMods::NEG; 1475 Src = Src.getOperand(0); 1476 } 1477 1478 if (Src.getOpcode() == ISD::FABS) { 1479 Mods |= SISrcMods::ABS; 1480 Src = Src.getOperand(0); 1481 } 1482 1483 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1484 1485 return true; 1486 } 1487 1488 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1489 SDValue &SrcMods) const { 1490 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1491 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1492 } 1493 1494 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1495 SDValue &SrcMods, SDValue &Clamp, 1496 SDValue &Omod) const { 1497 SDLoc DL(In); 1498 // FIXME: Handle Clamp and Omod 1499 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1500 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1501 1502 return SelectVOP3Mods(In, Src, SrcMods); 1503 } 1504 1505 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1506 SDValue &SrcMods, SDValue &Clamp, 1507 SDValue &Omod) const { 1508 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1509 1510 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1511 cast<ConstantSDNode>(Clamp)->isNullValue() && 1512 cast<ConstantSDNode>(Omod)->isNullValue(); 1513 } 1514 1515 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1516 SDValue &SrcMods, 1517 SDValue &Omod) const { 1518 // FIXME: Handle Omod 1519 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1520 1521 return SelectVOP3Mods(In, Src, SrcMods); 1522 } 1523 1524 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1525 SDValue &SrcMods, 1526 SDValue &Clamp, 1527 SDValue &Omod) const { 1528 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1529 return SelectVOP3Mods(In, Src, SrcMods); 1530 } 1531 1532 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1533 MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo(); 1534 1535 // Handle the perverse case where a frame index is being stored. We don't 1536 // want to see multiple frame index operands on the same instruction since 1537 // it complicates things and violates some assumptions about frame index 1538 // lowering. 1539 for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); 1540 I != E; ++I) { 1541 SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32); 1542 1543 // It's possible that we have a frame index defined in the function that 1544 // isn't used in this block. 1545 if (FI.use_empty()) 1546 continue; 1547 1548 // Skip over the AssertZext inserted during lowering. 1549 SDValue EffectiveFI = FI; 1550 auto It = FI->use_begin(); 1551 if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) { 1552 EffectiveFI = SDValue(*It, 0); 1553 It = EffectiveFI->use_begin(); 1554 } 1555 1556 for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) { 1557 SDUse &Use = It.getUse(); 1558 SDNode *User = Use.getUser(); 1559 unsigned OpIdx = It.getOperandNo(); 1560 ++It; 1561 1562 if (MemSDNode *M = dyn_cast<MemSDNode>(User)) { 1563 unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1; 1564 if (OpIdx == PtrIdx) 1565 continue; 1566 1567 unsigned OpN = M->getNumOperands(); 1568 SDValue NewOps[8]; 1569 1570 assert(OpN < array_lengthof(NewOps)); 1571 for (unsigned Op = 0; Op != OpN; ++Op) { 1572 if (Op != OpIdx) { 1573 NewOps[Op] = M->getOperand(Op); 1574 continue; 1575 } 1576 1577 MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1578 SDLoc(M), MVT::i32, FI); 1579 NewOps[Op] = SDValue(Mov, 0); 1580 } 1581 1582 CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN)); 1583 } 1584 } 1585 } 1586 } 1587 1588 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1589 const AMDGPUTargetLowering& Lowering = 1590 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1591 bool IsModified = false; 1592 do { 1593 IsModified = false; 1594 // Go over all selected nodes and try to fold them a bit more 1595 for (SDNode &Node : CurDAG->allnodes()) { 1596 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1597 if (!MachineNode) 1598 continue; 1599 1600 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1601 if (ResNode != &Node) { 1602 ReplaceUses(&Node, ResNode); 1603 IsModified = true; 1604 } 1605 } 1606 CurDAG->RemoveDeadNodes(); 1607 } while (IsModified); 1608 } 1609