1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 class R600InstrInfo; 33 } 34 35 //===----------------------------------------------------------------------===// 36 // Instruction Selector Implementation 37 //===----------------------------------------------------------------------===// 38 39 namespace { 40 41 static bool isCBranchSCC(const SDNode *N) { 42 assert(N->getOpcode() == ISD::BRCOND); 43 if (!N->hasOneUse()) 44 return false; 45 46 SDValue Cond = N->getOperand(1); 47 if (Cond.getOpcode() == ISD::CopyToReg) 48 Cond = Cond.getOperand(2); 49 return Cond.getOpcode() == ISD::SETCC && 50 Cond.getOperand(0).getValueType() == MVT::i32 && 51 Cond.hasOneUse(); 52 } 53 54 /// AMDGPU specific code to select AMDGPU machine instructions for 55 /// SelectionDAG operations. 56 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 57 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 58 // make the right decision when generating code for different targets. 59 const AMDGPUSubtarget *Subtarget; 60 61 public: 62 AMDGPUDAGToDAGISel(TargetMachine &TM); 63 virtual ~AMDGPUDAGToDAGISel(); 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 void Select(SDNode *N) override; 66 const char *getPassName() const override; 67 void PreprocessISelDAG() override; 68 void PostprocessISelDAG() override; 69 70 private: 71 bool isInlineImmediate(SDNode *N) const; 72 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 73 const R600InstrInfo *TII); 74 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 75 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 76 77 // Complex pattern selectors 78 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 79 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 80 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 81 82 static bool checkType(const Value *ptr, unsigned int addrspace); 83 static bool checkPrivateAddress(const MachineMemOperand *Op); 84 85 static bool isGlobalStore(const MemSDNode *N); 86 static bool isFlatStore(const MemSDNode *N); 87 static bool isPrivateStore(const StoreSDNode *N); 88 static bool isLocalStore(const StoreSDNode *N); 89 static bool isRegionStore(const StoreSDNode *N); 90 91 bool isCPLoad(const LoadSDNode *N) const; 92 bool isConstantLoad(const MemSDNode *N, int cbID) const; 93 bool isGlobalLoad(const MemSDNode *N) const; 94 bool isFlatLoad(const MemSDNode *N) const; 95 bool isParamLoad(const LoadSDNode *N) const; 96 bool isPrivateLoad(const LoadSDNode *N) const; 97 bool isLocalLoad(const LoadSDNode *N) const; 98 bool isRegionLoad(const LoadSDNode *N) const; 99 100 bool isUniformBr(const SDNode *N) const; 101 102 SDNode *glueCopyToM0(SDNode *N) const; 103 104 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 105 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 106 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 107 SDValue& Offset); 108 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 109 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 110 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 111 unsigned OffsetBits) const; 112 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 113 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 114 SDValue &Offset1) const; 115 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 116 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 117 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 118 SDValue &TFE) const; 119 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 120 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 121 SDValue &SLC, SDValue &TFE) const; 122 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 123 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 124 SDValue &SLC) const; 125 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 126 SDValue &SOffset, SDValue &ImmOffset) const; 127 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 128 SDValue &Offset, SDValue &GLC, SDValue &SLC, 129 SDValue &TFE) const; 130 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 131 SDValue &Offset, SDValue &GLC) const; 132 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 133 SDValue &Offset) const; 134 void SelectMUBUFConstant(SDValue Constant, 135 SDValue &SOffset, 136 SDValue &ImmOffset) const; 137 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 138 SDValue &ImmOffset) const; 139 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 140 SDValue &ImmOffset, SDValue &VOffset) const; 141 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 142 bool &Imm) const; 143 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 144 bool &Imm) const; 145 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 146 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 147 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 148 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 149 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 150 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 151 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 152 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 153 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 154 SDValue &Clamp, SDValue &Omod) const; 155 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 156 SDValue &Clamp, SDValue &Omod) const; 157 158 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 159 SDValue &Omod) const; 160 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 161 SDValue &Clamp, 162 SDValue &Omod) const; 163 164 void SelectADD_SUB_I64(SDNode *N); 165 void SelectDIV_SCALE(SDNode *N); 166 167 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 168 uint32_t Offset, uint32_t Width); 169 void SelectS_BFEFromShifts(SDNode *N); 170 void SelectS_BFE(SDNode *N); 171 void SelectBRCOND(SDNode *N); 172 173 // Include the pieces autogenerated from the target description. 174 #include "AMDGPUGenDAGISel.inc" 175 }; 176 } // end anonymous namespace 177 178 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 179 // DAG, ready for instruction scheduling. 180 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 181 return new AMDGPUDAGToDAGISel(TM); 182 } 183 184 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 185 : SelectionDAGISel(TM) {} 186 187 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 188 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 189 return SelectionDAGISel::runOnMachineFunction(MF); 190 } 191 192 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 193 } 194 195 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 196 const SITargetLowering *TL 197 = static_cast<const SITargetLowering *>(getTargetLowering()); 198 return TL->analyzeImmediate(N) == 0; 199 } 200 201 /// \brief Determine the register class for \p OpNo 202 /// \returns The register class of the virtual register that will be used for 203 /// the given operand number \OpNo or NULL if the register class cannot be 204 /// determined. 205 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 206 unsigned OpNo) const { 207 if (!N->isMachineOpcode()) 208 return nullptr; 209 210 switch (N->getMachineOpcode()) { 211 default: { 212 const MCInstrDesc &Desc = 213 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 214 unsigned OpIdx = Desc.getNumDefs() + OpNo; 215 if (OpIdx >= Desc.getNumOperands()) 216 return nullptr; 217 int RegClass = Desc.OpInfo[OpIdx].RegClass; 218 if (RegClass == -1) 219 return nullptr; 220 221 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 222 } 223 case AMDGPU::REG_SEQUENCE: { 224 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 225 const TargetRegisterClass *SuperRC = 226 Subtarget->getRegisterInfo()->getRegClass(RCID); 227 228 SDValue SubRegOp = N->getOperand(OpNo + 1); 229 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 230 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 231 SubRegIdx); 232 } 233 } 234 } 235 236 bool AMDGPUDAGToDAGISel::SelectADDRParam( 237 SDValue Addr, SDValue& R1, SDValue& R2) { 238 239 if (Addr.getOpcode() == ISD::FrameIndex) { 240 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 241 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 242 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 243 } else { 244 R1 = Addr; 245 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 246 } 247 } else if (Addr.getOpcode() == ISD::ADD) { 248 R1 = Addr.getOperand(0); 249 R2 = Addr.getOperand(1); 250 } else { 251 R1 = Addr; 252 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 253 } 254 return true; 255 } 256 257 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 258 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 259 Addr.getOpcode() == ISD::TargetGlobalAddress) { 260 return false; 261 } 262 return SelectADDRParam(Addr, R1, R2); 263 } 264 265 266 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 267 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 268 Addr.getOpcode() == ISD::TargetGlobalAddress) { 269 return false; 270 } 271 272 if (Addr.getOpcode() == ISD::FrameIndex) { 273 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 274 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 275 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 276 } else { 277 R1 = Addr; 278 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 279 } 280 } else if (Addr.getOpcode() == ISD::ADD) { 281 R1 = Addr.getOperand(0); 282 R2 = Addr.getOperand(1); 283 } else { 284 R1 = Addr; 285 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 286 } 287 return true; 288 } 289 290 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 291 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 292 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 293 AMDGPUAS::LOCAL_ADDRESS)) 294 return N; 295 296 const SITargetLowering& Lowering = 297 *static_cast<const SITargetLowering*>(getTargetLowering()); 298 299 // Write max value to m0 before each load operation 300 301 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 302 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 303 304 SDValue Glue = M0.getValue(1); 305 306 SmallVector <SDValue, 8> Ops; 307 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 308 Ops.push_back(N->getOperand(i)); 309 } 310 Ops.push_back(Glue); 311 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 312 313 return N; 314 } 315 316 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 317 switch (NumVectorElts) { 318 case 1: 319 return AMDGPU::SReg_32RegClassID; 320 case 2: 321 return AMDGPU::SReg_64RegClassID; 322 case 4: 323 return AMDGPU::SReg_128RegClassID; 324 case 8: 325 return AMDGPU::SReg_256RegClassID; 326 case 16: 327 return AMDGPU::SReg_512RegClassID; 328 } 329 330 llvm_unreachable("invalid vector size"); 331 } 332 333 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 334 unsigned int Opc = N->getOpcode(); 335 if (N->isMachineOpcode()) { 336 N->setNodeId(-1); 337 return; // Already selected. 338 } 339 340 if (isa<AtomicSDNode>(N) || 341 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 342 N = glueCopyToM0(N); 343 344 switch (Opc) { 345 default: break; 346 // We are selecting i64 ADD here instead of custom lower it during 347 // DAG legalization, so we can fold some i64 ADDs used for address 348 // calculation into the LOAD and STORE instructions. 349 case ISD::ADD: 350 case ISD::SUB: { 351 if (N->getValueType(0) != MVT::i64 || 352 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 353 break; 354 355 SelectADD_SUB_I64(N); 356 return; 357 } 358 case ISD::SCALAR_TO_VECTOR: 359 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 360 case ISD::BUILD_VECTOR: { 361 unsigned RegClassID; 362 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 363 EVT VT = N->getValueType(0); 364 unsigned NumVectorElts = VT.getVectorNumElements(); 365 EVT EltVT = VT.getVectorElementType(); 366 assert(EltVT.bitsEq(MVT::i32)); 367 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 368 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 369 } else { 370 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 371 // that adds a 128 bits reg copy when going through TwoAddressInstructions 372 // pass. We want to avoid 128 bits copies as much as possible because they 373 // can't be bundled by our scheduler. 374 switch(NumVectorElts) { 375 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 376 case 4: 377 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 378 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 379 else 380 RegClassID = AMDGPU::R600_Reg128RegClassID; 381 break; 382 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 383 } 384 } 385 386 SDLoc DL(N); 387 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 388 389 if (NumVectorElts == 1) { 390 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 391 RegClass); 392 return; 393 } 394 395 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 396 "supported yet"); 397 // 16 = Max Num Vector Elements 398 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 399 // 1 = Vector Register Class 400 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 401 402 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 403 bool IsRegSeq = true; 404 unsigned NOps = N->getNumOperands(); 405 for (unsigned i = 0; i < NOps; i++) { 406 // XXX: Why is this here? 407 if (isa<RegisterSDNode>(N->getOperand(i))) { 408 IsRegSeq = false; 409 break; 410 } 411 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 412 RegSeqArgs[1 + (2 * i) + 1] = 413 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 414 MVT::i32); 415 } 416 417 if (NOps != NumVectorElts) { 418 // Fill in the missing undef elements if this was a scalar_to_vector. 419 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 420 421 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 422 DL, EltVT); 423 for (unsigned i = NOps; i < NumVectorElts; ++i) { 424 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 425 RegSeqArgs[1 + (2 * i) + 1] = 426 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 427 } 428 } 429 430 if (!IsRegSeq) 431 break; 432 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 433 return; 434 } 435 case ISD::BUILD_PAIR: { 436 SDValue RC, SubReg0, SubReg1; 437 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 438 break; 439 } 440 SDLoc DL(N); 441 if (N->getValueType(0) == MVT::i128) { 442 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 443 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 444 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 445 } else if (N->getValueType(0) == MVT::i64) { 446 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 447 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 448 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 449 } else { 450 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 451 } 452 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 453 N->getOperand(1), SubReg1 }; 454 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 455 N->getValueType(0), Ops)); 456 return; 457 } 458 459 case ISD::Constant: 460 case ISD::ConstantFP: { 461 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 462 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 463 break; 464 465 uint64_t Imm; 466 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 467 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 468 else { 469 ConstantSDNode *C = cast<ConstantSDNode>(N); 470 Imm = C->getZExtValue(); 471 } 472 473 SDLoc DL(N); 474 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 475 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 476 MVT::i32)); 477 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 478 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 479 const SDValue Ops[] = { 480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 481 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 482 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 483 }; 484 485 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 486 N->getValueType(0), Ops)); 487 return; 488 } 489 case ISD::LOAD: 490 case ISD::STORE: { 491 N = glueCopyToM0(N); 492 break; 493 } 494 495 case AMDGPUISD::BFE_I32: 496 case AMDGPUISD::BFE_U32: { 497 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 498 break; 499 500 // There is a scalar version available, but unlike the vector version which 501 // has a separate operand for the offset and width, the scalar version packs 502 // the width and offset into a single operand. Try to move to the scalar 503 // version if the offsets are constant, so that we can try to keep extended 504 // loads of kernel arguments in SGPRs. 505 506 // TODO: Technically we could try to pattern match scalar bitshifts of 507 // dynamic values, but it's probably not useful. 508 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 509 if (!Offset) 510 break; 511 512 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 513 if (!Width) 514 break; 515 516 bool Signed = Opc == AMDGPUISD::BFE_I32; 517 518 uint32_t OffsetVal = Offset->getZExtValue(); 519 uint32_t WidthVal = Width->getZExtValue(); 520 521 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 522 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 523 return; 524 } 525 case AMDGPUISD::DIV_SCALE: { 526 SelectDIV_SCALE(N); 527 return; 528 } 529 case ISD::CopyToReg: { 530 const SITargetLowering& Lowering = 531 *static_cast<const SITargetLowering*>(getTargetLowering()); 532 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 533 break; 534 } 535 case ISD::AND: 536 case ISD::SRL: 537 case ISD::SRA: 538 case ISD::SIGN_EXTEND_INREG: 539 if (N->getValueType(0) != MVT::i32 || 540 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 541 break; 542 543 SelectS_BFE(N); 544 return; 545 case ISD::BRCOND: 546 SelectBRCOND(N); 547 return; 548 } 549 550 SelectCode(N); 551 } 552 553 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 554 assert(AS != 0 && "Use checkPrivateAddress instead."); 555 if (!Ptr) 556 return false; 557 558 return Ptr->getType()->getPointerAddressSpace() == AS; 559 } 560 561 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 562 if (Op->getPseudoValue()) 563 return true; 564 565 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 566 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 567 568 return false; 569 } 570 571 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) { 572 if (!N->writeMem()) 573 return false; 574 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 575 } 576 577 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 578 const Value *MemVal = N->getMemOperand()->getValue(); 579 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 580 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 581 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 582 } 583 584 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 585 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 586 } 587 588 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) { 589 if (!N->writeMem()) 590 return false; 591 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 592 } 593 594 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 595 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 596 } 597 598 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 599 if (!N->readMem()) 600 return false; 601 const Value *MemVal = N->getMemOperand()->getValue(); 602 if (CbId == -1) 603 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 604 605 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 606 } 607 608 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { 609 if (!N->readMem()) 610 return false; 611 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { 612 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 613 return !isa<GlobalValue>( 614 GetUnderlyingObject(N->getMemOperand()->getValue(), 615 CurDAG->getDataLayout())); 616 617 //TODO: Why do we need this? 618 if (N->getMemoryVT().bitsLT(MVT::i32)) 619 return true; 620 } 621 622 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 623 } 624 625 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 626 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 627 } 628 629 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 630 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 631 } 632 633 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const { 634 if (!N->readMem()) 635 return false; 636 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 637 } 638 639 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 640 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 641 } 642 643 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 644 MachineMemOperand *MMO = N->getMemOperand(); 645 if (checkPrivateAddress(N->getMemOperand())) { 646 if (MMO) { 647 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 648 if (PSV && PSV->isConstantPool()) { 649 return true; 650 } 651 } 652 } 653 return false; 654 } 655 656 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 657 if (checkPrivateAddress(N->getMemOperand())) { 658 // Check to make sure we are not a constant pool load or a constant load 659 // that is marked as a private load 660 if (isCPLoad(N) || isConstantLoad(N, -1)) { 661 return false; 662 } 663 } 664 665 const Value *MemVal = N->getMemOperand()->getValue(); 666 return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 667 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 668 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 669 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 670 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 671 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 672 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS); 673 } 674 675 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 676 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 677 const Instruction *Term = BB->getTerminator(); 678 return Term->getMetadata("amdgpu.uniform") || 679 Term->getMetadata("structurizecfg.uniform"); 680 } 681 682 const char *AMDGPUDAGToDAGISel::getPassName() const { 683 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 684 } 685 686 //===----------------------------------------------------------------------===// 687 // Complex Patterns 688 //===----------------------------------------------------------------------===// 689 690 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 691 SDValue& IntPtr) { 692 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 693 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 694 true); 695 return true; 696 } 697 return false; 698 } 699 700 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 701 SDValue& BaseReg, SDValue &Offset) { 702 if (!isa<ConstantSDNode>(Addr)) { 703 BaseReg = Addr; 704 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 705 return true; 706 } 707 return false; 708 } 709 710 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 711 SDValue &Offset) { 712 ConstantSDNode *IMMOffset; 713 714 if (Addr.getOpcode() == ISD::ADD 715 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 716 && isInt<16>(IMMOffset->getZExtValue())) { 717 718 Base = Addr.getOperand(0); 719 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 720 MVT::i32); 721 return true; 722 // If the pointer address is constant, we can move it to the offset field. 723 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 724 && isInt<16>(IMMOffset->getZExtValue())) { 725 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 726 SDLoc(CurDAG->getEntryNode()), 727 AMDGPU::ZERO, MVT::i32); 728 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 729 MVT::i32); 730 return true; 731 } 732 733 // Default case, no offset 734 Base = Addr; 735 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 736 return true; 737 } 738 739 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 740 SDValue &Offset) { 741 ConstantSDNode *C; 742 SDLoc DL(Addr); 743 744 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 745 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 746 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 747 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 748 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 749 Base = Addr.getOperand(0); 750 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 751 } else { 752 Base = Addr; 753 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 754 } 755 756 return true; 757 } 758 759 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 760 SDLoc DL(N); 761 SDValue LHS = N->getOperand(0); 762 SDValue RHS = N->getOperand(1); 763 764 bool IsAdd = (N->getOpcode() == ISD::ADD); 765 766 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 767 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 768 769 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 770 DL, MVT::i32, LHS, Sub0); 771 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 772 DL, MVT::i32, LHS, Sub1); 773 774 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 775 DL, MVT::i32, RHS, Sub0); 776 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 777 DL, MVT::i32, RHS, Sub1); 778 779 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 780 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 781 782 783 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 784 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 785 786 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 787 SDValue Carry(AddLo, 1); 788 SDNode *AddHi 789 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 790 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 791 792 SDValue Args[5] = { 793 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 794 SDValue(AddLo,0), 795 Sub0, 796 SDValue(AddHi,0), 797 Sub1, 798 }; 799 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 800 } 801 802 // We need to handle this here because tablegen doesn't support matching 803 // instructions with multiple outputs. 804 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 805 SDLoc SL(N); 806 EVT VT = N->getValueType(0); 807 808 assert(VT == MVT::f32 || VT == MVT::f64); 809 810 unsigned Opc 811 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 812 813 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 814 // omod 815 SDValue Ops[8]; 816 817 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 818 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 819 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 820 CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 821 } 822 823 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 824 unsigned OffsetBits) const { 825 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 826 (OffsetBits == 8 && !isUInt<8>(Offset))) 827 return false; 828 829 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 830 Subtarget->unsafeDSOffsetFoldingEnabled()) 831 return true; 832 833 // On Southern Islands instruction with a negative base value and an offset 834 // don't seem to work. 835 return CurDAG->SignBitIsZero(Base); 836 } 837 838 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 839 SDValue &Offset) const { 840 SDLoc DL(Addr); 841 if (CurDAG->isBaseWithConstantOffset(Addr)) { 842 SDValue N0 = Addr.getOperand(0); 843 SDValue N1 = Addr.getOperand(1); 844 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 845 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 846 // (add n0, c0) 847 Base = N0; 848 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 849 return true; 850 } 851 } else if (Addr.getOpcode() == ISD::SUB) { 852 // sub C, x -> add (sub 0, x), C 853 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 854 int64_t ByteOffset = C->getSExtValue(); 855 if (isUInt<16>(ByteOffset)) { 856 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 857 858 // XXX - This is kind of hacky. Create a dummy sub node so we can check 859 // the known bits in isDSOffsetLegal. We need to emit the selected node 860 // here, so this is thrown away. 861 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 862 Zero, Addr.getOperand(1)); 863 864 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 865 MachineSDNode *MachineSub 866 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 867 Zero, Addr.getOperand(1)); 868 869 Base = SDValue(MachineSub, 0); 870 Offset = Addr.getOperand(0); 871 return true; 872 } 873 } 874 } 875 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 876 // If we have a constant address, prefer to put the constant into the 877 // offset. This can save moves to load the constant address since multiple 878 // operations can share the zero base address register, and enables merging 879 // into read2 / write2 instructions. 880 881 SDLoc DL(Addr); 882 883 if (isUInt<16>(CAddr->getZExtValue())) { 884 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 885 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 886 DL, MVT::i32, Zero); 887 Base = SDValue(MovZero, 0); 888 Offset = Addr; 889 return true; 890 } 891 } 892 893 // default case 894 Base = Addr; 895 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 896 return true; 897 } 898 899 // TODO: If offset is too big, put low 16-bit into offset. 900 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 901 SDValue &Offset0, 902 SDValue &Offset1) const { 903 SDLoc DL(Addr); 904 905 if (CurDAG->isBaseWithConstantOffset(Addr)) { 906 SDValue N0 = Addr.getOperand(0); 907 SDValue N1 = Addr.getOperand(1); 908 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 909 unsigned DWordOffset0 = C1->getZExtValue() / 4; 910 unsigned DWordOffset1 = DWordOffset0 + 1; 911 // (add n0, c0) 912 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 913 Base = N0; 914 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 915 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 916 return true; 917 } 918 } else if (Addr.getOpcode() == ISD::SUB) { 919 // sub C, x -> add (sub 0, x), C 920 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 921 unsigned DWordOffset0 = C->getZExtValue() / 4; 922 unsigned DWordOffset1 = DWordOffset0 + 1; 923 924 if (isUInt<8>(DWordOffset0)) { 925 SDLoc DL(Addr); 926 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 927 928 // XXX - This is kind of hacky. Create a dummy sub node so we can check 929 // the known bits in isDSOffsetLegal. We need to emit the selected node 930 // here, so this is thrown away. 931 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 932 Zero, Addr.getOperand(1)); 933 934 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 935 MachineSDNode *MachineSub 936 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 937 Zero, Addr.getOperand(1)); 938 939 Base = SDValue(MachineSub, 0); 940 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 941 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 942 return true; 943 } 944 } 945 } 946 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 947 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 948 unsigned DWordOffset1 = DWordOffset0 + 1; 949 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 950 951 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 952 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 953 MachineSDNode *MovZero 954 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 955 DL, MVT::i32, Zero); 956 Base = SDValue(MovZero, 0); 957 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 958 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 959 return true; 960 } 961 } 962 963 // default case 964 Base = Addr; 965 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 966 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 967 return true; 968 } 969 970 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 971 return isUInt<12>(Imm->getZExtValue()); 972 } 973 974 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 975 SDValue &VAddr, SDValue &SOffset, 976 SDValue &Offset, SDValue &Offen, 977 SDValue &Idxen, SDValue &Addr64, 978 SDValue &GLC, SDValue &SLC, 979 SDValue &TFE) const { 980 // Subtarget prefers to use flat instruction 981 if (Subtarget->useFlatForGlobal()) 982 return false; 983 984 SDLoc DL(Addr); 985 986 if (!GLC.getNode()) 987 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 988 if (!SLC.getNode()) 989 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 990 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 991 992 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 993 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 994 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 995 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 996 997 if (CurDAG->isBaseWithConstantOffset(Addr)) { 998 SDValue N0 = Addr.getOperand(0); 999 SDValue N1 = Addr.getOperand(1); 1000 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1001 1002 if (N0.getOpcode() == ISD::ADD) { 1003 // (add (add N2, N3), C1) -> addr64 1004 SDValue N2 = N0.getOperand(0); 1005 SDValue N3 = N0.getOperand(1); 1006 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1007 Ptr = N2; 1008 VAddr = N3; 1009 } else { 1010 1011 // (add N0, C1) -> offset 1012 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1013 Ptr = N0; 1014 } 1015 1016 if (isLegalMUBUFImmOffset(C1)) { 1017 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1018 return true; 1019 } else if (isUInt<32>(C1->getZExtValue())) { 1020 // Illegal offset, store it in soffset. 1021 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1022 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1023 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1024 0); 1025 return true; 1026 } 1027 } 1028 1029 if (Addr.getOpcode() == ISD::ADD) { 1030 // (add N0, N1) -> addr64 1031 SDValue N0 = Addr.getOperand(0); 1032 SDValue N1 = Addr.getOperand(1); 1033 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1034 Ptr = N0; 1035 VAddr = N1; 1036 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1037 return true; 1038 } 1039 1040 // default case -> offset 1041 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1042 Ptr = Addr; 1043 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1044 1045 return true; 1046 } 1047 1048 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1049 SDValue &VAddr, SDValue &SOffset, 1050 SDValue &Offset, SDValue &GLC, 1051 SDValue &SLC, SDValue &TFE) const { 1052 SDValue Ptr, Offen, Idxen, Addr64; 1053 1054 // addr64 bit was removed for volcanic islands. 1055 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1056 return false; 1057 1058 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1059 GLC, SLC, TFE)) 1060 return false; 1061 1062 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1063 if (C->getSExtValue()) { 1064 SDLoc DL(Addr); 1065 1066 const SITargetLowering& Lowering = 1067 *static_cast<const SITargetLowering*>(getTargetLowering()); 1068 1069 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1070 return true; 1071 } 1072 1073 return false; 1074 } 1075 1076 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1077 SDValue &VAddr, SDValue &SOffset, 1078 SDValue &Offset, 1079 SDValue &SLC) const { 1080 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1081 SDValue GLC, TFE; 1082 1083 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1084 } 1085 1086 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1087 SDValue &VAddr, SDValue &SOffset, 1088 SDValue &ImmOffset) const { 1089 1090 SDLoc DL(Addr); 1091 MachineFunction &MF = CurDAG->getMachineFunction(); 1092 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1093 1094 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1095 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1096 1097 // (add n0, c1) 1098 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1099 SDValue N0 = Addr.getOperand(0); 1100 SDValue N1 = Addr.getOperand(1); 1101 1102 // Offsets in vaddr must be positive. 1103 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1104 if (isLegalMUBUFImmOffset(C1)) { 1105 VAddr = N0; 1106 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1107 return true; 1108 } 1109 } 1110 1111 // (node) 1112 VAddr = Addr; 1113 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1114 return true; 1115 } 1116 1117 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1118 SDValue &SOffset, SDValue &Offset, 1119 SDValue &GLC, SDValue &SLC, 1120 SDValue &TFE) const { 1121 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1122 const SIInstrInfo *TII = 1123 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1124 1125 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1126 GLC, SLC, TFE)) 1127 return false; 1128 1129 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1130 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1131 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1132 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1133 APInt::getAllOnesValue(32).getZExtValue(); // Size 1134 SDLoc DL(Addr); 1135 1136 const SITargetLowering& Lowering = 1137 *static_cast<const SITargetLowering*>(getTargetLowering()); 1138 1139 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1140 return true; 1141 } 1142 return false; 1143 } 1144 1145 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1146 SDValue &Soffset, SDValue &Offset 1147 ) const { 1148 SDValue GLC, SLC, TFE; 1149 1150 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1151 } 1152 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1153 SDValue &Soffset, SDValue &Offset, 1154 SDValue &GLC) const { 1155 SDValue SLC, TFE; 1156 1157 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1158 } 1159 1160 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1161 SDValue &SOffset, 1162 SDValue &ImmOffset) const { 1163 SDLoc DL(Constant); 1164 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1165 uint32_t Overflow = 0; 1166 1167 if (Imm >= 4096) { 1168 if (Imm <= 4095 + 64) { 1169 // Use an SOffset inline constant for 1..64 1170 Overflow = Imm - 4095; 1171 Imm = 4095; 1172 } else { 1173 // Try to keep the same value in SOffset for adjacent loads, so that 1174 // the corresponding register contents can be re-used. 1175 // 1176 // Load values with all low-bits set into SOffset, so that a larger 1177 // range of values can be covered using s_movk_i32 1178 uint32_t High = (Imm + 1) & ~4095; 1179 uint32_t Low = (Imm + 1) & 4095; 1180 Imm = Low; 1181 Overflow = High - 1; 1182 } 1183 } 1184 1185 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1186 1187 if (Overflow <= 64) 1188 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1189 else 1190 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1191 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1192 0); 1193 } 1194 1195 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1196 SDValue &SOffset, 1197 SDValue &ImmOffset) const { 1198 SDLoc DL(Offset); 1199 1200 if (!isa<ConstantSDNode>(Offset)) 1201 return false; 1202 1203 SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1204 1205 return true; 1206 } 1207 1208 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1209 SDValue &SOffset, 1210 SDValue &ImmOffset, 1211 SDValue &VOffset) const { 1212 SDLoc DL(Offset); 1213 1214 // Don't generate an unnecessary voffset for constant offsets. 1215 if (isa<ConstantSDNode>(Offset)) 1216 return false; 1217 1218 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1219 SDValue N0 = Offset.getOperand(0); 1220 SDValue N1 = Offset.getOperand(1); 1221 SelectMUBUFConstant(N1, SOffset, ImmOffset); 1222 VOffset = N0; 1223 } else { 1224 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1225 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1226 VOffset = Offset; 1227 } 1228 1229 return true; 1230 } 1231 1232 /// 1233 /// \param EncodedOffset This is the immediate value that will be encoded 1234 /// directly into the instruction. On SI/CI the \p EncodedOffset 1235 /// will be in units of dwords and on VI+ it will be units of bytes. 1236 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1237 int64_t EncodedOffset) { 1238 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1239 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1240 } 1241 1242 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1243 SDValue &Offset, bool &Imm) const { 1244 1245 // FIXME: Handle non-constant offsets. 1246 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1247 if (!C) 1248 return false; 1249 1250 SDLoc SL(ByteOffsetNode); 1251 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1252 int64_t ByteOffset = C->getSExtValue(); 1253 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1254 ByteOffset >> 2 : ByteOffset; 1255 1256 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1257 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1258 Imm = true; 1259 return true; 1260 } 1261 1262 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1263 return false; 1264 1265 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1266 // 32-bit Immediates are supported on Sea Islands. 1267 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1268 } else { 1269 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1270 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1271 C32Bit), 0); 1272 } 1273 Imm = false; 1274 return true; 1275 } 1276 1277 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1278 SDValue &Offset, bool &Imm) const { 1279 1280 SDLoc SL(Addr); 1281 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1282 SDValue N0 = Addr.getOperand(0); 1283 SDValue N1 = Addr.getOperand(1); 1284 1285 if (SelectSMRDOffset(N1, Offset, Imm)) { 1286 SBase = N0; 1287 return true; 1288 } 1289 } 1290 SBase = Addr; 1291 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1292 Imm = true; 1293 return true; 1294 } 1295 1296 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1297 SDValue &Offset) const { 1298 bool Imm; 1299 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1300 } 1301 1302 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1303 SDValue &Offset) const { 1304 1305 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1306 return false; 1307 1308 bool Imm; 1309 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1310 return false; 1311 1312 return !Imm && isa<ConstantSDNode>(Offset); 1313 } 1314 1315 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1316 SDValue &Offset) const { 1317 bool Imm; 1318 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1319 !isa<ConstantSDNode>(Offset); 1320 } 1321 1322 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1323 SDValue &Offset) const { 1324 bool Imm; 1325 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1326 } 1327 1328 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1329 SDValue &Offset) const { 1330 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1331 return false; 1332 1333 bool Imm; 1334 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1335 return false; 1336 1337 return !Imm && isa<ConstantSDNode>(Offset); 1338 } 1339 1340 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1341 SDValue &Offset) const { 1342 bool Imm; 1343 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1344 !isa<ConstantSDNode>(Offset); 1345 } 1346 1347 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1348 uint32_t Offset, uint32_t Width) { 1349 // Transformation function, pack the offset and width of a BFE into 1350 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1351 // source, bits [5:0] contain the offset and bits [22:16] the width. 1352 uint32_t PackedVal = Offset | (Width << 16); 1353 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1354 1355 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1356 } 1357 1358 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1359 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1360 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1361 // Predicate: 0 < b <= c < 32 1362 1363 const SDValue &Shl = N->getOperand(0); 1364 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1365 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1366 1367 if (B && C) { 1368 uint32_t BVal = B->getZExtValue(); 1369 uint32_t CVal = C->getZExtValue(); 1370 1371 if (0 < BVal && BVal <= CVal && CVal < 32) { 1372 bool Signed = N->getOpcode() == ISD::SRA; 1373 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1374 1375 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1376 32 - CVal)); 1377 return; 1378 } 1379 } 1380 SelectCode(N); 1381 } 1382 1383 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1384 switch (N->getOpcode()) { 1385 case ISD::AND: 1386 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1387 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1388 // Predicate: isMask(mask) 1389 const SDValue &Srl = N->getOperand(0); 1390 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1391 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1392 1393 if (Shift && Mask) { 1394 uint32_t ShiftVal = Shift->getZExtValue(); 1395 uint32_t MaskVal = Mask->getZExtValue(); 1396 1397 if (isMask_32(MaskVal)) { 1398 uint32_t WidthVal = countPopulation(MaskVal); 1399 1400 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1401 Srl.getOperand(0), ShiftVal, WidthVal)); 1402 return; 1403 } 1404 } 1405 } 1406 break; 1407 case ISD::SRL: 1408 if (N->getOperand(0).getOpcode() == ISD::AND) { 1409 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1410 // Predicate: isMask(mask >> b) 1411 const SDValue &And = N->getOperand(0); 1412 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1413 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1414 1415 if (Shift && Mask) { 1416 uint32_t ShiftVal = Shift->getZExtValue(); 1417 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1418 1419 if (isMask_32(MaskVal)) { 1420 uint32_t WidthVal = countPopulation(MaskVal); 1421 1422 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1423 And.getOperand(0), ShiftVal, WidthVal)); 1424 return; 1425 } 1426 } 1427 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1428 SelectS_BFEFromShifts(N); 1429 return; 1430 } 1431 break; 1432 case ISD::SRA: 1433 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1434 SelectS_BFEFromShifts(N); 1435 return; 1436 } 1437 break; 1438 1439 case ISD::SIGN_EXTEND_INREG: { 1440 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1441 SDValue Src = N->getOperand(0); 1442 if (Src.getOpcode() != ISD::SRL) 1443 break; 1444 1445 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1446 if (!Amt) 1447 break; 1448 1449 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1450 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1451 Amt->getZExtValue(), Width)); 1452 return; 1453 } 1454 } 1455 1456 SelectCode(N); 1457 } 1458 1459 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1460 SDValue Cond = N->getOperand(1); 1461 1462 if (isCBranchSCC(N)) { 1463 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1464 SelectCode(N); 1465 return; 1466 } 1467 1468 // The result of VOPC instructions is or'd against ~EXEC before it is 1469 // written to vcc or another SGPR. This means that the value '1' is always 1470 // written to the corresponding bit for results that are masked. In order 1471 // to correctly check against vccz, we need to and VCC with the EXEC 1472 // register in order to clear the value from the masked bits. 1473 1474 SDLoc SL(N); 1475 1476 SDNode *MaskedCond = 1477 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1478 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1479 Cond); 1480 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1481 SDValue(MaskedCond, 0), 1482 SDValue()); // Passing SDValue() adds a 1483 // glue output. 1484 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1485 N->getOperand(2), // Basic Block 1486 VCC.getValue(0), // Chain 1487 VCC.getValue(1)); // Glue 1488 return; 1489 } 1490 1491 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1492 SDValue &SrcMods) const { 1493 1494 unsigned Mods = 0; 1495 1496 Src = In; 1497 1498 if (Src.getOpcode() == ISD::FNEG) { 1499 Mods |= SISrcMods::NEG; 1500 Src = Src.getOperand(0); 1501 } 1502 1503 if (Src.getOpcode() == ISD::FABS) { 1504 Mods |= SISrcMods::ABS; 1505 Src = Src.getOperand(0); 1506 } 1507 1508 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1509 1510 return true; 1511 } 1512 1513 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1514 SDValue &SrcMods) const { 1515 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1516 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1517 } 1518 1519 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1520 SDValue &SrcMods, SDValue &Clamp, 1521 SDValue &Omod) const { 1522 SDLoc DL(In); 1523 // FIXME: Handle Clamp and Omod 1524 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1525 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1526 1527 return SelectVOP3Mods(In, Src, SrcMods); 1528 } 1529 1530 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1531 SDValue &SrcMods, SDValue &Clamp, 1532 SDValue &Omod) const { 1533 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1534 1535 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1536 cast<ConstantSDNode>(Clamp)->isNullValue() && 1537 cast<ConstantSDNode>(Omod)->isNullValue(); 1538 } 1539 1540 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1541 SDValue &SrcMods, 1542 SDValue &Omod) const { 1543 // FIXME: Handle Omod 1544 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1545 1546 return SelectVOP3Mods(In, Src, SrcMods); 1547 } 1548 1549 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1550 SDValue &SrcMods, 1551 SDValue &Clamp, 1552 SDValue &Omod) const { 1553 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1554 return SelectVOP3Mods(In, Src, SrcMods); 1555 } 1556 1557 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1558 MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo(); 1559 1560 // Handle the perverse case where a frame index is being stored. We don't 1561 // want to see multiple frame index operands on the same instruction since 1562 // it complicates things and violates some assumptions about frame index 1563 // lowering. 1564 for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); 1565 I != E; ++I) { 1566 SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32); 1567 1568 // It's possible that we have a frame index defined in the function that 1569 // isn't used in this block. 1570 if (FI.use_empty()) 1571 continue; 1572 1573 // Skip over the AssertZext inserted during lowering. 1574 SDValue EffectiveFI = FI; 1575 auto It = FI->use_begin(); 1576 if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) { 1577 EffectiveFI = SDValue(*It, 0); 1578 It = EffectiveFI->use_begin(); 1579 } 1580 1581 for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) { 1582 SDUse &Use = It.getUse(); 1583 SDNode *User = Use.getUser(); 1584 unsigned OpIdx = It.getOperandNo(); 1585 ++It; 1586 1587 if (MemSDNode *M = dyn_cast<MemSDNode>(User)) { 1588 unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1; 1589 if (OpIdx == PtrIdx) 1590 continue; 1591 1592 unsigned OpN = M->getNumOperands(); 1593 SDValue NewOps[8]; 1594 1595 assert(OpN < array_lengthof(NewOps)); 1596 for (unsigned Op = 0; Op != OpN; ++Op) { 1597 if (Op != OpIdx) { 1598 NewOps[Op] = M->getOperand(Op); 1599 continue; 1600 } 1601 1602 MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1603 SDLoc(M), MVT::i32, FI); 1604 NewOps[Op] = SDValue(Mov, 0); 1605 } 1606 1607 CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN)); 1608 } 1609 } 1610 } 1611 } 1612 1613 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1614 const AMDGPUTargetLowering& Lowering = 1615 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1616 bool IsModified = false; 1617 do { 1618 IsModified = false; 1619 // Go over all selected nodes and try to fold them a bit more 1620 for (SDNode &Node : CurDAG->allnodes()) { 1621 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1622 if (!MachineNode) 1623 continue; 1624 1625 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1626 if (ResNode != &Node) { 1627 ReplaceUses(&Node, ResNode); 1628 IsModified = true; 1629 } 1630 } 1631 CurDAG->RemoveDeadNodes(); 1632 } while (IsModified); 1633 } 1634