1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/FunctionLoweringInfo.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/SelectionDAG.h" 25 #include "llvm/CodeGen/SelectionDAGISel.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 28 using namespace llvm; 29 30 namespace llvm { 31 class R600InstrInfo; 32 } 33 34 //===----------------------------------------------------------------------===// 35 // Instruction Selector Implementation 36 //===----------------------------------------------------------------------===// 37 38 namespace { 39 40 static bool isCBranchSCC(const SDNode *N) { 41 assert(N->getOpcode() == ISD::BRCOND); 42 if (!N->hasOneUse()) 43 return false; 44 45 SDValue Cond = N->getOperand(1); 46 if (Cond.getOpcode() == ISD::CopyToReg) 47 Cond = Cond.getOperand(2); 48 return Cond.getOpcode() == ISD::SETCC && 49 Cond.getOperand(0).getValueType() == MVT::i32 && 50 Cond.hasOneUse(); 51 } 52 53 /// AMDGPU specific code to select AMDGPU machine instructions for 54 /// SelectionDAG operations. 55 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 56 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 57 // make the right decision when generating code for different targets. 58 const AMDGPUSubtarget *Subtarget; 59 60 public: 61 AMDGPUDAGToDAGISel(TargetMachine &TM); 62 virtual ~AMDGPUDAGToDAGISel(); 63 bool runOnMachineFunction(MachineFunction &MF) override; 64 SDNode *Select(SDNode *N) override; 65 const char *getPassName() const override; 66 void PreprocessISelDAG() override; 67 void PostprocessISelDAG() override; 68 69 private: 70 bool isInlineImmediate(SDNode *N) const; 71 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 72 const R600InstrInfo *TII); 73 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 74 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 75 76 // Complex pattern selectors 77 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 78 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 79 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 80 81 static bool checkType(const Value *ptr, unsigned int addrspace); 82 static bool checkPrivateAddress(const MachineMemOperand *Op); 83 84 static bool isGlobalStore(const MemSDNode *N); 85 static bool isFlatStore(const MemSDNode *N); 86 static bool isPrivateStore(const StoreSDNode *N); 87 static bool isLocalStore(const StoreSDNode *N); 88 static bool isRegionStore(const StoreSDNode *N); 89 90 bool isCPLoad(const LoadSDNode *N) const; 91 bool isConstantLoad(const MemSDNode *N, int cbID) const; 92 bool isGlobalLoad(const MemSDNode *N) const; 93 bool isFlatLoad(const MemSDNode *N) const; 94 bool isParamLoad(const LoadSDNode *N) const; 95 bool isPrivateLoad(const LoadSDNode *N) const; 96 bool isLocalLoad(const LoadSDNode *N) const; 97 bool isRegionLoad(const LoadSDNode *N) const; 98 99 bool isUniformBr(const SDNode *N) const; 100 101 SDNode *glueCopyToM0(SDNode *N) const; 102 103 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 104 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 105 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 106 SDValue& Offset); 107 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 108 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 109 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 110 unsigned OffsetBits) const; 111 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 112 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 113 SDValue &Offset1) const; 114 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 115 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 116 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 117 SDValue &TFE) const; 118 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 119 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 120 SDValue &SLC, SDValue &TFE) const; 121 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 122 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 123 SDValue &SLC) const; 124 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 125 SDValue &SOffset, SDValue &ImmOffset) const; 126 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 127 SDValue &Offset, SDValue &GLC, SDValue &SLC, 128 SDValue &TFE) const; 129 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 130 SDValue &Offset, SDValue &GLC) const; 131 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 132 SDValue &Offset) const; 133 void SelectMUBUFConstant(SDValue Constant, 134 SDValue &SOffset, 135 SDValue &ImmOffset) const; 136 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 137 SDValue &ImmOffset) const; 138 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 139 SDValue &ImmOffset, SDValue &VOffset) const; 140 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 141 bool &Imm) const; 142 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 143 bool &Imm) const; 144 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 145 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 146 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 147 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 148 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 149 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 150 SDNode *SelectAddrSpaceCast(SDNode *N); 151 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 152 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 153 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 154 SDValue &Clamp, SDValue &Omod) const; 155 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 156 SDValue &Clamp, SDValue &Omod) const; 157 158 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 159 SDValue &Omod) const; 160 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 161 SDValue &Clamp, 162 SDValue &Omod) const; 163 164 SDNode *SelectADD_SUB_I64(SDNode *N); 165 SDNode *SelectDIV_SCALE(SDNode *N); 166 167 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 168 uint32_t Offset, uint32_t Width); 169 SDNode *SelectS_BFEFromShifts(SDNode *N); 170 SDNode *SelectS_BFE(SDNode *N); 171 SDNode *SelectBRCOND(SDNode *N); 172 173 // Include the pieces autogenerated from the target description. 174 #include "AMDGPUGenDAGISel.inc" 175 }; 176 } // end anonymous namespace 177 178 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 179 // DAG, ready for instruction scheduling. 180 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 181 return new AMDGPUDAGToDAGISel(TM); 182 } 183 184 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 185 : SelectionDAGISel(TM) {} 186 187 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 188 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 189 return SelectionDAGISel::runOnMachineFunction(MF); 190 } 191 192 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 193 } 194 195 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 196 const SITargetLowering *TL 197 = static_cast<const SITargetLowering *>(getTargetLowering()); 198 return TL->analyzeImmediate(N) == 0; 199 } 200 201 /// \brief Determine the register class for \p OpNo 202 /// \returns The register class of the virtual register that will be used for 203 /// the given operand number \OpNo or NULL if the register class cannot be 204 /// determined. 205 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 206 unsigned OpNo) const { 207 if (!N->isMachineOpcode()) 208 return nullptr; 209 210 switch (N->getMachineOpcode()) { 211 default: { 212 const MCInstrDesc &Desc = 213 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 214 unsigned OpIdx = Desc.getNumDefs() + OpNo; 215 if (OpIdx >= Desc.getNumOperands()) 216 return nullptr; 217 int RegClass = Desc.OpInfo[OpIdx].RegClass; 218 if (RegClass == -1) 219 return nullptr; 220 221 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 222 } 223 case AMDGPU::REG_SEQUENCE: { 224 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 225 const TargetRegisterClass *SuperRC = 226 Subtarget->getRegisterInfo()->getRegClass(RCID); 227 228 SDValue SubRegOp = N->getOperand(OpNo + 1); 229 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 230 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 231 SubRegIdx); 232 } 233 } 234 } 235 236 bool AMDGPUDAGToDAGISel::SelectADDRParam( 237 SDValue Addr, SDValue& R1, SDValue& R2) { 238 239 if (Addr.getOpcode() == ISD::FrameIndex) { 240 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 241 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 242 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 243 } else { 244 R1 = Addr; 245 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 246 } 247 } else if (Addr.getOpcode() == ISD::ADD) { 248 R1 = Addr.getOperand(0); 249 R2 = Addr.getOperand(1); 250 } else { 251 R1 = Addr; 252 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 253 } 254 return true; 255 } 256 257 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 258 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 259 Addr.getOpcode() == ISD::TargetGlobalAddress) { 260 return false; 261 } 262 return SelectADDRParam(Addr, R1, R2); 263 } 264 265 266 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 267 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 268 Addr.getOpcode() == ISD::TargetGlobalAddress) { 269 return false; 270 } 271 272 if (Addr.getOpcode() == ISD::FrameIndex) { 273 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 274 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 275 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 276 } else { 277 R1 = Addr; 278 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 279 } 280 } else if (Addr.getOpcode() == ISD::ADD) { 281 R1 = Addr.getOperand(0); 282 R2 = Addr.getOperand(1); 283 } else { 284 R1 = Addr; 285 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 286 } 287 return true; 288 } 289 290 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 291 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 292 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 293 AMDGPUAS::LOCAL_ADDRESS)) 294 return N; 295 296 const SITargetLowering& Lowering = 297 *static_cast<const SITargetLowering*>(getTargetLowering()); 298 299 // Write max value to m0 before each load operation 300 301 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 302 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 303 304 SDValue Glue = M0.getValue(1); 305 306 SmallVector <SDValue, 8> Ops; 307 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 308 Ops.push_back(N->getOperand(i)); 309 } 310 Ops.push_back(Glue); 311 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 312 313 return N; 314 } 315 316 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 317 switch (NumVectorElts) { 318 case 1: 319 return AMDGPU::SReg_32RegClassID; 320 case 2: 321 return AMDGPU::SReg_64RegClassID; 322 case 4: 323 return AMDGPU::SReg_128RegClassID; 324 case 8: 325 return AMDGPU::SReg_256RegClassID; 326 case 16: 327 return AMDGPU::SReg_512RegClassID; 328 } 329 330 llvm_unreachable("invalid vector size"); 331 } 332 333 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 334 unsigned int Opc = N->getOpcode(); 335 if (N->isMachineOpcode()) { 336 N->setNodeId(-1); 337 return nullptr; // Already selected. 338 } 339 340 if (isa<AtomicSDNode>(N) || 341 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 342 N = glueCopyToM0(N); 343 344 switch (Opc) { 345 default: break; 346 // We are selecting i64 ADD here instead of custom lower it during 347 // DAG legalization, so we can fold some i64 ADDs used for address 348 // calculation into the LOAD and STORE instructions. 349 case ISD::ADD: 350 case ISD::SUB: { 351 if (N->getValueType(0) != MVT::i64 || 352 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 353 break; 354 355 return SelectADD_SUB_I64(N); 356 } 357 case ISD::SCALAR_TO_VECTOR: 358 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 359 case ISD::BUILD_VECTOR: { 360 unsigned RegClassID; 361 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 362 EVT VT = N->getValueType(0); 363 unsigned NumVectorElts = VT.getVectorNumElements(); 364 EVT EltVT = VT.getVectorElementType(); 365 assert(EltVT.bitsEq(MVT::i32)); 366 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 367 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 368 } else { 369 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 370 // that adds a 128 bits reg copy when going through TwoAddressInstructions 371 // pass. We want to avoid 128 bits copies as much as possible because they 372 // can't be bundled by our scheduler. 373 switch(NumVectorElts) { 374 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 375 case 4: 376 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 377 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 378 else 379 RegClassID = AMDGPU::R600_Reg128RegClassID; 380 break; 381 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 382 } 383 } 384 385 SDLoc DL(N); 386 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 387 388 if (NumVectorElts == 1) { 389 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 390 N->getOperand(0), RegClass); 391 } 392 393 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 394 "supported yet"); 395 // 16 = Max Num Vector Elements 396 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 397 // 1 = Vector Register Class 398 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 399 400 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 401 bool IsRegSeq = true; 402 unsigned NOps = N->getNumOperands(); 403 for (unsigned i = 0; i < NOps; i++) { 404 // XXX: Why is this here? 405 if (isa<RegisterSDNode>(N->getOperand(i))) { 406 IsRegSeq = false; 407 break; 408 } 409 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 410 RegSeqArgs[1 + (2 * i) + 1] = 411 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 412 MVT::i32); 413 } 414 415 if (NOps != NumVectorElts) { 416 // Fill in the missing undef elements if this was a scalar_to_vector. 417 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 418 419 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 420 DL, EltVT); 421 for (unsigned i = NOps; i < NumVectorElts; ++i) { 422 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 423 RegSeqArgs[1 + (2 * i) + 1] = 424 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 425 } 426 } 427 428 if (!IsRegSeq) 429 break; 430 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 431 RegSeqArgs); 432 } 433 case ISD::BUILD_PAIR: { 434 SDValue RC, SubReg0, SubReg1; 435 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 436 break; 437 } 438 SDLoc DL(N); 439 if (N->getValueType(0) == MVT::i128) { 440 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 441 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 442 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 443 } else if (N->getValueType(0) == MVT::i64) { 444 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 445 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 446 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 447 } else { 448 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 449 } 450 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 451 N->getOperand(1), SubReg1 }; 452 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 453 DL, N->getValueType(0), Ops); 454 } 455 456 case ISD::Constant: 457 case ISD::ConstantFP: { 458 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 459 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 460 break; 461 462 uint64_t Imm; 463 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 464 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 465 else { 466 ConstantSDNode *C = cast<ConstantSDNode>(N); 467 Imm = C->getZExtValue(); 468 } 469 470 SDLoc DL(N); 471 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 472 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 473 MVT::i32)); 474 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 475 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 476 const SDValue Ops[] = { 477 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 478 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 479 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 480 }; 481 482 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 483 N->getValueType(0), Ops); 484 } 485 case ISD::LOAD: 486 case ISD::STORE: { 487 N = glueCopyToM0(N); 488 break; 489 } 490 491 case AMDGPUISD::BFE_I32: 492 case AMDGPUISD::BFE_U32: { 493 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 494 break; 495 496 // There is a scalar version available, but unlike the vector version which 497 // has a separate operand for the offset and width, the scalar version packs 498 // the width and offset into a single operand. Try to move to the scalar 499 // version if the offsets are constant, so that we can try to keep extended 500 // loads of kernel arguments in SGPRs. 501 502 // TODO: Technically we could try to pattern match scalar bitshifts of 503 // dynamic values, but it's probably not useful. 504 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 505 if (!Offset) 506 break; 507 508 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 509 if (!Width) 510 break; 511 512 bool Signed = Opc == AMDGPUISD::BFE_I32; 513 514 uint32_t OffsetVal = Offset->getZExtValue(); 515 uint32_t WidthVal = Width->getZExtValue(); 516 517 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 518 N->getOperand(0), OffsetVal, WidthVal); 519 } 520 case AMDGPUISD::DIV_SCALE: { 521 return SelectDIV_SCALE(N); 522 } 523 case ISD::CopyToReg: { 524 const SITargetLowering& Lowering = 525 *static_cast<const SITargetLowering*>(getTargetLowering()); 526 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 527 break; 528 } 529 case ISD::ADDRSPACECAST: 530 return SelectAddrSpaceCast(N); 531 case ISD::AND: 532 case ISD::SRL: 533 case ISD::SRA: 534 if (N->getValueType(0) != MVT::i32 || 535 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 536 break; 537 538 return SelectS_BFE(N); 539 case ISD::BRCOND: 540 return SelectBRCOND(N); 541 } 542 543 return SelectCode(N); 544 } 545 546 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 547 assert(AS != 0 && "Use checkPrivateAddress instead."); 548 if (!Ptr) 549 return false; 550 551 return Ptr->getType()->getPointerAddressSpace() == AS; 552 } 553 554 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 555 if (Op->getPseudoValue()) 556 return true; 557 558 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 559 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 560 561 return false; 562 } 563 564 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) { 565 if (!N->writeMem()) 566 return false; 567 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 568 } 569 570 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 571 const Value *MemVal = N->getMemOperand()->getValue(); 572 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 573 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 574 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 575 } 576 577 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 578 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 579 } 580 581 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) { 582 if (!N->writeMem()) 583 return false; 584 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 585 } 586 587 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 588 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 589 } 590 591 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 592 if (!N->readMem()) 593 return false; 594 const Value *MemVal = N->getMemOperand()->getValue(); 595 if (CbId == -1) 596 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 597 598 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 599 } 600 601 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { 602 if (!N->readMem()) 603 return false; 604 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 605 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 606 N->getMemoryVT().bitsLT(MVT::i32)) 607 return true; 608 609 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 610 } 611 612 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 613 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 614 } 615 616 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 617 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 618 } 619 620 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const { 621 if (!N->readMem()) 622 return false; 623 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 624 } 625 626 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 627 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 628 } 629 630 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 631 MachineMemOperand *MMO = N->getMemOperand(); 632 if (checkPrivateAddress(N->getMemOperand())) { 633 if (MMO) { 634 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 635 if (PSV && PSV->isConstantPool()) { 636 return true; 637 } 638 } 639 } 640 return false; 641 } 642 643 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 644 if (checkPrivateAddress(N->getMemOperand())) { 645 // Check to make sure we are not a constant pool load or a constant load 646 // that is marked as a private load 647 if (isCPLoad(N) || isConstantLoad(N, -1)) { 648 return false; 649 } 650 } 651 652 const Value *MemVal = N->getMemOperand()->getValue(); 653 return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 654 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 655 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 656 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 657 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 658 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 659 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS); 660 } 661 662 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 663 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 664 const Instruction *Term = BB->getTerminator(); 665 return Term->getMetadata("amdgpu.uniform") || 666 Term->getMetadata("structurizecfg.uniform"); 667 } 668 669 const char *AMDGPUDAGToDAGISel::getPassName() const { 670 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 671 } 672 673 //===----------------------------------------------------------------------===// 674 // Complex Patterns 675 //===----------------------------------------------------------------------===// 676 677 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 678 SDValue& IntPtr) { 679 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 680 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 681 true); 682 return true; 683 } 684 return false; 685 } 686 687 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 688 SDValue& BaseReg, SDValue &Offset) { 689 if (!isa<ConstantSDNode>(Addr)) { 690 BaseReg = Addr; 691 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 692 return true; 693 } 694 return false; 695 } 696 697 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 698 SDValue &Offset) { 699 ConstantSDNode *IMMOffset; 700 701 if (Addr.getOpcode() == ISD::ADD 702 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 703 && isInt<16>(IMMOffset->getZExtValue())) { 704 705 Base = Addr.getOperand(0); 706 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 707 MVT::i32); 708 return true; 709 // If the pointer address is constant, we can move it to the offset field. 710 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 711 && isInt<16>(IMMOffset->getZExtValue())) { 712 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 713 SDLoc(CurDAG->getEntryNode()), 714 AMDGPU::ZERO, MVT::i32); 715 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 716 MVT::i32); 717 return true; 718 } 719 720 // Default case, no offset 721 Base = Addr; 722 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 723 return true; 724 } 725 726 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 727 SDValue &Offset) { 728 ConstantSDNode *C; 729 SDLoc DL(Addr); 730 731 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 732 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 733 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 734 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 735 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 736 Base = Addr.getOperand(0); 737 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 738 } else { 739 Base = Addr; 740 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 741 } 742 743 return true; 744 } 745 746 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 747 SDLoc DL(N); 748 SDValue LHS = N->getOperand(0); 749 SDValue RHS = N->getOperand(1); 750 751 bool IsAdd = (N->getOpcode() == ISD::ADD); 752 753 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 754 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 755 756 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 757 DL, MVT::i32, LHS, Sub0); 758 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 759 DL, MVT::i32, LHS, Sub1); 760 761 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 762 DL, MVT::i32, RHS, Sub0); 763 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 764 DL, MVT::i32, RHS, Sub1); 765 766 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 767 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 768 769 770 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 771 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 772 773 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 774 SDValue Carry(AddLo, 1); 775 SDNode *AddHi 776 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 777 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 778 779 SDValue Args[5] = { 780 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 781 SDValue(AddLo,0), 782 Sub0, 783 SDValue(AddHi,0), 784 Sub1, 785 }; 786 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 787 } 788 789 // We need to handle this here because tablegen doesn't support matching 790 // instructions with multiple outputs. 791 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 792 SDLoc SL(N); 793 EVT VT = N->getValueType(0); 794 795 assert(VT == MVT::f32 || VT == MVT::f64); 796 797 unsigned Opc 798 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 799 800 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 801 // omod 802 SDValue Ops[8]; 803 804 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 805 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 806 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 807 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 808 } 809 810 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 811 unsigned OffsetBits) const { 812 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 813 (OffsetBits == 8 && !isUInt<8>(Offset))) 814 return false; 815 816 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 817 Subtarget->unsafeDSOffsetFoldingEnabled()) 818 return true; 819 820 // On Southern Islands instruction with a negative base value and an offset 821 // don't seem to work. 822 return CurDAG->SignBitIsZero(Base); 823 } 824 825 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 826 SDValue &Offset) const { 827 if (CurDAG->isBaseWithConstantOffset(Addr)) { 828 SDValue N0 = Addr.getOperand(0); 829 SDValue N1 = Addr.getOperand(1); 830 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 831 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 832 // (add n0, c0) 833 Base = N0; 834 Offset = N1; 835 return true; 836 } 837 } else if (Addr.getOpcode() == ISD::SUB) { 838 // sub C, x -> add (sub 0, x), C 839 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 840 int64_t ByteOffset = C->getSExtValue(); 841 if (isUInt<16>(ByteOffset)) { 842 SDLoc DL(Addr); 843 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 844 845 // XXX - This is kind of hacky. Create a dummy sub node so we can check 846 // the known bits in isDSOffsetLegal. We need to emit the selected node 847 // here, so this is thrown away. 848 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 849 Zero, Addr.getOperand(1)); 850 851 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 852 MachineSDNode *MachineSub 853 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 854 Zero, Addr.getOperand(1)); 855 856 Base = SDValue(MachineSub, 0); 857 Offset = Addr.getOperand(0); 858 return true; 859 } 860 } 861 } 862 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 863 // If we have a constant address, prefer to put the constant into the 864 // offset. This can save moves to load the constant address since multiple 865 // operations can share the zero base address register, and enables merging 866 // into read2 / write2 instructions. 867 868 SDLoc DL(Addr); 869 870 if (isUInt<16>(CAddr->getZExtValue())) { 871 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 872 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 873 DL, MVT::i32, Zero); 874 Base = SDValue(MovZero, 0); 875 Offset = Addr; 876 return true; 877 } 878 } 879 880 // default case 881 Base = Addr; 882 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 883 return true; 884 } 885 886 // TODO: If offset is too big, put low 16-bit into offset. 887 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 888 SDValue &Offset0, 889 SDValue &Offset1) const { 890 SDLoc DL(Addr); 891 892 if (CurDAG->isBaseWithConstantOffset(Addr)) { 893 SDValue N0 = Addr.getOperand(0); 894 SDValue N1 = Addr.getOperand(1); 895 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 896 unsigned DWordOffset0 = C1->getZExtValue() / 4; 897 unsigned DWordOffset1 = DWordOffset0 + 1; 898 // (add n0, c0) 899 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 900 Base = N0; 901 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 902 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 903 return true; 904 } 905 } else if (Addr.getOpcode() == ISD::SUB) { 906 // sub C, x -> add (sub 0, x), C 907 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 908 unsigned DWordOffset0 = C->getZExtValue() / 4; 909 unsigned DWordOffset1 = DWordOffset0 + 1; 910 911 if (isUInt<8>(DWordOffset0)) { 912 SDLoc DL(Addr); 913 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 914 915 // XXX - This is kind of hacky. Create a dummy sub node so we can check 916 // the known bits in isDSOffsetLegal. We need to emit the selected node 917 // here, so this is thrown away. 918 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 919 Zero, Addr.getOperand(1)); 920 921 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 922 MachineSDNode *MachineSub 923 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 924 Zero, Addr.getOperand(1)); 925 926 Base = SDValue(MachineSub, 0); 927 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 928 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 929 return true; 930 } 931 } 932 } 933 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 934 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 935 unsigned DWordOffset1 = DWordOffset0 + 1; 936 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 937 938 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 939 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 940 MachineSDNode *MovZero 941 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 942 DL, MVT::i32, Zero); 943 Base = SDValue(MovZero, 0); 944 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 945 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 946 return true; 947 } 948 } 949 950 // default case 951 Base = Addr; 952 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 953 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 954 return true; 955 } 956 957 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 958 return isUInt<12>(Imm->getZExtValue()); 959 } 960 961 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 962 SDValue &VAddr, SDValue &SOffset, 963 SDValue &Offset, SDValue &Offen, 964 SDValue &Idxen, SDValue &Addr64, 965 SDValue &GLC, SDValue &SLC, 966 SDValue &TFE) const { 967 // Subtarget prefers to use flat instruction 968 if (Subtarget->useFlatForGlobal()) 969 return false; 970 971 SDLoc DL(Addr); 972 973 if (!GLC.getNode()) 974 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 975 if (!SLC.getNode()) 976 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 977 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 978 979 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 980 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 981 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 982 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 983 984 if (CurDAG->isBaseWithConstantOffset(Addr)) { 985 SDValue N0 = Addr.getOperand(0); 986 SDValue N1 = Addr.getOperand(1); 987 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 988 989 if (N0.getOpcode() == ISD::ADD) { 990 // (add (add N2, N3), C1) -> addr64 991 SDValue N2 = N0.getOperand(0); 992 SDValue N3 = N0.getOperand(1); 993 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 994 Ptr = N2; 995 VAddr = N3; 996 } else { 997 998 // (add N0, C1) -> offset 999 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1000 Ptr = N0; 1001 } 1002 1003 if (isLegalMUBUFImmOffset(C1)) { 1004 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1005 return true; 1006 } else if (isUInt<32>(C1->getZExtValue())) { 1007 // Illegal offset, store it in soffset. 1008 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1009 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1010 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1011 0); 1012 return true; 1013 } 1014 } 1015 1016 if (Addr.getOpcode() == ISD::ADD) { 1017 // (add N0, N1) -> addr64 1018 SDValue N0 = Addr.getOperand(0); 1019 SDValue N1 = Addr.getOperand(1); 1020 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1021 Ptr = N0; 1022 VAddr = N1; 1023 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1024 return true; 1025 } 1026 1027 // default case -> offset 1028 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1029 Ptr = Addr; 1030 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1031 1032 return true; 1033 } 1034 1035 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1036 SDValue &VAddr, SDValue &SOffset, 1037 SDValue &Offset, SDValue &GLC, 1038 SDValue &SLC, SDValue &TFE) const { 1039 SDValue Ptr, Offen, Idxen, Addr64; 1040 1041 // addr64 bit was removed for volcanic islands. 1042 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1043 return false; 1044 1045 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1046 GLC, SLC, TFE)) 1047 return false; 1048 1049 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1050 if (C->getSExtValue()) { 1051 SDLoc DL(Addr); 1052 1053 const SITargetLowering& Lowering = 1054 *static_cast<const SITargetLowering*>(getTargetLowering()); 1055 1056 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1057 return true; 1058 } 1059 1060 return false; 1061 } 1062 1063 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1064 SDValue &VAddr, SDValue &SOffset, 1065 SDValue &Offset, 1066 SDValue &SLC) const { 1067 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1068 SDValue GLC, TFE; 1069 1070 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1071 } 1072 1073 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1074 SDValue &VAddr, SDValue &SOffset, 1075 SDValue &ImmOffset) const { 1076 1077 SDLoc DL(Addr); 1078 MachineFunction &MF = CurDAG->getMachineFunction(); 1079 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1080 1081 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1082 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1083 1084 // (add n0, c1) 1085 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1086 SDValue N0 = Addr.getOperand(0); 1087 SDValue N1 = Addr.getOperand(1); 1088 1089 // Offsets in vaddr must be positive. 1090 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1091 if (isLegalMUBUFImmOffset(C1)) { 1092 VAddr = N0; 1093 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1094 return true; 1095 } 1096 } 1097 1098 // (node) 1099 VAddr = Addr; 1100 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1101 return true; 1102 } 1103 1104 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1105 SDValue &SOffset, SDValue &Offset, 1106 SDValue &GLC, SDValue &SLC, 1107 SDValue &TFE) const { 1108 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1109 const SIInstrInfo *TII = 1110 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1111 1112 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1113 GLC, SLC, TFE)) 1114 return false; 1115 1116 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1117 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1118 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1119 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1120 APInt::getAllOnesValue(32).getZExtValue(); // Size 1121 SDLoc DL(Addr); 1122 1123 const SITargetLowering& Lowering = 1124 *static_cast<const SITargetLowering*>(getTargetLowering()); 1125 1126 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1127 return true; 1128 } 1129 return false; 1130 } 1131 1132 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1133 SDValue &Soffset, SDValue &Offset 1134 ) const { 1135 SDValue GLC, SLC, TFE; 1136 1137 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1138 } 1139 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1140 SDValue &Soffset, SDValue &Offset, 1141 SDValue &GLC) const { 1142 SDValue SLC, TFE; 1143 1144 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1145 } 1146 1147 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1148 SDValue &SOffset, 1149 SDValue &ImmOffset) const { 1150 SDLoc DL(Constant); 1151 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1152 uint32_t Overflow = 0; 1153 1154 if (Imm >= 4096) { 1155 if (Imm <= 4095 + 64) { 1156 // Use an SOffset inline constant for 1..64 1157 Overflow = Imm - 4095; 1158 Imm = 4095; 1159 } else { 1160 // Try to keep the same value in SOffset for adjacent loads, so that 1161 // the corresponding register contents can be re-used. 1162 // 1163 // Load values with all low-bits set into SOffset, so that a larger 1164 // range of values can be covered using s_movk_i32 1165 uint32_t High = (Imm + 1) & ~4095; 1166 uint32_t Low = (Imm + 1) & 4095; 1167 Imm = Low; 1168 Overflow = High - 1; 1169 } 1170 } 1171 1172 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1173 1174 if (Overflow <= 64) 1175 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1176 else 1177 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1178 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1179 0); 1180 } 1181 1182 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1183 SDValue &SOffset, 1184 SDValue &ImmOffset) const { 1185 SDLoc DL(Offset); 1186 1187 if (!isa<ConstantSDNode>(Offset)) 1188 return false; 1189 1190 SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1191 1192 return true; 1193 } 1194 1195 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1196 SDValue &SOffset, 1197 SDValue &ImmOffset, 1198 SDValue &VOffset) const { 1199 SDLoc DL(Offset); 1200 1201 // Don't generate an unnecessary voffset for constant offsets. 1202 if (isa<ConstantSDNode>(Offset)) 1203 return false; 1204 1205 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1206 SDValue N0 = Offset.getOperand(0); 1207 SDValue N1 = Offset.getOperand(1); 1208 SelectMUBUFConstant(N1, SOffset, ImmOffset); 1209 VOffset = N0; 1210 } else { 1211 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1212 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1213 VOffset = Offset; 1214 } 1215 1216 return true; 1217 } 1218 1219 /// 1220 /// \param EncodedOffset This is the immediate value that will be encoded 1221 /// directly into the instruction. On SI/CI the \p EncodedOffset 1222 /// will be in units of dwords and on VI+ it will be units of bytes. 1223 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1224 int64_t EncodedOffset) { 1225 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1226 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1227 } 1228 1229 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1230 SDValue &Offset, bool &Imm) const { 1231 1232 // FIXME: Handle non-constant offsets. 1233 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1234 if (!C) 1235 return false; 1236 1237 SDLoc SL(ByteOffsetNode); 1238 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1239 int64_t ByteOffset = C->getSExtValue(); 1240 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1241 ByteOffset >> 2 : ByteOffset; 1242 1243 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1244 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1245 Imm = true; 1246 return true; 1247 } 1248 1249 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1250 return false; 1251 1252 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1253 // 32-bit Immediates are supported on Sea Islands. 1254 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1255 } else { 1256 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1257 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1258 C32Bit), 0); 1259 } 1260 Imm = false; 1261 return true; 1262 } 1263 1264 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1265 SDValue &Offset, bool &Imm) const { 1266 1267 SDLoc SL(Addr); 1268 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1269 SDValue N0 = Addr.getOperand(0); 1270 SDValue N1 = Addr.getOperand(1); 1271 1272 if (SelectSMRDOffset(N1, Offset, Imm)) { 1273 SBase = N0; 1274 return true; 1275 } 1276 } 1277 SBase = Addr; 1278 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1279 Imm = true; 1280 return true; 1281 } 1282 1283 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1284 SDValue &Offset) const { 1285 bool Imm; 1286 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1287 } 1288 1289 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1290 SDValue &Offset) const { 1291 1292 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1293 return false; 1294 1295 bool Imm; 1296 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1297 return false; 1298 1299 return !Imm && isa<ConstantSDNode>(Offset); 1300 } 1301 1302 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1303 SDValue &Offset) const { 1304 bool Imm; 1305 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1306 !isa<ConstantSDNode>(Offset); 1307 } 1308 1309 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1310 SDValue &Offset) const { 1311 bool Imm; 1312 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1313 } 1314 1315 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1316 SDValue &Offset) const { 1317 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1318 return false; 1319 1320 bool Imm; 1321 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1322 return false; 1323 1324 return !Imm && isa<ConstantSDNode>(Offset); 1325 } 1326 1327 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1328 SDValue &Offset) const { 1329 bool Imm; 1330 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1331 !isa<ConstantSDNode>(Offset); 1332 } 1333 1334 // FIXME: This is incorrect and only enough to be able to compile. 1335 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1336 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1337 SDLoc DL(N); 1338 1339 const MachineFunction &MF = CurDAG->getMachineFunction(); 1340 DiagnosticInfoUnsupported NotImplemented( 1341 *MF.getFunction(), "addrspacecast not implemented", DL.getDebugLoc()); 1342 CurDAG->getContext()->diagnose(NotImplemented); 1343 1344 assert(Subtarget->hasFlatAddressSpace() && 1345 "addrspacecast only supported with flat address space!"); 1346 1347 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1348 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1349 "Can only cast to / from flat address space!"); 1350 1351 // The flat instructions read the address as the index of the VGPR holding the 1352 // address, so casting should just be reinterpreting the base VGPR, so just 1353 // insert trunc / bitcast / zext. 1354 1355 SDValue Src = ASC->getOperand(0); 1356 EVT DestVT = ASC->getValueType(0); 1357 EVT SrcVT = Src.getValueType(); 1358 1359 unsigned SrcSize = SrcVT.getSizeInBits(); 1360 unsigned DestSize = DestVT.getSizeInBits(); 1361 1362 if (SrcSize > DestSize) { 1363 assert(SrcSize == 64 && DestSize == 32); 1364 return CurDAG->getMachineNode( 1365 TargetOpcode::EXTRACT_SUBREG, 1366 DL, 1367 DestVT, 1368 Src, 1369 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1370 } 1371 1372 if (DestSize > SrcSize) { 1373 assert(SrcSize == 32 && DestSize == 64); 1374 1375 // FIXME: This is probably wrong, we should never be defining 1376 // a register class with both VGPRs and SGPRs 1377 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1378 MVT::i32); 1379 1380 const SDValue Ops[] = { 1381 RC, 1382 Src, 1383 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1384 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1385 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1386 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1387 }; 1388 1389 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1390 DL, N->getValueType(0), Ops); 1391 } 1392 1393 assert(SrcSize == 64 && DestSize == 64); 1394 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1395 } 1396 1397 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1398 uint32_t Offset, uint32_t Width) { 1399 // Transformation function, pack the offset and width of a BFE into 1400 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1401 // source, bits [5:0] contain the offset and bits [22:16] the width. 1402 uint32_t PackedVal = Offset | (Width << 16); 1403 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1404 1405 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1406 } 1407 1408 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1409 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1410 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1411 // Predicate: 0 < b <= c < 32 1412 1413 const SDValue &Shl = N->getOperand(0); 1414 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1415 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1416 1417 if (B && C) { 1418 uint32_t BVal = B->getZExtValue(); 1419 uint32_t CVal = C->getZExtValue(); 1420 1421 if (0 < BVal && BVal <= CVal && CVal < 32) { 1422 bool Signed = N->getOpcode() == ISD::SRA; 1423 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1424 1425 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1426 CVal - BVal, 32 - CVal); 1427 } 1428 } 1429 return SelectCode(N); 1430 } 1431 1432 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1433 switch (N->getOpcode()) { 1434 case ISD::AND: 1435 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1436 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1437 // Predicate: isMask(mask) 1438 const SDValue &Srl = N->getOperand(0); 1439 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1440 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1441 1442 if (Shift && Mask) { 1443 uint32_t ShiftVal = Shift->getZExtValue(); 1444 uint32_t MaskVal = Mask->getZExtValue(); 1445 1446 if (isMask_32(MaskVal)) { 1447 uint32_t WidthVal = countPopulation(MaskVal); 1448 1449 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1450 ShiftVal, WidthVal); 1451 } 1452 } 1453 } 1454 break; 1455 case ISD::SRL: 1456 if (N->getOperand(0).getOpcode() == ISD::AND) { 1457 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1458 // Predicate: isMask(mask >> b) 1459 const SDValue &And = N->getOperand(0); 1460 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1461 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1462 1463 if (Shift && Mask) { 1464 uint32_t ShiftVal = Shift->getZExtValue(); 1465 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1466 1467 if (isMask_32(MaskVal)) { 1468 uint32_t WidthVal = countPopulation(MaskVal); 1469 1470 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1471 ShiftVal, WidthVal); 1472 } 1473 } 1474 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1475 return SelectS_BFEFromShifts(N); 1476 break; 1477 case ISD::SRA: 1478 if (N->getOperand(0).getOpcode() == ISD::SHL) 1479 return SelectS_BFEFromShifts(N); 1480 break; 1481 } 1482 1483 return SelectCode(N); 1484 } 1485 1486 SDNode *AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1487 SDValue Cond = N->getOperand(1); 1488 1489 if (isCBranchSCC(N)) { 1490 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1491 return SelectCode(N); 1492 } 1493 1494 // The result of VOPC instructions is or'd against ~EXEC before it is 1495 // written to vcc or another SGPR. This means that the value '1' is always 1496 // written to the corresponding bit for results that are masked. In order 1497 // to correctly check against vccz, we need to and VCC with the EXEC 1498 // register in order to clear the value from the masked bits. 1499 1500 SDLoc SL(N); 1501 1502 SDNode *MaskedCond = 1503 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1504 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1505 Cond); 1506 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1507 SDValue(MaskedCond, 0), 1508 SDValue()); // Passing SDValue() adds a 1509 // glue output. 1510 return CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1511 N->getOperand(2), // Basic Block 1512 VCC.getValue(0), // Chain 1513 VCC.getValue(1)); // Glue 1514 } 1515 1516 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1517 SDValue &SrcMods) const { 1518 1519 unsigned Mods = 0; 1520 1521 Src = In; 1522 1523 if (Src.getOpcode() == ISD::FNEG) { 1524 Mods |= SISrcMods::NEG; 1525 Src = Src.getOperand(0); 1526 } 1527 1528 if (Src.getOpcode() == ISD::FABS) { 1529 Mods |= SISrcMods::ABS; 1530 Src = Src.getOperand(0); 1531 } 1532 1533 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1534 1535 return true; 1536 } 1537 1538 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1539 SDValue &SrcMods) const { 1540 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1541 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1542 } 1543 1544 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1545 SDValue &SrcMods, SDValue &Clamp, 1546 SDValue &Omod) const { 1547 SDLoc DL(In); 1548 // FIXME: Handle Clamp and Omod 1549 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1550 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1551 1552 return SelectVOP3Mods(In, Src, SrcMods); 1553 } 1554 1555 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1556 SDValue &SrcMods, SDValue &Clamp, 1557 SDValue &Omod) const { 1558 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1559 1560 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1561 cast<ConstantSDNode>(Clamp)->isNullValue() && 1562 cast<ConstantSDNode>(Omod)->isNullValue(); 1563 } 1564 1565 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1566 SDValue &SrcMods, 1567 SDValue &Omod) const { 1568 // FIXME: Handle Omod 1569 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1570 1571 return SelectVOP3Mods(In, Src, SrcMods); 1572 } 1573 1574 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1575 SDValue &SrcMods, 1576 SDValue &Clamp, 1577 SDValue &Omod) const { 1578 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1579 return SelectVOP3Mods(In, Src, SrcMods); 1580 } 1581 1582 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1583 bool Modified = false; 1584 1585 MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo(); 1586 1587 // Handle the perverse case where a frame index is being stored. We don't 1588 // want to see multiple frame index operands on the same instruction since 1589 // it complicates things and violates some assumptions about frame index 1590 // lowering. 1591 for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); 1592 I != E; ++I) { 1593 SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32); 1594 1595 // It's possible that we have a frame index defined in the function that 1596 // isn't used in this block. 1597 if (FI.use_empty()) 1598 continue; 1599 1600 // Skip over the AssertZext inserted during lowering. 1601 SDValue EffectiveFI = FI; 1602 auto It = FI->use_begin(); 1603 if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) { 1604 EffectiveFI = SDValue(*It, 0); 1605 It = EffectiveFI->use_begin(); 1606 } 1607 1608 for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) { 1609 SDUse &Use = It.getUse(); 1610 SDNode *User = Use.getUser(); 1611 unsigned OpIdx = It.getOperandNo(); 1612 ++It; 1613 1614 if (MemSDNode *M = dyn_cast<MemSDNode>(User)) { 1615 unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1; 1616 if (OpIdx == PtrIdx) 1617 continue; 1618 1619 unsigned OpN = M->getNumOperands(); 1620 SDValue NewOps[8]; 1621 1622 assert(OpN < array_lengthof(NewOps)); 1623 for (unsigned Op = 0; Op != OpN; ++Op) { 1624 if (Op != OpIdx) { 1625 NewOps[Op] = M->getOperand(Op); 1626 continue; 1627 } 1628 1629 MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1630 SDLoc(M), MVT::i32, FI); 1631 NewOps[Op] = SDValue(Mov, 0); 1632 } 1633 1634 CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN)); 1635 Modified = true; 1636 } 1637 } 1638 } 1639 1640 // XXX - Other targets seem to be able to do this without a worklist. 1641 SmallVector<LoadSDNode *, 8> LoadsToReplace; 1642 SmallVector<StoreSDNode *, 8> StoresToReplace; 1643 1644 for (SDNode &Node : CurDAG->allnodes()) { 1645 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) { 1646 EVT VT = LD->getValueType(0); 1647 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) 1648 continue; 1649 1650 // To simplify the TableGen patters, we replace all i64 loads with v2i32 1651 // loads. Alternatively, we could promote i64 loads to v2i32 during DAG 1652 // legalization, however, so places (ExpandUnalignedLoad) in the DAG 1653 // legalizer assume that if i64 is legal, so doing this promotion early 1654 // can cause problems. 1655 LoadsToReplace.push_back(LD); 1656 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) { 1657 // Handle i64 stores here for the same reason mentioned above for loads. 1658 SDValue Value = ST->getValue(); 1659 if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore()) 1660 continue; 1661 StoresToReplace.push_back(ST); 1662 } 1663 } 1664 1665 for (LoadSDNode *LD : LoadsToReplace) { 1666 SDLoc SL(LD); 1667 1668 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(), 1669 LD->getBasePtr(), LD->getMemOperand()); 1670 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 1671 MVT::i64, NewLoad); 1672 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1)); 1673 CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast); 1674 Modified = true; 1675 } 1676 1677 for (StoreSDNode *ST : StoresToReplace) { 1678 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST), 1679 MVT::v2i32, ST->getValue()); 1680 const SDValue StoreOps[] = { 1681 ST->getChain(), 1682 NewValue, 1683 ST->getBasePtr(), 1684 ST->getOffset() 1685 }; 1686 1687 CurDAG->UpdateNodeOperands(ST, StoreOps); 1688 Modified = true; 1689 } 1690 1691 // XXX - Is this necessary? 1692 if (Modified) 1693 CurDAG->RemoveDeadNodes(); 1694 } 1695 1696 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1697 const AMDGPUTargetLowering& Lowering = 1698 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1699 bool IsModified = false; 1700 do { 1701 IsModified = false; 1702 // Go over all selected nodes and try to fold them a bit more 1703 for (SDNode &Node : CurDAG->allnodes()) { 1704 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1705 if (!MachineNode) 1706 continue; 1707 1708 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1709 if (ResNode != &Node) { 1710 ReplaceUses(&Node, ResNode); 1711 IsModified = true; 1712 } 1713 } 1714 CurDAG->RemoveDeadNodes(); 1715 } while (IsModified); 1716 } 1717