1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/IR/Function.h" 29 30 using namespace llvm; 31 32 //===----------------------------------------------------------------------===// 33 // Instruction Selector Implementation 34 //===----------------------------------------------------------------------===// 35 36 namespace { 37 /// AMDGPU specific code to select AMDGPU machine instructions for 38 /// SelectionDAG operations. 39 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 41 // make the right decision when generating code for different targets. 42 const AMDGPUSubtarget *Subtarget; 43 public: 44 AMDGPUDAGToDAGISel(TargetMachine &TM); 45 virtual ~AMDGPUDAGToDAGISel(); 46 bool runOnMachineFunction(MachineFunction &MF) override; 47 SDNode *Select(SDNode *N) override; 48 const char *getPassName() const override; 49 void PostprocessISelDAG() override; 50 51 private: 52 bool isInlineImmediate(SDNode *N) const; 53 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 54 const R600InstrInfo *TII); 55 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 56 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 57 58 // Complex pattern selectors 59 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 60 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 61 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 62 63 static bool checkType(const Value *ptr, unsigned int addrspace); 64 static bool checkPrivateAddress(const MachineMemOperand *Op); 65 66 static bool isGlobalStore(const StoreSDNode *N); 67 static bool isFlatStore(const StoreSDNode *N); 68 static bool isPrivateStore(const StoreSDNode *N); 69 static bool isLocalStore(const StoreSDNode *N); 70 static bool isRegionStore(const StoreSDNode *N); 71 72 bool isCPLoad(const LoadSDNode *N) const; 73 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 74 bool isGlobalLoad(const LoadSDNode *N) const; 75 bool isFlatLoad(const LoadSDNode *N) const; 76 bool isParamLoad(const LoadSDNode *N) const; 77 bool isPrivateLoad(const LoadSDNode *N) const; 78 bool isLocalLoad(const LoadSDNode *N) const; 79 bool isRegionLoad(const LoadSDNode *N) const; 80 81 SDNode *glueCopyToM0(SDNode *N) const; 82 83 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 84 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 85 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 86 SDValue& Offset); 87 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 88 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 89 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 90 unsigned OffsetBits) const; 91 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 92 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 93 SDValue &Offset1) const; 94 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 95 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 96 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 97 SDValue &TFE) const; 98 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 99 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 100 SDValue &SLC, SDValue &TFE) const; 101 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 102 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 103 SDValue &SLC) const; 104 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 105 SDValue &SOffset, SDValue &ImmOffset) const; 106 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 107 SDValue &Offset, SDValue &GLC, SDValue &SLC, 108 SDValue &TFE) const; 109 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 110 SDValue &Offset, SDValue &GLC) const; 111 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 112 bool &Imm) const; 113 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 114 bool &Imm) const; 115 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 116 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 117 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 118 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 119 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 120 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 121 SDNode *SelectAddrSpaceCast(SDNode *N); 122 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 123 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 124 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 125 SDValue &Clamp, SDValue &Omod) const; 126 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 127 SDValue &Clamp, SDValue &Omod) const; 128 129 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 130 SDValue &Omod) const; 131 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 132 SDValue &Clamp, 133 SDValue &Omod) const; 134 135 SDNode *SelectADD_SUB_I64(SDNode *N); 136 SDNode *SelectDIV_SCALE(SDNode *N); 137 138 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 139 uint32_t Offset, uint32_t Width); 140 SDNode *SelectS_BFEFromShifts(SDNode *N); 141 SDNode *SelectS_BFE(SDNode *N); 142 143 // Include the pieces autogenerated from the target description. 144 #include "AMDGPUGenDAGISel.inc" 145 }; 146 } // end anonymous namespace 147 148 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 149 // DAG, ready for instruction scheduling. 150 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 151 return new AMDGPUDAGToDAGISel(TM); 152 } 153 154 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 155 : SelectionDAGISel(TM) {} 156 157 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 158 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 159 return SelectionDAGISel::runOnMachineFunction(MF); 160 } 161 162 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 163 } 164 165 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 166 const SITargetLowering *TL 167 = static_cast<const SITargetLowering *>(getTargetLowering()); 168 return TL->analyzeImmediate(N) == 0; 169 } 170 171 /// \brief Determine the register class for \p OpNo 172 /// \returns The register class of the virtual register that will be used for 173 /// the given operand number \OpNo or NULL if the register class cannot be 174 /// determined. 175 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 176 unsigned OpNo) const { 177 if (!N->isMachineOpcode()) 178 return nullptr; 179 180 switch (N->getMachineOpcode()) { 181 default: { 182 const MCInstrDesc &Desc = 183 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 184 unsigned OpIdx = Desc.getNumDefs() + OpNo; 185 if (OpIdx >= Desc.getNumOperands()) 186 return nullptr; 187 int RegClass = Desc.OpInfo[OpIdx].RegClass; 188 if (RegClass == -1) 189 return nullptr; 190 191 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 192 } 193 case AMDGPU::REG_SEQUENCE: { 194 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 195 const TargetRegisterClass *SuperRC = 196 Subtarget->getRegisterInfo()->getRegClass(RCID); 197 198 SDValue SubRegOp = N->getOperand(OpNo + 1); 199 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 200 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 201 SubRegIdx); 202 } 203 } 204 } 205 206 bool AMDGPUDAGToDAGISel::SelectADDRParam( 207 SDValue Addr, SDValue& R1, SDValue& R2) { 208 209 if (Addr.getOpcode() == ISD::FrameIndex) { 210 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 211 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 212 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 213 } else { 214 R1 = Addr; 215 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 216 } 217 } else if (Addr.getOpcode() == ISD::ADD) { 218 R1 = Addr.getOperand(0); 219 R2 = Addr.getOperand(1); 220 } else { 221 R1 = Addr; 222 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 223 } 224 return true; 225 } 226 227 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 228 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 229 Addr.getOpcode() == ISD::TargetGlobalAddress) { 230 return false; 231 } 232 return SelectADDRParam(Addr, R1, R2); 233 } 234 235 236 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 237 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 238 Addr.getOpcode() == ISD::TargetGlobalAddress) { 239 return false; 240 } 241 242 if (Addr.getOpcode() == ISD::FrameIndex) { 243 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 244 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 245 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 246 } else { 247 R1 = Addr; 248 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 249 } 250 } else if (Addr.getOpcode() == ISD::ADD) { 251 R1 = Addr.getOperand(0); 252 R2 = Addr.getOperand(1); 253 } else { 254 R1 = Addr; 255 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 256 } 257 return true; 258 } 259 260 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 261 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 262 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 263 AMDGPUAS::LOCAL_ADDRESS)) 264 return N; 265 266 const SITargetLowering& Lowering = 267 *static_cast<const SITargetLowering*>(getTargetLowering()); 268 269 // Write max value to m0 before each load operation 270 271 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 272 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 273 274 SDValue Glue = M0.getValue(1); 275 276 SmallVector <SDValue, 8> Ops; 277 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 278 Ops.push_back(N->getOperand(i)); 279 } 280 Ops.push_back(Glue); 281 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 282 283 return N; 284 } 285 286 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 287 unsigned int Opc = N->getOpcode(); 288 if (N->isMachineOpcode()) { 289 N->setNodeId(-1); 290 return nullptr; // Already selected. 291 } 292 293 if (isa<AtomicSDNode>(N)) 294 N = glueCopyToM0(N); 295 296 switch (Opc) { 297 default: break; 298 // We are selecting i64 ADD here instead of custom lower it during 299 // DAG legalization, so we can fold some i64 ADDs used for address 300 // calculation into the LOAD and STORE instructions. 301 case ISD::ADD: 302 case ISD::SUB: { 303 if (N->getValueType(0) != MVT::i64 || 304 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 305 break; 306 307 return SelectADD_SUB_I64(N); 308 } 309 case ISD::SCALAR_TO_VECTOR: 310 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 311 case ISD::BUILD_VECTOR: { 312 unsigned RegClassID; 313 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 314 EVT VT = N->getValueType(0); 315 unsigned NumVectorElts = VT.getVectorNumElements(); 316 EVT EltVT = VT.getVectorElementType(); 317 assert(EltVT.bitsEq(MVT::i32)); 318 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 319 bool UseVReg = true; 320 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 321 U != E; ++U) { 322 if (!U->isMachineOpcode()) { 323 continue; 324 } 325 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 326 if (!RC) { 327 continue; 328 } 329 if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { 330 UseVReg = false; 331 } 332 } 333 switch(NumVectorElts) { 334 case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID : 335 AMDGPU::SReg_32RegClassID; 336 break; 337 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 338 AMDGPU::SReg_64RegClassID; 339 break; 340 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 341 AMDGPU::SReg_128RegClassID; 342 break; 343 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 344 AMDGPU::SReg_256RegClassID; 345 break; 346 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 347 AMDGPU::SReg_512RegClassID; 348 break; 349 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 350 } 351 } else { 352 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 353 // that adds a 128 bits reg copy when going through TwoAddressInstructions 354 // pass. We want to avoid 128 bits copies as much as possible because they 355 // can't be bundled by our scheduler. 356 switch(NumVectorElts) { 357 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 358 case 4: 359 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 360 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 361 else 362 RegClassID = AMDGPU::R600_Reg128RegClassID; 363 break; 364 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 365 } 366 } 367 368 SDLoc DL(N); 369 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 370 371 if (NumVectorElts == 1) { 372 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 373 N->getOperand(0), RegClass); 374 } 375 376 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 377 "supported yet"); 378 // 16 = Max Num Vector Elements 379 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 380 // 1 = Vector Register Class 381 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 382 383 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 384 bool IsRegSeq = true; 385 unsigned NOps = N->getNumOperands(); 386 for (unsigned i = 0; i < NOps; i++) { 387 // XXX: Why is this here? 388 if (isa<RegisterSDNode>(N->getOperand(i))) { 389 IsRegSeq = false; 390 break; 391 } 392 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 393 RegSeqArgs[1 + (2 * i) + 1] = 394 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 395 MVT::i32); 396 } 397 398 if (NOps != NumVectorElts) { 399 // Fill in the missing undef elements if this was a scalar_to_vector. 400 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 401 402 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 403 DL, EltVT); 404 for (unsigned i = NOps; i < NumVectorElts; ++i) { 405 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 406 RegSeqArgs[1 + (2 * i) + 1] = 407 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 408 } 409 } 410 411 if (!IsRegSeq) 412 break; 413 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 414 RegSeqArgs); 415 } 416 case ISD::BUILD_PAIR: { 417 SDValue RC, SubReg0, SubReg1; 418 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 419 break; 420 } 421 SDLoc DL(N); 422 if (N->getValueType(0) == MVT::i128) { 423 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 424 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 425 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 426 } else if (N->getValueType(0) == MVT::i64) { 427 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 428 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 429 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 430 } else { 431 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 432 } 433 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 434 N->getOperand(1), SubReg1 }; 435 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 436 DL, N->getValueType(0), Ops); 437 } 438 439 case ISD::Constant: 440 case ISD::ConstantFP: { 441 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 442 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 443 break; 444 445 uint64_t Imm; 446 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 447 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 448 else { 449 ConstantSDNode *C = cast<ConstantSDNode>(N); 450 Imm = C->getZExtValue(); 451 } 452 453 SDLoc DL(N); 454 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 455 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 456 MVT::i32)); 457 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 458 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 459 const SDValue Ops[] = { 460 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 461 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 462 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 463 }; 464 465 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 466 N->getValueType(0), Ops); 467 } 468 469 case ISD::LOAD: { 470 LoadSDNode *LD = cast<LoadSDNode>(N); 471 SDLoc SL(N); 472 EVT VT = N->getValueType(0); 473 474 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) { 475 N = glueCopyToM0(N); 476 break; 477 } 478 479 // To simplify the TableGen patters, we replace all i64 loads with 480 // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 481 // during DAG legalization, however, so places (ExpandUnalignedLoad) 482 // in the DAG legalizer assume that if i64 is legal, so doing this 483 // promotion early can cause problems. 484 485 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), 486 LD->getBasePtr(), LD->getMemOperand()); 487 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 488 MVT::i64, NewLoad); 489 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); 490 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); 491 SDNode *Load = glueCopyToM0(NewLoad.getNode()); 492 SelectCode(Load); 493 N = BitCast.getNode(); 494 break; 495 } 496 497 case ISD::STORE: { 498 // Handle i64 stores here for the same reason mentioned above for loads. 499 StoreSDNode *ST = cast<StoreSDNode>(N); 500 SDValue Value = ST->getValue(); 501 if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) { 502 503 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), 504 MVT::v2i32, Value); 505 SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, 506 ST->getBasePtr(), ST->getMemOperand()); 507 508 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); 509 510 if (NewValue.getOpcode() == ISD::BITCAST) { 511 Select(NewStore.getNode()); 512 return SelectCode(NewValue.getNode()); 513 } 514 515 // getNode() may fold the bitcast if its input was another bitcast. If that 516 // happens we should only select the new store. 517 N = NewStore.getNode(); 518 } 519 520 N = glueCopyToM0(N); 521 break; 522 } 523 524 case AMDGPUISD::REGISTER_LOAD: { 525 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 526 break; 527 SDValue Addr, Offset; 528 529 SDLoc DL(N); 530 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 531 const SDValue Ops[] = { 532 Addr, 533 Offset, 534 CurDAG->getTargetConstant(0, DL, MVT::i32), 535 N->getOperand(0), 536 }; 537 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, 538 CurDAG->getVTList(MVT::i32, MVT::i64, 539 MVT::Other), 540 Ops); 541 } 542 case AMDGPUISD::REGISTER_STORE: { 543 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 544 break; 545 SDValue Addr, Offset; 546 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 547 SDLoc DL(N); 548 const SDValue Ops[] = { 549 N->getOperand(1), 550 Addr, 551 Offset, 552 CurDAG->getTargetConstant(0, DL, MVT::i32), 553 N->getOperand(0), 554 }; 555 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, 556 CurDAG->getVTList(MVT::Other), 557 Ops); 558 } 559 560 case AMDGPUISD::BFE_I32: 561 case AMDGPUISD::BFE_U32: { 562 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 563 break; 564 565 // There is a scalar version available, but unlike the vector version which 566 // has a separate operand for the offset and width, the scalar version packs 567 // the width and offset into a single operand. Try to move to the scalar 568 // version if the offsets are constant, so that we can try to keep extended 569 // loads of kernel arguments in SGPRs. 570 571 // TODO: Technically we could try to pattern match scalar bitshifts of 572 // dynamic values, but it's probably not useful. 573 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 574 if (!Offset) 575 break; 576 577 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 578 if (!Width) 579 break; 580 581 bool Signed = Opc == AMDGPUISD::BFE_I32; 582 583 uint32_t OffsetVal = Offset->getZExtValue(); 584 uint32_t WidthVal = Width->getZExtValue(); 585 586 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 587 N->getOperand(0), OffsetVal, WidthVal); 588 589 } 590 case AMDGPUISD::DIV_SCALE: { 591 return SelectDIV_SCALE(N); 592 } 593 case ISD::CopyToReg: { 594 const SITargetLowering& Lowering = 595 *static_cast<const SITargetLowering*>(getTargetLowering()); 596 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 597 break; 598 } 599 case ISD::ADDRSPACECAST: 600 return SelectAddrSpaceCast(N); 601 case ISD::AND: 602 case ISD::SRL: 603 case ISD::SRA: 604 if (N->getValueType(0) != MVT::i32 || 605 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 606 break; 607 608 return SelectS_BFE(N); 609 } 610 611 return SelectCode(N); 612 } 613 614 615 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 616 assert(AS != 0 && "Use checkPrivateAddress instead."); 617 if (!Ptr) 618 return false; 619 620 return Ptr->getType()->getPointerAddressSpace() == AS; 621 } 622 623 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 624 if (Op->getPseudoValue()) 625 return true; 626 627 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 628 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 629 630 return false; 631 } 632 633 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 634 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 635 } 636 637 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 638 const Value *MemVal = N->getMemOperand()->getValue(); 639 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 640 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 641 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 642 } 643 644 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 645 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 646 } 647 648 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 649 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 650 } 651 652 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 653 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 654 } 655 656 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 657 const Value *MemVal = N->getMemOperand()->getValue(); 658 if (CbId == -1) 659 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 660 661 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 662 } 663 664 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 665 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 666 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 667 N->getMemoryVT().bitsLT(MVT::i32)) 668 return true; 669 670 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 671 } 672 673 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 674 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 675 } 676 677 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 678 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 679 } 680 681 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 682 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 683 } 684 685 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 686 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 687 } 688 689 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 690 MachineMemOperand *MMO = N->getMemOperand(); 691 if (checkPrivateAddress(N->getMemOperand())) { 692 if (MMO) { 693 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 694 if (PSV && PSV->isConstantPool()) { 695 return true; 696 } 697 } 698 } 699 return false; 700 } 701 702 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 703 if (checkPrivateAddress(N->getMemOperand())) { 704 // Check to make sure we are not a constant pool load or a constant load 705 // that is marked as a private load 706 if (isCPLoad(N) || isConstantLoad(N, -1)) { 707 return false; 708 } 709 } 710 711 const Value *MemVal = N->getMemOperand()->getValue(); 712 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 713 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 714 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 715 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 716 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 717 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 718 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 719 return true; 720 } 721 return false; 722 } 723 724 const char *AMDGPUDAGToDAGISel::getPassName() const { 725 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 726 } 727 728 #ifdef DEBUGTMP 729 #undef INT64_C 730 #endif 731 #undef DEBUGTMP 732 733 //===----------------------------------------------------------------------===// 734 // Complex Patterns 735 //===----------------------------------------------------------------------===// 736 737 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 738 SDValue& IntPtr) { 739 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 740 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 741 true); 742 return true; 743 } 744 return false; 745 } 746 747 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 748 SDValue& BaseReg, SDValue &Offset) { 749 if (!isa<ConstantSDNode>(Addr)) { 750 BaseReg = Addr; 751 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 752 return true; 753 } 754 return false; 755 } 756 757 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 758 SDValue &Offset) { 759 ConstantSDNode *IMMOffset; 760 761 if (Addr.getOpcode() == ISD::ADD 762 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 763 && isInt<16>(IMMOffset->getZExtValue())) { 764 765 Base = Addr.getOperand(0); 766 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 767 MVT::i32); 768 return true; 769 // If the pointer address is constant, we can move it to the offset field. 770 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 771 && isInt<16>(IMMOffset->getZExtValue())) { 772 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 773 SDLoc(CurDAG->getEntryNode()), 774 AMDGPU::ZERO, MVT::i32); 775 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 776 MVT::i32); 777 return true; 778 } 779 780 // Default case, no offset 781 Base = Addr; 782 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 783 return true; 784 } 785 786 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 787 SDValue &Offset) { 788 ConstantSDNode *C; 789 SDLoc DL(Addr); 790 791 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 792 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 793 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 794 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 795 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 796 Base = Addr.getOperand(0); 797 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 798 } else { 799 Base = Addr; 800 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 801 } 802 803 return true; 804 } 805 806 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 807 SDLoc DL(N); 808 SDValue LHS = N->getOperand(0); 809 SDValue RHS = N->getOperand(1); 810 811 bool IsAdd = (N->getOpcode() == ISD::ADD); 812 813 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 814 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 815 816 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 817 DL, MVT::i32, LHS, Sub0); 818 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 819 DL, MVT::i32, LHS, Sub1); 820 821 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 822 DL, MVT::i32, RHS, Sub0); 823 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 824 DL, MVT::i32, RHS, Sub1); 825 826 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 827 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 828 829 830 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 831 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 832 833 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 834 SDValue Carry(AddLo, 1); 835 SDNode *AddHi 836 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 837 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 838 839 SDValue Args[5] = { 840 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 841 SDValue(AddLo,0), 842 Sub0, 843 SDValue(AddHi,0), 844 Sub1, 845 }; 846 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 847 } 848 849 // We need to handle this here because tablegen doesn't support matching 850 // instructions with multiple outputs. 851 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 852 SDLoc SL(N); 853 EVT VT = N->getValueType(0); 854 855 assert(VT == MVT::f32 || VT == MVT::f64); 856 857 unsigned Opc 858 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 859 860 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 861 SDValue Ops[8]; 862 863 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 864 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 865 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 866 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 867 } 868 869 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 870 unsigned OffsetBits) const { 871 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 872 (OffsetBits == 8 && !isUInt<8>(Offset))) 873 return false; 874 875 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 876 Subtarget->unsafeDSOffsetFoldingEnabled()) 877 return true; 878 879 // On Southern Islands instruction with a negative base value and an offset 880 // don't seem to work. 881 return CurDAG->SignBitIsZero(Base); 882 } 883 884 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 885 SDValue &Offset) const { 886 if (CurDAG->isBaseWithConstantOffset(Addr)) { 887 SDValue N0 = Addr.getOperand(0); 888 SDValue N1 = Addr.getOperand(1); 889 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 890 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 891 // (add n0, c0) 892 Base = N0; 893 Offset = N1; 894 return true; 895 } 896 } else if (Addr.getOpcode() == ISD::SUB) { 897 // sub C, x -> add (sub 0, x), C 898 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 899 int64_t ByteOffset = C->getSExtValue(); 900 if (isUInt<16>(ByteOffset)) { 901 SDLoc DL(Addr); 902 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 903 904 // XXX - This is kind of hacky. Create a dummy sub node so we can check 905 // the known bits in isDSOffsetLegal. We need to emit the selected node 906 // here, so this is thrown away. 907 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 908 Zero, Addr.getOperand(1)); 909 910 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 911 MachineSDNode *MachineSub 912 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 913 Zero, Addr.getOperand(1)); 914 915 Base = SDValue(MachineSub, 0); 916 Offset = Addr.getOperand(0); 917 return true; 918 } 919 } 920 } 921 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 922 // If we have a constant address, prefer to put the constant into the 923 // offset. This can save moves to load the constant address since multiple 924 // operations can share the zero base address register, and enables merging 925 // into read2 / write2 instructions. 926 927 SDLoc DL(Addr); 928 929 if (isUInt<16>(CAddr->getZExtValue())) { 930 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 931 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 932 DL, MVT::i32, Zero); 933 Base = SDValue(MovZero, 0); 934 Offset = Addr; 935 return true; 936 } 937 } 938 939 // default case 940 Base = Addr; 941 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 942 return true; 943 } 944 945 // TODO: If offset is too big, put low 16-bit into offset. 946 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 947 SDValue &Offset0, 948 SDValue &Offset1) const { 949 SDLoc DL(Addr); 950 951 if (CurDAG->isBaseWithConstantOffset(Addr)) { 952 SDValue N0 = Addr.getOperand(0); 953 SDValue N1 = Addr.getOperand(1); 954 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 955 unsigned DWordOffset0 = C1->getZExtValue() / 4; 956 unsigned DWordOffset1 = DWordOffset0 + 1; 957 // (add n0, c0) 958 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 959 Base = N0; 960 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 961 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 962 return true; 963 } 964 } else if (Addr.getOpcode() == ISD::SUB) { 965 // sub C, x -> add (sub 0, x), C 966 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 967 unsigned DWordOffset0 = C->getZExtValue() / 4; 968 unsigned DWordOffset1 = DWordOffset0 + 1; 969 970 if (isUInt<8>(DWordOffset0)) { 971 SDLoc DL(Addr); 972 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 973 974 // XXX - This is kind of hacky. Create a dummy sub node so we can check 975 // the known bits in isDSOffsetLegal. We need to emit the selected node 976 // here, so this is thrown away. 977 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 978 Zero, Addr.getOperand(1)); 979 980 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 981 MachineSDNode *MachineSub 982 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 983 Zero, Addr.getOperand(1)); 984 985 Base = SDValue(MachineSub, 0); 986 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 987 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 988 return true; 989 } 990 } 991 } 992 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 993 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 994 unsigned DWordOffset1 = DWordOffset0 + 1; 995 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 996 997 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 998 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 999 MachineSDNode *MovZero 1000 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1001 DL, MVT::i32, Zero); 1002 Base = SDValue(MovZero, 0); 1003 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 1004 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 1005 return true; 1006 } 1007 } 1008 1009 // default case 1010 Base = Addr; 1011 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 1012 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 1013 return true; 1014 } 1015 1016 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 1017 return isUInt<12>(Imm->getZExtValue()); 1018 } 1019 1020 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 1021 SDValue &VAddr, SDValue &SOffset, 1022 SDValue &Offset, SDValue &Offen, 1023 SDValue &Idxen, SDValue &Addr64, 1024 SDValue &GLC, SDValue &SLC, 1025 SDValue &TFE) const { 1026 SDLoc DL(Addr); 1027 1028 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1029 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 1030 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1031 1032 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1033 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1034 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1035 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1036 1037 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1038 SDValue N0 = Addr.getOperand(0); 1039 SDValue N1 = Addr.getOperand(1); 1040 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1041 1042 if (N0.getOpcode() == ISD::ADD) { 1043 // (add (add N2, N3), C1) -> addr64 1044 SDValue N2 = N0.getOperand(0); 1045 SDValue N3 = N0.getOperand(1); 1046 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1047 Ptr = N2; 1048 VAddr = N3; 1049 } else { 1050 1051 // (add N0, C1) -> offset 1052 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1053 Ptr = N0; 1054 } 1055 1056 if (isLegalMUBUFImmOffset(C1)) { 1057 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1058 return; 1059 } else if (isUInt<32>(C1->getZExtValue())) { 1060 // Illegal offset, store it in soffset. 1061 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1062 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1063 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1064 0); 1065 return; 1066 } 1067 } 1068 1069 if (Addr.getOpcode() == ISD::ADD) { 1070 // (add N0, N1) -> addr64 1071 SDValue N0 = Addr.getOperand(0); 1072 SDValue N1 = Addr.getOperand(1); 1073 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1074 Ptr = N0; 1075 VAddr = N1; 1076 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1077 return; 1078 } 1079 1080 // default case -> offset 1081 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1082 Ptr = Addr; 1083 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1084 1085 } 1086 1087 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1088 SDValue &VAddr, SDValue &SOffset, 1089 SDValue &Offset, SDValue &GLC, 1090 SDValue &SLC, SDValue &TFE) const { 1091 SDValue Ptr, Offen, Idxen, Addr64; 1092 1093 // addr64 bit was removed for volcanic islands. 1094 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1095 return false; 1096 1097 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1098 GLC, SLC, TFE); 1099 1100 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1101 if (C->getSExtValue()) { 1102 SDLoc DL(Addr); 1103 1104 const SITargetLowering& Lowering = 1105 *static_cast<const SITargetLowering*>(getTargetLowering()); 1106 1107 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1108 return true; 1109 } 1110 1111 return false; 1112 } 1113 1114 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1115 SDValue &VAddr, SDValue &SOffset, 1116 SDValue &Offset, 1117 SDValue &SLC) const { 1118 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1119 SDValue GLC, TFE; 1120 1121 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1122 } 1123 1124 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1125 SDValue &VAddr, SDValue &SOffset, 1126 SDValue &ImmOffset) const { 1127 1128 SDLoc DL(Addr); 1129 MachineFunction &MF = CurDAG->getMachineFunction(); 1130 const SIRegisterInfo *TRI = 1131 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 1132 MachineRegisterInfo &MRI = MF.getRegInfo(); 1133 const SITargetLowering& Lowering = 1134 *static_cast<const SITargetLowering*>(getTargetLowering()); 1135 1136 unsigned ScratchOffsetReg = 1137 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 1138 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 1139 ScratchOffsetReg, MVT::i32); 1140 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); 1141 SDValue ScratchRsrcDword0 = 1142 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); 1143 1144 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); 1145 SDValue ScratchRsrcDword1 = 1146 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); 1147 1148 const SDValue RsrcOps[] = { 1149 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1150 ScratchRsrcDword0, 1151 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1152 ScratchRsrcDword1, 1153 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), 1154 }; 1155 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1156 MVT::v2i32, RsrcOps), 0); 1157 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); 1158 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 1159 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 1160 1161 // (add n0, c1) 1162 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1163 SDValue N0 = Addr.getOperand(0); 1164 SDValue N1 = Addr.getOperand(1); 1165 // Offsets in vaddr must be positive. 1166 if (CurDAG->SignBitIsZero(N0)) { 1167 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1168 if (isLegalMUBUFImmOffset(C1)) { 1169 VAddr = N0; 1170 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1171 return true; 1172 } 1173 } 1174 } 1175 1176 // (node) 1177 VAddr = Addr; 1178 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1179 return true; 1180 } 1181 1182 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1183 SDValue &SOffset, SDValue &Offset, 1184 SDValue &GLC, SDValue &SLC, 1185 SDValue &TFE) const { 1186 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1187 const SIInstrInfo *TII = 1188 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1189 1190 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1191 GLC, SLC, TFE); 1192 1193 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1194 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1195 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1196 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1197 APInt::getAllOnesValue(32).getZExtValue(); // Size 1198 SDLoc DL(Addr); 1199 1200 const SITargetLowering& Lowering = 1201 *static_cast<const SITargetLowering*>(getTargetLowering()); 1202 1203 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1204 return true; 1205 } 1206 return false; 1207 } 1208 1209 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1210 SDValue &Soffset, SDValue &Offset, 1211 SDValue &GLC) const { 1212 SDValue SLC, TFE; 1213 1214 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1215 } 1216 1217 /// 1218 /// \param EncodedOffset This is the immediate value that will be encoded 1219 /// directly into the instruction. On SI/CI the \p EncodedOffset 1220 /// will be in units of dwords and on VI+ it will be units of bytes. 1221 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1222 int64_t EncodedOffset) { 1223 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1224 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1225 } 1226 1227 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1228 SDValue &Offset, bool &Imm) const { 1229 1230 // FIXME: Handle non-constant offsets. 1231 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1232 if (!C) 1233 return false; 1234 1235 SDLoc SL(ByteOffsetNode); 1236 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1237 int64_t ByteOffset = C->getSExtValue(); 1238 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1239 ByteOffset >> 2 : ByteOffset; 1240 1241 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1242 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1243 Imm = true; 1244 return true; 1245 } 1246 1247 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1248 return false; 1249 1250 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1251 // 32-bit Immediates are supported on Sea Islands. 1252 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1253 } else { 1254 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1255 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1256 C32Bit), 0); 1257 } 1258 Imm = false; 1259 return true; 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1263 SDValue &Offset, bool &Imm) const { 1264 1265 SDLoc SL(Addr); 1266 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1267 SDValue N0 = Addr.getOperand(0); 1268 SDValue N1 = Addr.getOperand(1); 1269 1270 if (SelectSMRDOffset(N1, Offset, Imm)) { 1271 SBase = N0; 1272 return true; 1273 } 1274 } 1275 SBase = Addr; 1276 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1277 Imm = true; 1278 return true; 1279 } 1280 1281 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1282 SDValue &Offset) const { 1283 bool Imm; 1284 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1285 } 1286 1287 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1288 SDValue &Offset) const { 1289 1290 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1291 return false; 1292 1293 bool Imm; 1294 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1295 return false; 1296 1297 return !Imm && isa<ConstantSDNode>(Offset); 1298 } 1299 1300 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1301 SDValue &Offset) const { 1302 bool Imm; 1303 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1304 !isa<ConstantSDNode>(Offset); 1305 } 1306 1307 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1308 SDValue &Offset) const { 1309 bool Imm; 1310 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1311 } 1312 1313 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1314 SDValue &Offset) const { 1315 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1316 return false; 1317 1318 bool Imm; 1319 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1320 return false; 1321 1322 return !Imm && isa<ConstantSDNode>(Offset); 1323 } 1324 1325 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1326 SDValue &Offset) const { 1327 bool Imm; 1328 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1329 !isa<ConstantSDNode>(Offset); 1330 } 1331 1332 // FIXME: This is incorrect and only enough to be able to compile. 1333 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1334 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1335 SDLoc DL(N); 1336 1337 assert(Subtarget->hasFlatAddressSpace() && 1338 "addrspacecast only supported with flat address space!"); 1339 1340 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 1341 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 1342 "Cannot cast address space to / from constant address!"); 1343 1344 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1345 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1346 "Can only cast to / from flat address space!"); 1347 1348 // The flat instructions read the address as the index of the VGPR holding the 1349 // address, so casting should just be reinterpreting the base VGPR, so just 1350 // insert trunc / bitcast / zext. 1351 1352 SDValue Src = ASC->getOperand(0); 1353 EVT DestVT = ASC->getValueType(0); 1354 EVT SrcVT = Src.getValueType(); 1355 1356 unsigned SrcSize = SrcVT.getSizeInBits(); 1357 unsigned DestSize = DestVT.getSizeInBits(); 1358 1359 if (SrcSize > DestSize) { 1360 assert(SrcSize == 64 && DestSize == 32); 1361 return CurDAG->getMachineNode( 1362 TargetOpcode::EXTRACT_SUBREG, 1363 DL, 1364 DestVT, 1365 Src, 1366 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1367 } 1368 1369 1370 if (DestSize > SrcSize) { 1371 assert(SrcSize == 32 && DestSize == 64); 1372 1373 // FIXME: This is probably wrong, we should never be defining 1374 // a register class with both VGPRs and SGPRs 1375 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1376 MVT::i32); 1377 1378 const SDValue Ops[] = { 1379 RC, 1380 Src, 1381 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1382 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1383 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1384 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1385 }; 1386 1387 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1388 DL, N->getValueType(0), Ops); 1389 } 1390 1391 assert(SrcSize == 64 && DestSize == 64); 1392 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1393 } 1394 1395 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1396 uint32_t Offset, uint32_t Width) { 1397 // Transformation function, pack the offset and width of a BFE into 1398 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1399 // source, bits [5:0] contain the offset and bits [22:16] the width. 1400 uint32_t PackedVal = Offset | (Width << 16); 1401 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1402 1403 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1404 } 1405 1406 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1407 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1408 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1409 // Predicate: 0 < b <= c < 32 1410 1411 const SDValue &Shl = N->getOperand(0); 1412 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1413 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1414 1415 if (B && C) { 1416 uint32_t BVal = B->getZExtValue(); 1417 uint32_t CVal = C->getZExtValue(); 1418 1419 if (0 < BVal && BVal <= CVal && CVal < 32) { 1420 bool Signed = N->getOpcode() == ISD::SRA; 1421 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1422 1423 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1424 CVal - BVal, 32 - CVal); 1425 } 1426 } 1427 return SelectCode(N); 1428 } 1429 1430 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1431 switch (N->getOpcode()) { 1432 case ISD::AND: 1433 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1434 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1435 // Predicate: isMask(mask) 1436 const SDValue &Srl = N->getOperand(0); 1437 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1438 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1439 1440 if (Shift && Mask) { 1441 uint32_t ShiftVal = Shift->getZExtValue(); 1442 uint32_t MaskVal = Mask->getZExtValue(); 1443 1444 if (isMask_32(MaskVal)) { 1445 uint32_t WidthVal = countPopulation(MaskVal); 1446 1447 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1448 ShiftVal, WidthVal); 1449 } 1450 } 1451 } 1452 break; 1453 case ISD::SRL: 1454 if (N->getOperand(0).getOpcode() == ISD::AND) { 1455 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1456 // Predicate: isMask(mask >> b) 1457 const SDValue &And = N->getOperand(0); 1458 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1459 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1460 1461 if (Shift && Mask) { 1462 uint32_t ShiftVal = Shift->getZExtValue(); 1463 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1464 1465 if (isMask_32(MaskVal)) { 1466 uint32_t WidthVal = countPopulation(MaskVal); 1467 1468 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1469 ShiftVal, WidthVal); 1470 } 1471 } 1472 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1473 return SelectS_BFEFromShifts(N); 1474 break; 1475 case ISD::SRA: 1476 if (N->getOperand(0).getOpcode() == ISD::SHL) 1477 return SelectS_BFEFromShifts(N); 1478 break; 1479 } 1480 1481 return SelectCode(N); 1482 } 1483 1484 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1485 SDValue &SrcMods) const { 1486 1487 unsigned Mods = 0; 1488 1489 Src = In; 1490 1491 if (Src.getOpcode() == ISD::FNEG) { 1492 Mods |= SISrcMods::NEG; 1493 Src = Src.getOperand(0); 1494 } 1495 1496 if (Src.getOpcode() == ISD::FABS) { 1497 Mods |= SISrcMods::ABS; 1498 Src = Src.getOperand(0); 1499 } 1500 1501 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1502 1503 return true; 1504 } 1505 1506 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1507 SDValue &SrcMods) const { 1508 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1509 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1510 } 1511 1512 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1513 SDValue &SrcMods, SDValue &Clamp, 1514 SDValue &Omod) const { 1515 SDLoc DL(In); 1516 // FIXME: Handle Clamp and Omod 1517 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1518 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1519 1520 return SelectVOP3Mods(In, Src, SrcMods); 1521 } 1522 1523 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1524 SDValue &SrcMods, SDValue &Clamp, 1525 SDValue &Omod) const { 1526 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1527 1528 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1529 cast<ConstantSDNode>(Clamp)->isNullValue() && 1530 cast<ConstantSDNode>(Omod)->isNullValue(); 1531 } 1532 1533 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1534 SDValue &SrcMods, 1535 SDValue &Omod) const { 1536 // FIXME: Handle Omod 1537 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1538 1539 return SelectVOP3Mods(In, Src, SrcMods); 1540 } 1541 1542 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1543 SDValue &SrcMods, 1544 SDValue &Clamp, 1545 SDValue &Omod) const { 1546 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1547 return SelectVOP3Mods(In, Src, SrcMods); 1548 } 1549 1550 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1551 const AMDGPUTargetLowering& Lowering = 1552 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1553 bool IsModified = false; 1554 do { 1555 IsModified = false; 1556 // Go over all selected nodes and try to fold them a bit more 1557 for (SDNode &Node : CurDAG->allnodes()) { 1558 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1559 if (!MachineNode) 1560 continue; 1561 1562 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1563 if (ResNode != &Node) { 1564 ReplaceUses(&Node, ResNode); 1565 IsModified = true; 1566 } 1567 } 1568 CurDAG->RemoveDeadNodes(); 1569 } while (IsModified); 1570 } 1571