1 //===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDGPUISelLowering.h" // For AMDGPUISD 16 #include "AMDGPURegisterInfo.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600InstrInfo.h" 19 #include "SIDefines.h" 20 #include "SIISelLowering.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/PseudoSourceValue.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/SelectionDAG.h" 27 #include "llvm/CodeGen/SelectionDAGISel.h" 28 #include "llvm/IR/Function.h" 29 30 using namespace llvm; 31 32 //===----------------------------------------------------------------------===// 33 // Instruction Selector Implementation 34 //===----------------------------------------------------------------------===// 35 36 namespace { 37 /// AMDGPU specific code to select AMDGPU machine instructions for 38 /// SelectionDAG operations. 39 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 40 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 41 // make the right decision when generating code for different targets. 42 const AMDGPUSubtarget *Subtarget; 43 public: 44 AMDGPUDAGToDAGISel(TargetMachine &TM); 45 virtual ~AMDGPUDAGToDAGISel(); 46 bool runOnMachineFunction(MachineFunction &MF) override; 47 SDNode *Select(SDNode *N) override; 48 const char *getPassName() const override; 49 void PostprocessISelDAG() override; 50 51 private: 52 bool isInlineImmediate(SDNode *N) const; 53 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 54 const R600InstrInfo *TII); 55 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 56 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 57 58 // Complex pattern selectors 59 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 60 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 61 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 62 63 static bool checkType(const Value *ptr, unsigned int addrspace); 64 static bool checkPrivateAddress(const MachineMemOperand *Op); 65 66 static bool isGlobalStore(const StoreSDNode *N); 67 static bool isFlatStore(const StoreSDNode *N); 68 static bool isPrivateStore(const StoreSDNode *N); 69 static bool isLocalStore(const StoreSDNode *N); 70 static bool isRegionStore(const StoreSDNode *N); 71 72 bool isCPLoad(const LoadSDNode *N) const; 73 bool isConstantLoad(const LoadSDNode *N, int cbID) const; 74 bool isGlobalLoad(const LoadSDNode *N) const; 75 bool isFlatLoad(const LoadSDNode *N) const; 76 bool isParamLoad(const LoadSDNode *N) const; 77 bool isPrivateLoad(const LoadSDNode *N) const; 78 bool isLocalLoad(const LoadSDNode *N) const; 79 bool isRegionLoad(const LoadSDNode *N) const; 80 81 SDNode *glueCopyToM0(SDNode *N) const; 82 83 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 84 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 85 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 86 SDValue& Offset); 87 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 88 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 89 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 90 unsigned OffsetBits) const; 91 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 92 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 93 SDValue &Offset1) const; 94 void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 95 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 96 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 97 SDValue &TFE) const; 98 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 99 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 100 SDValue &SLC, SDValue &TFE) const; 101 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 102 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 103 SDValue &SLC) const; 104 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 105 SDValue &SOffset, SDValue &ImmOffset) const; 106 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 107 SDValue &Offset, SDValue &GLC, SDValue &SLC, 108 SDValue &TFE) const; 109 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 110 SDValue &Offset, SDValue &GLC) const; 111 SDNode *SelectAddrSpaceCast(SDNode *N); 112 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 113 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 114 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 115 SDValue &Clamp, SDValue &Omod) const; 116 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 117 SDValue &Clamp, SDValue &Omod) const; 118 119 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 120 SDValue &Omod) const; 121 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 122 SDValue &Clamp, 123 SDValue &Omod) const; 124 125 SDNode *SelectADD_SUB_I64(SDNode *N); 126 SDNode *SelectDIV_SCALE(SDNode *N); 127 128 SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 129 uint32_t Offset, uint32_t Width); 130 SDNode *SelectS_BFEFromShifts(SDNode *N); 131 SDNode *SelectS_BFE(SDNode *N); 132 133 // Include the pieces autogenerated from the target description. 134 #include "AMDGPUGenDAGISel.inc" 135 }; 136 } // end anonymous namespace 137 138 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 139 // DAG, ready for instruction scheduling. 140 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 141 return new AMDGPUDAGToDAGISel(TM); 142 } 143 144 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 145 : SelectionDAGISel(TM) {} 146 147 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 148 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 149 return SelectionDAGISel::runOnMachineFunction(MF); 150 } 151 152 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 153 } 154 155 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 156 const SITargetLowering *TL 157 = static_cast<const SITargetLowering *>(getTargetLowering()); 158 return TL->analyzeImmediate(N) == 0; 159 } 160 161 /// \brief Determine the register class for \p OpNo 162 /// \returns The register class of the virtual register that will be used for 163 /// the given operand number \OpNo or NULL if the register class cannot be 164 /// determined. 165 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 166 unsigned OpNo) const { 167 if (!N->isMachineOpcode()) 168 return nullptr; 169 170 switch (N->getMachineOpcode()) { 171 default: { 172 const MCInstrDesc &Desc = 173 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 174 unsigned OpIdx = Desc.getNumDefs() + OpNo; 175 if (OpIdx >= Desc.getNumOperands()) 176 return nullptr; 177 int RegClass = Desc.OpInfo[OpIdx].RegClass; 178 if (RegClass == -1) 179 return nullptr; 180 181 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 182 } 183 case AMDGPU::REG_SEQUENCE: { 184 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 185 const TargetRegisterClass *SuperRC = 186 Subtarget->getRegisterInfo()->getRegClass(RCID); 187 188 SDValue SubRegOp = N->getOperand(OpNo + 1); 189 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 190 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 191 SubRegIdx); 192 } 193 } 194 } 195 196 bool AMDGPUDAGToDAGISel::SelectADDRParam( 197 SDValue Addr, SDValue& R1, SDValue& R2) { 198 199 if (Addr.getOpcode() == ISD::FrameIndex) { 200 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 201 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 202 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 203 } else { 204 R1 = Addr; 205 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 206 } 207 } else if (Addr.getOpcode() == ISD::ADD) { 208 R1 = Addr.getOperand(0); 209 R2 = Addr.getOperand(1); 210 } else { 211 R1 = Addr; 212 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 213 } 214 return true; 215 } 216 217 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 218 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 219 Addr.getOpcode() == ISD::TargetGlobalAddress) { 220 return false; 221 } 222 return SelectADDRParam(Addr, R1, R2); 223 } 224 225 226 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 227 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 228 Addr.getOpcode() == ISD::TargetGlobalAddress) { 229 return false; 230 } 231 232 if (Addr.getOpcode() == ISD::FrameIndex) { 233 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 234 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 235 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 236 } else { 237 R1 = Addr; 238 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 239 } 240 } else if (Addr.getOpcode() == ISD::ADD) { 241 R1 = Addr.getOperand(0); 242 R2 = Addr.getOperand(1); 243 } else { 244 R1 = Addr; 245 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 246 } 247 return true; 248 } 249 250 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 251 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 252 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 253 AMDGPUAS::LOCAL_ADDRESS)) 254 return N; 255 256 const SITargetLowering& Lowering = 257 *static_cast<const SITargetLowering*>(getTargetLowering()); 258 259 // Write max value to m0 before each load operation 260 261 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 262 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 263 264 SDValue Glue = M0.getValue(1); 265 266 SmallVector <SDValue, 8> Ops; 267 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 268 Ops.push_back(N->getOperand(i)); 269 } 270 Ops.push_back(Glue); 271 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 272 273 return N; 274 } 275 276 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { 277 unsigned int Opc = N->getOpcode(); 278 if (N->isMachineOpcode()) { 279 N->setNodeId(-1); 280 return nullptr; // Already selected. 281 } 282 283 if (isa<AtomicSDNode>(N)) 284 N = glueCopyToM0(N); 285 286 switch (Opc) { 287 default: break; 288 // We are selecting i64 ADD here instead of custom lower it during 289 // DAG legalization, so we can fold some i64 ADDs used for address 290 // calculation into the LOAD and STORE instructions. 291 case ISD::ADD: 292 case ISD::SUB: { 293 if (N->getValueType(0) != MVT::i64 || 294 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 295 break; 296 297 return SelectADD_SUB_I64(N); 298 } 299 case ISD::SCALAR_TO_VECTOR: 300 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 301 case ISD::BUILD_VECTOR: { 302 unsigned RegClassID; 303 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 304 EVT VT = N->getValueType(0); 305 unsigned NumVectorElts = VT.getVectorNumElements(); 306 EVT EltVT = VT.getVectorElementType(); 307 assert(EltVT.bitsEq(MVT::i32)); 308 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 309 bool UseVReg = true; 310 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); 311 U != E; ++U) { 312 if (!U->isMachineOpcode()) { 313 continue; 314 } 315 const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); 316 if (!RC) { 317 continue; 318 } 319 if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) { 320 UseVReg = false; 321 } 322 } 323 switch(NumVectorElts) { 324 case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID : 325 AMDGPU::SReg_32RegClassID; 326 break; 327 case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID : 328 AMDGPU::SReg_64RegClassID; 329 break; 330 case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID : 331 AMDGPU::SReg_128RegClassID; 332 break; 333 case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID : 334 AMDGPU::SReg_256RegClassID; 335 break; 336 case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID : 337 AMDGPU::SReg_512RegClassID; 338 break; 339 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 340 } 341 } else { 342 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 343 // that adds a 128 bits reg copy when going through TwoAddressInstructions 344 // pass. We want to avoid 128 bits copies as much as possible because they 345 // can't be bundled by our scheduler. 346 switch(NumVectorElts) { 347 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 348 case 4: 349 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 350 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 351 else 352 RegClassID = AMDGPU::R600_Reg128RegClassID; 353 break; 354 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 355 } 356 } 357 358 SDLoc DL(N); 359 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 360 361 if (NumVectorElts == 1) { 362 return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, 363 N->getOperand(0), RegClass); 364 } 365 366 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 367 "supported yet"); 368 // 16 = Max Num Vector Elements 369 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 370 // 1 = Vector Register Class 371 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 372 373 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 374 bool IsRegSeq = true; 375 unsigned NOps = N->getNumOperands(); 376 for (unsigned i = 0; i < NOps; i++) { 377 // XXX: Why is this here? 378 if (isa<RegisterSDNode>(N->getOperand(i))) { 379 IsRegSeq = false; 380 break; 381 } 382 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 383 RegSeqArgs[1 + (2 * i) + 1] = 384 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 385 MVT::i32); 386 } 387 388 if (NOps != NumVectorElts) { 389 // Fill in the missing undef elements if this was a scalar_to_vector. 390 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 391 392 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 393 DL, EltVT); 394 for (unsigned i = NOps; i < NumVectorElts; ++i) { 395 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 396 RegSeqArgs[1 + (2 * i) + 1] = 397 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 398 } 399 } 400 401 if (!IsRegSeq) 402 break; 403 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), 404 RegSeqArgs); 405 } 406 case ISD::BUILD_PAIR: { 407 SDValue RC, SubReg0, SubReg1; 408 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 409 break; 410 } 411 SDLoc DL(N); 412 if (N->getValueType(0) == MVT::i128) { 413 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 414 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 415 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 416 } else if (N->getValueType(0) == MVT::i64) { 417 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 418 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 419 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 420 } else { 421 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 422 } 423 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 424 N->getOperand(1), SubReg1 }; 425 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 426 DL, N->getValueType(0), Ops); 427 } 428 429 case ISD::Constant: 430 case ISD::ConstantFP: { 431 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 432 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 433 break; 434 435 uint64_t Imm; 436 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 437 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 438 else { 439 ConstantSDNode *C = cast<ConstantSDNode>(N); 440 Imm = C->getZExtValue(); 441 } 442 443 SDLoc DL(N); 444 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 445 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 446 MVT::i32)); 447 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 448 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 449 const SDValue Ops[] = { 450 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 451 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 452 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 453 }; 454 455 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 456 N->getValueType(0), Ops); 457 } 458 459 case ISD::LOAD: { 460 LoadSDNode *LD = cast<LoadSDNode>(N); 461 SDLoc SL(N); 462 EVT VT = N->getValueType(0); 463 464 if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) { 465 N = glueCopyToM0(N); 466 break; 467 } 468 469 // To simplify the TableGen patters, we replace all i64 loads with 470 // v2i32 loads. Alternatively, we could promote i64 loads to v2i32 471 // during DAG legalization, however, so places (ExpandUnalignedLoad) 472 // in the DAG legalizer assume that if i64 is legal, so doing this 473 // promotion early can cause problems. 474 475 SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(), 476 LD->getBasePtr(), LD->getMemOperand()); 477 SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL, 478 MVT::i64, NewLoad); 479 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1)); 480 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast); 481 SDNode *Load = glueCopyToM0(NewLoad.getNode()); 482 SelectCode(Load); 483 N = BitCast.getNode(); 484 break; 485 } 486 487 case ISD::STORE: { 488 // Handle i64 stores here for the same reason mentioned above for loads. 489 StoreSDNode *ST = cast<StoreSDNode>(N); 490 SDValue Value = ST->getValue(); 491 if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) { 492 493 SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N), 494 MVT::v2i32, Value); 495 SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue, 496 ST->getBasePtr(), ST->getMemOperand()); 497 498 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore); 499 500 if (NewValue.getOpcode() == ISD::BITCAST) { 501 Select(NewStore.getNode()); 502 return SelectCode(NewValue.getNode()); 503 } 504 505 // getNode() may fold the bitcast if its input was another bitcast. If that 506 // happens we should only select the new store. 507 N = NewStore.getNode(); 508 } 509 510 N = glueCopyToM0(N); 511 break; 512 } 513 514 case AMDGPUISD::REGISTER_LOAD: { 515 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 516 break; 517 SDValue Addr, Offset; 518 519 SDLoc DL(N); 520 SelectADDRIndirect(N->getOperand(1), Addr, Offset); 521 const SDValue Ops[] = { 522 Addr, 523 Offset, 524 CurDAG->getTargetConstant(0, DL, MVT::i32), 525 N->getOperand(0), 526 }; 527 return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL, 528 CurDAG->getVTList(MVT::i32, MVT::i64, 529 MVT::Other), 530 Ops); 531 } 532 case AMDGPUISD::REGISTER_STORE: { 533 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 534 break; 535 SDValue Addr, Offset; 536 SelectADDRIndirect(N->getOperand(2), Addr, Offset); 537 SDLoc DL(N); 538 const SDValue Ops[] = { 539 N->getOperand(1), 540 Addr, 541 Offset, 542 CurDAG->getTargetConstant(0, DL, MVT::i32), 543 N->getOperand(0), 544 }; 545 return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL, 546 CurDAG->getVTList(MVT::Other), 547 Ops); 548 } 549 550 case AMDGPUISD::BFE_I32: 551 case AMDGPUISD::BFE_U32: { 552 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 553 break; 554 555 // There is a scalar version available, but unlike the vector version which 556 // has a separate operand for the offset and width, the scalar version packs 557 // the width and offset into a single operand. Try to move to the scalar 558 // version if the offsets are constant, so that we can try to keep extended 559 // loads of kernel arguments in SGPRs. 560 561 // TODO: Technically we could try to pattern match scalar bitshifts of 562 // dynamic values, but it's probably not useful. 563 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 564 if (!Offset) 565 break; 566 567 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 568 if (!Width) 569 break; 570 571 bool Signed = Opc == AMDGPUISD::BFE_I32; 572 573 uint32_t OffsetVal = Offset->getZExtValue(); 574 uint32_t WidthVal = Width->getZExtValue(); 575 576 return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N), 577 N->getOperand(0), OffsetVal, WidthVal); 578 579 } 580 case AMDGPUISD::DIV_SCALE: { 581 return SelectDIV_SCALE(N); 582 } 583 case ISD::CopyToReg: { 584 const SITargetLowering& Lowering = 585 *static_cast<const SITargetLowering*>(getTargetLowering()); 586 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 587 break; 588 } 589 case ISD::ADDRSPACECAST: 590 return SelectAddrSpaceCast(N); 591 case ISD::AND: 592 case ISD::SRL: 593 case ISD::SRA: 594 if (N->getValueType(0) != MVT::i32 || 595 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 596 break; 597 598 return SelectS_BFE(N); 599 } 600 601 return SelectCode(N); 602 } 603 604 605 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 606 assert(AS != 0 && "Use checkPrivateAddress instead."); 607 if (!Ptr) 608 return false; 609 610 return Ptr->getType()->getPointerAddressSpace() == AS; 611 } 612 613 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 614 if (Op->getPseudoValue()) 615 return true; 616 617 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 618 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 619 620 return false; 621 } 622 623 bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { 624 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 625 } 626 627 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 628 const Value *MemVal = N->getMemOperand()->getValue(); 629 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 630 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 631 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 632 } 633 634 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 635 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 636 } 637 638 bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { 639 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 640 } 641 642 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 643 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 644 } 645 646 bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { 647 const Value *MemVal = N->getMemOperand()->getValue(); 648 if (CbId == -1) 649 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 650 651 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 652 } 653 654 bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { 655 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) 656 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 657 N->getMemoryVT().bitsLT(MVT::i32)) 658 return true; 659 660 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 661 } 662 663 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 664 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 665 } 666 667 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 668 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 669 } 670 671 bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { 672 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 673 } 674 675 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 676 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 677 } 678 679 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 680 MachineMemOperand *MMO = N->getMemOperand(); 681 if (checkPrivateAddress(N->getMemOperand())) { 682 if (MMO) { 683 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 684 if (PSV && PSV == PseudoSourceValue::getConstantPool()) { 685 return true; 686 } 687 } 688 } 689 return false; 690 } 691 692 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 693 if (checkPrivateAddress(N->getMemOperand())) { 694 // Check to make sure we are not a constant pool load or a constant load 695 // that is marked as a private load 696 if (isCPLoad(N) || isConstantLoad(N, -1)) { 697 return false; 698 } 699 } 700 701 const Value *MemVal = N->getMemOperand()->getValue(); 702 if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 703 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 704 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 705 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 706 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 707 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 708 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)) { 709 return true; 710 } 711 return false; 712 } 713 714 const char *AMDGPUDAGToDAGISel::getPassName() const { 715 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 716 } 717 718 #ifdef DEBUGTMP 719 #undef INT64_C 720 #endif 721 #undef DEBUGTMP 722 723 //===----------------------------------------------------------------------===// 724 // Complex Patterns 725 //===----------------------------------------------------------------------===// 726 727 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 728 SDValue& IntPtr) { 729 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 730 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 731 true); 732 return true; 733 } 734 return false; 735 } 736 737 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 738 SDValue& BaseReg, SDValue &Offset) { 739 if (!isa<ConstantSDNode>(Addr)) { 740 BaseReg = Addr; 741 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 742 return true; 743 } 744 return false; 745 } 746 747 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 748 SDValue &Offset) { 749 ConstantSDNode *IMMOffset; 750 751 if (Addr.getOpcode() == ISD::ADD 752 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 753 && isInt<16>(IMMOffset->getZExtValue())) { 754 755 Base = Addr.getOperand(0); 756 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 757 MVT::i32); 758 return true; 759 // If the pointer address is constant, we can move it to the offset field. 760 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 761 && isInt<16>(IMMOffset->getZExtValue())) { 762 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 763 SDLoc(CurDAG->getEntryNode()), 764 AMDGPU::ZERO, MVT::i32); 765 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 766 MVT::i32); 767 return true; 768 } 769 770 // Default case, no offset 771 Base = Addr; 772 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 773 return true; 774 } 775 776 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 777 SDValue &Offset) { 778 ConstantSDNode *C; 779 SDLoc DL(Addr); 780 781 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 782 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 783 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 784 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 785 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 786 Base = Addr.getOperand(0); 787 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 788 } else { 789 Base = Addr; 790 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 791 } 792 793 return true; 794 } 795 796 SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 797 SDLoc DL(N); 798 SDValue LHS = N->getOperand(0); 799 SDValue RHS = N->getOperand(1); 800 801 bool IsAdd = (N->getOpcode() == ISD::ADD); 802 803 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 804 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 805 806 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 807 DL, MVT::i32, LHS, Sub0); 808 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 809 DL, MVT::i32, LHS, Sub1); 810 811 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 812 DL, MVT::i32, RHS, Sub0); 813 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 814 DL, MVT::i32, RHS, Sub1); 815 816 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 817 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 818 819 820 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 821 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 822 823 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 824 SDValue Carry(AddLo, 1); 825 SDNode *AddHi 826 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 827 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 828 829 SDValue Args[5] = { 830 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 831 SDValue(AddLo,0), 832 Sub0, 833 SDValue(AddHi,0), 834 Sub1, 835 }; 836 return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 837 } 838 839 // We need to handle this here because tablegen doesn't support matching 840 // instructions with multiple outputs. 841 SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 842 SDLoc SL(N); 843 EVT VT = N->getValueType(0); 844 845 assert(VT == MVT::f32 || VT == MVT::f64); 846 847 unsigned Opc 848 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 849 850 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod 851 SDValue Ops[8]; 852 853 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 854 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 855 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 856 return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 857 } 858 859 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 860 unsigned OffsetBits) const { 861 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 862 (OffsetBits == 8 && !isUInt<8>(Offset))) 863 return false; 864 865 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 866 Subtarget->unsafeDSOffsetFoldingEnabled()) 867 return true; 868 869 // On Southern Islands instruction with a negative base value and an offset 870 // don't seem to work. 871 return CurDAG->SignBitIsZero(Base); 872 } 873 874 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 875 SDValue &Offset) const { 876 if (CurDAG->isBaseWithConstantOffset(Addr)) { 877 SDValue N0 = Addr.getOperand(0); 878 SDValue N1 = Addr.getOperand(1); 879 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 880 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 881 // (add n0, c0) 882 Base = N0; 883 Offset = N1; 884 return true; 885 } 886 } 887 888 SDLoc DL(Addr); 889 890 // If we have a constant address, prefer to put the constant into the 891 // offset. This can save moves to load the constant address since multiple 892 // operations can share the zero base address register, and enables merging 893 // into read2 / write2 instructions. 894 if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 895 if (isUInt<16>(CAddr->getZExtValue())) { 896 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 897 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 898 DL, MVT::i32, Zero); 899 Base = SDValue(MovZero, 0); 900 Offset = Addr; 901 return true; 902 } 903 } 904 905 // default case 906 Base = Addr; 907 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 908 return true; 909 } 910 911 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 912 SDValue &Offset0, 913 SDValue &Offset1) const { 914 SDLoc DL(Addr); 915 916 if (CurDAG->isBaseWithConstantOffset(Addr)) { 917 SDValue N0 = Addr.getOperand(0); 918 SDValue N1 = Addr.getOperand(1); 919 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 920 unsigned DWordOffset0 = C1->getZExtValue() / 4; 921 unsigned DWordOffset1 = DWordOffset0 + 1; 922 // (add n0, c0) 923 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 924 Base = N0; 925 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 926 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 927 return true; 928 } 929 } 930 931 if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 932 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 933 unsigned DWordOffset1 = DWordOffset0 + 1; 934 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 935 936 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 937 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 938 MachineSDNode *MovZero 939 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 940 DL, MVT::i32, Zero); 941 Base = SDValue(MovZero, 0); 942 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 943 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 944 return true; 945 } 946 } 947 948 // default case 949 Base = Addr; 950 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 951 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 952 return true; 953 } 954 955 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 956 return isUInt<12>(Imm->getZExtValue()); 957 } 958 959 void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 960 SDValue &VAddr, SDValue &SOffset, 961 SDValue &Offset, SDValue &Offen, 962 SDValue &Idxen, SDValue &Addr64, 963 SDValue &GLC, SDValue &SLC, 964 SDValue &TFE) const { 965 SDLoc DL(Addr); 966 967 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 968 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 969 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 970 971 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 972 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 973 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 974 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 975 976 if (CurDAG->isBaseWithConstantOffset(Addr)) { 977 SDValue N0 = Addr.getOperand(0); 978 SDValue N1 = Addr.getOperand(1); 979 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 980 981 if (N0.getOpcode() == ISD::ADD) { 982 // (add (add N2, N3), C1) -> addr64 983 SDValue N2 = N0.getOperand(0); 984 SDValue N3 = N0.getOperand(1); 985 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 986 Ptr = N2; 987 VAddr = N3; 988 } else { 989 990 // (add N0, C1) -> offset 991 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 992 Ptr = N0; 993 } 994 995 if (isLegalMUBUFImmOffset(C1)) { 996 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 997 return; 998 } else if (isUInt<32>(C1->getZExtValue())) { 999 // Illegal offset, store it in soffset. 1000 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1001 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1002 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1003 0); 1004 return; 1005 } 1006 } 1007 1008 if (Addr.getOpcode() == ISD::ADD) { 1009 // (add N0, N1) -> addr64 1010 SDValue N0 = Addr.getOperand(0); 1011 SDValue N1 = Addr.getOperand(1); 1012 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1013 Ptr = N0; 1014 VAddr = N1; 1015 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1016 return; 1017 } 1018 1019 // default case -> offset 1020 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1021 Ptr = Addr; 1022 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1023 1024 } 1025 1026 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1027 SDValue &VAddr, SDValue &SOffset, 1028 SDValue &Offset, SDValue &GLC, 1029 SDValue &SLC, SDValue &TFE) const { 1030 SDValue Ptr, Offen, Idxen, Addr64; 1031 1032 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1033 GLC, SLC, TFE); 1034 1035 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1036 if (C->getSExtValue()) { 1037 SDLoc DL(Addr); 1038 1039 const SITargetLowering& Lowering = 1040 *static_cast<const SITargetLowering*>(getTargetLowering()); 1041 1042 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1043 return true; 1044 } 1045 1046 return false; 1047 } 1048 1049 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1050 SDValue &VAddr, SDValue &SOffset, 1051 SDValue &Offset, 1052 SDValue &SLC) const { 1053 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1054 SDValue GLC, TFE; 1055 1056 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1057 } 1058 1059 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1060 SDValue &VAddr, SDValue &SOffset, 1061 SDValue &ImmOffset) const { 1062 1063 SDLoc DL(Addr); 1064 MachineFunction &MF = CurDAG->getMachineFunction(); 1065 const SIRegisterInfo *TRI = 1066 static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); 1067 MachineRegisterInfo &MRI = MF.getRegInfo(); 1068 const SITargetLowering& Lowering = 1069 *static_cast<const SITargetLowering*>(getTargetLowering()); 1070 1071 unsigned ScratchOffsetReg = 1072 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET); 1073 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass, 1074 ScratchOffsetReg, MVT::i32); 1075 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32); 1076 SDValue ScratchRsrcDword0 = 1077 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0); 1078 1079 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32); 1080 SDValue ScratchRsrcDword1 = 1081 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0); 1082 1083 const SDValue RsrcOps[] = { 1084 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 1085 ScratchRsrcDword0, 1086 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1087 ScratchRsrcDword1, 1088 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32), 1089 }; 1090 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, 1091 MVT::v2i32, RsrcOps), 0); 1092 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0); 1093 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 1094 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32); 1095 1096 // (add n0, c1) 1097 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1098 SDValue N1 = Addr.getOperand(1); 1099 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1100 1101 if (isLegalMUBUFImmOffset(C1)) { 1102 VAddr = Addr.getOperand(0); 1103 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1104 return true; 1105 } 1106 } 1107 1108 // (node) 1109 VAddr = Addr; 1110 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1111 return true; 1112 } 1113 1114 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1115 SDValue &SOffset, SDValue &Offset, 1116 SDValue &GLC, SDValue &SLC, 1117 SDValue &TFE) const { 1118 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1119 const SIInstrInfo *TII = 1120 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1121 1122 SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1123 GLC, SLC, TFE); 1124 1125 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1126 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1127 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1128 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1129 APInt::getAllOnesValue(32).getZExtValue(); // Size 1130 SDLoc DL(Addr); 1131 1132 const SITargetLowering& Lowering = 1133 *static_cast<const SITargetLowering*>(getTargetLowering()); 1134 1135 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1136 return true; 1137 } 1138 return false; 1139 } 1140 1141 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1142 SDValue &Soffset, SDValue &Offset, 1143 SDValue &GLC) const { 1144 SDValue SLC, TFE; 1145 1146 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1147 } 1148 1149 // FIXME: This is incorrect and only enough to be able to compile. 1150 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { 1151 AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N); 1152 SDLoc DL(N); 1153 1154 assert(Subtarget->hasFlatAddressSpace() && 1155 "addrspacecast only supported with flat address space!"); 1156 1157 assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && 1158 ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) && 1159 "Cannot cast address space to / from constant address!"); 1160 1161 assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS || 1162 ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) && 1163 "Can only cast to / from flat address space!"); 1164 1165 // The flat instructions read the address as the index of the VGPR holding the 1166 // address, so casting should just be reinterpreting the base VGPR, so just 1167 // insert trunc / bitcast / zext. 1168 1169 SDValue Src = ASC->getOperand(0); 1170 EVT DestVT = ASC->getValueType(0); 1171 EVT SrcVT = Src.getValueType(); 1172 1173 unsigned SrcSize = SrcVT.getSizeInBits(); 1174 unsigned DestSize = DestVT.getSizeInBits(); 1175 1176 if (SrcSize > DestSize) { 1177 assert(SrcSize == 64 && DestSize == 32); 1178 return CurDAG->getMachineNode( 1179 TargetOpcode::EXTRACT_SUBREG, 1180 DL, 1181 DestVT, 1182 Src, 1183 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32)); 1184 } 1185 1186 1187 if (DestSize > SrcSize) { 1188 assert(SrcSize == 32 && DestSize == 64); 1189 1190 // FIXME: This is probably wrong, we should never be defining 1191 // a register class with both VGPRs and SGPRs 1192 SDValue RC = CurDAG->getTargetConstant(AMDGPU::VS_64RegClassID, DL, 1193 MVT::i32); 1194 1195 const SDValue Ops[] = { 1196 RC, 1197 Src, 1198 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 1199 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1200 CurDAG->getConstant(0, DL, MVT::i32)), 0), 1201 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 1202 }; 1203 1204 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, 1205 DL, N->getValueType(0), Ops); 1206 } 1207 1208 assert(SrcSize == 64 && DestSize == 64); 1209 return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode(); 1210 } 1211 1212 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val, 1213 uint32_t Offset, uint32_t Width) { 1214 // Transformation function, pack the offset and width of a BFE into 1215 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1216 // source, bits [5:0] contain the offset and bits [22:16] the width. 1217 uint32_t PackedVal = Offset | (Width << 16); 1218 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1219 1220 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1221 } 1222 1223 SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1224 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1225 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1226 // Predicate: 0 < b <= c < 32 1227 1228 const SDValue &Shl = N->getOperand(0); 1229 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1230 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1231 1232 if (B && C) { 1233 uint32_t BVal = B->getZExtValue(); 1234 uint32_t CVal = C->getZExtValue(); 1235 1236 if (0 < BVal && BVal <= CVal && CVal < 32) { 1237 bool Signed = N->getOpcode() == ISD::SRA; 1238 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1239 1240 return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), 1241 CVal - BVal, 32 - CVal); 1242 } 1243 } 1244 return SelectCode(N); 1245 } 1246 1247 SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1248 switch (N->getOpcode()) { 1249 case ISD::AND: 1250 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1251 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1252 // Predicate: isMask(mask) 1253 const SDValue &Srl = N->getOperand(0); 1254 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1255 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1256 1257 if (Shift && Mask) { 1258 uint32_t ShiftVal = Shift->getZExtValue(); 1259 uint32_t MaskVal = Mask->getZExtValue(); 1260 1261 if (isMask_32(MaskVal)) { 1262 uint32_t WidthVal = countPopulation(MaskVal); 1263 1264 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0), 1265 ShiftVal, WidthVal); 1266 } 1267 } 1268 } 1269 break; 1270 case ISD::SRL: 1271 if (N->getOperand(0).getOpcode() == ISD::AND) { 1272 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1273 // Predicate: isMask(mask >> b) 1274 const SDValue &And = N->getOperand(0); 1275 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1276 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1277 1278 if (Shift && Mask) { 1279 uint32_t ShiftVal = Shift->getZExtValue(); 1280 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1281 1282 if (isMask_32(MaskVal)) { 1283 uint32_t WidthVal = countPopulation(MaskVal); 1284 1285 return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0), 1286 ShiftVal, WidthVal); 1287 } 1288 } 1289 } else if (N->getOperand(0).getOpcode() == ISD::SHL) 1290 return SelectS_BFEFromShifts(N); 1291 break; 1292 case ISD::SRA: 1293 if (N->getOperand(0).getOpcode() == ISD::SHL) 1294 return SelectS_BFEFromShifts(N); 1295 break; 1296 } 1297 1298 return SelectCode(N); 1299 } 1300 1301 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1302 SDValue &SrcMods) const { 1303 1304 unsigned Mods = 0; 1305 1306 Src = In; 1307 1308 if (Src.getOpcode() == ISD::FNEG) { 1309 Mods |= SISrcMods::NEG; 1310 Src = Src.getOperand(0); 1311 } 1312 1313 if (Src.getOpcode() == ISD::FABS) { 1314 Mods |= SISrcMods::ABS; 1315 Src = Src.getOperand(0); 1316 } 1317 1318 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1319 1320 return true; 1321 } 1322 1323 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1324 SDValue &SrcMods) const { 1325 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1326 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1327 } 1328 1329 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1330 SDValue &SrcMods, SDValue &Clamp, 1331 SDValue &Omod) const { 1332 SDLoc DL(In); 1333 // FIXME: Handle Clamp and Omod 1334 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1335 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1336 1337 return SelectVOP3Mods(In, Src, SrcMods); 1338 } 1339 1340 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1341 SDValue &SrcMods, SDValue &Clamp, 1342 SDValue &Omod) const { 1343 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1344 1345 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1346 cast<ConstantSDNode>(Clamp)->isNullValue() && 1347 cast<ConstantSDNode>(Omod)->isNullValue(); 1348 } 1349 1350 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1351 SDValue &SrcMods, 1352 SDValue &Omod) const { 1353 // FIXME: Handle Omod 1354 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1355 1356 return SelectVOP3Mods(In, Src, SrcMods); 1357 } 1358 1359 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1360 SDValue &SrcMods, 1361 SDValue &Clamp, 1362 SDValue &Omod) const { 1363 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1364 return SelectVOP3Mods(In, Src, SrcMods); 1365 } 1366 1367 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1368 const AMDGPUTargetLowering& Lowering = 1369 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1370 bool IsModified = false; 1371 do { 1372 IsModified = false; 1373 // Go over all selected nodes and try to fold them a bit more 1374 for (SDNode &Node : CurDAG->allnodes()) { 1375 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1376 if (!MachineNode) 1377 continue; 1378 1379 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1380 if (ResNode != &Node) { 1381 ReplaceUses(&Node, ResNode); 1382 IsModified = true; 1383 } 1384 } 1385 CurDAG->RemoveDeadNodes(); 1386 } while (IsModified); 1387 } 1388