1 //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Defines an instruction selector for the AMDGPU target. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPUInstrInfo.h" 16 #include "AMDGPUIntrinsicInfo.h" 17 #include "AMDGPUISelLowering.h" // For AMDGPUISD 18 #include "AMDGPUSubtarget.h" 19 #include "SIISelLowering.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/FunctionLoweringInfo.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/PseudoSourceValue.h" 25 #include "llvm/CodeGen/SelectionDAG.h" 26 #include "llvm/CodeGen/SelectionDAGISel.h" 27 #include "llvm/IR/DiagnosticInfo.h" 28 29 using namespace llvm; 30 31 namespace llvm { 32 class R600InstrInfo; 33 } 34 35 //===----------------------------------------------------------------------===// 36 // Instruction Selector Implementation 37 //===----------------------------------------------------------------------===// 38 39 namespace { 40 41 static bool isCBranchSCC(const SDNode *N) { 42 assert(N->getOpcode() == ISD::BRCOND); 43 if (!N->hasOneUse()) 44 return false; 45 46 SDValue Cond = N->getOperand(1); 47 if (Cond.getOpcode() == ISD::CopyToReg) 48 Cond = Cond.getOperand(2); 49 return Cond.getOpcode() == ISD::SETCC && 50 Cond.getOperand(0).getValueType() == MVT::i32 && 51 Cond.hasOneUse(); 52 } 53 54 /// AMDGPU specific code to select AMDGPU machine instructions for 55 /// SelectionDAG operations. 56 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 57 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 58 // make the right decision when generating code for different targets. 59 const AMDGPUSubtarget *Subtarget; 60 61 public: 62 AMDGPUDAGToDAGISel(TargetMachine &TM); 63 virtual ~AMDGPUDAGToDAGISel(); 64 bool runOnMachineFunction(MachineFunction &MF) override; 65 void Select(SDNode *N) override; 66 const char *getPassName() const override; 67 void PreprocessISelDAG() override; 68 void PostprocessISelDAG() override; 69 70 private: 71 bool isInlineImmediate(SDNode *N) const; 72 bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, 73 const R600InstrInfo *TII); 74 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 75 bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &); 76 77 // Complex pattern selectors 78 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); 79 bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); 80 bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); 81 82 static bool checkType(const Value *ptr, unsigned int addrspace); 83 static bool checkPrivateAddress(const MachineMemOperand *Op); 84 85 static bool isGlobalStore(const MemSDNode *N); 86 static bool isFlatStore(const MemSDNode *N); 87 static bool isPrivateStore(const StoreSDNode *N); 88 static bool isLocalStore(const StoreSDNode *N); 89 static bool isRegionStore(const StoreSDNode *N); 90 91 bool isCPLoad(const LoadSDNode *N) const; 92 bool isConstantLoad(const MemSDNode *N, int cbID) const; 93 bool isGlobalLoad(const MemSDNode *N) const; 94 bool isFlatLoad(const MemSDNode *N) const; 95 bool isParamLoad(const LoadSDNode *N) const; 96 bool isPrivateLoad(const LoadSDNode *N) const; 97 bool isLocalLoad(const LoadSDNode *N) const; 98 bool isRegionLoad(const LoadSDNode *N) const; 99 100 bool isUniformBr(const SDNode *N) const; 101 102 SDNode *glueCopyToM0(SDNode *N) const; 103 104 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 105 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); 106 bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, 107 SDValue& Offset); 108 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 109 bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 110 bool isDSOffsetLegal(const SDValue &Base, unsigned Offset, 111 unsigned OffsetBits) const; 112 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 113 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 114 SDValue &Offset1) const; 115 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 116 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 117 SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, 118 SDValue &TFE) const; 119 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 120 SDValue &SOffset, SDValue &Offset, SDValue &GLC, 121 SDValue &SLC, SDValue &TFE) const; 122 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 123 SDValue &VAddr, SDValue &SOffset, SDValue &Offset, 124 SDValue &SLC) const; 125 bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, 126 SDValue &SOffset, SDValue &ImmOffset) const; 127 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, 128 SDValue &Offset, SDValue &GLC, SDValue &SLC, 129 SDValue &TFE) const; 130 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 131 SDValue &Offset, SDValue &SLC) const; 132 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 133 SDValue &Offset) const; 134 void SelectMUBUFConstant(SDValue Constant, 135 SDValue &SOffset, 136 SDValue &ImmOffset) const; 137 bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset, 138 SDValue &ImmOffset) const; 139 bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, 140 SDValue &ImmOffset, SDValue &VOffset) const; 141 142 bool SelectFlat(SDValue Addr, SDValue &VAddr, 143 SDValue &SLC, SDValue &TFE) const; 144 145 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, 146 bool &Imm) const; 147 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, 148 bool &Imm) const; 149 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 150 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 151 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 152 bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; 153 bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; 154 bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const; 155 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 156 bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 157 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 158 SDValue &Clamp, SDValue &Omod) const; 159 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 160 SDValue &Clamp, SDValue &Omod) const; 161 162 bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, 163 SDValue &Omod) const; 164 bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, 165 SDValue &Clamp, 166 SDValue &Omod) const; 167 168 void SelectADD_SUB_I64(SDNode *N); 169 void SelectDIV_SCALE(SDNode *N); 170 171 SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, 172 uint32_t Offset, uint32_t Width); 173 void SelectS_BFEFromShifts(SDNode *N); 174 void SelectS_BFE(SDNode *N); 175 void SelectBRCOND(SDNode *N); 176 void SelectATOMIC_CMP_SWAP(SDNode *N); 177 178 // Include the pieces autogenerated from the target description. 179 #include "AMDGPUGenDAGISel.inc" 180 }; 181 } // end anonymous namespace 182 183 /// \brief This pass converts a legalized DAG into a AMDGPU-specific 184 // DAG, ready for instruction scheduling. 185 FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { 186 return new AMDGPUDAGToDAGISel(TM); 187 } 188 189 AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) 190 : SelectionDAGISel(TM) {} 191 192 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { 193 Subtarget = &static_cast<const AMDGPUSubtarget &>(MF.getSubtarget()); 194 return SelectionDAGISel::runOnMachineFunction(MF); 195 } 196 197 AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { 198 } 199 200 bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { 201 const SITargetLowering *TL 202 = static_cast<const SITargetLowering *>(getTargetLowering()); 203 return TL->analyzeImmediate(N) == 0; 204 } 205 206 /// \brief Determine the register class for \p OpNo 207 /// \returns The register class of the virtual register that will be used for 208 /// the given operand number \OpNo or NULL if the register class cannot be 209 /// determined. 210 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, 211 unsigned OpNo) const { 212 if (!N->isMachineOpcode()) 213 return nullptr; 214 215 switch (N->getMachineOpcode()) { 216 default: { 217 const MCInstrDesc &Desc = 218 Subtarget->getInstrInfo()->get(N->getMachineOpcode()); 219 unsigned OpIdx = Desc.getNumDefs() + OpNo; 220 if (OpIdx >= Desc.getNumOperands()) 221 return nullptr; 222 int RegClass = Desc.OpInfo[OpIdx].RegClass; 223 if (RegClass == -1) 224 return nullptr; 225 226 return Subtarget->getRegisterInfo()->getRegClass(RegClass); 227 } 228 case AMDGPU::REG_SEQUENCE: { 229 unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 230 const TargetRegisterClass *SuperRC = 231 Subtarget->getRegisterInfo()->getRegClass(RCID); 232 233 SDValue SubRegOp = N->getOperand(OpNo + 1); 234 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); 235 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, 236 SubRegIdx); 237 } 238 } 239 } 240 241 bool AMDGPUDAGToDAGISel::SelectADDRParam( 242 SDValue Addr, SDValue& R1, SDValue& R2) { 243 244 if (Addr.getOpcode() == ISD::FrameIndex) { 245 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 246 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); 247 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 248 } else { 249 R1 = Addr; 250 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 251 } 252 } else if (Addr.getOpcode() == ISD::ADD) { 253 R1 = Addr.getOperand(0); 254 R2 = Addr.getOperand(1); 255 } else { 256 R1 = Addr; 257 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 258 } 259 return true; 260 } 261 262 bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { 263 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 264 Addr.getOpcode() == ISD::TargetGlobalAddress) { 265 return false; 266 } 267 return SelectADDRParam(Addr, R1, R2); 268 } 269 270 271 bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { 272 if (Addr.getOpcode() == ISD::TargetExternalSymbol || 273 Addr.getOpcode() == ISD::TargetGlobalAddress) { 274 return false; 275 } 276 277 if (Addr.getOpcode() == ISD::FrameIndex) { 278 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 279 R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); 280 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 281 } else { 282 R1 = Addr; 283 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 284 } 285 } else if (Addr.getOpcode() == ISD::ADD) { 286 R1 = Addr.getOperand(0); 287 R2 = Addr.getOperand(1); 288 } else { 289 R1 = Addr; 290 R2 = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i64); 291 } 292 return true; 293 } 294 295 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { 296 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 297 !checkType(cast<MemSDNode>(N)->getMemOperand()->getValue(), 298 AMDGPUAS::LOCAL_ADDRESS)) 299 return N; 300 301 const SITargetLowering& Lowering = 302 *static_cast<const SITargetLowering*>(getTargetLowering()); 303 304 // Write max value to m0 before each load operation 305 306 SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N), 307 CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); 308 309 SDValue Glue = M0.getValue(1); 310 311 SmallVector <SDValue, 8> Ops; 312 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 313 Ops.push_back(N->getOperand(i)); 314 } 315 Ops.push_back(Glue); 316 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); 317 318 return N; 319 } 320 321 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { 322 switch (NumVectorElts) { 323 case 1: 324 return AMDGPU::SReg_32RegClassID; 325 case 2: 326 return AMDGPU::SReg_64RegClassID; 327 case 4: 328 return AMDGPU::SReg_128RegClassID; 329 case 8: 330 return AMDGPU::SReg_256RegClassID; 331 case 16: 332 return AMDGPU::SReg_512RegClassID; 333 } 334 335 llvm_unreachable("invalid vector size"); 336 } 337 338 void AMDGPUDAGToDAGISel::Select(SDNode *N) { 339 unsigned int Opc = N->getOpcode(); 340 if (N->isMachineOpcode()) { 341 N->setNodeId(-1); 342 return; // Already selected. 343 } 344 345 if (isa<AtomicSDNode>(N) || 346 (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC)) 347 N = glueCopyToM0(N); 348 349 switch (Opc) { 350 default: break; 351 // We are selecting i64 ADD here instead of custom lower it during 352 // DAG legalization, so we can fold some i64 ADDs used for address 353 // calculation into the LOAD and STORE instructions. 354 case ISD::ADD: 355 case ISD::SUB: { 356 if (N->getValueType(0) != MVT::i64 || 357 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 358 break; 359 360 SelectADD_SUB_I64(N); 361 return; 362 } 363 case ISD::SCALAR_TO_VECTOR: 364 case AMDGPUISD::BUILD_VERTICAL_VECTOR: 365 case ISD::BUILD_VECTOR: { 366 unsigned RegClassID; 367 const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo(); 368 EVT VT = N->getValueType(0); 369 unsigned NumVectorElts = VT.getVectorNumElements(); 370 EVT EltVT = VT.getVectorElementType(); 371 assert(EltVT.bitsEq(MVT::i32)); 372 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 373 RegClassID = selectSGPRVectorRegClassID(NumVectorElts); 374 } else { 375 // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG 376 // that adds a 128 bits reg copy when going through TwoAddressInstructions 377 // pass. We want to avoid 128 bits copies as much as possible because they 378 // can't be bundled by our scheduler. 379 switch(NumVectorElts) { 380 case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; 381 case 4: 382 if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) 383 RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; 384 else 385 RegClassID = AMDGPU::R600_Reg128RegClassID; 386 break; 387 default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); 388 } 389 } 390 391 SDLoc DL(N); 392 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 393 394 if (NumVectorElts == 1) { 395 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), 396 RegClass); 397 return; 398 } 399 400 assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not " 401 "supported yet"); 402 // 16 = Max Num Vector Elements 403 // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) 404 // 1 = Vector Register Class 405 SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); 406 407 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); 408 bool IsRegSeq = true; 409 unsigned NOps = N->getNumOperands(); 410 for (unsigned i = 0; i < NOps; i++) { 411 // XXX: Why is this here? 412 if (isa<RegisterSDNode>(N->getOperand(i))) { 413 IsRegSeq = false; 414 break; 415 } 416 RegSeqArgs[1 + (2 * i)] = N->getOperand(i); 417 RegSeqArgs[1 + (2 * i) + 1] = 418 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, 419 MVT::i32); 420 } 421 422 if (NOps != NumVectorElts) { 423 // Fill in the missing undef elements if this was a scalar_to_vector. 424 assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); 425 426 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 427 DL, EltVT); 428 for (unsigned i = NOps; i < NumVectorElts; ++i) { 429 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); 430 RegSeqArgs[1 + (2 * i) + 1] = 431 CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), DL, MVT::i32); 432 } 433 } 434 435 if (!IsRegSeq) 436 break; 437 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); 438 return; 439 } 440 case ISD::BUILD_PAIR: { 441 SDValue RC, SubReg0, SubReg1; 442 if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { 443 break; 444 } 445 SDLoc DL(N); 446 if (N->getValueType(0) == MVT::i128) { 447 RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); 448 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); 449 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); 450 } else if (N->getValueType(0) == MVT::i64) { 451 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); 452 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 453 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 454 } else { 455 llvm_unreachable("Unhandled value type for BUILD_PAIR"); 456 } 457 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, 458 N->getOperand(1), SubReg1 }; 459 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 460 N->getValueType(0), Ops)); 461 return; 462 } 463 464 case ISD::Constant: 465 case ISD::ConstantFP: { 466 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || 467 N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) 468 break; 469 470 uint64_t Imm; 471 if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) 472 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); 473 else { 474 ConstantSDNode *C = cast<ConstantSDNode>(N); 475 Imm = C->getZExtValue(); 476 } 477 478 SDLoc DL(N); 479 SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 480 CurDAG->getConstant(Imm & 0xFFFFFFFF, DL, 481 MVT::i32)); 482 SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 483 CurDAG->getConstant(Imm >> 32, DL, MVT::i32)); 484 const SDValue Ops[] = { 485 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 486 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), 487 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32) 488 }; 489 490 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 491 N->getValueType(0), Ops)); 492 return; 493 } 494 case ISD::LOAD: 495 case ISD::STORE: { 496 N = glueCopyToM0(N); 497 break; 498 } 499 500 case AMDGPUISD::BFE_I32: 501 case AMDGPUISD::BFE_U32: { 502 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 503 break; 504 505 // There is a scalar version available, but unlike the vector version which 506 // has a separate operand for the offset and width, the scalar version packs 507 // the width and offset into a single operand. Try to move to the scalar 508 // version if the offsets are constant, so that we can try to keep extended 509 // loads of kernel arguments in SGPRs. 510 511 // TODO: Technically we could try to pattern match scalar bitshifts of 512 // dynamic values, but it's probably not useful. 513 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 514 if (!Offset) 515 break; 516 517 ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 518 if (!Width) 519 break; 520 521 bool Signed = Opc == AMDGPUISD::BFE_I32; 522 523 uint32_t OffsetVal = Offset->getZExtValue(); 524 uint32_t WidthVal = Width->getZExtValue(); 525 526 ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, 527 SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); 528 return; 529 } 530 case AMDGPUISD::DIV_SCALE: { 531 SelectDIV_SCALE(N); 532 return; 533 } 534 case ISD::CopyToReg: { 535 const SITargetLowering& Lowering = 536 *static_cast<const SITargetLowering*>(getTargetLowering()); 537 Lowering.legalizeTargetIndependentNode(N, *CurDAG); 538 break; 539 } 540 case ISD::AND: 541 case ISD::SRL: 542 case ISD::SRA: 543 case ISD::SIGN_EXTEND_INREG: 544 if (N->getValueType(0) != MVT::i32 || 545 Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 546 break; 547 548 SelectS_BFE(N); 549 return; 550 case ISD::BRCOND: 551 SelectBRCOND(N); 552 return; 553 554 case AMDGPUISD::ATOMIC_CMP_SWAP: 555 SelectATOMIC_CMP_SWAP(N); 556 return; 557 } 558 559 SelectCode(N); 560 } 561 562 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { 563 assert(AS != 0 && "Use checkPrivateAddress instead."); 564 if (!Ptr) 565 return false; 566 567 return Ptr->getType()->getPointerAddressSpace() == AS; 568 } 569 570 bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { 571 if (Op->getPseudoValue()) 572 return true; 573 574 if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType())) 575 return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 576 577 return false; 578 } 579 580 bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) { 581 if (!N->writeMem()) 582 return false; 583 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 584 } 585 586 bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { 587 const Value *MemVal = N->getMemOperand()->getValue(); 588 return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 589 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 590 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); 591 } 592 593 bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { 594 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 595 } 596 597 bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) { 598 if (!N->writeMem()) 599 return false; 600 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 601 } 602 603 bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { 604 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 605 } 606 607 bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { 608 if (!N->readMem()) 609 return false; 610 const Value *MemVal = N->getMemOperand()->getValue(); 611 if (CbId == -1) 612 return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); 613 614 return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); 615 } 616 617 bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { 618 if (!N->readMem()) 619 return false; 620 if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { 621 if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 622 return !isa<GlobalValue>( 623 GetUnderlyingObject(N->getMemOperand()->getValue(), 624 CurDAG->getDataLayout())); 625 626 //TODO: Why do we need this? 627 if (N->getMemoryVT().bitsLT(MVT::i32)) 628 return true; 629 } 630 631 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); 632 } 633 634 bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { 635 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); 636 } 637 638 bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { 639 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); 640 } 641 642 bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const { 643 if (!N->readMem()) 644 return false; 645 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); 646 } 647 648 bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { 649 return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); 650 } 651 652 bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { 653 MachineMemOperand *MMO = N->getMemOperand(); 654 if (checkPrivateAddress(N->getMemOperand())) { 655 if (MMO) { 656 const PseudoSourceValue *PSV = MMO->getPseudoValue(); 657 if (PSV && PSV->isConstantPool()) { 658 return true; 659 } 660 } 661 } 662 return false; 663 } 664 665 bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { 666 if (checkPrivateAddress(N->getMemOperand())) { 667 // Check to make sure we are not a constant pool load or a constant load 668 // that is marked as a private load 669 if (isCPLoad(N) || isConstantLoad(N, -1)) { 670 return false; 671 } 672 } 673 674 const Value *MemVal = N->getMemOperand()->getValue(); 675 return !checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && 676 !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && 677 !checkType(MemVal, AMDGPUAS::FLAT_ADDRESS) && 678 !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && 679 !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && 680 !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && 681 !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS); 682 } 683 684 bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { 685 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 686 const Instruction *Term = BB->getTerminator(); 687 return Term->getMetadata("amdgpu.uniform") || 688 Term->getMetadata("structurizecfg.uniform"); 689 } 690 691 const char *AMDGPUDAGToDAGISel::getPassName() const { 692 return "AMDGPU DAG->DAG Pattern Instruction Selection"; 693 } 694 695 //===----------------------------------------------------------------------===// 696 // Complex Patterns 697 //===----------------------------------------------------------------------===// 698 699 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, 700 SDValue& IntPtr) { 701 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { 702 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), 703 true); 704 return true; 705 } 706 return false; 707 } 708 709 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, 710 SDValue& BaseReg, SDValue &Offset) { 711 if (!isa<ConstantSDNode>(Addr)) { 712 BaseReg = Addr; 713 Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); 714 return true; 715 } 716 return false; 717 } 718 719 bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, 720 SDValue &Offset) { 721 ConstantSDNode *IMMOffset; 722 723 if (Addr.getOpcode() == ISD::ADD 724 && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) 725 && isInt<16>(IMMOffset->getZExtValue())) { 726 727 Base = Addr.getOperand(0); 728 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 729 MVT::i32); 730 return true; 731 // If the pointer address is constant, we can move it to the offset field. 732 } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) 733 && isInt<16>(IMMOffset->getZExtValue())) { 734 Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 735 SDLoc(CurDAG->getEntryNode()), 736 AMDGPU::ZERO, MVT::i32); 737 Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), 738 MVT::i32); 739 return true; 740 } 741 742 // Default case, no offset 743 Base = Addr; 744 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); 745 return true; 746 } 747 748 bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, 749 SDValue &Offset) { 750 ConstantSDNode *C; 751 SDLoc DL(Addr); 752 753 if ((C = dyn_cast<ConstantSDNode>(Addr))) { 754 Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32); 755 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 756 } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && 757 (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { 758 Base = Addr.getOperand(0); 759 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); 760 } else { 761 Base = Addr; 762 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); 763 } 764 765 return true; 766 } 767 768 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { 769 SDLoc DL(N); 770 SDValue LHS = N->getOperand(0); 771 SDValue RHS = N->getOperand(1); 772 773 bool IsAdd = (N->getOpcode() == ISD::ADD); 774 775 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); 776 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); 777 778 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 779 DL, MVT::i32, LHS, Sub0); 780 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 781 DL, MVT::i32, LHS, Sub1); 782 783 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 784 DL, MVT::i32, RHS, Sub0); 785 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 786 DL, MVT::i32, RHS, Sub1); 787 788 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); 789 SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; 790 791 792 unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 793 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 794 795 SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); 796 SDValue Carry(AddLo, 1); 797 SDNode *AddHi 798 = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, 799 SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); 800 801 SDValue Args[5] = { 802 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), 803 SDValue(AddLo,0), 804 Sub0, 805 SDValue(AddHi,0), 806 Sub1, 807 }; 808 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); 809 } 810 811 // We need to handle this here because tablegen doesn't support matching 812 // instructions with multiple outputs. 813 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { 814 SDLoc SL(N); 815 EVT VT = N->getValueType(0); 816 817 assert(VT == MVT::f32 || VT == MVT::f64); 818 819 unsigned Opc 820 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; 821 822 // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, 823 // omod 824 SDValue Ops[8]; 825 826 SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); 827 SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); 828 SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); 829 CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); 830 } 831 832 bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset, 833 unsigned OffsetBits) const { 834 if ((OffsetBits == 16 && !isUInt<16>(Offset)) || 835 (OffsetBits == 8 && !isUInt<8>(Offset))) 836 return false; 837 838 if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS || 839 Subtarget->unsafeDSOffsetFoldingEnabled()) 840 return true; 841 842 // On Southern Islands instruction with a negative base value and an offset 843 // don't seem to work. 844 return CurDAG->SignBitIsZero(Base); 845 } 846 847 bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, 848 SDValue &Offset) const { 849 SDLoc DL(Addr); 850 if (CurDAG->isBaseWithConstantOffset(Addr)) { 851 SDValue N0 = Addr.getOperand(0); 852 SDValue N1 = Addr.getOperand(1); 853 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 854 if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { 855 // (add n0, c0) 856 Base = N0; 857 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 858 return true; 859 } 860 } else if (Addr.getOpcode() == ISD::SUB) { 861 // sub C, x -> add (sub 0, x), C 862 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 863 int64_t ByteOffset = C->getSExtValue(); 864 if (isUInt<16>(ByteOffset)) { 865 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 866 867 // XXX - This is kind of hacky. Create a dummy sub node so we can check 868 // the known bits in isDSOffsetLegal. We need to emit the selected node 869 // here, so this is thrown away. 870 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 871 Zero, Addr.getOperand(1)); 872 873 if (isDSOffsetLegal(Sub, ByteOffset, 16)) { 874 MachineSDNode *MachineSub 875 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 876 Zero, Addr.getOperand(1)); 877 878 Base = SDValue(MachineSub, 0); 879 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); 880 return true; 881 } 882 } 883 } 884 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 885 // If we have a constant address, prefer to put the constant into the 886 // offset. This can save moves to load the constant address since multiple 887 // operations can share the zero base address register, and enables merging 888 // into read2 / write2 instructions. 889 890 SDLoc DL(Addr); 891 892 if (isUInt<16>(CAddr->getZExtValue())) { 893 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 894 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 895 DL, MVT::i32, Zero); 896 Base = SDValue(MovZero, 0); 897 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); 898 return true; 899 } 900 } 901 902 // default case 903 Base = Addr; 904 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); 905 return true; 906 } 907 908 // TODO: If offset is too big, put low 16-bit into offset. 909 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, 910 SDValue &Offset0, 911 SDValue &Offset1) const { 912 SDLoc DL(Addr); 913 914 if (CurDAG->isBaseWithConstantOffset(Addr)) { 915 SDValue N0 = Addr.getOperand(0); 916 SDValue N1 = Addr.getOperand(1); 917 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 918 unsigned DWordOffset0 = C1->getZExtValue() / 4; 919 unsigned DWordOffset1 = DWordOffset0 + 1; 920 // (add n0, c0) 921 if (isDSOffsetLegal(N0, DWordOffset1, 8)) { 922 Base = N0; 923 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 924 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 925 return true; 926 } 927 } else if (Addr.getOpcode() == ISD::SUB) { 928 // sub C, x -> add (sub 0, x), C 929 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { 930 unsigned DWordOffset0 = C->getZExtValue() / 4; 931 unsigned DWordOffset1 = DWordOffset0 + 1; 932 933 if (isUInt<8>(DWordOffset0)) { 934 SDLoc DL(Addr); 935 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 936 937 // XXX - This is kind of hacky. Create a dummy sub node so we can check 938 // the known bits in isDSOffsetLegal. We need to emit the selected node 939 // here, so this is thrown away. 940 SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, 941 Zero, Addr.getOperand(1)); 942 943 if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { 944 MachineSDNode *MachineSub 945 = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32, 946 Zero, Addr.getOperand(1)); 947 948 Base = SDValue(MachineSub, 0); 949 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 950 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 951 return true; 952 } 953 } 954 } 955 } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { 956 unsigned DWordOffset0 = CAddr->getZExtValue() / 4; 957 unsigned DWordOffset1 = DWordOffset0 + 1; 958 assert(4 * DWordOffset0 == CAddr->getZExtValue()); 959 960 if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { 961 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); 962 MachineSDNode *MovZero 963 = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 964 DL, MVT::i32, Zero); 965 Base = SDValue(MovZero, 0); 966 Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); 967 Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); 968 return true; 969 } 970 } 971 972 // default case 973 Base = Addr; 974 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); 975 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); 976 return true; 977 } 978 979 static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { 980 return isUInt<12>(Imm->getZExtValue()); 981 } 982 983 bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, 984 SDValue &VAddr, SDValue &SOffset, 985 SDValue &Offset, SDValue &Offen, 986 SDValue &Idxen, SDValue &Addr64, 987 SDValue &GLC, SDValue &SLC, 988 SDValue &TFE) const { 989 // Subtarget prefers to use flat instruction 990 if (Subtarget->useFlatForGlobal()) 991 return false; 992 993 SDLoc DL(Addr); 994 995 if (!GLC.getNode()) 996 GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 997 if (!SLC.getNode()) 998 SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); 999 TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); 1000 1001 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1002 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); 1003 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); 1004 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1005 1006 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1007 SDValue N0 = Addr.getOperand(0); 1008 SDValue N1 = Addr.getOperand(1); 1009 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1010 1011 if (N0.getOpcode() == ISD::ADD) { 1012 // (add (add N2, N3), C1) -> addr64 1013 SDValue N2 = N0.getOperand(0); 1014 SDValue N3 = N0.getOperand(1); 1015 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1016 Ptr = N2; 1017 VAddr = N3; 1018 } else { 1019 1020 // (add N0, C1) -> offset 1021 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1022 Ptr = N0; 1023 } 1024 1025 if (isLegalMUBUFImmOffset(C1)) { 1026 Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1027 return true; 1028 } 1029 1030 if (isUInt<32>(C1->getZExtValue())) { 1031 // Illegal offset, store it in soffset. 1032 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1033 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1034 CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), 1035 0); 1036 return true; 1037 } 1038 } 1039 1040 if (Addr.getOpcode() == ISD::ADD) { 1041 // (add N0, N1) -> addr64 1042 SDValue N0 = Addr.getOperand(0); 1043 SDValue N1 = Addr.getOperand(1); 1044 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); 1045 Ptr = N0; 1046 VAddr = N1; 1047 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1048 return true; 1049 } 1050 1051 // default case -> offset 1052 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); 1053 Ptr = Addr; 1054 Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1055 1056 return true; 1057 } 1058 1059 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1060 SDValue &VAddr, SDValue &SOffset, 1061 SDValue &Offset, SDValue &GLC, 1062 SDValue &SLC, SDValue &TFE) const { 1063 SDValue Ptr, Offen, Idxen, Addr64; 1064 1065 // addr64 bit was removed for volcanic islands. 1066 if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 1067 return false; 1068 1069 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1070 GLC, SLC, TFE)) 1071 return false; 1072 1073 ConstantSDNode *C = cast<ConstantSDNode>(Addr64); 1074 if (C->getSExtValue()) { 1075 SDLoc DL(Addr); 1076 1077 const SITargetLowering& Lowering = 1078 *static_cast<const SITargetLowering*>(getTargetLowering()); 1079 1080 SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); 1081 return true; 1082 } 1083 1084 return false; 1085 } 1086 1087 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, 1088 SDValue &VAddr, SDValue &SOffset, 1089 SDValue &Offset, 1090 SDValue &SLC) const { 1091 SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); 1092 SDValue GLC, TFE; 1093 1094 return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); 1095 } 1096 1097 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, 1098 SDValue &VAddr, SDValue &SOffset, 1099 SDValue &ImmOffset) const { 1100 1101 SDLoc DL(Addr); 1102 MachineFunction &MF = CurDAG->getMachineFunction(); 1103 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 1104 1105 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); 1106 SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); 1107 1108 // (add n0, c1) 1109 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1110 SDValue N0 = Addr.getOperand(0); 1111 SDValue N1 = Addr.getOperand(1); 1112 1113 // Offsets in vaddr must be positive. 1114 ConstantSDNode *C1 = cast<ConstantSDNode>(N1); 1115 if (isLegalMUBUFImmOffset(C1)) { 1116 VAddr = N0; 1117 ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); 1118 return true; 1119 } 1120 } 1121 1122 // (node) 1123 VAddr = Addr; 1124 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1125 return true; 1126 } 1127 1128 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1129 SDValue &SOffset, SDValue &Offset, 1130 SDValue &GLC, SDValue &SLC, 1131 SDValue &TFE) const { 1132 SDValue Ptr, VAddr, Offen, Idxen, Addr64; 1133 const SIInstrInfo *TII = 1134 static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); 1135 1136 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, 1137 GLC, SLC, TFE)) 1138 return false; 1139 1140 if (!cast<ConstantSDNode>(Offen)->getSExtValue() && 1141 !cast<ConstantSDNode>(Idxen)->getSExtValue() && 1142 !cast<ConstantSDNode>(Addr64)->getSExtValue()) { 1143 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | 1144 APInt::getAllOnesValue(32).getZExtValue(); // Size 1145 SDLoc DL(Addr); 1146 1147 const SITargetLowering& Lowering = 1148 *static_cast<const SITargetLowering*>(getTargetLowering()); 1149 1150 SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); 1151 return true; 1152 } 1153 return false; 1154 } 1155 1156 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1157 SDValue &Soffset, SDValue &Offset 1158 ) const { 1159 SDValue GLC, SLC, TFE; 1160 1161 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1162 } 1163 bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, 1164 SDValue &Soffset, SDValue &Offset, 1165 SDValue &SLC) const { 1166 SDValue GLC, TFE; 1167 1168 return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); 1169 } 1170 1171 void AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, 1172 SDValue &SOffset, 1173 SDValue &ImmOffset) const { 1174 SDLoc DL(Constant); 1175 uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue(); 1176 uint32_t Overflow = 0; 1177 1178 if (Imm >= 4096) { 1179 if (Imm <= 4095 + 64) { 1180 // Use an SOffset inline constant for 1..64 1181 Overflow = Imm - 4095; 1182 Imm = 4095; 1183 } else { 1184 // Try to keep the same value in SOffset for adjacent loads, so that 1185 // the corresponding register contents can be re-used. 1186 // 1187 // Load values with all low-bits set into SOffset, so that a larger 1188 // range of values can be covered using s_movk_i32 1189 uint32_t High = (Imm + 1) & ~4095; 1190 uint32_t Low = (Imm + 1) & 4095; 1191 Imm = Low; 1192 Overflow = High - 1; 1193 } 1194 } 1195 1196 ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16); 1197 1198 if (Overflow <= 64) 1199 SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32); 1200 else 1201 SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, 1202 CurDAG->getTargetConstant(Overflow, DL, MVT::i32)), 1203 0); 1204 } 1205 1206 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset, 1207 SDValue &SOffset, 1208 SDValue &ImmOffset) const { 1209 SDLoc DL(Offset); 1210 1211 if (!isa<ConstantSDNode>(Offset)) 1212 return false; 1213 1214 SelectMUBUFConstant(Offset, SOffset, ImmOffset); 1215 1216 return true; 1217 } 1218 1219 bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, 1220 SDValue &SOffset, 1221 SDValue &ImmOffset, 1222 SDValue &VOffset) const { 1223 SDLoc DL(Offset); 1224 1225 // Don't generate an unnecessary voffset for constant offsets. 1226 if (isa<ConstantSDNode>(Offset)) 1227 return false; 1228 1229 if (CurDAG->isBaseWithConstantOffset(Offset)) { 1230 SDValue N0 = Offset.getOperand(0); 1231 SDValue N1 = Offset.getOperand(1); 1232 SelectMUBUFConstant(N1, SOffset, ImmOffset); 1233 VOffset = N0; 1234 } else { 1235 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); 1236 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); 1237 VOffset = Offset; 1238 } 1239 1240 return true; 1241 } 1242 1243 bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, 1244 SDValue &VAddr, 1245 SDValue &SLC, 1246 SDValue &TFE) const { 1247 VAddr = Addr; 1248 TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); 1249 return true; 1250 } 1251 1252 /// 1253 /// \param EncodedOffset This is the immediate value that will be encoded 1254 /// directly into the instruction. On SI/CI the \p EncodedOffset 1255 /// will be in units of dwords and on VI+ it will be units of bytes. 1256 static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST, 1257 int64_t EncodedOffset) { 1258 return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1259 isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); 1260 } 1261 1262 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, 1263 SDValue &Offset, bool &Imm) const { 1264 1265 // FIXME: Handle non-constant offsets. 1266 ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); 1267 if (!C) 1268 return false; 1269 1270 SDLoc SL(ByteOffsetNode); 1271 AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); 1272 int64_t ByteOffset = C->getSExtValue(); 1273 int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? 1274 ByteOffset >> 2 : ByteOffset; 1275 1276 if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { 1277 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1278 Imm = true; 1279 return true; 1280 } 1281 1282 if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)) 1283 return false; 1284 1285 if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) { 1286 // 32-bit Immediates are supported on Sea Islands. 1287 Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); 1288 } else { 1289 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); 1290 Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, 1291 C32Bit), 0); 1292 } 1293 Imm = false; 1294 return true; 1295 } 1296 1297 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, 1298 SDValue &Offset, bool &Imm) const { 1299 1300 SDLoc SL(Addr); 1301 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1302 SDValue N0 = Addr.getOperand(0); 1303 SDValue N1 = Addr.getOperand(1); 1304 1305 if (SelectSMRDOffset(N1, Offset, Imm)) { 1306 SBase = N0; 1307 return true; 1308 } 1309 } 1310 SBase = Addr; 1311 Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); 1312 Imm = true; 1313 return true; 1314 } 1315 1316 bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, 1317 SDValue &Offset) const { 1318 bool Imm; 1319 return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; 1320 } 1321 1322 bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, 1323 SDValue &Offset) const { 1324 1325 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1326 return false; 1327 1328 bool Imm; 1329 if (!SelectSMRD(Addr, SBase, Offset, Imm)) 1330 return false; 1331 1332 return !Imm && isa<ConstantSDNode>(Offset); 1333 } 1334 1335 bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, 1336 SDValue &Offset) const { 1337 bool Imm; 1338 return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && 1339 !isa<ConstantSDNode>(Offset); 1340 } 1341 1342 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, 1343 SDValue &Offset) const { 1344 bool Imm; 1345 return SelectSMRDOffset(Addr, Offset, Imm) && Imm; 1346 } 1347 1348 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, 1349 SDValue &Offset) const { 1350 if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) 1351 return false; 1352 1353 bool Imm; 1354 if (!SelectSMRDOffset(Addr, Offset, Imm)) 1355 return false; 1356 1357 return !Imm && isa<ConstantSDNode>(Offset); 1358 } 1359 1360 bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr, 1361 SDValue &Offset) const { 1362 bool Imm; 1363 return SelectSMRDOffset(Addr, Offset, Imm) && !Imm && 1364 !isa<ConstantSDNode>(Offset); 1365 } 1366 1367 SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, 1368 SDValue Val, uint32_t Offset, 1369 uint32_t Width) { 1370 // Transformation function, pack the offset and width of a BFE into 1371 // the format expected by the S_BFE_I32 / S_BFE_U32. In the second 1372 // source, bits [5:0] contain the offset and bits [22:16] the width. 1373 uint32_t PackedVal = Offset | (Width << 16); 1374 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); 1375 1376 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); 1377 } 1378 1379 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { 1380 // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) 1381 // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) 1382 // Predicate: 0 < b <= c < 32 1383 1384 const SDValue &Shl = N->getOperand(0); 1385 ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); 1386 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1387 1388 if (B && C) { 1389 uint32_t BVal = B->getZExtValue(); 1390 uint32_t CVal = C->getZExtValue(); 1391 1392 if (0 < BVal && BVal <= CVal && CVal < 32) { 1393 bool Signed = N->getOpcode() == ISD::SRA; 1394 unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 1395 1396 ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, 1397 32 - CVal)); 1398 return; 1399 } 1400 } 1401 SelectCode(N); 1402 } 1403 1404 void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { 1405 switch (N->getOpcode()) { 1406 case ISD::AND: 1407 if (N->getOperand(0).getOpcode() == ISD::SRL) { 1408 // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" 1409 // Predicate: isMask(mask) 1410 const SDValue &Srl = N->getOperand(0); 1411 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); 1412 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1413 1414 if (Shift && Mask) { 1415 uint32_t ShiftVal = Shift->getZExtValue(); 1416 uint32_t MaskVal = Mask->getZExtValue(); 1417 1418 if (isMask_32(MaskVal)) { 1419 uint32_t WidthVal = countPopulation(MaskVal); 1420 1421 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1422 Srl.getOperand(0), ShiftVal, WidthVal)); 1423 return; 1424 } 1425 } 1426 } 1427 break; 1428 case ISD::SRL: 1429 if (N->getOperand(0).getOpcode() == ISD::AND) { 1430 // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" 1431 // Predicate: isMask(mask >> b) 1432 const SDValue &And = N->getOperand(0); 1433 ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); 1434 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); 1435 1436 if (Shift && Mask) { 1437 uint32_t ShiftVal = Shift->getZExtValue(); 1438 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; 1439 1440 if (isMask_32(MaskVal)) { 1441 uint32_t WidthVal = countPopulation(MaskVal); 1442 1443 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), 1444 And.getOperand(0), ShiftVal, WidthVal)); 1445 return; 1446 } 1447 } 1448 } else if (N->getOperand(0).getOpcode() == ISD::SHL) { 1449 SelectS_BFEFromShifts(N); 1450 return; 1451 } 1452 break; 1453 case ISD::SRA: 1454 if (N->getOperand(0).getOpcode() == ISD::SHL) { 1455 SelectS_BFEFromShifts(N); 1456 return; 1457 } 1458 break; 1459 1460 case ISD::SIGN_EXTEND_INREG: { 1461 // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 1462 SDValue Src = N->getOperand(0); 1463 if (Src.getOpcode() != ISD::SRL) 1464 break; 1465 1466 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); 1467 if (!Amt) 1468 break; 1469 1470 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1471 ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), 1472 Amt->getZExtValue(), Width)); 1473 return; 1474 } 1475 } 1476 1477 SelectCode(N); 1478 } 1479 1480 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { 1481 SDValue Cond = N->getOperand(1); 1482 1483 if (isCBranchSCC(N)) { 1484 // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. 1485 SelectCode(N); 1486 return; 1487 } 1488 1489 // The result of VOPC instructions is or'd against ~EXEC before it is 1490 // written to vcc or another SGPR. This means that the value '1' is always 1491 // written to the corresponding bit for results that are masked. In order 1492 // to correctly check against vccz, we need to and VCC with the EXEC 1493 // register in order to clear the value from the masked bits. 1494 1495 SDLoc SL(N); 1496 1497 SDNode *MaskedCond = 1498 CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1, 1499 CurDAG->getRegister(AMDGPU::EXEC, MVT::i1), 1500 Cond); 1501 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, 1502 SDValue(MaskedCond, 0), 1503 SDValue()); // Passing SDValue() adds a 1504 // glue output. 1505 CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, 1506 N->getOperand(2), // Basic Block 1507 VCC.getValue(0), // Chain 1508 VCC.getValue(1)); // Glue 1509 return; 1510 } 1511 1512 // This is here because there isn't a way to use the generated sub0_sub1 as the 1513 // subreg index to EXTRACT_SUBREG in tablegen. 1514 void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { 1515 MemSDNode *Mem = cast<MemSDNode>(N); 1516 unsigned AS = Mem->getAddressSpace(); 1517 if (AS == AMDGPUAS::FLAT_ADDRESS) { 1518 SelectCode(N); 1519 return; 1520 } 1521 1522 MVT VT = N->getSimpleValueType(0); 1523 bool Is32 = (VT == MVT::i32); 1524 SDLoc SL(N); 1525 1526 MachineSDNode *CmpSwap = nullptr; 1527 if (Subtarget->hasAddr64()) { 1528 SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; 1529 1530 if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { 1531 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_ADDR64 : 1532 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_ADDR64; 1533 SDValue CmpVal = Mem->getOperand(2); 1534 1535 // XXX - Do we care about glue operands? 1536 1537 SDValue Ops[] = { 1538 CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1539 }; 1540 1541 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1542 } 1543 } 1544 1545 if (!CmpSwap) { 1546 SDValue SRsrc, SOffset, Offset, SLC; 1547 if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { 1548 unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET : 1549 AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_RTN_OFFSET; 1550 1551 SDValue CmpVal = Mem->getOperand(2); 1552 SDValue Ops[] = { 1553 CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() 1554 }; 1555 1556 CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); 1557 } 1558 } 1559 1560 if (!CmpSwap) { 1561 SelectCode(N); 1562 return; 1563 } 1564 1565 MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1); 1566 *MMOs = Mem->getMemOperand(); 1567 CmpSwap->setMemRefs(MMOs, MMOs + 1); 1568 1569 unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; 1570 SDValue Extract 1571 = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); 1572 1573 ReplaceUses(SDValue(N, 0), Extract); 1574 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); 1575 CurDAG->RemoveDeadNode(N); 1576 } 1577 1578 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, 1579 SDValue &SrcMods) const { 1580 1581 unsigned Mods = 0; 1582 1583 Src = In; 1584 1585 if (Src.getOpcode() == ISD::FNEG) { 1586 Mods |= SISrcMods::NEG; 1587 Src = Src.getOperand(0); 1588 } 1589 1590 if (Src.getOpcode() == ISD::FABS) { 1591 Mods |= SISrcMods::ABS; 1592 Src = Src.getOperand(0); 1593 } 1594 1595 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); 1596 1597 return true; 1598 } 1599 1600 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, 1601 SDValue &SrcMods) const { 1602 bool Res = SelectVOP3Mods(In, Src, SrcMods); 1603 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue(); 1604 } 1605 1606 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, 1607 SDValue &SrcMods, SDValue &Clamp, 1608 SDValue &Omod) const { 1609 SDLoc DL(In); 1610 // FIXME: Handle Clamp and Omod 1611 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); 1612 Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); 1613 1614 return SelectVOP3Mods(In, Src, SrcMods); 1615 } 1616 1617 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, 1618 SDValue &SrcMods, SDValue &Clamp, 1619 SDValue &Omod) const { 1620 bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); 1621 1622 return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() && 1623 cast<ConstantSDNode>(Clamp)->isNullValue() && 1624 cast<ConstantSDNode>(Omod)->isNullValue(); 1625 } 1626 1627 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, 1628 SDValue &SrcMods, 1629 SDValue &Omod) const { 1630 // FIXME: Handle Omod 1631 Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1632 1633 return SelectVOP3Mods(In, Src, SrcMods); 1634 } 1635 1636 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, 1637 SDValue &SrcMods, 1638 SDValue &Clamp, 1639 SDValue &Omod) const { 1640 Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); 1641 return SelectVOP3Mods(In, Src, SrcMods); 1642 } 1643 1644 void AMDGPUDAGToDAGISel::PreprocessISelDAG() { 1645 MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo(); 1646 1647 // Handle the perverse case where a frame index is being stored. We don't 1648 // want to see multiple frame index operands on the same instruction since 1649 // it complicates things and violates some assumptions about frame index 1650 // lowering. 1651 for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); 1652 I != E; ++I) { 1653 SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32); 1654 1655 // It's possible that we have a frame index defined in the function that 1656 // isn't used in this block. 1657 if (FI.use_empty()) 1658 continue; 1659 1660 // Skip over the AssertZext inserted during lowering. 1661 SDValue EffectiveFI = FI; 1662 auto It = FI->use_begin(); 1663 if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) { 1664 EffectiveFI = SDValue(*It, 0); 1665 It = EffectiveFI->use_begin(); 1666 } 1667 1668 for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) { 1669 SDUse &Use = It.getUse(); 1670 SDNode *User = Use.getUser(); 1671 unsigned OpIdx = It.getOperandNo(); 1672 ++It; 1673 1674 if (MemSDNode *M = dyn_cast<MemSDNode>(User)) { 1675 unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1; 1676 if (OpIdx == PtrIdx) 1677 continue; 1678 1679 unsigned OpN = M->getNumOperands(); 1680 SDValue NewOps[8]; 1681 1682 assert(OpN < array_lengthof(NewOps)); 1683 for (unsigned Op = 0; Op != OpN; ++Op) { 1684 if (Op != OpIdx) { 1685 NewOps[Op] = M->getOperand(Op); 1686 continue; 1687 } 1688 1689 MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, 1690 SDLoc(M), MVT::i32, FI); 1691 NewOps[Op] = SDValue(Mov, 0); 1692 } 1693 1694 CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN)); 1695 } 1696 } 1697 } 1698 } 1699 1700 void AMDGPUDAGToDAGISel::PostprocessISelDAG() { 1701 const AMDGPUTargetLowering& Lowering = 1702 *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); 1703 bool IsModified = false; 1704 do { 1705 IsModified = false; 1706 // Go over all selected nodes and try to fold them a bit more 1707 for (SDNode &Node : CurDAG->allnodes()) { 1708 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 1709 if (!MachineNode) 1710 continue; 1711 1712 SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); 1713 if (ResNode != &Node) { 1714 ReplaceUses(&Node, ResNode); 1715 IsModified = true; 1716 } 1717 } 1718 CurDAG->RemoveDeadNodes(); 1719 } while (IsModified); 1720 } 1721