1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI implementation of the TargetRegisterInfo class. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SIRegisterInfo.h" 16 #include "AMDGPUSubtarget.h" 17 #include "SIInstrInfo.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/RegisterScavenging.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/LLVMContext.h" 24 25 using namespace llvm; 26 27 static bool hasPressureSet(const int *PSets, unsigned PSetID) { 28 for (unsigned i = 0; PSets[i] != -1; ++i) { 29 if (PSets[i] == (int)PSetID) 30 return true; 31 } 32 return false; 33 } 34 35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, 36 BitVector &PressureSets) const { 37 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) { 38 const int *PSets = getRegUnitPressureSets(*U); 39 if (hasPressureSet(PSets, PSetID)) { 40 PressureSets.set(PSetID); 41 break; 42 } 43 } 44 } 45 46 static cl::opt<bool> EnableSpillSGPRToSMEM( 47 "amdgpu-spill-sgpr-to-smem", 48 cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), 49 cl::init(false)); 50 51 static cl::opt<bool> EnableSpillSGPRToVGPR( 52 "amdgpu-spill-sgpr-to-vgpr", 53 cl::desc("Enable spilling VGPRs to SGPRs"), 54 cl::ReallyHidden, 55 cl::init(true)); 56 57 SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) : 58 AMDGPURegisterInfo(), 59 SGPRPressureSets(getNumRegPressureSets()), 60 VGPRPressureSets(getNumRegPressureSets()), 61 SpillSGPRToVGPR(false), 62 SpillSGPRToSMEM(false) { 63 if (EnableSpillSGPRToSMEM && ST.hasScalarStores()) 64 SpillSGPRToSMEM = true; 65 else if (EnableSpillSGPRToVGPR) 66 SpillSGPRToVGPR = true; 67 68 unsigned NumRegPressureSets = getNumRegPressureSets(); 69 70 SGPRSetID = NumRegPressureSets; 71 VGPRSetID = NumRegPressureSets; 72 73 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 74 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets); 75 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets); 76 } 77 78 // Determine the number of reg units for each pressure set. 79 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0); 80 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) { 81 const int *PSets = getRegUnitPressureSets(i); 82 for (unsigned j = 0; PSets[j] != -1; ++j) { 83 ++PressureSetRegUnits[PSets[j]]; 84 } 85 } 86 87 unsigned VGPRMax = 0, SGPRMax = 0; 88 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 89 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) { 90 VGPRSetID = i; 91 VGPRMax = PressureSetRegUnits[i]; 92 continue; 93 } 94 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) { 95 SGPRSetID = i; 96 SGPRMax = PressureSetRegUnits[i]; 97 } 98 } 99 100 assert(SGPRSetID < NumRegPressureSets && 101 VGPRSetID < NumRegPressureSets); 102 } 103 104 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { 105 MCRegAliasIterator R(Reg, this, true); 106 107 for (; R.isValid(); ++R) 108 Reserved.set(*R); 109 } 110 111 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( 112 const MachineFunction &MF) const { 113 114 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 115 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; 116 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 117 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 118 } 119 120 static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { 121 unsigned Reg; 122 123 // Try to place it in a hole after PrivateSegmentBufferReg. 124 if (RegCount & 3) { 125 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to 126 // alignment constraints, so we have a hole where can put the wave offset. 127 Reg = RegCount - 1; 128 } else { 129 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the 130 // wave offset before it. 131 Reg = RegCount - 5; 132 } 133 134 return Reg; 135 } 136 137 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( 138 const MachineFunction &MF) const { 139 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 140 unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); 141 return AMDGPU::SGPR_32RegClass.getRegister(Reg); 142 } 143 144 unsigned SIRegisterInfo::reservedStackPtrOffsetReg( 145 const MachineFunction &MF) const { 146 return AMDGPU::SGPR32; 147 } 148 149 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 150 BitVector Reserved(getNumRegs()); 151 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); 152 153 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 154 // this seems likely to result in bugs, so I'm marking them as reserved. 155 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 156 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 157 158 // M0 has to be reserved so that llvm accepts it as a live-in into a block. 159 reserveRegisterTuples(Reserved, AMDGPU::M0); 160 161 // Reserve the memory aperture registers. 162 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); 163 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); 164 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); 165 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); 166 167 // Reserve Trap Handler registers - support is not implemented in Codegen. 168 reserveRegisterTuples(Reserved, AMDGPU::TBA); 169 reserveRegisterTuples(Reserved, AMDGPU::TMA); 170 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 171 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 172 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 173 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 174 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 175 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 176 177 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 178 179 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); 180 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 181 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { 182 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 183 reserveRegisterTuples(Reserved, Reg); 184 } 185 186 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); 187 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 188 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { 189 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); 190 reserveRegisterTuples(Reserved, Reg); 191 } 192 193 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 194 195 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); 196 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { 197 // Reserve 1 SGPR for scratch wave offset in case we need to spill. 198 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); 199 } 200 201 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); 202 if (ScratchRSrcReg != AMDGPU::NoRegister) { 203 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 204 // to spill. 205 // TODO: May need to reserve a VGPR if doing LDS spilling. 206 reserveRegisterTuples(Reserved, ScratchRSrcReg); 207 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); 208 } 209 210 unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); 211 if (StackPtrReg != AMDGPU::NoRegister) { 212 reserveRegisterTuples(Reserved, StackPtrReg); 213 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); 214 } 215 216 unsigned FrameReg = MFI->getFrameOffsetReg(); 217 if (FrameReg != AMDGPU::NoRegister) { 218 reserveRegisterTuples(Reserved, FrameReg); 219 assert(!isSubRegister(ScratchRSrcReg, FrameReg)); 220 } 221 222 return Reserved; 223 } 224 225 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 226 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); 227 if (Info->isEntryFunction()) { 228 const MachineFrameInfo &MFI = Fn.getFrameInfo(); 229 return MFI.hasStackObjects() || MFI.hasCalls(); 230 } 231 232 // May need scavenger for dealing with callee saved registers. 233 return true; 234 } 235 236 bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { 237 return MF.getFrameInfo().hasStackObjects(); 238 } 239 240 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( 241 const MachineFunction &MF) const { 242 // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't 243 // create a virtual register for it during frame index elimination, so the 244 // scavenger is directly needed. 245 return MF.getFrameInfo().hasStackObjects() && 246 MF.getSubtarget<SISubtarget>().hasScalarStores() && 247 MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs(); 248 } 249 250 bool SIRegisterInfo::requiresVirtualBaseRegisters( 251 const MachineFunction &) const { 252 // There are no special dedicated stack or frame pointers. 253 return true; 254 } 255 256 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { 257 // This helps catch bugs as verifier errors. 258 return true; 259 } 260 261 int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const { 262 assert(SIInstrInfo::isMUBUF(*MI)); 263 264 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 265 AMDGPU::OpName::offset); 266 return MI->getOperand(OffIdx).getImm(); 267 } 268 269 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 270 int Idx) const { 271 if (!SIInstrInfo::isMUBUF(*MI)) 272 return 0; 273 274 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 275 AMDGPU::OpName::vaddr) && 276 "Should never see frame index on non-address operand"); 277 278 return getMUBUFInstrOffset(MI); 279 } 280 281 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 282 if (!MI->mayLoadOrStore()) 283 return false; 284 285 int64_t FullOffset = Offset + getMUBUFInstrOffset(MI); 286 287 return !isUInt<12>(FullOffset); 288 } 289 290 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 291 unsigned BaseReg, 292 int FrameIdx, 293 int64_t Offset) const { 294 MachineBasicBlock::iterator Ins = MBB->begin(); 295 DebugLoc DL; // Defaults to "unknown" 296 297 if (Ins != MBB->end()) 298 DL = Ins->getDebugLoc(); 299 300 MachineFunction *MF = MBB->getParent(); 301 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 302 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 303 304 if (Offset == 0) { 305 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) 306 .addFrameIndex(FrameIdx); 307 return; 308 } 309 310 MachineRegisterInfo &MRI = MF->getRegInfo(); 311 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 312 313 unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 314 315 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 316 .addImm(Offset); 317 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) 318 .addFrameIndex(FrameIdx); 319 320 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) 321 .addReg(OffsetReg, RegState::Kill) 322 .addReg(FIReg); 323 } 324 325 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, 326 int64_t Offset) const { 327 328 MachineBasicBlock *MBB = MI.getParent(); 329 MachineFunction *MF = MBB->getParent(); 330 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 331 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 332 333 #ifndef NDEBUG 334 // FIXME: Is it possible to be storing a frame index to itself? 335 bool SeenFI = false; 336 for (const MachineOperand &MO: MI.operands()) { 337 if (MO.isFI()) { 338 if (SeenFI) 339 llvm_unreachable("should not see multiple frame indices"); 340 341 SeenFI = true; 342 } 343 } 344 #endif 345 346 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); 347 assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 348 assert(TII->isMUBUF(MI)); 349 assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == 350 MF->getInfo<SIMachineFunctionInfo>()->getFrameOffsetReg() && 351 "should only be seeing frame offset relative FrameIndex"); 352 353 354 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 355 int64_t NewOffset = OffsetOp->getImm() + Offset; 356 assert(isUInt<12>(NewOffset) && "offset should be legal"); 357 358 FIOp->ChangeToRegister(BaseReg, false); 359 OffsetOp->setImm(NewOffset); 360 } 361 362 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 363 unsigned BaseReg, 364 int64_t Offset) const { 365 if (!SIInstrInfo::isMUBUF(*MI)) 366 return false; 367 368 int64_t NewOffset = Offset + getMUBUFInstrOffset(MI); 369 370 return isUInt<12>(NewOffset); 371 } 372 373 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 374 const MachineFunction &MF, unsigned Kind) const { 375 // This is inaccurate. It depends on the instruction and address space. The 376 // only place where we should hit this is for dealing with frame indexes / 377 // private accesses, so this is correct in that case. 378 return &AMDGPU::VGPR_32RegClass; 379 } 380 381 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 382 383 switch (Op) { 384 case AMDGPU::SI_SPILL_S512_SAVE: 385 case AMDGPU::SI_SPILL_S512_RESTORE: 386 case AMDGPU::SI_SPILL_V512_SAVE: 387 case AMDGPU::SI_SPILL_V512_RESTORE: 388 return 16; 389 case AMDGPU::SI_SPILL_S256_SAVE: 390 case AMDGPU::SI_SPILL_S256_RESTORE: 391 case AMDGPU::SI_SPILL_V256_SAVE: 392 case AMDGPU::SI_SPILL_V256_RESTORE: 393 return 8; 394 case AMDGPU::SI_SPILL_S128_SAVE: 395 case AMDGPU::SI_SPILL_S128_RESTORE: 396 case AMDGPU::SI_SPILL_V128_SAVE: 397 case AMDGPU::SI_SPILL_V128_RESTORE: 398 return 4; 399 case AMDGPU::SI_SPILL_V96_SAVE: 400 case AMDGPU::SI_SPILL_V96_RESTORE: 401 return 3; 402 case AMDGPU::SI_SPILL_S64_SAVE: 403 case AMDGPU::SI_SPILL_S64_RESTORE: 404 case AMDGPU::SI_SPILL_V64_SAVE: 405 case AMDGPU::SI_SPILL_V64_RESTORE: 406 return 2; 407 case AMDGPU::SI_SPILL_S32_SAVE: 408 case AMDGPU::SI_SPILL_S32_RESTORE: 409 case AMDGPU::SI_SPILL_V32_SAVE: 410 case AMDGPU::SI_SPILL_V32_RESTORE: 411 return 1; 412 default: llvm_unreachable("Invalid spill opcode"); 413 } 414 } 415 416 static int getOffsetMUBUFStore(unsigned Opc) { 417 switch (Opc) { 418 case AMDGPU::BUFFER_STORE_DWORD_OFFEN: 419 return AMDGPU::BUFFER_STORE_DWORD_OFFSET; 420 case AMDGPU::BUFFER_STORE_BYTE_OFFEN: 421 return AMDGPU::BUFFER_STORE_BYTE_OFFSET; 422 case AMDGPU::BUFFER_STORE_SHORT_OFFEN: 423 return AMDGPU::BUFFER_STORE_SHORT_OFFSET; 424 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: 425 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; 426 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: 427 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; 428 default: 429 return -1; 430 } 431 } 432 433 static int getOffsetMUBUFLoad(unsigned Opc) { 434 switch (Opc) { 435 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: 436 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 437 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: 438 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; 439 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: 440 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; 441 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: 442 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; 443 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: 444 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; 445 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: 446 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; 447 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: 448 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; 449 default: 450 return -1; 451 } 452 } 453 454 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not 455 // need to handle the case where an SGPR may need to be spilled while spilling. 456 static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, 457 MachineFrameInfo &MFI, 458 MachineBasicBlock::iterator MI, 459 int Index, 460 int64_t Offset) { 461 MachineBasicBlock *MBB = MI->getParent(); 462 const DebugLoc &DL = MI->getDebugLoc(); 463 bool IsStore = MI->mayStore(); 464 465 unsigned Opc = MI->getOpcode(); 466 int LoadStoreOp = IsStore ? 467 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); 468 if (LoadStoreOp == -1) 469 return false; 470 471 unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg(); 472 473 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 474 .addReg(Reg, getDefRegState(!IsStore)) 475 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) 476 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) 477 .addImm(Offset) 478 .addImm(0) // glc 479 .addImm(0) // slc 480 .addImm(0) // tfe 481 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 482 return true; 483 } 484 485 void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, 486 unsigned LoadStoreOp, 487 int Index, 488 unsigned ValueReg, 489 bool IsKill, 490 unsigned ScratchRsrcReg, 491 unsigned ScratchOffsetReg, 492 int64_t InstOffset, 493 MachineMemOperand *MMO, 494 RegScavenger *RS) const { 495 MachineBasicBlock *MBB = MI->getParent(); 496 MachineFunction *MF = MI->getParent()->getParent(); 497 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 498 const SIInstrInfo *TII = ST.getInstrInfo(); 499 const MachineFrameInfo &MFI = MF->getFrameInfo(); 500 501 const MCInstrDesc &Desc = TII->get(LoadStoreOp); 502 const DebugLoc &DL = MI->getDebugLoc(); 503 bool IsStore = Desc.mayStore(); 504 505 bool RanOutOfSGPRs = false; 506 bool Scavenged = false; 507 unsigned SOffset = ScratchOffsetReg; 508 509 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); 510 unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32; 511 unsigned Size = NumSubRegs * 4; 512 int64_t Offset = InstOffset + MFI.getObjectOffset(Index); 513 const int64_t OriginalImmOffset = Offset; 514 515 unsigned Align = MFI.getObjectAlignment(Index); 516 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); 517 518 if (!isUInt<12>(Offset + Size)) { 519 SOffset = AMDGPU::NoRegister; 520 521 // We don't have access to the register scavenger if this function is called 522 // during PEI::scavengeFrameVirtualRegs(). 523 if (RS) 524 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass); 525 526 if (SOffset == AMDGPU::NoRegister) { 527 // There are no free SGPRs, and since we are in the process of spilling 528 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 529 // on SI/CI and on VI it is true until we implement spilling using scalar 530 // stores), we have no way to free up an SGPR. Our solution here is to 531 // add the offset directly to the ScratchOffset register, and then 532 // subtract the offset after the spill to return ScratchOffset to it's 533 // original value. 534 RanOutOfSGPRs = true; 535 SOffset = ScratchOffsetReg; 536 } else { 537 Scavenged = true; 538 } 539 540 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) 541 .addReg(ScratchOffsetReg) 542 .addImm(Offset); 543 544 Offset = 0; 545 } 546 547 const unsigned EltSize = 4; 548 549 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) { 550 unsigned SubReg = NumSubRegs == 1 ? 551 ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i)); 552 553 unsigned SOffsetRegState = 0; 554 unsigned SrcDstRegState = getDefRegState(!IsStore); 555 if (i + 1 == e) { 556 SOffsetRegState |= getKillRegState(Scavenged); 557 // The last implicit use carries the "Kill" flag. 558 SrcDstRegState |= getKillRegState(IsKill); 559 } 560 561 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); 562 MachineMemOperand *NewMMO 563 = MF->getMachineMemOperand(PInfo, MMO->getFlags(), 564 EltSize, MinAlign(Align, EltSize * i)); 565 566 auto MIB = BuildMI(*MBB, MI, DL, Desc) 567 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) 568 .addReg(ScratchRsrcReg) 569 .addReg(SOffset, SOffsetRegState) 570 .addImm(Offset) 571 .addImm(0) // glc 572 .addImm(0) // slc 573 .addImm(0) // tfe 574 .addMemOperand(NewMMO); 575 576 if (NumSubRegs > 1) 577 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); 578 } 579 580 if (RanOutOfSGPRs) { 581 // Subtract the offset we added to the ScratchOffset register. 582 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg) 583 .addReg(ScratchOffsetReg) 584 .addImm(OriginalImmOffset); 585 } 586 } 587 588 static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize, 589 bool Store) { 590 if (SuperRegSize % 16 == 0) { 591 return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR : 592 AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR }; 593 } 594 595 if (SuperRegSize % 8 == 0) { 596 return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR : 597 AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR }; 598 } 599 600 return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR : 601 AMDGPU::S_BUFFER_LOAD_DWORD_SGPR}; 602 } 603 604 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, 605 int Index, 606 RegScavenger *RS, 607 bool OnlyToVGPR) const { 608 MachineBasicBlock *MBB = MI->getParent(); 609 MachineFunction *MF = MBB->getParent(); 610 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 611 612 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills 613 = MFI->getSGPRToVGPRSpills(Index); 614 bool SpillToVGPR = !VGPRSpills.empty(); 615 if (OnlyToVGPR && !SpillToVGPR) 616 return false; 617 618 MachineRegisterInfo &MRI = MF->getRegInfo(); 619 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 620 const SIInstrInfo *TII = ST.getInstrInfo(); 621 622 unsigned SuperReg = MI->getOperand(0).getReg(); 623 bool IsKill = MI->getOperand(0).isKill(); 624 const DebugLoc &DL = MI->getDebugLoc(); 625 626 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 627 628 bool SpillToSMEM = spillSGPRToSMEM(); 629 if (SpillToSMEM && OnlyToVGPR) 630 return false; 631 632 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 633 634 unsigned OffsetReg = AMDGPU::M0; 635 unsigned M0CopyReg = AMDGPU::NoRegister; 636 637 if (SpillToSMEM) { 638 if (RS->isRegUsed(AMDGPU::M0)) { 639 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 640 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) 641 .addReg(AMDGPU::M0); 642 } 643 } 644 645 unsigned ScalarStoreOp; 646 unsigned EltSize = 4; 647 const TargetRegisterClass *RC = getPhysRegClass(SuperReg); 648 if (SpillToSMEM && isSGPRClass(RC)) { 649 // XXX - if private_element_size is larger than 4 it might be useful to be 650 // able to spill wider vmem spills. 651 std::tie(EltSize, ScalarStoreOp) = 652 getSpillEltSize(getRegSizeInBits(*RC) / 8, true); 653 } 654 655 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); 656 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 657 658 // SubReg carries the "Kill" flag when SubReg == SuperReg. 659 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); 660 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 661 unsigned SubReg = NumSubRegs == 1 ? 662 SuperReg : getSubReg(SuperReg, SplitParts[i]); 663 664 if (SpillToSMEM) { 665 int64_t FrOffset = FrameInfo.getObjectOffset(Index); 666 667 // The allocated memory size is really the wavefront size * the frame 668 // index size. The widest register class is 64 bytes, so a 4-byte scratch 669 // allocation is enough to spill this in a single stack object. 670 // 671 // FIXME: Frame size/offsets are computed earlier than this, so the extra 672 // space is still unnecessarily allocated. 673 674 unsigned Align = FrameInfo.getObjectAlignment(Index); 675 MachinePointerInfo PtrInfo 676 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 677 MachineMemOperand *MMO 678 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 679 EltSize, MinAlign(Align, EltSize * i)); 680 681 // SMEM instructions only support a single offset, so increment the wave 682 // offset. 683 684 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); 685 if (Offset != 0) { 686 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) 687 .addReg(MFI->getFrameOffsetReg()) 688 .addImm(Offset); 689 } else { 690 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 691 .addReg(MFI->getFrameOffsetReg()); 692 } 693 694 BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) 695 .addReg(SubReg, getKillRegState(IsKill)) // sdata 696 .addReg(MFI->getScratchRSrcReg()) // sbase 697 .addReg(OffsetReg, RegState::Kill) // soff 698 .addImm(0) // glc 699 .addMemOperand(MMO); 700 701 continue; 702 } 703 704 if (SpillToVGPR) { 705 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 706 707 BuildMI(*MBB, MI, DL, 708 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 709 Spill.VGPR) 710 .addReg(SubReg, getKillRegState(IsKill)) 711 .addImm(Spill.Lane); 712 713 // FIXME: Since this spills to another register instead of an actual 714 // frame index, we should delete the frame index when all references to 715 // it are fixed. 716 } else { 717 // XXX - Can to VGPR spill fail for some subregisters but not others? 718 if (OnlyToVGPR) 719 return false; 720 721 // Spill SGPR to a frame index. 722 // TODO: Should VI try to spill to VGPR and then spill to SMEM? 723 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 724 // TODO: Should VI try to spill to VGPR and then spill to SMEM? 725 726 MachineInstrBuilder Mov 727 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 728 .addReg(SubReg, SubKillState); 729 730 731 // There could be undef components of a spilled super register. 732 // TODO: Can we detect this and skip the spill? 733 if (NumSubRegs > 1) { 734 // The last implicit use of the SuperReg carries the "Kill" flag. 735 unsigned SuperKillState = 0; 736 if (i + 1 == e) 737 SuperKillState |= getKillRegState(IsKill); 738 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); 739 } 740 741 unsigned Align = FrameInfo.getObjectAlignment(Index); 742 MachinePointerInfo PtrInfo 743 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 744 MachineMemOperand *MMO 745 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 746 EltSize, MinAlign(Align, EltSize * i)); 747 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) 748 .addReg(TmpReg, RegState::Kill) // src 749 .addFrameIndex(Index) // vaddr 750 .addReg(MFI->getScratchRSrcReg()) // srrsrc 751 .addReg(MFI->getFrameOffsetReg()) // soffset 752 .addImm(i * 4) // offset 753 .addMemOperand(MMO); 754 } 755 } 756 757 if (M0CopyReg != AMDGPU::NoRegister) { 758 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) 759 .addReg(M0CopyReg, RegState::Kill); 760 } 761 762 MI->eraseFromParent(); 763 MFI->addToSpilledSGPRs(NumSubRegs); 764 return true; 765 } 766 767 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, 768 int Index, 769 RegScavenger *RS, 770 bool OnlyToVGPR) const { 771 MachineFunction *MF = MI->getParent()->getParent(); 772 MachineRegisterInfo &MRI = MF->getRegInfo(); 773 MachineBasicBlock *MBB = MI->getParent(); 774 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 775 776 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills 777 = MFI->getSGPRToVGPRSpills(Index); 778 bool SpillToVGPR = !VGPRSpills.empty(); 779 if (OnlyToVGPR && !SpillToVGPR) 780 return false; 781 782 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 783 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 784 const SIInstrInfo *TII = ST.getInstrInfo(); 785 const DebugLoc &DL = MI->getDebugLoc(); 786 787 unsigned SuperReg = MI->getOperand(0).getReg(); 788 bool SpillToSMEM = spillSGPRToSMEM(); 789 if (SpillToSMEM && OnlyToVGPR) 790 return false; 791 792 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 793 794 unsigned OffsetReg = AMDGPU::M0; 795 unsigned M0CopyReg = AMDGPU::NoRegister; 796 797 if (SpillToSMEM) { 798 if (RS->isRegUsed(AMDGPU::M0)) { 799 M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 800 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg) 801 .addReg(AMDGPU::M0); 802 } 803 } 804 805 unsigned EltSize = 4; 806 unsigned ScalarLoadOp; 807 808 const TargetRegisterClass *RC = getPhysRegClass(SuperReg); 809 if (SpillToSMEM && isSGPRClass(RC)) { 810 // XXX - if private_element_size is larger than 4 it might be useful to be 811 // able to spill wider vmem spills. 812 std::tie(EltSize, ScalarLoadOp) = 813 getSpillEltSize(getRegSizeInBits(*RC) / 8, false); 814 } 815 816 ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); 817 unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 818 819 // SubReg carries the "Kill" flag when SubReg == SuperReg. 820 int64_t FrOffset = FrameInfo.getObjectOffset(Index); 821 822 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 823 unsigned SubReg = NumSubRegs == 1 ? 824 SuperReg : getSubReg(SuperReg, SplitParts[i]); 825 826 if (SpillToSMEM) { 827 // FIXME: Size may be > 4 but extra bytes wasted. 828 unsigned Align = FrameInfo.getObjectAlignment(Index); 829 MachinePointerInfo PtrInfo 830 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 831 MachineMemOperand *MMO 832 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, 833 EltSize, MinAlign(Align, EltSize * i)); 834 835 // Add i * 4 offset 836 int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); 837 if (Offset != 0) { 838 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) 839 .addReg(MFI->getFrameOffsetReg()) 840 .addImm(Offset); 841 } else { 842 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 843 .addReg(MFI->getFrameOffsetReg()); 844 } 845 846 auto MIB = 847 BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg) 848 .addReg(MFI->getScratchRSrcReg()) // sbase 849 .addReg(OffsetReg, RegState::Kill) // soff 850 .addImm(0) // glc 851 .addMemOperand(MMO); 852 853 if (NumSubRegs > 1) 854 MIB.addReg(SuperReg, RegState::ImplicitDefine); 855 856 continue; 857 } 858 859 if (SpillToVGPR) { 860 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 861 auto MIB = 862 BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 863 SubReg) 864 .addReg(Spill.VGPR) 865 .addImm(Spill.Lane); 866 867 if (NumSubRegs > 1) 868 MIB.addReg(SuperReg, RegState::ImplicitDefine); 869 } else { 870 if (OnlyToVGPR) 871 return false; 872 873 // Restore SGPR from a stack slot. 874 // FIXME: We should use S_LOAD_DWORD here for VI. 875 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 876 unsigned Align = FrameInfo.getObjectAlignment(Index); 877 878 MachinePointerInfo PtrInfo 879 = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i); 880 881 MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, 882 MachineMemOperand::MOLoad, EltSize, 883 MinAlign(Align, EltSize * i)); 884 885 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) 886 .addFrameIndex(Index) // vaddr 887 .addReg(MFI->getScratchRSrcReg()) // srsrc 888 .addReg(MFI->getFrameOffsetReg()) // soffset 889 .addImm(i * 4) // offset 890 .addMemOperand(MMO); 891 892 auto MIB = 893 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) 894 .addReg(TmpReg, RegState::Kill); 895 896 if (NumSubRegs > 1) 897 MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 898 } 899 } 900 901 if (M0CopyReg != AMDGPU::NoRegister) { 902 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0) 903 .addReg(M0CopyReg, RegState::Kill); 904 } 905 906 MI->eraseFromParent(); 907 return true; 908 } 909 910 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to 911 /// a VGPR and the stack slot can be safely eliminated when all other users are 912 /// handled. 913 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( 914 MachineBasicBlock::iterator MI, 915 int FI, 916 RegScavenger *RS) const { 917 switch (MI->getOpcode()) { 918 case AMDGPU::SI_SPILL_S512_SAVE: 919 case AMDGPU::SI_SPILL_S256_SAVE: 920 case AMDGPU::SI_SPILL_S128_SAVE: 921 case AMDGPU::SI_SPILL_S64_SAVE: 922 case AMDGPU::SI_SPILL_S32_SAVE: 923 return spillSGPR(MI, FI, RS, true); 924 case AMDGPU::SI_SPILL_S512_RESTORE: 925 case AMDGPU::SI_SPILL_S256_RESTORE: 926 case AMDGPU::SI_SPILL_S128_RESTORE: 927 case AMDGPU::SI_SPILL_S64_RESTORE: 928 case AMDGPU::SI_SPILL_S32_RESTORE: 929 return restoreSGPR(MI, FI, RS, true); 930 default: 931 llvm_unreachable("not an SGPR spill instruction"); 932 } 933 } 934 935 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 936 int SPAdj, unsigned FIOperandNum, 937 RegScavenger *RS) const { 938 MachineFunction *MF = MI->getParent()->getParent(); 939 MachineRegisterInfo &MRI = MF->getRegInfo(); 940 MachineBasicBlock *MBB = MI->getParent(); 941 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 942 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 943 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 944 const SIInstrInfo *TII = ST.getInstrInfo(); 945 DebugLoc DL = MI->getDebugLoc(); 946 947 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 948 int Index = MI->getOperand(FIOperandNum).getIndex(); 949 950 switch (MI->getOpcode()) { 951 // SGPR register spill 952 case AMDGPU::SI_SPILL_S512_SAVE: 953 case AMDGPU::SI_SPILL_S256_SAVE: 954 case AMDGPU::SI_SPILL_S128_SAVE: 955 case AMDGPU::SI_SPILL_S64_SAVE: 956 case AMDGPU::SI_SPILL_S32_SAVE: { 957 spillSGPR(MI, Index, RS); 958 break; 959 } 960 961 // SGPR register restore 962 case AMDGPU::SI_SPILL_S512_RESTORE: 963 case AMDGPU::SI_SPILL_S256_RESTORE: 964 case AMDGPU::SI_SPILL_S128_RESTORE: 965 case AMDGPU::SI_SPILL_S64_RESTORE: 966 case AMDGPU::SI_SPILL_S32_RESTORE: { 967 restoreSGPR(MI, Index, RS); 968 break; 969 } 970 971 // VGPR register spill 972 case AMDGPU::SI_SPILL_V512_SAVE: 973 case AMDGPU::SI_SPILL_V256_SAVE: 974 case AMDGPU::SI_SPILL_V128_SAVE: 975 case AMDGPU::SI_SPILL_V96_SAVE: 976 case AMDGPU::SI_SPILL_V64_SAVE: 977 case AMDGPU::SI_SPILL_V32_SAVE: { 978 const MachineOperand *VData = TII->getNamedOperand(*MI, 979 AMDGPU::OpName::vdata); 980 buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, 981 Index, 982 VData->getReg(), VData->isKill(), 983 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 984 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 985 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 986 *MI->memoperands_begin(), 987 RS); 988 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 989 MI->eraseFromParent(); 990 break; 991 } 992 case AMDGPU::SI_SPILL_V32_RESTORE: 993 case AMDGPU::SI_SPILL_V64_RESTORE: 994 case AMDGPU::SI_SPILL_V96_RESTORE: 995 case AMDGPU::SI_SPILL_V128_RESTORE: 996 case AMDGPU::SI_SPILL_V256_RESTORE: 997 case AMDGPU::SI_SPILL_V512_RESTORE: { 998 const MachineOperand *VData = TII->getNamedOperand(*MI, 999 AMDGPU::OpName::vdata); 1000 1001 buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, 1002 Index, 1003 VData->getReg(), VData->isKill(), 1004 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 1005 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 1006 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1007 *MI->memoperands_begin(), 1008 RS); 1009 MI->eraseFromParent(); 1010 break; 1011 } 1012 1013 default: { 1014 const DebugLoc &DL = MI->getDebugLoc(); 1015 bool IsMUBUF = TII->isMUBUF(*MI); 1016 1017 if (!IsMUBUF && 1018 MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) { 1019 // Convert to an absolute stack address by finding the offset from the 1020 // scratch wave base and scaling by the wave size. 1021 // 1022 // In an entry function/kernel the stack address is already the absolute 1023 // address relative to the the scratch wave offset. 1024 1025 unsigned DiffReg 1026 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1027 1028 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; 1029 unsigned ResultReg = IsCopy ? 1030 MI->getOperand(0).getReg() : 1031 MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1032 1033 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) 1034 .addReg(MFI->getFrameOffsetReg()) 1035 .addReg(MFI->getScratchWaveOffsetReg()); 1036 1037 int64_t Offset = FrameInfo.getObjectOffset(Index); 1038 if (Offset == 0) { 1039 // XXX - This never happens because of emergency scavenging slot at 0? 1040 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) 1041 .addImm(Log2_32(ST.getWavefrontSize())) 1042 .addReg(DiffReg); 1043 } else { 1044 unsigned CarryOut 1045 = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); 1046 unsigned ScaledReg 1047 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1048 1049 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) 1050 .addImm(Log2_32(ST.getWavefrontSize())) 1051 .addReg(DiffReg, RegState::Kill); 1052 1053 // TODO: Fold if use instruction is another add of a constant. 1054 if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { 1055 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) 1056 .addReg(CarryOut, RegState::Define | RegState::Dead) 1057 .addImm(Offset) 1058 .addReg(ScaledReg, RegState::Kill); 1059 } else { 1060 unsigned ConstOffsetReg 1061 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 1062 1063 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) 1064 .addImm(Offset); 1065 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) 1066 .addReg(CarryOut, RegState::Define | RegState::Dead) 1067 .addReg(ConstOffsetReg, RegState::Kill) 1068 .addReg(ScaledReg, RegState::Kill); 1069 } 1070 1071 MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); 1072 } 1073 1074 // Don't introduce an extra copy if we're just materializing in a mov. 1075 if (IsCopy) 1076 MI->eraseFromParent(); 1077 else 1078 FIOp.ChangeToRegister(ResultReg, false, false, true); 1079 return; 1080 } 1081 1082 if (IsMUBUF) { 1083 // Disable offen so we don't need a 0 vgpr base. 1084 assert(static_cast<int>(FIOperandNum) == 1085 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1086 AMDGPU::OpName::vaddr)); 1087 1088 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() 1089 == MFI->getFrameOffsetReg()); 1090 1091 int64_t Offset = FrameInfo.getObjectOffset(Index); 1092 int64_t OldImm 1093 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); 1094 int64_t NewOffset = OldImm + Offset; 1095 1096 if (isUInt<12>(NewOffset) && 1097 buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { 1098 MI->eraseFromParent(); 1099 return; 1100 } 1101 } 1102 1103 // If the offset is simply too big, don't convert to a scratch wave offset 1104 // relative index. 1105 1106 int64_t Offset = FrameInfo.getObjectOffset(Index); 1107 FIOp.ChangeToImmediate(Offset); 1108 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 1109 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 1110 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1111 .addImm(Offset); 1112 FIOp.ChangeToRegister(TmpReg, false, false, true); 1113 } 1114 } 1115 } 1116 } 1117 1118 StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { 1119 #define AMDGPU_REG_ASM_NAMES 1120 #include "AMDGPURegAsmNames.inc.cpp" 1121 1122 #define REG_RANGE(BeginReg, EndReg, RegTable) \ 1123 if (Reg >= BeginReg && Reg <= EndReg) { \ 1124 unsigned Index = Reg - BeginReg; \ 1125 assert(Index < array_lengthof(RegTable)); \ 1126 return RegTable[Index]; \ 1127 } 1128 1129 REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); 1130 REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); 1131 REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); 1132 REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); 1133 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, 1134 VGPR96RegNames); 1135 1136 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, 1137 AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, 1138 VGPR128RegNames); 1139 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, 1140 AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, 1141 SGPR128RegNames); 1142 1143 REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, 1144 AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, 1145 VGPR256RegNames); 1146 1147 REG_RANGE( 1148 AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, 1149 AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, 1150 VGPR512RegNames); 1151 1152 REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, 1153 AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, 1154 SGPR256RegNames); 1155 1156 REG_RANGE( 1157 AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, 1158 AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, 1159 SGPR512RegNames 1160 ); 1161 1162 #undef REG_RANGE 1163 1164 // FIXME: Rename flat_scr so we don't need to special case this. 1165 switch (Reg) { 1166 case AMDGPU::FLAT_SCR: 1167 return "flat_scratch"; 1168 case AMDGPU::FLAT_SCR_LO: 1169 return "flat_scratch_lo"; 1170 case AMDGPU::FLAT_SCR_HI: 1171 return "flat_scratch_hi"; 1172 default: 1173 // For the special named registers the default is fine. 1174 return TargetRegisterInfo::getRegAsmName(Reg); 1175 } 1176 } 1177 1178 // FIXME: This is very slow. It might be worth creating a map from physreg to 1179 // register class. 1180 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { 1181 assert(!TargetRegisterInfo::isVirtualRegister(Reg)); 1182 1183 static const TargetRegisterClass *const BaseClasses[] = { 1184 &AMDGPU::VGPR_32RegClass, 1185 &AMDGPU::SReg_32RegClass, 1186 &AMDGPU::VReg_64RegClass, 1187 &AMDGPU::SReg_64RegClass, 1188 &AMDGPU::VReg_96RegClass, 1189 &AMDGPU::VReg_128RegClass, 1190 &AMDGPU::SReg_128RegClass, 1191 &AMDGPU::VReg_256RegClass, 1192 &AMDGPU::SReg_256RegClass, 1193 &AMDGPU::VReg_512RegClass, 1194 &AMDGPU::SReg_512RegClass, 1195 &AMDGPU::SCC_CLASSRegClass, 1196 }; 1197 1198 for (const TargetRegisterClass *BaseClass : BaseClasses) { 1199 if (BaseClass->contains(Reg)) { 1200 return BaseClass; 1201 } 1202 } 1203 return nullptr; 1204 } 1205 1206 // TODO: It might be helpful to have some target specific flags in 1207 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 1208 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 1209 unsigned Size = getRegSizeInBits(*RC); 1210 if (Size < 32) 1211 return false; 1212 switch (Size) { 1213 case 32: 1214 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; 1215 case 64: 1216 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; 1217 case 96: 1218 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; 1219 case 128: 1220 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; 1221 case 256: 1222 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; 1223 case 512: 1224 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; 1225 default: 1226 llvm_unreachable("Invalid register class size"); 1227 } 1228 } 1229 1230 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( 1231 const TargetRegisterClass *SRC) const { 1232 switch (getRegSizeInBits(*SRC)) { 1233 case 32: 1234 return &AMDGPU::VGPR_32RegClass; 1235 case 64: 1236 return &AMDGPU::VReg_64RegClass; 1237 case 96: 1238 return &AMDGPU::VReg_96RegClass; 1239 case 128: 1240 return &AMDGPU::VReg_128RegClass; 1241 case 256: 1242 return &AMDGPU::VReg_256RegClass; 1243 case 512: 1244 return &AMDGPU::VReg_512RegClass; 1245 default: 1246 llvm_unreachable("Invalid register class size"); 1247 } 1248 } 1249 1250 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( 1251 const TargetRegisterClass *VRC) const { 1252 switch (getRegSizeInBits(*VRC)) { 1253 case 32: 1254 return &AMDGPU::SGPR_32RegClass; 1255 case 64: 1256 return &AMDGPU::SReg_64RegClass; 1257 case 128: 1258 return &AMDGPU::SReg_128RegClass; 1259 case 256: 1260 return &AMDGPU::SReg_256RegClass; 1261 case 512: 1262 return &AMDGPU::SReg_512RegClass; 1263 default: 1264 llvm_unreachable("Invalid register class size"); 1265 } 1266 } 1267 1268 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 1269 const TargetRegisterClass *RC, unsigned SubIdx) const { 1270 if (SubIdx == AMDGPU::NoSubRegister) 1271 return RC; 1272 1273 // We can assume that each lane corresponds to one 32-bit register. 1274 LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger(); 1275 unsigned Count = countPopulation(Mask); 1276 if (isSGPRClass(RC)) { 1277 switch (Count) { 1278 case 1: 1279 return &AMDGPU::SGPR_32RegClass; 1280 case 2: 1281 return &AMDGPU::SReg_64RegClass; 1282 case 4: 1283 return &AMDGPU::SReg_128RegClass; 1284 case 8: 1285 return &AMDGPU::SReg_256RegClass; 1286 case 16: /* fall-through */ 1287 default: 1288 llvm_unreachable("Invalid sub-register class size"); 1289 } 1290 } else { 1291 switch (Count) { 1292 case 1: 1293 return &AMDGPU::VGPR_32RegClass; 1294 case 2: 1295 return &AMDGPU::VReg_64RegClass; 1296 case 3: 1297 return &AMDGPU::VReg_96RegClass; 1298 case 4: 1299 return &AMDGPU::VReg_128RegClass; 1300 case 8: 1301 return &AMDGPU::VReg_256RegClass; 1302 case 16: /* fall-through */ 1303 default: 1304 llvm_unreachable("Invalid sub-register class size"); 1305 } 1306 } 1307 } 1308 1309 bool SIRegisterInfo::shouldRewriteCopySrc( 1310 const TargetRegisterClass *DefRC, 1311 unsigned DefSubReg, 1312 const TargetRegisterClass *SrcRC, 1313 unsigned SrcSubReg) const { 1314 // We want to prefer the smallest register class possible, so we don't want to 1315 // stop and rewrite on anything that looks like a subregister 1316 // extract. Operations mostly don't care about the super register class, so we 1317 // only want to stop on the most basic of copies between the same register 1318 // class. 1319 // 1320 // e.g. if we have something like 1321 // vreg0 = ... 1322 // vreg1 = ... 1323 // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 1324 // vreg3 = COPY vreg2, sub0 1325 // 1326 // We want to look through the COPY to find: 1327 // => vreg3 = COPY vreg0 1328 1329 // Plain copy. 1330 return getCommonSubClass(DefRC, SrcRC) != nullptr; 1331 } 1332 1333 // FIXME: Most of these are flexible with HSA and we don't need to reserve them 1334 // as input registers if unused. Whether the dispatch ptr is necessary should be 1335 // easy to detect from used intrinsics. Scratch setup is harder to know. 1336 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, 1337 enum PreloadedValue Value) const { 1338 1339 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1340 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 1341 (void)ST; 1342 switch (Value) { 1343 case SIRegisterInfo::WORKGROUP_ID_X: 1344 assert(MFI->hasWorkGroupIDX()); 1345 return MFI->WorkGroupIDXSystemSGPR; 1346 case SIRegisterInfo::WORKGROUP_ID_Y: 1347 assert(MFI->hasWorkGroupIDY()); 1348 return MFI->WorkGroupIDYSystemSGPR; 1349 case SIRegisterInfo::WORKGROUP_ID_Z: 1350 assert(MFI->hasWorkGroupIDZ()); 1351 return MFI->WorkGroupIDZSystemSGPR; 1352 case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: 1353 return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; 1354 case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: 1355 assert(MFI->hasPrivateSegmentBuffer()); 1356 return MFI->PrivateSegmentBufferUserSGPR; 1357 case SIRegisterInfo::IMPLICIT_BUFFER_PTR: 1358 assert(MFI->hasImplicitBufferPtr()); 1359 return MFI->ImplicitBufferPtrUserSGPR; 1360 case SIRegisterInfo::KERNARG_SEGMENT_PTR: 1361 assert(MFI->hasKernargSegmentPtr()); 1362 return MFI->KernargSegmentPtrUserSGPR; 1363 case SIRegisterInfo::DISPATCH_ID: 1364 assert(MFI->hasDispatchID()); 1365 return MFI->DispatchIDUserSGPR; 1366 case SIRegisterInfo::FLAT_SCRATCH_INIT: 1367 assert(MFI->hasFlatScratchInit()); 1368 return MFI->FlatScratchInitUserSGPR; 1369 case SIRegisterInfo::DISPATCH_PTR: 1370 assert(MFI->hasDispatchPtr()); 1371 return MFI->DispatchPtrUserSGPR; 1372 case SIRegisterInfo::QUEUE_PTR: 1373 assert(MFI->hasQueuePtr()); 1374 return MFI->QueuePtrUserSGPR; 1375 case SIRegisterInfo::WORKITEM_ID_X: 1376 assert(MFI->hasWorkItemIDX()); 1377 return AMDGPU::VGPR0; 1378 case SIRegisterInfo::WORKITEM_ID_Y: 1379 assert(MFI->hasWorkItemIDY()); 1380 return AMDGPU::VGPR1; 1381 case SIRegisterInfo::WORKITEM_ID_Z: 1382 assert(MFI->hasWorkItemIDZ()); 1383 return AMDGPU::VGPR2; 1384 } 1385 llvm_unreachable("unexpected preloaded value type"); 1386 } 1387 1388 /// \brief Returns a register that is not used at any point in the function. 1389 /// If all registers are used, then this function will return 1390 // AMDGPU::NoRegister. 1391 unsigned 1392 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 1393 const TargetRegisterClass *RC, 1394 const MachineFunction &MF) const { 1395 1396 for (unsigned Reg : *RC) 1397 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 1398 return Reg; 1399 return AMDGPU::NoRegister; 1400 } 1401 1402 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, 1403 unsigned EltSize) const { 1404 if (EltSize == 4) { 1405 static const int16_t Sub0_15[] = { 1406 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1407 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 1408 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 1409 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 1410 }; 1411 1412 static const int16_t Sub0_7[] = { 1413 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1414 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 1415 }; 1416 1417 static const int16_t Sub0_3[] = { 1418 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 1419 }; 1420 1421 static const int16_t Sub0_2[] = { 1422 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 1423 }; 1424 1425 static const int16_t Sub0_1[] = { 1426 AMDGPU::sub0, AMDGPU::sub1, 1427 }; 1428 1429 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1430 case 32: 1431 return {}; 1432 case 64: 1433 return makeArrayRef(Sub0_1); 1434 case 96: 1435 return makeArrayRef(Sub0_2); 1436 case 128: 1437 return makeArrayRef(Sub0_3); 1438 case 256: 1439 return makeArrayRef(Sub0_7); 1440 case 512: 1441 return makeArrayRef(Sub0_15); 1442 default: 1443 llvm_unreachable("unhandled register size"); 1444 } 1445 } 1446 1447 if (EltSize == 8) { 1448 static const int16_t Sub0_15_64[] = { 1449 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 1450 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 1451 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 1452 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15 1453 }; 1454 1455 static const int16_t Sub0_7_64[] = { 1456 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 1457 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7 1458 }; 1459 1460 1461 static const int16_t Sub0_3_64[] = { 1462 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3 1463 }; 1464 1465 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1466 case 64: 1467 return {}; 1468 case 128: 1469 return makeArrayRef(Sub0_3_64); 1470 case 256: 1471 return makeArrayRef(Sub0_7_64); 1472 case 512: 1473 return makeArrayRef(Sub0_15_64); 1474 default: 1475 llvm_unreachable("unhandled register size"); 1476 } 1477 } 1478 1479 assert(EltSize == 16 && "unhandled register spill split size"); 1480 1481 static const int16_t Sub0_15_128[] = { 1482 AMDGPU::sub0_sub1_sub2_sub3, 1483 AMDGPU::sub4_sub5_sub6_sub7, 1484 AMDGPU::sub8_sub9_sub10_sub11, 1485 AMDGPU::sub12_sub13_sub14_sub15 1486 }; 1487 1488 static const int16_t Sub0_7_128[] = { 1489 AMDGPU::sub0_sub1_sub2_sub3, 1490 AMDGPU::sub4_sub5_sub6_sub7 1491 }; 1492 1493 switch (AMDGPU::getRegBitWidth(*RC->MC)) { 1494 case 128: 1495 return {}; 1496 case 256: 1497 return makeArrayRef(Sub0_7_128); 1498 case 512: 1499 return makeArrayRef(Sub0_15_128); 1500 default: 1501 llvm_unreachable("unhandled register size"); 1502 } 1503 } 1504 1505 const TargetRegisterClass* 1506 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, 1507 unsigned Reg) const { 1508 if (TargetRegisterInfo::isVirtualRegister(Reg)) 1509 return MRI.getRegClass(Reg); 1510 1511 return getPhysRegClass(Reg); 1512 } 1513 1514 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 1515 unsigned Reg) const { 1516 return hasVGPRs(getRegClassForReg(MRI, Reg)); 1517 } 1518 1519 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, 1520 const TargetRegisterClass *SrcRC, 1521 unsigned SubReg, 1522 const TargetRegisterClass *DstRC, 1523 unsigned DstSubReg, 1524 const TargetRegisterClass *NewRC) const { 1525 unsigned SrcSize = getRegSizeInBits(*SrcRC); 1526 unsigned DstSize = getRegSizeInBits(*DstRC); 1527 unsigned NewSize = getRegSizeInBits(*NewRC); 1528 1529 // Do not increase size of registers beyond dword, we would need to allocate 1530 // adjacent registers and constraint regalloc more than needed. 1531 1532 // Always allow dword coalescing. 1533 if (SrcSize <= 32 || DstSize <= 32) 1534 return true; 1535 1536 return NewSize <= DstSize || NewSize <= SrcSize; 1537 } 1538 1539 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 1540 MachineFunction &MF) const { 1541 1542 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 1543 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 1544 1545 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 1546 *MF.getFunction()); 1547 switch (RC->getID()) { 1548 default: 1549 return AMDGPURegisterInfo::getRegPressureLimit(RC, MF); 1550 case AMDGPU::VGPR_32RegClassID: 1551 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); 1552 case AMDGPU::SGPR_32RegClassID: 1553 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); 1554 } 1555 } 1556 1557 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 1558 unsigned Idx) const { 1559 if (Idx == getVGPRPressureSet()) 1560 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 1561 const_cast<MachineFunction &>(MF)); 1562 1563 if (Idx == getSGPRPressureSet()) 1564 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, 1565 const_cast<MachineFunction &>(MF)); 1566 1567 return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx); 1568 } 1569 1570 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { 1571 static const int Empty[] = { -1 }; 1572 1573 if (hasRegUnit(AMDGPU::M0, RegUnit)) 1574 return Empty; 1575 return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); 1576 } 1577