1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief SI implementation of the TargetRegisterInfo class. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "SIRegisterInfo.h" 16 #include "SIInstrInfo.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "AMDGPUSubtarget.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/RegisterScavenging.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/LLVMContext.h" 24 25 using namespace llvm; 26 27 static bool hasPressureSet(const int *PSets, unsigned PSetID) { 28 for (unsigned i = 0; PSets[i] != -1; ++i) { 29 if (PSets[i] == (int)PSetID) 30 return true; 31 } 32 return false; 33 } 34 35 void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, 36 BitVector &PressureSets) const { 37 for (MCRegUnitIterator U(Reg, this); U.isValid(); ++U) { 38 const int *PSets = getRegUnitPressureSets(*U); 39 if (hasPressureSet(PSets, PSetID)) { 40 PressureSets.set(PSetID); 41 break; 42 } 43 } 44 } 45 46 SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(), 47 SGPRPressureSets(getNumRegPressureSets()), 48 VGPRPressureSets(getNumRegPressureSets()) { 49 unsigned NumRegPressureSets = getNumRegPressureSets(); 50 51 SGPRSetID = NumRegPressureSets; 52 VGPRSetID = NumRegPressureSets; 53 54 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 55 classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets); 56 classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets); 57 } 58 59 // Determine the number of reg units for each pressure set. 60 std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0); 61 for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) { 62 const int *PSets = getRegUnitPressureSets(i); 63 for (unsigned j = 0; PSets[j] != -1; ++j) { 64 ++PressureSetRegUnits[PSets[j]]; 65 } 66 } 67 68 unsigned VGPRMax = 0, SGPRMax = 0; 69 for (unsigned i = 0; i < NumRegPressureSets; ++i) { 70 if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) { 71 VGPRSetID = i; 72 VGPRMax = PressureSetRegUnits[i]; 73 continue; 74 } 75 if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) { 76 SGPRSetID = i; 77 SGPRMax = PressureSetRegUnits[i]; 78 } 79 } 80 81 assert(SGPRSetID < NumRegPressureSets && 82 VGPRSetID < NumRegPressureSets); 83 } 84 85 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { 86 MCRegAliasIterator R(Reg, this, true); 87 88 for (; R.isValid(); ++R) 89 Reserved.set(*R); 90 } 91 92 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( 93 const MachineFunction &MF) const { 94 unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4; 95 unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 96 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 97 } 98 99 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( 100 const MachineFunction &MF) const { 101 unsigned RegCount = getMaxNumSGPRs(MF); 102 unsigned Reg; 103 104 // Try to place it in a hole after PrivateSegmentbufferReg. 105 if (RegCount & 3) { 106 // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to 107 // alignment constraints, so we have a hole where can put the wave offset. 108 Reg = RegCount - 1; 109 } else { 110 // We can put the segment buffer in (Idx - 4) ... (Idx - 1) and put the 111 // wave offset before it. 112 Reg = RegCount - 5; 113 } 114 return AMDGPU::SGPR_32RegClass.getRegister(Reg); 115 } 116 117 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 118 BitVector Reserved(getNumRegs()); 119 Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); 120 121 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 122 // this seems likely to result in bugs, so I'm marking them as reserved. 123 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 124 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 125 126 // Reserve Trap Handler registers - support is not implemented in Codegen. 127 reserveRegisterTuples(Reserved, AMDGPU::TBA); 128 reserveRegisterTuples(Reserved, AMDGPU::TMA); 129 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 130 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 131 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 132 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 133 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 134 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 135 136 unsigned MaxNumSGPRs = getMaxNumSGPRs(MF); 137 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 138 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { 139 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 140 reserveRegisterTuples(Reserved, Reg); 141 } 142 143 unsigned MaxNumVGPRs = getMaxNumVGPRs(MF); 144 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 145 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { 146 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); 147 reserveRegisterTuples(Reserved, Reg); 148 } 149 150 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 151 152 unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); 153 if (ScratchWaveOffsetReg != AMDGPU::NoRegister) { 154 // Reserve 1 SGPR for scratch wave offset in case we need to spill. 155 reserveRegisterTuples(Reserved, ScratchWaveOffsetReg); 156 } 157 158 unsigned ScratchRSrcReg = MFI->getScratchRSrcReg(); 159 if (ScratchRSrcReg != AMDGPU::NoRegister) { 160 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 161 // to spill. 162 // TODO: May need to reserve a VGPR if doing LDS spilling. 163 reserveRegisterTuples(Reserved, ScratchRSrcReg); 164 assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); 165 } 166 167 return Reserved; 168 } 169 170 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 171 return Fn.getFrameInfo().hasStackObjects(); 172 } 173 174 bool 175 SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { 176 return MF.getFrameInfo().hasStackObjects(); 177 } 178 179 bool SIRegisterInfo::requiresVirtualBaseRegisters( 180 const MachineFunction &) const { 181 // There are no special dedicated stack or frame pointers. 182 return true; 183 } 184 185 bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { 186 // This helps catch bugs as verifier errors. 187 return true; 188 } 189 190 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 191 int Idx) const { 192 if (!SIInstrInfo::isMUBUF(*MI)) 193 return 0; 194 195 assert(Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 196 AMDGPU::OpName::vaddr) && 197 "Should never see frame index on non-address operand"); 198 199 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 200 AMDGPU::OpName::offset); 201 return MI->getOperand(OffIdx).getImm(); 202 } 203 204 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 205 return MI->mayLoadOrStore(); 206 } 207 208 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 209 unsigned BaseReg, 210 int FrameIdx, 211 int64_t Offset) const { 212 MachineBasicBlock::iterator Ins = MBB->begin(); 213 DebugLoc DL; // Defaults to "unknown" 214 215 if (Ins != MBB->end()) 216 DL = Ins->getDebugLoc(); 217 218 MachineFunction *MF = MBB->getParent(); 219 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 220 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 221 222 if (Offset == 0) { 223 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), BaseReg) 224 .addFrameIndex(FrameIdx); 225 return; 226 } 227 228 MachineRegisterInfo &MRI = MF->getRegInfo(); 229 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 230 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 231 232 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 233 .addImm(Offset); 234 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) 235 .addReg(UnusedCarry, RegState::Define | RegState::Dead) 236 .addReg(OffsetReg, RegState::Kill) 237 .addFrameIndex(FrameIdx); 238 } 239 240 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, 241 int64_t Offset) const { 242 243 MachineBasicBlock *MBB = MI.getParent(); 244 MachineFunction *MF = MBB->getParent(); 245 const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>(); 246 const SIInstrInfo *TII = Subtarget.getInstrInfo(); 247 248 #ifndef NDEBUG 249 // FIXME: Is it possible to be storing a frame index to itself? 250 bool SeenFI = false; 251 for (const MachineOperand &MO: MI.operands()) { 252 if (MO.isFI()) { 253 if (SeenFI) 254 llvm_unreachable("should not see multiple frame indices"); 255 256 SeenFI = true; 257 } 258 } 259 #endif 260 261 MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); 262 assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 263 264 assert(TII->isMUBUF(MI)); 265 266 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 267 int64_t NewOffset = OffsetOp->getImm() + Offset; 268 assert(isUInt<12>(NewOffset) && "offset should be legal"); 269 270 FIOp->ChangeToRegister(BaseReg, false); 271 OffsetOp->setImm(NewOffset); 272 } 273 274 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 275 unsigned BaseReg, 276 int64_t Offset) const { 277 return SIInstrInfo::isMUBUF(*MI) && isUInt<12>(Offset); 278 } 279 280 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 281 const MachineFunction &MF, unsigned Kind) const { 282 // This is inaccurate. It depends on the instruction and address space. The 283 // only place where we should hit this is for dealing with frame indexes / 284 // private accesses, so this is correct in that case. 285 return &AMDGPU::VGPR_32RegClass; 286 } 287 288 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 289 290 switch (Op) { 291 case AMDGPU::SI_SPILL_S512_SAVE: 292 case AMDGPU::SI_SPILL_S512_RESTORE: 293 case AMDGPU::SI_SPILL_V512_SAVE: 294 case AMDGPU::SI_SPILL_V512_RESTORE: 295 return 16; 296 case AMDGPU::SI_SPILL_S256_SAVE: 297 case AMDGPU::SI_SPILL_S256_RESTORE: 298 case AMDGPU::SI_SPILL_V256_SAVE: 299 case AMDGPU::SI_SPILL_V256_RESTORE: 300 return 8; 301 case AMDGPU::SI_SPILL_S128_SAVE: 302 case AMDGPU::SI_SPILL_S128_RESTORE: 303 case AMDGPU::SI_SPILL_V128_SAVE: 304 case AMDGPU::SI_SPILL_V128_RESTORE: 305 return 4; 306 case AMDGPU::SI_SPILL_V96_SAVE: 307 case AMDGPU::SI_SPILL_V96_RESTORE: 308 return 3; 309 case AMDGPU::SI_SPILL_S64_SAVE: 310 case AMDGPU::SI_SPILL_S64_RESTORE: 311 case AMDGPU::SI_SPILL_V64_SAVE: 312 case AMDGPU::SI_SPILL_V64_RESTORE: 313 return 2; 314 case AMDGPU::SI_SPILL_S32_SAVE: 315 case AMDGPU::SI_SPILL_S32_RESTORE: 316 case AMDGPU::SI_SPILL_V32_SAVE: 317 case AMDGPU::SI_SPILL_V32_RESTORE: 318 return 1; 319 default: llvm_unreachable("Invalid spill opcode"); 320 } 321 } 322 323 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, 324 unsigned LoadStoreOp, 325 const MachineOperand *SrcDst, 326 unsigned ScratchRsrcReg, 327 unsigned ScratchOffset, 328 int64_t Offset, 329 RegScavenger *RS) const { 330 331 unsigned Value = SrcDst->getReg(); 332 bool IsKill = SrcDst->isKill(); 333 MachineBasicBlock *MBB = MI->getParent(); 334 MachineFunction *MF = MI->getParent()->getParent(); 335 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 336 const SIInstrInfo *TII = ST.getInstrInfo(); 337 338 DebugLoc DL = MI->getDebugLoc(); 339 bool IsStore = MI->mayStore(); 340 341 bool RanOutOfSGPRs = false; 342 bool Scavenged = false; 343 unsigned SOffset = ScratchOffset; 344 unsigned OriginalImmOffset = Offset; 345 346 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 347 unsigned Size = NumSubRegs * 4; 348 349 if (!isUInt<12>(Offset + Size)) { 350 SOffset = AMDGPU::NoRegister; 351 352 // We don't have access to the register scavenger if this function is called 353 // during PEI::scavengeFrameVirtualRegs(). 354 if (RS) 355 SOffset = RS->FindUnusedReg(&AMDGPU::SGPR_32RegClass); 356 357 if (SOffset == AMDGPU::NoRegister) { 358 // There are no free SGPRs, and since we are in the process of spilling 359 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 360 // on SI/CI and on VI it is true until we implement spilling using scalar 361 // stores), we have no way to free up an SGPR. Our solution here is to 362 // add the offset directly to the ScratchOffset register, and then 363 // subtract the offset after the spill to return ScratchOffset to it's 364 // original value. 365 RanOutOfSGPRs = true; 366 SOffset = ScratchOffset; 367 } else { 368 Scavenged = true; 369 } 370 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset) 371 .addReg(ScratchOffset) 372 .addImm(Offset); 373 Offset = 0; 374 } 375 376 for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) { 377 unsigned SubReg = NumSubRegs == 1 ? 378 Value : getSubReg(Value, getSubRegFromChannel(i)); 379 380 unsigned SOffsetRegState = 0; 381 unsigned SrcDstRegState = getDefRegState(!IsStore); 382 if (i + 1 == e) { 383 SOffsetRegState |= getKillRegState(Scavenged); 384 // The last implicit use carries the "Kill" flag. 385 SrcDstRegState |= getKillRegState(IsKill); 386 } 387 388 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 389 .addReg(SubReg, getDefRegState(!IsStore)) 390 .addReg(ScratchRsrcReg) 391 .addReg(SOffset, SOffsetRegState) 392 .addImm(Offset) 393 .addImm(0) // glc 394 .addImm(0) // slc 395 .addImm(0) // tfe 396 .addReg(Value, RegState::Implicit | SrcDstRegState) 397 .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 398 } 399 if (RanOutOfSGPRs) { 400 // Subtract the offset we added to the ScratchOffset register. 401 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset) 402 .addReg(ScratchOffset) 403 .addImm(OriginalImmOffset); 404 } 405 } 406 407 void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, 408 int Index, 409 RegScavenger *RS) const { 410 MachineFunction *MF = MI->getParent()->getParent(); 411 MachineRegisterInfo &MRI = MF->getRegInfo(); 412 MachineBasicBlock *MBB = MI->getParent(); 413 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 414 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 415 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 416 const SIInstrInfo *TII = ST.getInstrInfo(); 417 const DebugLoc &DL = MI->getDebugLoc(); 418 419 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 420 unsigned SuperReg = MI->getOperand(0).getReg(); 421 bool IsKill = MI->getOperand(0).isKill(); 422 423 // SubReg carries the "Kill" flag when SubReg == SuperReg. 424 unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); 425 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 426 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 427 unsigned SubReg = NumSubRegs == 1 ? 428 SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); 429 430 struct SIMachineFunctionInfo::SpilledReg Spill = 431 MFI->getSpilledReg(MF, Index, i); 432 if (Spill.hasReg()) { 433 if (SuperReg == AMDGPU::M0) { 434 assert(NumSubRegs == 1); 435 unsigned CopyM0 436 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 437 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), CopyM0) 438 .addReg(SuperReg, getKillRegState(IsKill)); 439 440 // The real spill now kills the temp copy. 441 SubReg = SuperReg = CopyM0; 442 IsKill = true; 443 } 444 445 BuildMI(*MBB, MI, DL, 446 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), 447 Spill.VGPR) 448 .addReg(SubReg, getKillRegState(IsKill)) 449 .addImm(Spill.Lane); 450 451 // FIXME: Since this spills to another register instead of an actual 452 // frame index, we should delete the frame index when all references to 453 // it are fixed. 454 } else { 455 // Spill SGPR to a frame index. 456 // FIXME we should use S_STORE_DWORD here for VI. 457 MachineInstrBuilder Mov 458 = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 459 .addReg(SubReg, SubKillState); 460 461 462 // There could be undef components of a spilled super register. 463 // TODO: Can we detect this and skip the spill? 464 if (NumSubRegs > 1) { 465 // The last implicit use of the SuperReg carries the "Kill" flag. 466 unsigned SuperKillState = 0; 467 if (i + 1 == e) 468 SuperKillState |= getKillRegState(IsKill); 469 Mov.addReg(SuperReg, RegState::Implicit | SuperKillState); 470 } 471 472 unsigned Size = FrameInfo.getObjectSize(Index); 473 unsigned Align = FrameInfo.getObjectAlignment(Index); 474 MachinePointerInfo PtrInfo 475 = MachinePointerInfo::getFixedStack(*MF, Index); 476 MachineMemOperand *MMO 477 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 478 Size, Align); 479 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) 480 .addReg(TmpReg, RegState::Kill) // src 481 .addFrameIndex(Index) // vaddr 482 .addReg(MFI->getScratchRSrcReg()) // srrsrc 483 .addReg(MFI->getScratchWaveOffsetReg()) // soffset 484 .addImm(i * 4) // offset 485 .addMemOperand(MMO); 486 } 487 } 488 489 MI->eraseFromParent(); 490 MFI->addToSpilledSGPRs(NumSubRegs); 491 } 492 493 void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, 494 int Index, 495 RegScavenger *RS) const { 496 MachineFunction *MF = MI->getParent()->getParent(); 497 MachineRegisterInfo &MRI = MF->getRegInfo(); 498 MachineBasicBlock *MBB = MI->getParent(); 499 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 500 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 501 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 502 const SIInstrInfo *TII = ST.getInstrInfo(); 503 const DebugLoc &DL = MI->getDebugLoc(); 504 505 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 506 unsigned SuperReg = MI->getOperand(0).getReg(); 507 508 // m0 is not allowed as with readlane/writelane, so a temporary SGPR and 509 // extra copy is needed. 510 bool IsM0 = (SuperReg == AMDGPU::M0); 511 if (IsM0) { 512 assert(NumSubRegs == 1); 513 SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 514 } 515 516 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 517 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 518 unsigned SubReg = NumSubRegs == 1 ? 519 SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i)); 520 521 SIMachineFunctionInfo::SpilledReg Spill 522 = MFI->getSpilledReg(MF, Index, i); 523 524 if (Spill.hasReg()) { 525 BuildMI(*MBB, MI, DL, 526 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), 527 SubReg) 528 .addReg(Spill.VGPR) 529 .addImm(Spill.Lane) 530 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 531 } else { 532 // Restore SGPR from a stack slot. 533 // FIXME: We should use S_LOAD_DWORD here for VI. 534 535 unsigned Align = FrameInfo.getObjectAlignment(Index); 536 unsigned Size = FrameInfo.getObjectSize(Index); 537 538 MachinePointerInfo PtrInfo 539 = MachinePointerInfo::getFixedStack(*MF, Index); 540 541 MachineMemOperand *MMO = MF->getMachineMemOperand( 542 PtrInfo, MachineMemOperand::MOLoad, Size, Align); 543 544 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) 545 .addFrameIndex(Index) // vaddr 546 .addReg(MFI->getScratchRSrcReg()) // srsrc 547 .addReg(MFI->getScratchWaveOffsetReg()) // soffset 548 .addImm(i * 4) // offset 549 .addMemOperand(MMO); 550 BuildMI(*MBB, MI, DL, 551 TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg) 552 .addReg(TmpReg, RegState::Kill) 553 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); 554 } 555 } 556 557 if (IsM0 && SuperReg != AMDGPU::M0) { 558 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) 559 .addReg(SuperReg); 560 } 561 562 MI->eraseFromParent(); 563 } 564 565 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 566 int SPAdj, unsigned FIOperandNum, 567 RegScavenger *RS) const { 568 MachineFunction *MF = MI->getParent()->getParent(); 569 MachineRegisterInfo &MRI = MF->getRegInfo(); 570 MachineBasicBlock *MBB = MI->getParent(); 571 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 572 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 573 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 574 const SIInstrInfo *TII = ST.getInstrInfo(); 575 DebugLoc DL = MI->getDebugLoc(); 576 577 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 578 int Index = MI->getOperand(FIOperandNum).getIndex(); 579 580 switch (MI->getOpcode()) { 581 // SGPR register spill 582 case AMDGPU::SI_SPILL_S512_SAVE: 583 case AMDGPU::SI_SPILL_S256_SAVE: 584 case AMDGPU::SI_SPILL_S128_SAVE: 585 case AMDGPU::SI_SPILL_S64_SAVE: 586 case AMDGPU::SI_SPILL_S32_SAVE: { 587 spillSGPR(MI, Index, RS); 588 break; 589 } 590 591 // SGPR register restore 592 case AMDGPU::SI_SPILL_S512_RESTORE: 593 case AMDGPU::SI_SPILL_S256_RESTORE: 594 case AMDGPU::SI_SPILL_S128_RESTORE: 595 case AMDGPU::SI_SPILL_S64_RESTORE: 596 case AMDGPU::SI_SPILL_S32_RESTORE: { 597 restoreSGPR(MI, Index, RS); 598 break; 599 } 600 601 // VGPR register spill 602 case AMDGPU::SI_SPILL_V512_SAVE: 603 case AMDGPU::SI_SPILL_V256_SAVE: 604 case AMDGPU::SI_SPILL_V128_SAVE: 605 case AMDGPU::SI_SPILL_V96_SAVE: 606 case AMDGPU::SI_SPILL_V64_SAVE: 607 case AMDGPU::SI_SPILL_V32_SAVE: 608 buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, 609 TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), 610 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 611 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 612 FrameInfo.getObjectOffset(Index) + 613 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); 614 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 615 MI->eraseFromParent(); 616 break; 617 case AMDGPU::SI_SPILL_V32_RESTORE: 618 case AMDGPU::SI_SPILL_V64_RESTORE: 619 case AMDGPU::SI_SPILL_V96_RESTORE: 620 case AMDGPU::SI_SPILL_V128_RESTORE: 621 case AMDGPU::SI_SPILL_V256_RESTORE: 622 case AMDGPU::SI_SPILL_V512_RESTORE: { 623 buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, 624 TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), 625 TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), 626 TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), 627 FrameInfo.getObjectOffset(Index) + 628 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS); 629 MI->eraseFromParent(); 630 break; 631 } 632 633 default: { 634 int64_t Offset = FrameInfo.getObjectOffset(Index); 635 FIOp.ChangeToImmediate(Offset); 636 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 637 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 638 BuildMI(*MBB, MI, MI->getDebugLoc(), 639 TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 640 .addImm(Offset); 641 FIOp.ChangeToRegister(TmpReg, false, false, true); 642 } 643 } 644 } 645 } 646 647 // FIXME: This is very slow. It might be worth creating a map from physreg to 648 // register class. 649 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { 650 assert(!TargetRegisterInfo::isVirtualRegister(Reg)); 651 652 static const TargetRegisterClass *const BaseClasses[] = { 653 &AMDGPU::VGPR_32RegClass, 654 &AMDGPU::SReg_32RegClass, 655 &AMDGPU::VReg_64RegClass, 656 &AMDGPU::SReg_64RegClass, 657 &AMDGPU::VReg_96RegClass, 658 &AMDGPU::VReg_128RegClass, 659 &AMDGPU::SReg_128RegClass, 660 &AMDGPU::VReg_256RegClass, 661 &AMDGPU::SReg_256RegClass, 662 &AMDGPU::VReg_512RegClass, 663 &AMDGPU::SReg_512RegClass, 664 &AMDGPU::SCC_CLASSRegClass, 665 }; 666 667 for (const TargetRegisterClass *BaseClass : BaseClasses) { 668 if (BaseClass->contains(Reg)) { 669 return BaseClass; 670 } 671 } 672 return nullptr; 673 } 674 675 // TODO: It might be helpful to have some target specific flags in 676 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 677 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 678 switch (RC->getSize()) { 679 case 0: return false; 680 case 1: return false; 681 case 4: 682 return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; 683 case 8: 684 return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; 685 case 12: 686 return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; 687 case 16: 688 return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; 689 case 32: 690 return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; 691 case 64: 692 return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; 693 default: 694 llvm_unreachable("Invalid register class size"); 695 } 696 } 697 698 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( 699 const TargetRegisterClass *SRC) const { 700 switch (SRC->getSize()) { 701 case 4: 702 return &AMDGPU::VGPR_32RegClass; 703 case 8: 704 return &AMDGPU::VReg_64RegClass; 705 case 12: 706 return &AMDGPU::VReg_96RegClass; 707 case 16: 708 return &AMDGPU::VReg_128RegClass; 709 case 32: 710 return &AMDGPU::VReg_256RegClass; 711 case 64: 712 return &AMDGPU::VReg_512RegClass; 713 default: 714 llvm_unreachable("Invalid register class size"); 715 } 716 } 717 718 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( 719 const TargetRegisterClass *VRC) const { 720 switch (VRC->getSize()) { 721 case 4: 722 return &AMDGPU::SGPR_32RegClass; 723 case 8: 724 return &AMDGPU::SReg_64RegClass; 725 case 16: 726 return &AMDGPU::SReg_128RegClass; 727 case 32: 728 return &AMDGPU::SReg_256RegClass; 729 case 64: 730 return &AMDGPU::SReg_512RegClass; 731 default: 732 llvm_unreachable("Invalid register class size"); 733 } 734 } 735 736 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 737 const TargetRegisterClass *RC, unsigned SubIdx) const { 738 if (SubIdx == AMDGPU::NoSubRegister) 739 return RC; 740 741 // We can assume that each lane corresponds to one 32-bit register. 742 unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx)); 743 if (isSGPRClass(RC)) { 744 switch (Count) { 745 case 1: 746 return &AMDGPU::SGPR_32RegClass; 747 case 2: 748 return &AMDGPU::SReg_64RegClass; 749 case 4: 750 return &AMDGPU::SReg_128RegClass; 751 case 8: 752 return &AMDGPU::SReg_256RegClass; 753 case 16: /* fall-through */ 754 default: 755 llvm_unreachable("Invalid sub-register class size"); 756 } 757 } else { 758 switch (Count) { 759 case 1: 760 return &AMDGPU::VGPR_32RegClass; 761 case 2: 762 return &AMDGPU::VReg_64RegClass; 763 case 3: 764 return &AMDGPU::VReg_96RegClass; 765 case 4: 766 return &AMDGPU::VReg_128RegClass; 767 case 8: 768 return &AMDGPU::VReg_256RegClass; 769 case 16: /* fall-through */ 770 default: 771 llvm_unreachable("Invalid sub-register class size"); 772 } 773 } 774 } 775 776 bool SIRegisterInfo::shouldRewriteCopySrc( 777 const TargetRegisterClass *DefRC, 778 unsigned DefSubReg, 779 const TargetRegisterClass *SrcRC, 780 unsigned SrcSubReg) const { 781 // We want to prefer the smallest register class possible, so we don't want to 782 // stop and rewrite on anything that looks like a subregister 783 // extract. Operations mostly don't care about the super register class, so we 784 // only want to stop on the most basic of copies between the same register 785 // class. 786 // 787 // e.g. if we have something like 788 // vreg0 = ... 789 // vreg1 = ... 790 // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2 791 // vreg3 = COPY vreg2, sub0 792 // 793 // We want to look through the COPY to find: 794 // => vreg3 = COPY vreg0 795 796 // Plain copy. 797 return getCommonSubClass(DefRC, SrcRC) != nullptr; 798 } 799 800 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 801 return OpType == AMDGPU::OPERAND_REG_IMM32_INT || 802 OpType == AMDGPU::OPERAND_REG_IMM32_FP; 803 } 804 805 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 806 if (opCanUseLiteralConstant(OpType)) 807 return true; 808 809 return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT || 810 OpType == AMDGPU::OPERAND_REG_INLINE_C_FP; 811 } 812 813 // FIXME: Most of these are flexible with HSA and we don't need to reserve them 814 // as input registers if unused. Whether the dispatch ptr is necessary should be 815 // easy to detect from used intrinsics. Scratch setup is harder to know. 816 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, 817 enum PreloadedValue Value) const { 818 819 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 820 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 821 (void)ST; 822 switch (Value) { 823 case SIRegisterInfo::WORKGROUP_ID_X: 824 assert(MFI->hasWorkGroupIDX()); 825 return MFI->WorkGroupIDXSystemSGPR; 826 case SIRegisterInfo::WORKGROUP_ID_Y: 827 assert(MFI->hasWorkGroupIDY()); 828 return MFI->WorkGroupIDYSystemSGPR; 829 case SIRegisterInfo::WORKGROUP_ID_Z: 830 assert(MFI->hasWorkGroupIDZ()); 831 return MFI->WorkGroupIDZSystemSGPR; 832 case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET: 833 return MFI->PrivateSegmentWaveByteOffsetSystemSGPR; 834 case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER: 835 assert(ST.isAmdCodeObjectV2() && 836 "Non-CodeObjectV2 ABI currently uses relocations"); 837 assert(MFI->hasPrivateSegmentBuffer()); 838 return MFI->PrivateSegmentBufferUserSGPR; 839 case SIRegisterInfo::KERNARG_SEGMENT_PTR: 840 assert(MFI->hasKernargSegmentPtr()); 841 return MFI->KernargSegmentPtrUserSGPR; 842 case SIRegisterInfo::DISPATCH_ID: 843 assert(MFI->hasDispatchID()); 844 return MFI->DispatchIDUserSGPR; 845 case SIRegisterInfo::FLAT_SCRATCH_INIT: 846 assert(MFI->hasFlatScratchInit()); 847 return MFI->FlatScratchInitUserSGPR; 848 case SIRegisterInfo::DISPATCH_PTR: 849 assert(MFI->hasDispatchPtr()); 850 return MFI->DispatchPtrUserSGPR; 851 case SIRegisterInfo::QUEUE_PTR: 852 assert(MFI->hasQueuePtr()); 853 return MFI->QueuePtrUserSGPR; 854 case SIRegisterInfo::WORKITEM_ID_X: 855 assert(MFI->hasWorkItemIDX()); 856 return AMDGPU::VGPR0; 857 case SIRegisterInfo::WORKITEM_ID_Y: 858 assert(MFI->hasWorkItemIDY()); 859 return AMDGPU::VGPR1; 860 case SIRegisterInfo::WORKITEM_ID_Z: 861 assert(MFI->hasWorkItemIDZ()); 862 return AMDGPU::VGPR2; 863 } 864 llvm_unreachable("unexpected preloaded value type"); 865 } 866 867 /// \brief Returns a register that is not used at any point in the function. 868 /// If all registers are used, then this function will return 869 // AMDGPU::NoRegister. 870 unsigned 871 SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 872 const TargetRegisterClass *RC, 873 const MachineFunction &MF) const { 874 875 for (unsigned Reg : *RC) 876 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 877 return Reg; 878 return AMDGPU::NoRegister; 879 } 880 881 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 882 unsigned Reg) const { 883 const TargetRegisterClass *RC; 884 if (TargetRegisterInfo::isVirtualRegister(Reg)) 885 RC = MRI.getRegClass(Reg); 886 else 887 RC = getPhysRegClass(Reg); 888 889 return hasVGPRs(RC); 890 } 891 892 unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const { 893 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 894 return 800; 895 return 512; 896 } 897 898 unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const { 899 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 900 return 102; 901 return 104; 902 } 903 904 unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST) const { 905 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 906 return 6; // VCC, FLAT_SCRATCH, XNACK. 907 return 2; // VCC. 908 } 909 910 unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST, 911 unsigned WavesPerEU) const { 912 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 913 switch (WavesPerEU) { 914 case 0: return 0; 915 case 10: return 0; 916 case 9: return 0; 917 case 8: return 81; 918 default: return 97; 919 } 920 } else { 921 switch (WavesPerEU) { 922 case 0: return 0; 923 case 10: return 0; 924 case 9: return 49; 925 case 8: return 57; 926 case 7: return 65; 927 case 6: return 73; 928 case 5: return 81; 929 default: return 97; 930 } 931 } 932 } 933 934 unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST, 935 unsigned WavesPerEU) const { 936 if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { 937 switch (WavesPerEU) { 938 case 0: return 80; 939 case 10: return 80; 940 case 9: return 80; 941 case 8: return 96; 942 default: return getNumAddressableSGPRs(ST); 943 } 944 } else { 945 switch (WavesPerEU) { 946 case 0: return 48; 947 case 10: return 48; 948 case 9: return 56; 949 case 8: return 64; 950 case 7: return 72; 951 case 6: return 80; 952 case 5: return 96; 953 default: return getNumAddressableSGPRs(ST); 954 } 955 } 956 } 957 958 unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const { 959 const Function &F = *MF.getFunction(); 960 961 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 962 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 963 964 // Compute maximum number of SGPRs function can use using default/requested 965 // minimum number of waves per execution unit. 966 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 967 unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first); 968 969 // Check if maximum number of SGPRs was explicitly requested using 970 // "amdgpu-num-sgpr" attribute. 971 if (F.hasFnAttribute("amdgpu-num-sgpr")) { 972 unsigned Requested = AMDGPU::getIntegerAttribute( 973 F, "amdgpu-num-sgpr", MaxNumSGPRs); 974 975 // Make sure requested value does not violate subtarget's specifications. 976 if (Requested && Requested <= getNumReservedSGPRs(ST)) 977 Requested = 0; 978 979 // Make sure requested value is compatible with values implied by 980 // default/requested minimum/maximum number of waves per execution unit. 981 if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first)) 982 Requested = 0; 983 if (WavesPerEU.second && 984 Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second)) 985 Requested = 0; 986 987 if (Requested) 988 MaxNumSGPRs = Requested; 989 } 990 991 if (ST.hasSGPRInitBug()) 992 MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; 993 994 return MaxNumSGPRs - getNumReservedSGPRs(ST); 995 } 996 997 unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs( 998 const SISubtarget &ST) const { 999 if (ST.debuggerReserveRegs()) 1000 return 4; 1001 return 0; 1002 } 1003 1004 unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const { 1005 switch (WavesPerEU) { 1006 case 0: return 0; 1007 case 10: return 0; 1008 case 9: return 25; 1009 case 8: return 29; 1010 case 7: return 33; 1011 case 6: return 37; 1012 case 5: return 41; 1013 case 4: return 49; 1014 case 3: return 65; 1015 case 2: return 85; 1016 default: return 129; 1017 } 1018 } 1019 1020 unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const { 1021 switch (WavesPerEU) { 1022 case 0: return 24; 1023 case 10: return 24; 1024 case 9: return 28; 1025 case 8: return 32; 1026 case 7: return 36; 1027 case 6: return 40; 1028 case 5: return 48; 1029 case 4: return 64; 1030 case 3: return 84; 1031 case 2: return 128; 1032 default: return getTotalNumVGPRs(); 1033 } 1034 } 1035 1036 unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const { 1037 const Function &F = *MF.getFunction(); 1038 1039 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 1040 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); 1041 1042 // Compute maximum number of VGPRs function can use using default/requested 1043 // minimum number of waves per execution unit. 1044 std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); 1045 unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); 1046 1047 // Check if maximum number of VGPRs was explicitly requested using 1048 // "amdgpu-num-vgpr" attribute. 1049 if (F.hasFnAttribute("amdgpu-num-vgpr")) { 1050 unsigned Requested = AMDGPU::getIntegerAttribute( 1051 F, "amdgpu-num-vgpr", MaxNumVGPRs); 1052 1053 // Make sure requested value does not violate subtarget's specifications. 1054 if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST)) 1055 Requested = 0; 1056 1057 // Make sure requested value is compatible with values implied by 1058 // default/requested minimum/maximum number of waves per execution unit. 1059 if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) 1060 Requested = 0; 1061 if (WavesPerEU.second && 1062 Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) 1063 Requested = 0; 1064 1065 if (Requested) 1066 MaxNumVGPRs = Requested; 1067 } 1068 1069 return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST); 1070 } 1071