1 //===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// SI implementation of the TargetRegisterInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SIRegisterInfo.h" 15 #include "AMDGPU.h" 16 #include "AMDGPURegisterBankInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUInstPrinter.h" 19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 20 #include "SIMachineFunctionInfo.h" 21 #include "llvm/CodeGen/LiveIntervals.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/RegisterScavenging.h" 24 25 using namespace llvm; 26 27 #define GET_REGINFO_TARGET_DESC 28 #include "AMDGPUGenRegisterInfo.inc" 29 30 static cl::opt<bool> EnableSpillSGPRToVGPR( 31 "amdgpu-spill-sgpr-to-vgpr", 32 cl::desc("Enable spilling VGPRs to SGPRs"), 33 cl::ReallyHidden, 34 cl::init(true)); 35 36 std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts; 37 std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable; 38 39 // Map numbers of DWORDs to indexes in SubRegFromChannelTable. 40 // Valid indexes are shifted 1, such that a 0 mapping means unsupported. 41 // e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8, 42 // meaning index 7 in SubRegFromChannelTable. 43 static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = { 44 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; 45 46 namespace llvm { 47 48 // A temporary struct to spill SGPRs. 49 // This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits 50 // just v_writelane and v_readlane. 51 // 52 // When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR 53 // is saved to scratch (or the other way around for loads). 54 // For this, a VGPR is required where the needed lanes can be clobbered. The 55 // RegScavenger can provide a VGPR where currently active lanes can be 56 // clobbered, but we still need to save inactive lanes. 57 // The high-level steps are: 58 // - Try to scavenge SGPR(s) to save exec 59 // - Try to scavenge VGPR 60 // - Save needed, all or inactive lanes of a TmpVGPR 61 // - Spill/Restore SGPRs using TmpVGPR 62 // - Restore TmpVGPR 63 // 64 // To save all lanes of TmpVGPR, exec needs to be saved and modified. If we 65 // cannot scavenge temporary SGPRs to save exec, we use the following code: 66 // buffer_store_dword TmpVGPR ; only if active lanes need to be saved 67 // s_not exec, exec 68 // buffer_store_dword TmpVGPR ; save inactive lanes 69 // s_not exec, exec 70 struct SGPRSpillBuilder { 71 struct PerVGPRData { 72 unsigned PerVGPR; 73 unsigned NumVGPRs; 74 int64_t VGPRLanes; 75 }; 76 77 // The SGPR to save 78 Register SuperReg; 79 MachineBasicBlock::iterator MI; 80 ArrayRef<int16_t> SplitParts; 81 unsigned NumSubRegs; 82 bool IsKill; 83 const DebugLoc &DL; 84 85 /* When spilling to stack */ 86 // The SGPRs are written into this VGPR, which is then written to scratch 87 // (or vice versa for loads). 88 Register TmpVGPR = AMDGPU::NoRegister; 89 // Temporary spill slot to save TmpVGPR to. 90 int TmpVGPRIndex = 0; 91 // If TmpVGPR is live before the spill or if it is scavenged. 92 bool TmpVGPRLive = false; 93 // Scavenged SGPR to save EXEC. 94 Register SavedExecReg = AMDGPU::NoRegister; 95 // Stack index to write the SGPRs to. 96 int Index; 97 unsigned EltSize = 4; 98 99 RegScavenger *RS; 100 MachineBasicBlock &MBB; 101 MachineFunction &MF; 102 SIMachineFunctionInfo &MFI; 103 const SIInstrInfo &TII; 104 const SIRegisterInfo &TRI; 105 bool IsWave32; 106 Register ExecReg; 107 unsigned MovOpc; 108 unsigned NotOpc; 109 110 SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, 111 bool IsWave32, MachineBasicBlock::iterator MI, int Index, 112 RegScavenger *RS) 113 : SuperReg(MI->getOperand(0).getReg()), MI(MI), 114 IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), 115 RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), 116 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), 117 IsWave32(IsWave32) { 118 const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); 119 SplitParts = TRI.getRegSplitParts(RC, EltSize); 120 NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); 121 122 if (IsWave32) { 123 ExecReg = AMDGPU::EXEC_LO; 124 MovOpc = AMDGPU::S_MOV_B32; 125 NotOpc = AMDGPU::S_NOT_B32; 126 } else { 127 ExecReg = AMDGPU::EXEC; 128 MovOpc = AMDGPU::S_MOV_B64; 129 NotOpc = AMDGPU::S_NOT_B64; 130 } 131 132 assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); 133 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && 134 SuperReg != AMDGPU::EXEC && "exec should never spill"); 135 } 136 137 PerVGPRData getPerVGPRData() { 138 PerVGPRData Data; 139 Data.PerVGPR = IsWave32 ? 32 : 64; 140 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; 141 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; 142 return Data; 143 } 144 145 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is 146 // free. 147 // Writes these instructions if an SGPR can be scavenged: 148 // s_mov_b64 s[6:7], exec ; Save exec 149 // s_mov_b64 exec, 3 ; Wanted lanemask 150 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot 151 // 152 // Writes these instructions if no SGPR can be scavenged: 153 // buffer_store_dword v0 ; Only if no free VGPR was found 154 // s_not_b64 exec, exec 155 // buffer_store_dword v0 ; Save inactive lanes 156 // ; exec stays inverted, it is flipped back in 157 // ; restore. 158 void prepare() { 159 // Scavenged temporary VGPR to use. It must be scavenged once for any number 160 // of spilled subregs. 161 // FIXME: The liveness analysis is limited and does not tell if a register 162 // is in use in lanes that are currently inactive. We can never be sure if 163 // a register as actually in use in another lane, so we need to save all 164 // used lanes of the chosen VGPR. 165 assert(RS && "Cannot spill SGPR to memory without RegScavenger"); 166 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); 167 168 // Reserve temporary stack slot 169 TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); 170 if (TmpVGPR) { 171 // Found a register that is dead in the currently active lanes, we only 172 // need to spill inactive lanes. 173 TmpVGPRLive = false; 174 } else { 175 // Pick v0 because it doesn't make a difference. 176 TmpVGPR = AMDGPU::VGPR0; 177 TmpVGPRLive = true; 178 } 179 180 // Try to scavenge SGPRs to save exec 181 assert(!SavedExecReg && "Exec is already saved, refuse to save again"); 182 const TargetRegisterClass &RC = 183 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; 184 RS->setRegUsed(SuperReg); 185 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false); 186 187 int64_t VGPRLanes = getPerVGPRData().VGPRLanes; 188 189 if (SavedExecReg) { 190 RS->setRegUsed(SavedExecReg); 191 // Set exec to needed lanes 192 BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); 193 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); 194 if (!TmpVGPRLive) 195 I.addReg(TmpVGPR, RegState::ImplicitDefine); 196 // Spill needed lanes 197 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 198 } else { 199 // Spill active lanes 200 if (TmpVGPRLive) 201 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, 202 /*IsKill*/ false); 203 // Spill inactive lanes 204 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 205 if (!TmpVGPRLive) 206 I.addReg(TmpVGPR, RegState::ImplicitDefine); 207 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); 208 } 209 } 210 211 // Writes these instructions if an SGPR can be scavenged: 212 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot 213 // s_waitcnt vmcnt(0) ; If a free VGPR was found 214 // s_mov_b64 exec, s[6:7] ; Save exec 215 // 216 // Writes these instructions if no SGPR can be scavenged: 217 // buffer_load_dword v0 ; Restore inactive lanes 218 // s_waitcnt vmcnt(0) ; If a free VGPR was found 219 // s_not_b64 exec, exec 220 // buffer_load_dword v0 ; Only if no free VGPR was found 221 void restore() { 222 if (SavedExecReg) { 223 // Restore used lanes 224 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 225 /*IsKill*/ false); 226 // Restore exec 227 auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) 228 .addReg(SavedExecReg, RegState::Kill); 229 // Add an implicit use of the load so it is not dead. 230 // FIXME This inserts an unnecessary waitcnt 231 if (!TmpVGPRLive) { 232 I.addReg(TmpVGPR, RegState::ImplicitKill); 233 } 234 } else { 235 // Restore inactive lanes 236 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, 237 /*IsKill*/ false); 238 auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 239 if (!TmpVGPRLive) { 240 I.addReg(TmpVGPR, RegState::ImplicitKill); 241 } 242 // Restore active lanes 243 if (TmpVGPRLive) 244 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); 245 } 246 } 247 248 // Write TmpVGPR to memory or read TmpVGPR from memory. 249 // Either using a single buffer_load/store if exec is set to the needed mask 250 // or using 251 // buffer_load 252 // s_not exec, exec 253 // buffer_load 254 // s_not exec, exec 255 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) { 256 if (SavedExecReg) { 257 // Spill needed lanes 258 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 259 } else { 260 // Spill active lanes 261 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, 262 /*IsKill*/ false); 263 // Spill inactive lanes 264 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 265 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); 266 BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); 267 } 268 } 269 }; 270 271 } // namespace llvm 272 273 SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) 274 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), 275 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { 276 277 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 && 278 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) && 279 (getSubRegIndexLaneMask(AMDGPU::lo16) | 280 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() == 281 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() && 282 "getNumCoveredRegs() will not work with generated subreg masks!"); 283 284 RegPressureIgnoredUnits.resize(getNumRegUnits()); 285 RegPressureIgnoredUnits.set( 286 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this)); 287 for (auto Reg : AMDGPU::VGPR_HI16RegClass) 288 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this)); 289 290 // HACK: Until this is fully tablegen'd. 291 static llvm::once_flag InitializeRegSplitPartsFlag; 292 293 static auto InitializeRegSplitPartsOnce = [this]() { 294 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) { 295 unsigned Size = getSubRegIdxSize(Idx); 296 if (Size & 31) 297 continue; 298 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1]; 299 unsigned Pos = getSubRegIdxOffset(Idx); 300 if (Pos % Size) 301 continue; 302 Pos /= Size; 303 if (Vec.empty()) { 304 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits. 305 Vec.resize(MaxNumParts); 306 } 307 Vec[Pos] = Idx; 308 } 309 }; 310 311 static llvm::once_flag InitializeSubRegFromChannelTableFlag; 312 313 static auto InitializeSubRegFromChannelTableOnce = [this]() { 314 for (auto &Row : SubRegFromChannelTable) 315 Row.fill(AMDGPU::NoSubRegister); 316 for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { 317 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32; 318 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32; 319 assert(Width < SubRegFromChannelTableWidthMap.size()); 320 Width = SubRegFromChannelTableWidthMap[Width]; 321 if (Width == 0) 322 continue; 323 unsigned TableIdx = Width - 1; 324 assert(TableIdx < SubRegFromChannelTable.size()); 325 assert(Offset < SubRegFromChannelTable[TableIdx].size()); 326 SubRegFromChannelTable[TableIdx][Offset] = Idx; 327 } 328 }; 329 330 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); 331 llvm::call_once(InitializeSubRegFromChannelTableFlag, 332 InitializeSubRegFromChannelTableOnce); 333 } 334 335 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, 336 MCRegister Reg) const { 337 MCRegAliasIterator R(Reg, this, true); 338 339 for (; R.isValid(); ++R) 340 Reserved.set(*R); 341 } 342 343 // Forced to be here by one .inc 344 const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( 345 const MachineFunction *MF) const { 346 CallingConv::ID CC = MF->getFunction().getCallingConv(); 347 switch (CC) { 348 case CallingConv::C: 349 case CallingConv::Fast: 350 case CallingConv::Cold: 351 case CallingConv::AMDGPU_Gfx: 352 return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts() 353 ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList 354 : CSR_AMDGPU_HighRegs_SaveList; 355 default: { 356 // Dummy to not crash RegisterClassInfo. 357 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; 358 return &NoCalleeSavedReg; 359 } 360 } 361 } 362 363 const MCPhysReg * 364 SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { 365 return nullptr; 366 } 367 368 const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, 369 CallingConv::ID CC) const { 370 switch (CC) { 371 case CallingConv::C: 372 case CallingConv::Fast: 373 case CallingConv::Cold: 374 case CallingConv::AMDGPU_Gfx: 375 return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts() 376 ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask 377 : CSR_AMDGPU_HighRegs_RegMask; 378 default: 379 return nullptr; 380 } 381 } 382 383 const uint32_t *SIRegisterInfo::getNoPreservedMask() const { 384 return CSR_AMDGPU_NoRegs_RegMask; 385 } 386 387 Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { 388 const SIFrameLowering *TFI = 389 MF.getSubtarget<GCNSubtarget>().getFrameLowering(); 390 const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); 391 // During ISel lowering we always reserve the stack pointer in entry 392 // functions, but never actually want to reference it when accessing our own 393 // frame. If we need a frame pointer we use it, but otherwise we can just use 394 // an immediate "0" which we represent by returning NoRegister. 395 if (FuncInfo->isEntryFunction()) { 396 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register(); 397 } 398 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() 399 : FuncInfo->getStackPtrOffsetReg(); 400 } 401 402 bool SIRegisterInfo::hasBasePointer(const MachineFunction &MF) const { 403 // When we need stack realignment, we can't reference off of the 404 // stack pointer, so we reserve a base pointer. 405 const MachineFrameInfo &MFI = MF.getFrameInfo(); 406 return MFI.getNumFixedObjects() && shouldRealignStack(MF); 407 } 408 409 Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; } 410 411 const uint32_t *SIRegisterInfo::getAllVGPRRegMask() const { 412 return CSR_AMDGPU_AllVGPRs_RegMask; 413 } 414 415 const uint32_t *SIRegisterInfo::getAllAGPRRegMask() const { 416 return CSR_AMDGPU_AllAGPRs_RegMask; 417 } 418 419 const uint32_t *SIRegisterInfo::getAllVectorRegMask() const { 420 return CSR_AMDGPU_AllVectorRegs_RegMask; 421 } 422 423 const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const { 424 return CSR_AMDGPU_AllAllocatableSRegs_RegMask; 425 } 426 427 unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, 428 unsigned NumRegs) { 429 assert(NumRegs < SubRegFromChannelTableWidthMap.size()); 430 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs]; 431 assert(NumRegIndex && "Not implemented"); 432 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size()); 433 return SubRegFromChannelTable[NumRegIndex - 1][Channel]; 434 } 435 436 MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( 437 const MachineFunction &MF) const { 438 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4; 439 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx)); 440 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass); 441 } 442 443 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { 444 BitVector Reserved(getNumRegs()); 445 Reserved.set(AMDGPU::MODE); 446 447 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but 448 // this seems likely to result in bugs, so I'm marking them as reserved. 449 reserveRegisterTuples(Reserved, AMDGPU::EXEC); 450 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); 451 452 // M0 has to be reserved so that llvm accepts it as a live-in into a block. 453 reserveRegisterTuples(Reserved, AMDGPU::M0); 454 455 // Reserve src_vccz, src_execz, src_scc. 456 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ); 457 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ); 458 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC); 459 460 // Reserve the memory aperture registers. 461 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); 462 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); 463 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE); 464 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT); 465 466 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen. 467 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID); 468 469 // Reserve xnack_mask registers - support is not implemented in Codegen. 470 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK); 471 472 // Reserve lds_direct register - support is not implemented in Codegen. 473 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT); 474 475 // Reserve Trap Handler registers - support is not implemented in Codegen. 476 reserveRegisterTuples(Reserved, AMDGPU::TBA); 477 reserveRegisterTuples(Reserved, AMDGPU::TMA); 478 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1); 479 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3); 480 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5); 481 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7); 482 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9); 483 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11); 484 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13); 485 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15); 486 487 // Reserve null register - it shall never be allocated 488 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL); 489 490 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely 491 // will result in bugs. 492 if (isWave32) { 493 Reserved.set(AMDGPU::VCC); 494 Reserved.set(AMDGPU::VCC_HI); 495 } 496 497 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF); 498 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); 499 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) { 500 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i); 501 reserveRegisterTuples(Reserved, Reg); 502 } 503 504 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); 505 // TODO: In an entry function without calls and AGPRs used it is possible 506 // to use the whole register budget for VGPRs. Even more it shall 507 // be possible to estimate maximum AGPR/VGPR pressure and split 508 // register file accordingly. 509 if (ST.hasGFX90AInsts()) 510 MaxNumVGPRs /= 2; 511 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); 512 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) { 513 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i); 514 reserveRegisterTuples(Reserved, Reg); 515 Reg = AMDGPU::AGPR_32RegClass.getRegister(i); 516 reserveRegisterTuples(Reserved, Reg); 517 } 518 519 for (auto Reg : AMDGPU::SReg_32RegClass) { 520 Reserved.set(getSubReg(Reg, AMDGPU::hi16)); 521 Register Low = getSubReg(Reg, AMDGPU::lo16); 522 // This is to prevent BB vcc liveness errors. 523 if (!AMDGPU::SGPR_LO16RegClass.contains(Low)) 524 Reserved.set(Low); 525 } 526 527 for (auto Reg : AMDGPU::AGPR_32RegClass) { 528 Reserved.set(getSubReg(Reg, AMDGPU::hi16)); 529 } 530 531 // Reserve all the rest AGPRs if there are no instructions to use it. 532 if (!ST.hasMAIInsts()) { 533 for (unsigned i = 0; i < MaxNumVGPRs; ++i) { 534 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i); 535 reserveRegisterTuples(Reserved, Reg); 536 } 537 } 538 539 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 540 541 Register ScratchRSrcReg = MFI->getScratchRSrcReg(); 542 if (ScratchRSrcReg != AMDGPU::NoRegister) { 543 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need 544 // to spill. 545 // TODO: May need to reserve a VGPR if doing LDS spilling. 546 reserveRegisterTuples(Reserved, ScratchRSrcReg); 547 } 548 549 // We have to assume the SP is needed in case there are calls in the function, 550 // which is detected after the function is lowered. If we aren't really going 551 // to need SP, don't bother reserving it. 552 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg(); 553 554 if (StackPtrReg) { 555 reserveRegisterTuples(Reserved, StackPtrReg); 556 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); 557 } 558 559 MCRegister FrameReg = MFI->getFrameOffsetReg(); 560 if (FrameReg) { 561 reserveRegisterTuples(Reserved, FrameReg); 562 assert(!isSubRegister(ScratchRSrcReg, FrameReg)); 563 } 564 565 if (hasBasePointer(MF)) { 566 MCRegister BasePtrReg = getBaseRegister(); 567 reserveRegisterTuples(Reserved, BasePtrReg); 568 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg)); 569 } 570 571 for (auto Reg : MFI->WWMReservedRegs) { 572 reserveRegisterTuples(Reserved, Reg.first); 573 } 574 575 // FIXME: Stop using reserved registers for this. 576 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) 577 reserveRegisterTuples(Reserved, Reg); 578 579 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) 580 reserveRegisterTuples(Reserved, Reg); 581 582 for (auto SSpill : MFI->getSGPRSpillVGPRs()) 583 reserveRegisterTuples(Reserved, SSpill.VGPR); 584 585 return Reserved; 586 } 587 588 bool SIRegisterInfo::shouldRealignStack(const MachineFunction &MF) const { 589 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 590 // On entry, the base address is 0, so it can't possibly need any more 591 // alignment. 592 593 // FIXME: Should be able to specify the entry frame alignment per calling 594 // convention instead. 595 if (Info->isEntryFunction()) 596 return false; 597 598 return TargetRegisterInfo::shouldRealignStack(MF); 599 } 600 601 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { 602 const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>(); 603 if (Info->isEntryFunction()) { 604 const MachineFrameInfo &MFI = Fn.getFrameInfo(); 605 return MFI.hasStackObjects() || MFI.hasCalls(); 606 } 607 608 // May need scavenger for dealing with callee saved registers. 609 return true; 610 } 611 612 bool SIRegisterInfo::requiresFrameIndexScavenging( 613 const MachineFunction &MF) const { 614 // Do not use frame virtual registers. They used to be used for SGPRs, but 615 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the 616 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a 617 // spill. 618 return false; 619 } 620 621 bool SIRegisterInfo::requiresFrameIndexReplacementScavenging( 622 const MachineFunction &MF) const { 623 const MachineFrameInfo &MFI = MF.getFrameInfo(); 624 return MFI.hasStackObjects(); 625 } 626 627 bool SIRegisterInfo::requiresVirtualBaseRegisters( 628 const MachineFunction &) const { 629 // There are no special dedicated stack or frame pointers. 630 return true; 631 } 632 633 int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const { 634 assert(SIInstrInfo::isMUBUF(*MI) || SIInstrInfo::isFLATScratch(*MI)); 635 636 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 637 AMDGPU::OpName::offset); 638 return MI->getOperand(OffIdx).getImm(); 639 } 640 641 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI, 642 int Idx) const { 643 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 644 return 0; 645 646 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 647 AMDGPU::OpName::vaddr) || 648 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(), 649 AMDGPU::OpName::saddr))) && 650 "Should never see frame index on non-address operand"); 651 652 return getScratchInstrOffset(MI); 653 } 654 655 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { 656 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 657 return false; 658 659 int64_t FullOffset = Offset + getScratchInstrOffset(MI); 660 661 if (SIInstrInfo::isMUBUF(*MI)) 662 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset); 663 664 const SIInstrInfo *TII = ST.getInstrInfo(); 665 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, 666 SIInstrFlags::FlatScratch); 667 } 668 669 Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, 670 int FrameIdx, 671 int64_t Offset) const { 672 MachineBasicBlock::iterator Ins = MBB->begin(); 673 DebugLoc DL; // Defaults to "unknown" 674 675 if (Ins != MBB->end()) 676 DL = Ins->getDebugLoc(); 677 678 MachineFunction *MF = MBB->getParent(); 679 const SIInstrInfo *TII = ST.getInstrInfo(); 680 MachineRegisterInfo &MRI = MF->getRegInfo(); 681 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32 682 : AMDGPU::V_MOV_B32_e32; 683 684 Register BaseReg = MRI.createVirtualRegister( 685 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass 686 : &AMDGPU::VGPR_32RegClass); 687 688 if (Offset == 0) { 689 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg) 690 .addFrameIndex(FrameIdx); 691 return BaseReg; 692 } 693 694 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); 695 696 Register FIReg = MRI.createVirtualRegister( 697 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass 698 : &AMDGPU::VGPR_32RegClass); 699 700 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) 701 .addImm(Offset); 702 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg) 703 .addFrameIndex(FrameIdx); 704 705 if (ST.enableFlatScratch() ) { 706 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg) 707 .addReg(OffsetReg, RegState::Kill) 708 .addReg(FIReg); 709 return BaseReg; 710 } 711 712 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) 713 .addReg(OffsetReg, RegState::Kill) 714 .addReg(FIReg) 715 .addImm(0); // clamp bit 716 717 return BaseReg; 718 } 719 720 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg, 721 int64_t Offset) const { 722 const SIInstrInfo *TII = ST.getInstrInfo(); 723 bool IsFlat = TII->isFLATScratch(MI); 724 725 #ifndef NDEBUG 726 // FIXME: Is it possible to be storing a frame index to itself? 727 bool SeenFI = false; 728 for (const MachineOperand &MO: MI.operands()) { 729 if (MO.isFI()) { 730 if (SeenFI) 731 llvm_unreachable("should not see multiple frame indices"); 732 733 SeenFI = true; 734 } 735 } 736 #endif 737 738 MachineOperand *FIOp = 739 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr 740 : AMDGPU::OpName::vaddr); 741 742 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); 743 int64_t NewOffset = OffsetOp->getImm() + Offset; 744 745 assert(FIOp && FIOp->isFI() && "frame index must be address operand"); 746 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI)); 747 748 if (IsFlat) { 749 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 750 SIInstrFlags::FlatScratch) && 751 "offset should be legal"); 752 FIOp->ChangeToRegister(BaseReg, false); 753 OffsetOp->setImm(NewOffset); 754 return; 755 } 756 757 #ifndef NDEBUG 758 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset); 759 assert(SOffset->isImm() && SOffset->getImm() == 0); 760 #endif 761 762 assert(SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && 763 "offset should be legal"); 764 765 FIOp->ChangeToRegister(BaseReg, false); 766 OffsetOp->setImm(NewOffset); 767 } 768 769 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, 770 Register BaseReg, 771 int64_t Offset) const { 772 if (!SIInstrInfo::isMUBUF(*MI) && !SIInstrInfo::isFLATScratch(*MI)) 773 return false; 774 775 int64_t NewOffset = Offset + getScratchInstrOffset(MI); 776 777 if (SIInstrInfo::isMUBUF(*MI)) 778 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset); 779 780 const SIInstrInfo *TII = ST.getInstrInfo(); 781 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 782 SIInstrFlags::FlatScratch); 783 } 784 785 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( 786 const MachineFunction &MF, unsigned Kind) const { 787 // This is inaccurate. It depends on the instruction and address space. The 788 // only place where we should hit this is for dealing with frame indexes / 789 // private accesses, so this is correct in that case. 790 return &AMDGPU::VGPR_32RegClass; 791 } 792 793 static unsigned getNumSubRegsForSpillOp(unsigned Op) { 794 795 switch (Op) { 796 case AMDGPU::SI_SPILL_S1024_SAVE: 797 case AMDGPU::SI_SPILL_S1024_RESTORE: 798 case AMDGPU::SI_SPILL_V1024_SAVE: 799 case AMDGPU::SI_SPILL_V1024_RESTORE: 800 case AMDGPU::SI_SPILL_A1024_SAVE: 801 case AMDGPU::SI_SPILL_A1024_RESTORE: 802 return 32; 803 case AMDGPU::SI_SPILL_S512_SAVE: 804 case AMDGPU::SI_SPILL_S512_RESTORE: 805 case AMDGPU::SI_SPILL_V512_SAVE: 806 case AMDGPU::SI_SPILL_V512_RESTORE: 807 case AMDGPU::SI_SPILL_A512_SAVE: 808 case AMDGPU::SI_SPILL_A512_RESTORE: 809 return 16; 810 case AMDGPU::SI_SPILL_S256_SAVE: 811 case AMDGPU::SI_SPILL_S256_RESTORE: 812 case AMDGPU::SI_SPILL_V256_SAVE: 813 case AMDGPU::SI_SPILL_V256_RESTORE: 814 case AMDGPU::SI_SPILL_A256_SAVE: 815 case AMDGPU::SI_SPILL_A256_RESTORE: 816 return 8; 817 case AMDGPU::SI_SPILL_S224_SAVE: 818 case AMDGPU::SI_SPILL_S224_RESTORE: 819 case AMDGPU::SI_SPILL_V224_SAVE: 820 case AMDGPU::SI_SPILL_V224_RESTORE: 821 case AMDGPU::SI_SPILL_A224_SAVE: 822 case AMDGPU::SI_SPILL_A224_RESTORE: 823 return 7; 824 case AMDGPU::SI_SPILL_S192_SAVE: 825 case AMDGPU::SI_SPILL_S192_RESTORE: 826 case AMDGPU::SI_SPILL_V192_SAVE: 827 case AMDGPU::SI_SPILL_V192_RESTORE: 828 case AMDGPU::SI_SPILL_A192_SAVE: 829 case AMDGPU::SI_SPILL_A192_RESTORE: 830 return 6; 831 case AMDGPU::SI_SPILL_S160_SAVE: 832 case AMDGPU::SI_SPILL_S160_RESTORE: 833 case AMDGPU::SI_SPILL_V160_SAVE: 834 case AMDGPU::SI_SPILL_V160_RESTORE: 835 case AMDGPU::SI_SPILL_A160_SAVE: 836 case AMDGPU::SI_SPILL_A160_RESTORE: 837 return 5; 838 case AMDGPU::SI_SPILL_S128_SAVE: 839 case AMDGPU::SI_SPILL_S128_RESTORE: 840 case AMDGPU::SI_SPILL_V128_SAVE: 841 case AMDGPU::SI_SPILL_V128_RESTORE: 842 case AMDGPU::SI_SPILL_A128_SAVE: 843 case AMDGPU::SI_SPILL_A128_RESTORE: 844 return 4; 845 case AMDGPU::SI_SPILL_S96_SAVE: 846 case AMDGPU::SI_SPILL_S96_RESTORE: 847 case AMDGPU::SI_SPILL_V96_SAVE: 848 case AMDGPU::SI_SPILL_V96_RESTORE: 849 case AMDGPU::SI_SPILL_A96_SAVE: 850 case AMDGPU::SI_SPILL_A96_RESTORE: 851 return 3; 852 case AMDGPU::SI_SPILL_S64_SAVE: 853 case AMDGPU::SI_SPILL_S64_RESTORE: 854 case AMDGPU::SI_SPILL_V64_SAVE: 855 case AMDGPU::SI_SPILL_V64_RESTORE: 856 case AMDGPU::SI_SPILL_A64_SAVE: 857 case AMDGPU::SI_SPILL_A64_RESTORE: 858 return 2; 859 case AMDGPU::SI_SPILL_S32_SAVE: 860 case AMDGPU::SI_SPILL_S32_RESTORE: 861 case AMDGPU::SI_SPILL_V32_SAVE: 862 case AMDGPU::SI_SPILL_V32_RESTORE: 863 case AMDGPU::SI_SPILL_A32_SAVE: 864 case AMDGPU::SI_SPILL_A32_RESTORE: 865 return 1; 866 default: llvm_unreachable("Invalid spill opcode"); 867 } 868 } 869 870 static int getOffsetMUBUFStore(unsigned Opc) { 871 switch (Opc) { 872 case AMDGPU::BUFFER_STORE_DWORD_OFFEN: 873 return AMDGPU::BUFFER_STORE_DWORD_OFFSET; 874 case AMDGPU::BUFFER_STORE_BYTE_OFFEN: 875 return AMDGPU::BUFFER_STORE_BYTE_OFFSET; 876 case AMDGPU::BUFFER_STORE_SHORT_OFFEN: 877 return AMDGPU::BUFFER_STORE_SHORT_OFFSET; 878 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: 879 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; 880 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: 881 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; 882 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN: 883 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET; 884 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN: 885 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET; 886 default: 887 return -1; 888 } 889 } 890 891 static int getOffsetMUBUFLoad(unsigned Opc) { 892 switch (Opc) { 893 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: 894 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 895 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: 896 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; 897 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: 898 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; 899 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: 900 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; 901 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: 902 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; 903 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: 904 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; 905 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: 906 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; 907 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN: 908 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET; 909 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: 910 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET; 911 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN: 912 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET; 913 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: 914 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET; 915 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN: 916 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET; 917 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: 918 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET; 919 default: 920 return -1; 921 } 922 } 923 924 static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, 925 MachineBasicBlock &MBB, 926 MachineBasicBlock::iterator MI, 927 int Index, unsigned Lane, 928 unsigned ValueReg, bool IsKill) { 929 MachineFunction *MF = MBB.getParent(); 930 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 931 const SIInstrInfo *TII = ST.getInstrInfo(); 932 933 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); 934 935 if (Reg == AMDGPU::NoRegister) 936 return MachineInstrBuilder(); 937 938 bool IsStore = MI->mayStore(); 939 MachineRegisterInfo &MRI = MF->getRegInfo(); 940 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 941 942 unsigned Dst = IsStore ? Reg : ValueReg; 943 unsigned Src = IsStore ? ValueReg : Reg; 944 unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 945 : AMDGPU::V_ACCVGPR_READ_B32_e64; 946 947 auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) 948 .addReg(Src, getKillRegState(IsKill)); 949 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 950 return MIB; 951 } 952 953 // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not 954 // need to handle the case where an SGPR may need to be spilled while spilling. 955 static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, 956 MachineFrameInfo &MFI, 957 MachineBasicBlock::iterator MI, 958 int Index, 959 int64_t Offset) { 960 const SIInstrInfo *TII = ST.getInstrInfo(); 961 MachineBasicBlock *MBB = MI->getParent(); 962 const DebugLoc &DL = MI->getDebugLoc(); 963 bool IsStore = MI->mayStore(); 964 965 unsigned Opc = MI->getOpcode(); 966 int LoadStoreOp = IsStore ? 967 getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); 968 if (LoadStoreOp == -1) 969 return false; 970 971 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); 972 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr()) 973 return true; 974 975 MachineInstrBuilder NewMI = 976 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) 977 .add(*Reg) 978 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) 979 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) 980 .addImm(Offset) 981 .addImm(0) // cpol 982 .addImm(0) // tfe 983 .addImm(0) // swz 984 .cloneMemRefs(*MI); 985 986 const MachineOperand *VDataIn = TII->getNamedOperand(*MI, 987 AMDGPU::OpName::vdata_in); 988 if (VDataIn) 989 NewMI.add(*VDataIn); 990 return true; 991 } 992 993 static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, 994 unsigned LoadStoreOp, 995 unsigned EltSize) { 996 bool IsStore = TII->get(LoadStoreOp).mayStore(); 997 bool UseST = 998 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 && 999 AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0; 1000 1001 switch (EltSize) { 1002 case 4: 1003 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1004 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR; 1005 break; 1006 case 8: 1007 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR 1008 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR; 1009 break; 1010 case 12: 1011 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR 1012 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR; 1013 break; 1014 case 16: 1015 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR 1016 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR; 1017 break; 1018 default: 1019 llvm_unreachable("Unexpected spill load/store size!"); 1020 } 1021 1022 if (UseST) 1023 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1024 1025 return LoadStoreOp; 1026 } 1027 1028 void SIRegisterInfo::buildSpillLoadStore( 1029 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1030 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, 1031 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, 1032 RegScavenger *RS, LivePhysRegs *LiveRegs) const { 1033 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both"); 1034 1035 MachineFunction *MF = MBB.getParent(); 1036 const SIInstrInfo *TII = ST.getInstrInfo(); 1037 const MachineFrameInfo &MFI = MF->getFrameInfo(); 1038 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>(); 1039 1040 const MCInstrDesc *Desc = &TII->get(LoadStoreOp); 1041 const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 1042 bool IsStore = Desc->mayStore(); 1043 bool IsFlat = TII->isFLATScratch(LoadStoreOp); 1044 1045 bool Scavenged = false; 1046 MCRegister SOffset = ScratchOffsetReg; 1047 1048 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg); 1049 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores. 1050 const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC); 1051 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8; 1052 1053 // Always use 4 byte operations for AGPRs because we need to scavenge 1054 // a temporary VGPR. 1055 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u; 1056 unsigned NumSubRegs = RegWidth / EltSize; 1057 unsigned Size = NumSubRegs * EltSize; 1058 unsigned RemSize = RegWidth - Size; 1059 unsigned NumRemSubRegs = RemSize ? 1 : 0; 1060 int64_t Offset = InstOffset + MFI.getObjectOffset(Index); 1061 int64_t MaxOffset = Offset + Size + RemSize - EltSize; 1062 int64_t ScratchOffsetRegDelta = 0; 1063 1064 if (IsFlat && EltSize > 4) { 1065 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1066 Desc = &TII->get(LoadStoreOp); 1067 } 1068 1069 Align Alignment = MFI.getObjectAlign(Index); 1070 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); 1071 1072 assert((IsFlat || ((Offset % EltSize) == 0)) && 1073 "unexpected VGPR spill offset"); 1074 1075 bool IsOffsetLegal = 1076 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, 1077 SIInstrFlags::FlatScratch) 1078 : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); 1079 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { 1080 SOffset = MCRegister(); 1081 1082 // We currently only support spilling VGPRs to EltSize boundaries, meaning 1083 // we can simplify the adjustment of Offset here to just scale with 1084 // WavefrontSize. 1085 if (!IsFlat) 1086 Offset *= ST.getWavefrontSize(); 1087 1088 // We don't have access to the register scavenger if this function is called 1089 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case. 1090 if (RS) { 1091 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false); 1092 } else if (LiveRegs) { 1093 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) { 1094 if (LiveRegs->available(MF->getRegInfo(), Reg)) { 1095 SOffset = Reg; 1096 break; 1097 } 1098 } 1099 } 1100 1101 if (!SOffset) { 1102 // There are no free SGPRs, and since we are in the process of spilling 1103 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true 1104 // on SI/CI and on VI it is true until we implement spilling using scalar 1105 // stores), we have no way to free up an SGPR. Our solution here is to 1106 // add the offset directly to the ScratchOffset or StackPtrOffset 1107 // register, and then subtract the offset after the spill to return the 1108 // register to it's original value. 1109 if (!ScratchOffsetReg) 1110 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg(); 1111 SOffset = ScratchOffsetReg; 1112 ScratchOffsetRegDelta = Offset; 1113 } else { 1114 Scavenged = true; 1115 } 1116 1117 if (!SOffset) 1118 report_fatal_error("could not scavenge SGPR to spill in entry function"); 1119 1120 if (ScratchOffsetReg == AMDGPU::NoRegister) { 1121 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset); 1122 } else { 1123 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 1124 .addReg(ScratchOffsetReg) 1125 .addImm(Offset); 1126 } 1127 1128 Offset = 0; 1129 } 1130 1131 if (IsFlat && SOffset == AMDGPU::NoRegister) { 1132 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 1133 && "Unexpected vaddr for flat scratch with a FI operand"); 1134 1135 assert(ST.hasFlatScratchSTMode()); 1136 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp); 1137 Desc = &TII->get(LoadStoreOp); 1138 } 1139 1140 Register TmpReg; 1141 1142 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e; 1143 ++i, RegOffset += EltSize) { 1144 if (i == NumSubRegs) { 1145 EltSize = RemSize; 1146 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize); 1147 } 1148 Desc = &TII->get(LoadStoreOp); 1149 1150 unsigned NumRegs = EltSize / 4; 1151 Register SubReg = e == 1 1152 ? ValueReg 1153 : Register(getSubReg(ValueReg, 1154 getSubRegFromChannel(RegOffset / 4, NumRegs))); 1155 1156 unsigned SOffsetRegState = 0; 1157 unsigned SrcDstRegState = getDefRegState(!IsStore); 1158 if (i + 1 == e) { 1159 SOffsetRegState |= getKillRegState(Scavenged); 1160 // The last implicit use carries the "Kill" flag. 1161 SrcDstRegState |= getKillRegState(IsKill); 1162 } 1163 1164 // Make sure the whole register is defined if there are undef components by 1165 // adding an implicit def of the super-reg on the first instruction. 1166 bool NeedSuperRegDef = e > 1 && IsStore && i == 0; 1167 bool NeedSuperRegImpOperand = e > 1; 1168 1169 unsigned Lane = RegOffset / 4; 1170 unsigned LaneE = (RegOffset + EltSize) / 4; 1171 for ( ; Lane != LaneE; ++Lane) { 1172 bool IsSubReg = e > 1 || EltSize > 4; 1173 Register Sub = IsSubReg 1174 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane))) 1175 : ValueReg; 1176 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill); 1177 if (!MIB.getInstr()) 1178 break; 1179 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) { 1180 MIB.addReg(ValueReg, RegState::ImplicitDefine); 1181 NeedSuperRegDef = false; 1182 } 1183 if (IsSubReg || NeedSuperRegImpOperand) { 1184 NeedSuperRegImpOperand = true; 1185 unsigned State = SrcDstRegState; 1186 if (Lane + 1 != LaneE) 1187 State &= ~RegState::Kill; 1188 MIB.addReg(ValueReg, RegState::Implicit | State); 1189 } 1190 } 1191 1192 if (Lane == LaneE) // Fully spilled into AGPRs. 1193 continue; 1194 1195 // Offset in bytes from the beginning of the ValueReg to its portion we 1196 // still need to spill. It may differ from RegOffset if a portion of 1197 // current SubReg has been already spilled into AGPRs by the loop above. 1198 unsigned RemRegOffset = Lane * 4; 1199 unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset); 1200 if (RemEltSize != EltSize) { // Partially spilled to AGPRs 1201 assert(IsFlat && EltSize > 4); 1202 1203 unsigned NumRegs = RemEltSize / 4; 1204 SubReg = Register(getSubReg(ValueReg, 1205 getSubRegFromChannel(RemRegOffset / 4, NumRegs))); 1206 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize); 1207 Desc = &TII->get(Opc); 1208 } 1209 1210 unsigned FinalReg = SubReg; 1211 1212 if (IsAGPR) { 1213 assert(EltSize == 4); 1214 1215 if (!TmpReg) { 1216 assert(RS && "Needs to have RegScavenger to spill an AGPR!"); 1217 // FIXME: change to scavengeRegisterBackwards() 1218 TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1219 RS->setRegUsed(TmpReg); 1220 } 1221 if (IsStore) { 1222 auto AccRead = BuildMI(MBB, MI, DL, 1223 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TmpReg) 1224 .addReg(SubReg, getKillRegState(IsKill)); 1225 if (NeedSuperRegDef) 1226 AccRead.addReg(ValueReg, RegState::ImplicitDefine); 1227 AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1228 } 1229 SubReg = TmpReg; 1230 } 1231 1232 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset); 1233 MachineMemOperand *NewMMO = 1234 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize, 1235 commonAlignment(Alignment, RemRegOffset)); 1236 1237 auto MIB = 1238 BuildMI(MBB, MI, DL, *Desc) 1239 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)); 1240 if (!IsFlat) 1241 MIB.addReg(FuncInfo->getScratchRSrcReg()); 1242 1243 if (SOffset == AMDGPU::NoRegister) { 1244 if (!IsFlat) 1245 MIB.addImm(0); 1246 } else { 1247 MIB.addReg(SOffset, SOffsetRegState); 1248 } 1249 MIB.addImm(Offset + RemRegOffset) 1250 .addImm(0); // cpol 1251 if (!IsFlat) 1252 MIB.addImm(0) // tfe 1253 .addImm(0); // swz 1254 MIB.addMemOperand(NewMMO); 1255 1256 if (!IsAGPR && NeedSuperRegDef) 1257 MIB.addReg(ValueReg, RegState::ImplicitDefine); 1258 1259 if (!IsStore && TmpReg != AMDGPU::NoRegister) { 1260 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), 1261 FinalReg) 1262 .addReg(TmpReg, RegState::Kill); 1263 MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse); 1264 } 1265 1266 if (NeedSuperRegImpOperand) 1267 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); 1268 } 1269 1270 if (ScratchOffsetRegDelta != 0) { 1271 // Subtract the offset we added to the ScratchOffset register. 1272 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset) 1273 .addReg(SOffset) 1274 .addImm(-ScratchOffsetRegDelta); 1275 } 1276 } 1277 1278 void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, 1279 int Offset, bool IsLoad, 1280 bool IsKill) const { 1281 // Load/store VGPR 1282 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo(); 1283 assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); 1284 1285 Register FrameReg = 1286 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF) 1287 ? getBaseRegister() 1288 : getFrameRegister(SB.MF); 1289 1290 Align Alignment = FrameInfo.getObjectAlign(Index); 1291 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index); 1292 MachineMemOperand *MMO = SB.MF.getMachineMemOperand( 1293 PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, 1294 SB.EltSize, Alignment); 1295 1296 if (IsLoad) { 1297 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 1298 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1299 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, 1300 Offset * SB.EltSize, MMO, SB.RS); 1301 } else { 1302 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1303 : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1304 buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, 1305 Offset * SB.EltSize, MMO, SB.RS); 1306 // This only ever adds one VGPR spill 1307 SB.MFI.addToSpilledVGPRs(1); 1308 } 1309 } 1310 1311 bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, 1312 int Index, 1313 RegScavenger *RS, 1314 bool OnlyToVGPR) const { 1315 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 1316 1317 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = 1318 SB.MFI.getSGPRToVGPRSpills(Index); 1319 bool SpillToVGPR = !VGPRSpills.empty(); 1320 if (OnlyToVGPR && !SpillToVGPR) 1321 return false; 1322 1323 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && 1324 SB.SuperReg != SB.MFI.getFrameOffsetReg())); 1325 1326 if (SpillToVGPR) { 1327 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1328 Register SubReg = 1329 SB.NumSubRegs == 1 1330 ? SB.SuperReg 1331 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1332 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 1333 1334 bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; 1335 1336 // Mark the "old value of vgpr" input undef only if this is the first sgpr 1337 // spill to this specific vgpr in the first basic block. 1338 auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), 1339 Spill.VGPR) 1340 .addReg(SubReg, getKillRegState(UseKill)) 1341 .addImm(Spill.Lane) 1342 .addReg(Spill.VGPR); 1343 1344 if (i == 0 && SB.NumSubRegs > 1) { 1345 // We may be spilling a super-register which is only partially defined, 1346 // and need to ensure later spills think the value is defined. 1347 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1348 } 1349 1350 if (SB.NumSubRegs > 1) 1351 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); 1352 1353 // FIXME: Since this spills to another register instead of an actual 1354 // frame index, we should delete the frame index when all references to 1355 // it are fixed. 1356 } 1357 } else { 1358 SB.prepare(); 1359 1360 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. 1361 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); 1362 1363 // Per VGPR helper data 1364 auto PVD = SB.getPerVGPRData(); 1365 1366 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 1367 unsigned TmpVGPRFlags = RegState::Undef; 1368 1369 // Write sub registers into the VGPR 1370 for (unsigned i = Offset * PVD.PerVGPR, 1371 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 1372 i < e; ++i) { 1373 Register SubReg = 1374 SB.NumSubRegs == 1 1375 ? SB.SuperReg 1376 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1377 1378 MachineInstrBuilder WriteLane = 1379 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), 1380 SB.TmpVGPR) 1381 .addReg(SubReg, SubKillState) 1382 .addImm(i % PVD.PerVGPR) 1383 .addReg(SB.TmpVGPR, TmpVGPRFlags); 1384 TmpVGPRFlags = 0; 1385 1386 // There could be undef components of a spilled super register. 1387 // TODO: Can we detect this and skip the spill? 1388 if (SB.NumSubRegs > 1) { 1389 // The last implicit use of the SB.SuperReg carries the "Kill" flag. 1390 unsigned SuperKillState = 0; 1391 if (i + 1 == SB.NumSubRegs) 1392 SuperKillState |= getKillRegState(SB.IsKill); 1393 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); 1394 } 1395 } 1396 1397 // Write out VGPR 1398 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); 1399 } 1400 1401 SB.restore(); 1402 } 1403 1404 MI->eraseFromParent(); 1405 SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); 1406 return true; 1407 } 1408 1409 bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, 1410 int Index, 1411 RegScavenger *RS, 1412 bool OnlyToVGPR) const { 1413 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS); 1414 1415 ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = 1416 SB.MFI.getSGPRToVGPRSpills(Index); 1417 bool SpillToVGPR = !VGPRSpills.empty(); 1418 if (OnlyToVGPR && !SpillToVGPR) 1419 return false; 1420 1421 if (SpillToVGPR) { 1422 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { 1423 Register SubReg = 1424 SB.NumSubRegs == 1 1425 ? SB.SuperReg 1426 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1427 1428 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; 1429 auto MIB = 1430 BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) 1431 .addReg(Spill.VGPR) 1432 .addImm(Spill.Lane); 1433 if (SB.NumSubRegs > 1 && i == 0) 1434 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1435 } 1436 } else { 1437 SB.prepare(); 1438 1439 // Per VGPR helper data 1440 auto PVD = SB.getPerVGPRData(); 1441 1442 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { 1443 // Load in VGPR data 1444 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true); 1445 1446 // Unpack lanes 1447 for (unsigned i = Offset * PVD.PerVGPR, 1448 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); 1449 i < e; ++i) { 1450 Register SubReg = 1451 SB.NumSubRegs == 1 1452 ? SB.SuperReg 1453 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); 1454 1455 bool LastSubReg = (i + 1 == e); 1456 auto MIB = BuildMI(SB.MBB, MI, SB.DL, 1457 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) 1458 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) 1459 .addImm(i); 1460 if (SB.NumSubRegs > 1 && i == 0) 1461 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); 1462 } 1463 } 1464 1465 SB.restore(); 1466 } 1467 1468 MI->eraseFromParent(); 1469 return true; 1470 } 1471 1472 /// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to 1473 /// a VGPR and the stack slot can be safely eliminated when all other users are 1474 /// handled. 1475 bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( 1476 MachineBasicBlock::iterator MI, 1477 int FI, 1478 RegScavenger *RS) const { 1479 switch (MI->getOpcode()) { 1480 case AMDGPU::SI_SPILL_S1024_SAVE: 1481 case AMDGPU::SI_SPILL_S512_SAVE: 1482 case AMDGPU::SI_SPILL_S256_SAVE: 1483 case AMDGPU::SI_SPILL_S224_SAVE: 1484 case AMDGPU::SI_SPILL_S192_SAVE: 1485 case AMDGPU::SI_SPILL_S160_SAVE: 1486 case AMDGPU::SI_SPILL_S128_SAVE: 1487 case AMDGPU::SI_SPILL_S96_SAVE: 1488 case AMDGPU::SI_SPILL_S64_SAVE: 1489 case AMDGPU::SI_SPILL_S32_SAVE: 1490 return spillSGPR(MI, FI, RS, true); 1491 case AMDGPU::SI_SPILL_S1024_RESTORE: 1492 case AMDGPU::SI_SPILL_S512_RESTORE: 1493 case AMDGPU::SI_SPILL_S256_RESTORE: 1494 case AMDGPU::SI_SPILL_S224_RESTORE: 1495 case AMDGPU::SI_SPILL_S192_RESTORE: 1496 case AMDGPU::SI_SPILL_S160_RESTORE: 1497 case AMDGPU::SI_SPILL_S128_RESTORE: 1498 case AMDGPU::SI_SPILL_S96_RESTORE: 1499 case AMDGPU::SI_SPILL_S64_RESTORE: 1500 case AMDGPU::SI_SPILL_S32_RESTORE: 1501 return restoreSGPR(MI, FI, RS, true); 1502 default: 1503 llvm_unreachable("not an SGPR spill instruction"); 1504 } 1505 } 1506 1507 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, 1508 int SPAdj, unsigned FIOperandNum, 1509 RegScavenger *RS) const { 1510 MachineFunction *MF = MI->getParent()->getParent(); 1511 MachineBasicBlock *MBB = MI->getParent(); 1512 SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 1513 MachineFrameInfo &FrameInfo = MF->getFrameInfo(); 1514 const SIInstrInfo *TII = ST.getInstrInfo(); 1515 DebugLoc DL = MI->getDebugLoc(); 1516 1517 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?"); 1518 1519 MachineOperand &FIOp = MI->getOperand(FIOperandNum); 1520 int Index = MI->getOperand(FIOperandNum).getIndex(); 1521 1522 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) 1523 ? getBaseRegister() 1524 : getFrameRegister(*MF); 1525 1526 switch (MI->getOpcode()) { 1527 // SGPR register spill 1528 case AMDGPU::SI_SPILL_S1024_SAVE: 1529 case AMDGPU::SI_SPILL_S512_SAVE: 1530 case AMDGPU::SI_SPILL_S256_SAVE: 1531 case AMDGPU::SI_SPILL_S224_SAVE: 1532 case AMDGPU::SI_SPILL_S192_SAVE: 1533 case AMDGPU::SI_SPILL_S160_SAVE: 1534 case AMDGPU::SI_SPILL_S128_SAVE: 1535 case AMDGPU::SI_SPILL_S96_SAVE: 1536 case AMDGPU::SI_SPILL_S64_SAVE: 1537 case AMDGPU::SI_SPILL_S32_SAVE: { 1538 spillSGPR(MI, Index, RS); 1539 break; 1540 } 1541 1542 // SGPR register restore 1543 case AMDGPU::SI_SPILL_S1024_RESTORE: 1544 case AMDGPU::SI_SPILL_S512_RESTORE: 1545 case AMDGPU::SI_SPILL_S256_RESTORE: 1546 case AMDGPU::SI_SPILL_S224_RESTORE: 1547 case AMDGPU::SI_SPILL_S192_RESTORE: 1548 case AMDGPU::SI_SPILL_S160_RESTORE: 1549 case AMDGPU::SI_SPILL_S128_RESTORE: 1550 case AMDGPU::SI_SPILL_S96_RESTORE: 1551 case AMDGPU::SI_SPILL_S64_RESTORE: 1552 case AMDGPU::SI_SPILL_S32_RESTORE: { 1553 restoreSGPR(MI, Index, RS); 1554 break; 1555 } 1556 1557 // VGPR register spill 1558 case AMDGPU::SI_SPILL_V1024_SAVE: 1559 case AMDGPU::SI_SPILL_V512_SAVE: 1560 case AMDGPU::SI_SPILL_V256_SAVE: 1561 case AMDGPU::SI_SPILL_V224_SAVE: 1562 case AMDGPU::SI_SPILL_V192_SAVE: 1563 case AMDGPU::SI_SPILL_V160_SAVE: 1564 case AMDGPU::SI_SPILL_V128_SAVE: 1565 case AMDGPU::SI_SPILL_V96_SAVE: 1566 case AMDGPU::SI_SPILL_V64_SAVE: 1567 case AMDGPU::SI_SPILL_V32_SAVE: 1568 case AMDGPU::SI_SPILL_A1024_SAVE: 1569 case AMDGPU::SI_SPILL_A512_SAVE: 1570 case AMDGPU::SI_SPILL_A256_SAVE: 1571 case AMDGPU::SI_SPILL_A224_SAVE: 1572 case AMDGPU::SI_SPILL_A192_SAVE: 1573 case AMDGPU::SI_SPILL_A160_SAVE: 1574 case AMDGPU::SI_SPILL_A128_SAVE: 1575 case AMDGPU::SI_SPILL_A96_SAVE: 1576 case AMDGPU::SI_SPILL_A64_SAVE: 1577 case AMDGPU::SI_SPILL_A32_SAVE: { 1578 const MachineOperand *VData = TII->getNamedOperand(*MI, 1579 AMDGPU::OpName::vdata); 1580 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 1581 MFI->getStackPtrOffsetReg()); 1582 1583 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR 1584 : AMDGPU::BUFFER_STORE_DWORD_OFFSET; 1585 auto *MBB = MI->getParent(); 1586 buildSpillLoadStore( 1587 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 1588 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1589 *MI->memoperands_begin(), RS); 1590 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); 1591 MI->eraseFromParent(); 1592 break; 1593 } 1594 case AMDGPU::SI_SPILL_V32_RESTORE: 1595 case AMDGPU::SI_SPILL_V64_RESTORE: 1596 case AMDGPU::SI_SPILL_V96_RESTORE: 1597 case AMDGPU::SI_SPILL_V128_RESTORE: 1598 case AMDGPU::SI_SPILL_V160_RESTORE: 1599 case AMDGPU::SI_SPILL_V192_RESTORE: 1600 case AMDGPU::SI_SPILL_V224_RESTORE: 1601 case AMDGPU::SI_SPILL_V256_RESTORE: 1602 case AMDGPU::SI_SPILL_V512_RESTORE: 1603 case AMDGPU::SI_SPILL_V1024_RESTORE: 1604 case AMDGPU::SI_SPILL_A32_RESTORE: 1605 case AMDGPU::SI_SPILL_A64_RESTORE: 1606 case AMDGPU::SI_SPILL_A96_RESTORE: 1607 case AMDGPU::SI_SPILL_A128_RESTORE: 1608 case AMDGPU::SI_SPILL_A160_RESTORE: 1609 case AMDGPU::SI_SPILL_A192_RESTORE: 1610 case AMDGPU::SI_SPILL_A224_RESTORE: 1611 case AMDGPU::SI_SPILL_A256_RESTORE: 1612 case AMDGPU::SI_SPILL_A512_RESTORE: 1613 case AMDGPU::SI_SPILL_A1024_RESTORE: { 1614 const MachineOperand *VData = TII->getNamedOperand(*MI, 1615 AMDGPU::OpName::vdata); 1616 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == 1617 MFI->getStackPtrOffsetReg()); 1618 1619 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR 1620 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; 1621 auto *MBB = MI->getParent(); 1622 buildSpillLoadStore( 1623 *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, 1624 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), 1625 *MI->memoperands_begin(), RS); 1626 MI->eraseFromParent(); 1627 break; 1628 } 1629 1630 default: { 1631 // Other access to frame index 1632 const DebugLoc &DL = MI->getDebugLoc(); 1633 1634 int64_t Offset = FrameInfo.getObjectOffset(Index); 1635 if (ST.enableFlatScratch()) { 1636 if (TII->isFLATScratch(*MI)) { 1637 assert((int16_t)FIOperandNum == 1638 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1639 AMDGPU::OpName::saddr)); 1640 1641 // The offset is always swizzled, just replace it 1642 if (FrameReg) 1643 FIOp.ChangeToRegister(FrameReg, false); 1644 1645 if (!Offset) 1646 return; 1647 1648 MachineOperand *OffsetOp = 1649 TII->getNamedOperand(*MI, AMDGPU::OpName::offset); 1650 int64_t NewOffset = Offset + OffsetOp->getImm(); 1651 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, 1652 SIInstrFlags::FlatScratch)) { 1653 OffsetOp->setImm(NewOffset); 1654 if (FrameReg) 1655 return; 1656 Offset = 0; 1657 } 1658 1659 assert(!TII->getNamedOperand(*MI, AMDGPU::OpName::vaddr) && 1660 "Unexpected vaddr for flat scratch with a FI operand"); 1661 1662 // On GFX10 we have ST mode to use no registers for an address. 1663 // Otherwise we need to materialize 0 into an SGPR. 1664 if (!Offset && ST.hasFlatScratchSTMode()) { 1665 unsigned Opc = MI->getOpcode(); 1666 unsigned NewOpc = AMDGPU::getFlatScratchInstSTfromSS(Opc); 1667 MI->RemoveOperand( 1668 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); 1669 MI->setDesc(TII->get(NewOpc)); 1670 return; 1671 } 1672 } 1673 1674 if (!FrameReg) { 1675 FIOp.ChangeToImmediate(Offset); 1676 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) 1677 return; 1678 } 1679 1680 // We need to use register here. Check if we can use an SGPR or need 1681 // a VGPR. 1682 FIOp.ChangeToRegister(AMDGPU::M0, false); 1683 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp); 1684 1685 if (!Offset && FrameReg && UseSGPR) { 1686 FIOp.setReg(FrameReg); 1687 return; 1688 } 1689 1690 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass 1691 : &AMDGPU::VGPR_32RegClass; 1692 1693 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR); 1694 FIOp.setReg(TmpReg); 1695 FIOp.setIsKill(true); 1696 1697 if ((!FrameReg || !Offset) && TmpReg) { 1698 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1699 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg); 1700 if (FrameReg) 1701 MIB.addReg(FrameReg); 1702 else 1703 MIB.addImm(Offset); 1704 1705 return; 1706 } 1707 1708 Register TmpSReg = 1709 UseSGPR ? TmpReg 1710 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, 1711 !UseSGPR); 1712 1713 // TODO: for flat scratch another attempt can be made with a VGPR index 1714 // if no SGPRs can be scavenged. 1715 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) 1716 report_fatal_error("Cannot scavenge register in FI elimination!"); 1717 1718 if (!TmpSReg) { 1719 // Use frame register and restore it after. 1720 TmpSReg = FrameReg; 1721 FIOp.setReg(FrameReg); 1722 FIOp.setIsKill(false); 1723 } 1724 1725 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg) 1726 .addReg(FrameReg) 1727 .addImm(Offset); 1728 1729 if (!UseSGPR) 1730 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1731 .addReg(TmpSReg, RegState::Kill); 1732 1733 if (TmpSReg == FrameReg) { 1734 // Undo frame register modification. 1735 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32), 1736 FrameReg) 1737 .addReg(FrameReg) 1738 .addImm(-Offset); 1739 } 1740 1741 return; 1742 } 1743 1744 bool IsMUBUF = TII->isMUBUF(*MI); 1745 1746 if (!IsMUBUF && !MFI->isEntryFunction()) { 1747 // Convert to a swizzled stack address by scaling by the wave size. 1748 // 1749 // In an entry function/kernel the offset is already swizzled. 1750 1751 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; 1752 Register ResultReg = 1753 IsCopy ? MI->getOperand(0).getReg() 1754 : RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1755 1756 int64_t Offset = FrameInfo.getObjectOffset(Index); 1757 if (Offset == 0) { 1758 // XXX - This never happens because of emergency scavenging slot at 0? 1759 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) 1760 .addImm(ST.getWavefrontSizeLog2()) 1761 .addReg(FrameReg); 1762 } else { 1763 if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { 1764 // Reuse ResultReg in intermediate step. 1765 Register ScaledReg = ResultReg; 1766 1767 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), 1768 ScaledReg) 1769 .addImm(ST.getWavefrontSizeLog2()) 1770 .addReg(FrameReg); 1771 1772 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32; 1773 1774 // TODO: Fold if use instruction is another add of a constant. 1775 if (IsVOP2 || AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { 1776 // FIXME: This can fail 1777 MIB.addImm(Offset); 1778 MIB.addReg(ScaledReg, RegState::Kill); 1779 if (!IsVOP2) 1780 MIB.addImm(0); // clamp bit 1781 } else { 1782 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 && 1783 "Need to reuse carry out register"); 1784 1785 // Use scavenged unused carry out as offset register. 1786 Register ConstOffsetReg; 1787 if (!isWave32) 1788 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); 1789 else 1790 ConstOffsetReg = MIB.getReg(1); 1791 1792 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) 1793 .addImm(Offset); 1794 MIB.addReg(ConstOffsetReg, RegState::Kill); 1795 MIB.addReg(ScaledReg, RegState::Kill); 1796 MIB.addImm(0); // clamp bit 1797 } 1798 } else { 1799 // We have to produce a carry out, and there isn't a free SGPR pair 1800 // for it. We can keep the whole computation on the SALU to avoid 1801 // clobbering an additional register at the cost of an extra mov. 1802 1803 // We may have 1 free scratch SGPR even though a carry out is 1804 // unavailable. Only one additional mov is needed. 1805 Register TmpScaledReg = 1806 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); 1807 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg; 1808 1809 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) 1810 .addReg(FrameReg) 1811 .addImm(ST.getWavefrontSizeLog2()); 1812 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 1813 .addReg(ScaledReg, RegState::Kill) 1814 .addImm(Offset); 1815 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) 1816 .addReg(ScaledReg, RegState::Kill); 1817 1818 // If there were truly no free SGPRs, we need to undo everything. 1819 if (!TmpScaledReg.isValid()) { 1820 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg) 1821 .addReg(ScaledReg, RegState::Kill) 1822 .addImm(-Offset); 1823 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) 1824 .addReg(FrameReg) 1825 .addImm(ST.getWavefrontSizeLog2()); 1826 } 1827 } 1828 } 1829 1830 // Don't introduce an extra copy if we're just materializing in a mov. 1831 if (IsCopy) 1832 MI->eraseFromParent(); 1833 else 1834 FIOp.ChangeToRegister(ResultReg, false, false, true); 1835 return; 1836 } 1837 1838 if (IsMUBUF) { 1839 // Disable offen so we don't need a 0 vgpr base. 1840 assert(static_cast<int>(FIOperandNum) == 1841 AMDGPU::getNamedOperandIdx(MI->getOpcode(), 1842 AMDGPU::OpName::vaddr)); 1843 1844 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset); 1845 assert((SOffset.isImm() && SOffset.getImm() == 0)); 1846 1847 if (FrameReg != AMDGPU::NoRegister) 1848 SOffset.ChangeToRegister(FrameReg, false); 1849 1850 int64_t Offset = FrameInfo.getObjectOffset(Index); 1851 int64_t OldImm 1852 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); 1853 int64_t NewOffset = OldImm + Offset; 1854 1855 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) && 1856 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) { 1857 MI->eraseFromParent(); 1858 return; 1859 } 1860 } 1861 1862 // If the offset is simply too big, don't convert to a scratch wave offset 1863 // relative index. 1864 1865 FIOp.ChangeToImmediate(Offset); 1866 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { 1867 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); 1868 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) 1869 .addImm(Offset); 1870 FIOp.ChangeToRegister(TmpReg, false, false, true); 1871 } 1872 } 1873 } 1874 } 1875 1876 StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const { 1877 return AMDGPUInstPrinter::getRegisterName(Reg); 1878 } 1879 1880 static const TargetRegisterClass * 1881 getAnyVGPRClassForBitWidth(unsigned BitWidth) { 1882 if (BitWidth <= 64) 1883 return &AMDGPU::VReg_64RegClass; 1884 if (BitWidth <= 96) 1885 return &AMDGPU::VReg_96RegClass; 1886 if (BitWidth <= 128) 1887 return &AMDGPU::VReg_128RegClass; 1888 if (BitWidth <= 160) 1889 return &AMDGPU::VReg_160RegClass; 1890 if (BitWidth <= 192) 1891 return &AMDGPU::VReg_192RegClass; 1892 if (BitWidth <= 224) 1893 return &AMDGPU::VReg_224RegClass; 1894 if (BitWidth <= 256) 1895 return &AMDGPU::VReg_256RegClass; 1896 if (BitWidth <= 512) 1897 return &AMDGPU::VReg_512RegClass; 1898 if (BitWidth <= 1024) 1899 return &AMDGPU::VReg_1024RegClass; 1900 1901 return nullptr; 1902 } 1903 1904 static const TargetRegisterClass * 1905 getAlignedVGPRClassForBitWidth(unsigned BitWidth) { 1906 if (BitWidth <= 64) 1907 return &AMDGPU::VReg_64_Align2RegClass; 1908 if (BitWidth <= 96) 1909 return &AMDGPU::VReg_96_Align2RegClass; 1910 if (BitWidth <= 128) 1911 return &AMDGPU::VReg_128_Align2RegClass; 1912 if (BitWidth <= 160) 1913 return &AMDGPU::VReg_160_Align2RegClass; 1914 if (BitWidth <= 192) 1915 return &AMDGPU::VReg_192_Align2RegClass; 1916 if (BitWidth <= 224) 1917 return &AMDGPU::VReg_224_Align2RegClass; 1918 if (BitWidth <= 256) 1919 return &AMDGPU::VReg_256_Align2RegClass; 1920 if (BitWidth <= 512) 1921 return &AMDGPU::VReg_512_Align2RegClass; 1922 if (BitWidth <= 1024) 1923 return &AMDGPU::VReg_1024_Align2RegClass; 1924 1925 return nullptr; 1926 } 1927 1928 const TargetRegisterClass * 1929 SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const { 1930 if (BitWidth == 1) 1931 return &AMDGPU::VReg_1RegClass; 1932 if (BitWidth <= 16) 1933 return &AMDGPU::VGPR_LO16RegClass; 1934 if (BitWidth <= 32) 1935 return &AMDGPU::VGPR_32RegClass; 1936 return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth) 1937 : getAnyVGPRClassForBitWidth(BitWidth); 1938 } 1939 1940 static const TargetRegisterClass * 1941 getAnyAGPRClassForBitWidth(unsigned BitWidth) { 1942 if (BitWidth <= 64) 1943 return &AMDGPU::AReg_64RegClass; 1944 if (BitWidth <= 96) 1945 return &AMDGPU::AReg_96RegClass; 1946 if (BitWidth <= 128) 1947 return &AMDGPU::AReg_128RegClass; 1948 if (BitWidth <= 160) 1949 return &AMDGPU::AReg_160RegClass; 1950 if (BitWidth <= 192) 1951 return &AMDGPU::AReg_192RegClass; 1952 if (BitWidth <= 256) 1953 return &AMDGPU::AReg_256RegClass; 1954 if (BitWidth <= 512) 1955 return &AMDGPU::AReg_512RegClass; 1956 if (BitWidth <= 1024) 1957 return &AMDGPU::AReg_1024RegClass; 1958 1959 return nullptr; 1960 } 1961 1962 static const TargetRegisterClass * 1963 getAlignedAGPRClassForBitWidth(unsigned BitWidth) { 1964 if (BitWidth <= 64) 1965 return &AMDGPU::AReg_64_Align2RegClass; 1966 if (BitWidth <= 96) 1967 return &AMDGPU::AReg_96_Align2RegClass; 1968 if (BitWidth <= 128) 1969 return &AMDGPU::AReg_128_Align2RegClass; 1970 if (BitWidth <= 160) 1971 return &AMDGPU::AReg_160_Align2RegClass; 1972 if (BitWidth <= 192) 1973 return &AMDGPU::AReg_192_Align2RegClass; 1974 if (BitWidth <= 256) 1975 return &AMDGPU::AReg_256_Align2RegClass; 1976 if (BitWidth <= 512) 1977 return &AMDGPU::AReg_512_Align2RegClass; 1978 if (BitWidth <= 1024) 1979 return &AMDGPU::AReg_1024_Align2RegClass; 1980 1981 return nullptr; 1982 } 1983 1984 const TargetRegisterClass * 1985 SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const { 1986 if (BitWidth <= 16) 1987 return &AMDGPU::AGPR_LO16RegClass; 1988 if (BitWidth <= 32) 1989 return &AMDGPU::AGPR_32RegClass; 1990 return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth) 1991 : getAnyAGPRClassForBitWidth(BitWidth); 1992 } 1993 1994 const TargetRegisterClass * 1995 SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) { 1996 if (BitWidth <= 16) 1997 return &AMDGPU::SGPR_LO16RegClass; 1998 if (BitWidth <= 32) 1999 return &AMDGPU::SReg_32RegClass; 2000 if (BitWidth <= 64) 2001 return &AMDGPU::SReg_64RegClass; 2002 if (BitWidth <= 96) 2003 return &AMDGPU::SGPR_96RegClass; 2004 if (BitWidth <= 128) 2005 return &AMDGPU::SGPR_128RegClass; 2006 if (BitWidth <= 160) 2007 return &AMDGPU::SGPR_160RegClass; 2008 if (BitWidth <= 192) 2009 return &AMDGPU::SGPR_192RegClass; 2010 if (BitWidth <= 256) 2011 return &AMDGPU::SGPR_256RegClass; 2012 if (BitWidth <= 512) 2013 return &AMDGPU::SGPR_512RegClass; 2014 if (BitWidth <= 1024) 2015 return &AMDGPU::SGPR_1024RegClass; 2016 2017 return nullptr; 2018 } 2019 2020 // FIXME: This is very slow. It might be worth creating a map from physreg to 2021 // register class. 2022 const TargetRegisterClass * 2023 SIRegisterInfo::getPhysRegClass(MCRegister Reg) const { 2024 static const TargetRegisterClass *const BaseClasses[] = { 2025 &AMDGPU::VGPR_LO16RegClass, 2026 &AMDGPU::VGPR_HI16RegClass, 2027 &AMDGPU::SReg_LO16RegClass, 2028 &AMDGPU::AGPR_LO16RegClass, 2029 &AMDGPU::VGPR_32RegClass, 2030 &AMDGPU::SReg_32RegClass, 2031 &AMDGPU::AGPR_32RegClass, 2032 &AMDGPU::AGPR_32RegClass, 2033 &AMDGPU::VReg_64_Align2RegClass, 2034 &AMDGPU::VReg_64RegClass, 2035 &AMDGPU::SReg_64RegClass, 2036 &AMDGPU::AReg_64_Align2RegClass, 2037 &AMDGPU::AReg_64RegClass, 2038 &AMDGPU::VReg_96_Align2RegClass, 2039 &AMDGPU::VReg_96RegClass, 2040 &AMDGPU::SReg_96RegClass, 2041 &AMDGPU::AReg_96_Align2RegClass, 2042 &AMDGPU::AReg_96RegClass, 2043 &AMDGPU::VReg_128_Align2RegClass, 2044 &AMDGPU::VReg_128RegClass, 2045 &AMDGPU::SReg_128RegClass, 2046 &AMDGPU::AReg_128_Align2RegClass, 2047 &AMDGPU::AReg_128RegClass, 2048 &AMDGPU::VReg_160_Align2RegClass, 2049 &AMDGPU::VReg_160RegClass, 2050 &AMDGPU::SReg_160RegClass, 2051 &AMDGPU::AReg_160_Align2RegClass, 2052 &AMDGPU::AReg_160RegClass, 2053 &AMDGPU::VReg_192_Align2RegClass, 2054 &AMDGPU::VReg_192RegClass, 2055 &AMDGPU::SReg_192RegClass, 2056 &AMDGPU::AReg_192_Align2RegClass, 2057 &AMDGPU::AReg_192RegClass, 2058 &AMDGPU::VReg_224_Align2RegClass, 2059 &AMDGPU::VReg_224RegClass, 2060 &AMDGPU::SReg_224RegClass, 2061 &AMDGPU::AReg_224_Align2RegClass, 2062 &AMDGPU::AReg_224RegClass, 2063 &AMDGPU::VReg_256_Align2RegClass, 2064 &AMDGPU::VReg_256RegClass, 2065 &AMDGPU::SReg_256RegClass, 2066 &AMDGPU::AReg_256_Align2RegClass, 2067 &AMDGPU::AReg_256RegClass, 2068 &AMDGPU::VReg_512_Align2RegClass, 2069 &AMDGPU::VReg_512RegClass, 2070 &AMDGPU::SReg_512RegClass, 2071 &AMDGPU::AReg_512_Align2RegClass, 2072 &AMDGPU::AReg_512RegClass, 2073 &AMDGPU::SReg_1024RegClass, 2074 &AMDGPU::VReg_1024_Align2RegClass, 2075 &AMDGPU::VReg_1024RegClass, 2076 &AMDGPU::AReg_1024_Align2RegClass, 2077 &AMDGPU::AReg_1024RegClass, 2078 &AMDGPU::SCC_CLASSRegClass, 2079 &AMDGPU::Pseudo_SReg_32RegClass, 2080 &AMDGPU::Pseudo_SReg_128RegClass, 2081 }; 2082 2083 for (const TargetRegisterClass *BaseClass : BaseClasses) { 2084 if (BaseClass->contains(Reg)) { 2085 return BaseClass; 2086 } 2087 } 2088 return nullptr; 2089 } 2090 2091 bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, 2092 Register Reg) const { 2093 const TargetRegisterClass *RC; 2094 if (Reg.isVirtual()) 2095 RC = MRI.getRegClass(Reg); 2096 else 2097 RC = getPhysRegClass(Reg); 2098 return isSGPRClass(RC); 2099 } 2100 2101 // TODO: It might be helpful to have some target specific flags in 2102 // TargetRegisterClass to mark which classes are VGPRs to make this trivial. 2103 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { 2104 unsigned Size = getRegSizeInBits(*RC); 2105 if (Size == 16) { 2106 return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr || 2107 getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr; 2108 } 2109 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); 2110 if (!VRC) { 2111 assert(Size < 32 && "Invalid register class size"); 2112 return false; 2113 } 2114 return getCommonSubClass(VRC, RC) != nullptr; 2115 } 2116 2117 bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const { 2118 unsigned Size = getRegSizeInBits(*RC); 2119 if (Size < 16) 2120 return false; 2121 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); 2122 if (!ARC) { 2123 assert(getVGPRClassForBitWidth(Size) && "Invalid register class size"); 2124 return false; 2125 } 2126 return getCommonSubClass(ARC, RC) != nullptr; 2127 } 2128 2129 const TargetRegisterClass * 2130 SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const { 2131 unsigned Size = getRegSizeInBits(*SRC); 2132 const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size); 2133 assert(VRC && "Invalid register class size"); 2134 return VRC; 2135 } 2136 2137 const TargetRegisterClass * 2138 SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const { 2139 unsigned Size = getRegSizeInBits(*SRC); 2140 const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size); 2141 assert(ARC && "Invalid register class size"); 2142 return ARC; 2143 } 2144 2145 const TargetRegisterClass * 2146 SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const { 2147 unsigned Size = getRegSizeInBits(*VRC); 2148 if (Size == 32) 2149 return &AMDGPU::SGPR_32RegClass; 2150 const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size); 2151 assert(SRC && "Invalid register class size"); 2152 return SRC; 2153 } 2154 2155 const TargetRegisterClass *SIRegisterInfo::getSubRegClass( 2156 const TargetRegisterClass *RC, unsigned SubIdx) const { 2157 if (SubIdx == AMDGPU::NoSubRegister) 2158 return RC; 2159 2160 // We can assume that each lane corresponds to one 32-bit register. 2161 unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; 2162 if (isSGPRClass(RC)) { 2163 if (Size == 32) 2164 RC = &AMDGPU::SGPR_32RegClass; 2165 else 2166 RC = getSGPRClassForBitWidth(Size); 2167 } else if (hasAGPRs(RC)) { 2168 RC = getAGPRClassForBitWidth(Size); 2169 } else { 2170 RC = getVGPRClassForBitWidth(Size); 2171 } 2172 assert(RC && "Invalid sub-register class size"); 2173 return RC; 2174 } 2175 2176 const TargetRegisterClass * 2177 SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, 2178 const TargetRegisterClass *SubRC, 2179 unsigned SubIdx) const { 2180 // Ensure this subregister index is aligned in the super register. 2181 const TargetRegisterClass *MatchRC = 2182 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx); 2183 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr; 2184 } 2185 2186 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const { 2187 if (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST && 2188 OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST) 2189 return !ST.hasMFMAInlineLiteralBug(); 2190 2191 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 2192 OpType <= AMDGPU::OPERAND_SRC_LAST; 2193 } 2194 2195 bool SIRegisterInfo::shouldRewriteCopySrc( 2196 const TargetRegisterClass *DefRC, 2197 unsigned DefSubReg, 2198 const TargetRegisterClass *SrcRC, 2199 unsigned SrcSubReg) const { 2200 // We want to prefer the smallest register class possible, so we don't want to 2201 // stop and rewrite on anything that looks like a subregister 2202 // extract. Operations mostly don't care about the super register class, so we 2203 // only want to stop on the most basic of copies between the same register 2204 // class. 2205 // 2206 // e.g. if we have something like 2207 // %0 = ... 2208 // %1 = ... 2209 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2 2210 // %3 = COPY %2, sub0 2211 // 2212 // We want to look through the COPY to find: 2213 // => %3 = COPY %0 2214 2215 // Plain copy. 2216 return getCommonSubClass(DefRC, SrcRC) != nullptr; 2217 } 2218 2219 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const { 2220 // TODO: 64-bit operands have extending behavior from 32-bit literal. 2221 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST && 2222 OpType <= AMDGPU::OPERAND_REG_IMM_LAST; 2223 } 2224 2225 /// Returns a lowest register that is not used at any point in the function. 2226 /// If all registers are used, then this function will return 2227 /// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return 2228 /// highest unused register. 2229 MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI, 2230 const TargetRegisterClass *RC, 2231 const MachineFunction &MF, 2232 bool ReserveHighestVGPR) const { 2233 if (ReserveHighestVGPR) { 2234 for (MCRegister Reg : reverse(*RC)) 2235 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 2236 return Reg; 2237 } else { 2238 for (MCRegister Reg : *RC) 2239 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) 2240 return Reg; 2241 } 2242 return MCRegister(); 2243 } 2244 2245 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, 2246 unsigned EltSize) const { 2247 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC); 2248 assert(RegBitWidth >= 32 && RegBitWidth <= 1024); 2249 2250 const unsigned RegDWORDs = RegBitWidth / 32; 2251 const unsigned EltDWORDs = EltSize / 4; 2252 assert(RegSplitParts.size() + 1 >= EltDWORDs); 2253 2254 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1]; 2255 const unsigned NumParts = RegDWORDs / EltDWORDs; 2256 2257 return makeArrayRef(Parts.data(), NumParts); 2258 } 2259 2260 const TargetRegisterClass* 2261 SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI, 2262 Register Reg) const { 2263 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg); 2264 } 2265 2266 bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, 2267 Register Reg) const { 2268 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 2269 // Registers without classes are unaddressable, SGPR-like registers. 2270 return RC && hasVGPRs(RC); 2271 } 2272 2273 bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI, 2274 Register Reg) const { 2275 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); 2276 2277 // Registers without classes are unaddressable, SGPR-like registers. 2278 return RC && hasAGPRs(RC); 2279 } 2280 2281 bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, 2282 const TargetRegisterClass *SrcRC, 2283 unsigned SubReg, 2284 const TargetRegisterClass *DstRC, 2285 unsigned DstSubReg, 2286 const TargetRegisterClass *NewRC, 2287 LiveIntervals &LIS) const { 2288 unsigned SrcSize = getRegSizeInBits(*SrcRC); 2289 unsigned DstSize = getRegSizeInBits(*DstRC); 2290 unsigned NewSize = getRegSizeInBits(*NewRC); 2291 2292 // Do not increase size of registers beyond dword, we would need to allocate 2293 // adjacent registers and constraint regalloc more than needed. 2294 2295 // Always allow dword coalescing. 2296 if (SrcSize <= 32 || DstSize <= 32) 2297 return true; 2298 2299 return NewSize <= DstSize || NewSize <= SrcSize; 2300 } 2301 2302 unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, 2303 MachineFunction &MF) const { 2304 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 2305 2306 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(), 2307 MF.getFunction()); 2308 switch (RC->getID()) { 2309 default: 2310 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF); 2311 case AMDGPU::VGPR_32RegClassID: 2312 case AMDGPU::VGPR_LO16RegClassID: 2313 case AMDGPU::VGPR_HI16RegClassID: 2314 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF)); 2315 case AMDGPU::SGPR_32RegClassID: 2316 case AMDGPU::SGPR_LO16RegClassID: 2317 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF)); 2318 } 2319 } 2320 2321 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, 2322 unsigned Idx) const { 2323 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 || 2324 Idx == AMDGPU::RegisterPressureSets::AGPR_32) 2325 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass, 2326 const_cast<MachineFunction &>(MF)); 2327 2328 if (Idx == AMDGPU::RegisterPressureSets::SReg_32) 2329 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass, 2330 const_cast<MachineFunction &>(MF)); 2331 2332 llvm_unreachable("Unexpected register pressure set!"); 2333 } 2334 2335 const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const { 2336 static const int Empty[] = { -1 }; 2337 2338 if (RegPressureIgnoredUnits[RegUnit]) 2339 return Empty; 2340 2341 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit); 2342 } 2343 2344 MCRegister SIRegisterInfo::getReturnAddressReg(const MachineFunction &MF) const { 2345 // Not a callee saved register. 2346 return AMDGPU::SGPR30_SGPR31; 2347 } 2348 2349 const TargetRegisterClass * 2350 SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size, 2351 const RegisterBank &RB, 2352 const MachineRegisterInfo &MRI) const { 2353 switch (RB.getID()) { 2354 case AMDGPU::VGPRRegBankID: 2355 return getVGPRClassForBitWidth(std::max(32u, Size)); 2356 case AMDGPU::VCCRegBankID: 2357 assert(Size == 1); 2358 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 2359 : &AMDGPU::SReg_64_XEXECRegClass; 2360 case AMDGPU::SGPRRegBankID: 2361 return getSGPRClassForBitWidth(std::max(32u, Size)); 2362 case AMDGPU::AGPRRegBankID: 2363 return getAGPRClassForBitWidth(std::max(32u, Size)); 2364 default: 2365 llvm_unreachable("unknown register bank"); 2366 } 2367 } 2368 2369 const TargetRegisterClass * 2370 SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, 2371 const MachineRegisterInfo &MRI) const { 2372 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg()); 2373 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>()) 2374 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI); 2375 2376 const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>(); 2377 return getAllocatableClass(RC); 2378 } 2379 2380 MCRegister SIRegisterInfo::getVCC() const { 2381 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC; 2382 } 2383 2384 const TargetRegisterClass *SIRegisterInfo::getVGPR64Class() const { 2385 // VGPR tuples have an alignment requirement on gfx90a variants. 2386 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass 2387 : &AMDGPU::VReg_64RegClass; 2388 } 2389 2390 const TargetRegisterClass * 2391 SIRegisterInfo::getRegClass(unsigned RCID) const { 2392 switch ((int)RCID) { 2393 case AMDGPU::SReg_1RegClassID: 2394 return getBoolRC(); 2395 case AMDGPU::SReg_1_XEXECRegClassID: 2396 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 2397 : &AMDGPU::SReg_64_XEXECRegClass; 2398 case -1: 2399 return nullptr; 2400 default: 2401 return AMDGPUGenRegisterInfo::getRegClass(RCID); 2402 } 2403 } 2404 2405 // Find reaching register definition 2406 MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg, 2407 MachineInstr &Use, 2408 MachineRegisterInfo &MRI, 2409 LiveIntervals *LIS) const { 2410 auto &MDT = LIS->getAnalysis<MachineDominatorTree>(); 2411 SlotIndex UseIdx = LIS->getInstructionIndex(Use); 2412 SlotIndex DefIdx; 2413 2414 if (Reg.isVirtual()) { 2415 if (!LIS->hasInterval(Reg)) 2416 return nullptr; 2417 LiveInterval &LI = LIS->getInterval(Reg); 2418 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg) 2419 : MRI.getMaxLaneMaskForVReg(Reg); 2420 VNInfo *V = nullptr; 2421 if (LI.hasSubRanges()) { 2422 for (auto &S : LI.subranges()) { 2423 if ((S.LaneMask & SubLanes) == SubLanes) { 2424 V = S.getVNInfoAt(UseIdx); 2425 break; 2426 } 2427 } 2428 } else { 2429 V = LI.getVNInfoAt(UseIdx); 2430 } 2431 if (!V) 2432 return nullptr; 2433 DefIdx = V->def; 2434 } else { 2435 // Find last def. 2436 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid(); 2437 ++Units) { 2438 LiveRange &LR = LIS->getRegUnit(*Units); 2439 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) { 2440 if (!DefIdx.isValid() || 2441 MDT.dominates(LIS->getInstructionFromIndex(DefIdx), 2442 LIS->getInstructionFromIndex(V->def))) 2443 DefIdx = V->def; 2444 } else { 2445 return nullptr; 2446 } 2447 } 2448 } 2449 2450 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx); 2451 2452 if (!Def || !MDT.dominates(Def, &Use)) 2453 return nullptr; 2454 2455 assert(Def->modifiesRegister(Reg, this)); 2456 2457 return Def; 2458 } 2459 2460 MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const { 2461 assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32); 2462 2463 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass, 2464 AMDGPU::SReg_32RegClass, 2465 AMDGPU::AGPR_32RegClass } ) { 2466 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC)) 2467 return Super; 2468 } 2469 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16, 2470 &AMDGPU::VGPR_32RegClass)) { 2471 return Super; 2472 } 2473 2474 return AMDGPU::NoRegister; 2475 } 2476 2477 bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const { 2478 if (!ST.needsAlignedVGPRs()) 2479 return true; 2480 2481 if (hasVGPRs(&RC)) 2482 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC))); 2483 if (hasAGPRs(&RC)) 2484 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC))); 2485 2486 return true; 2487 } 2488 2489 bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const { 2490 switch (PhysReg) { 2491 case AMDGPU::SGPR_NULL: 2492 case AMDGPU::SRC_SHARED_BASE: 2493 case AMDGPU::SRC_PRIVATE_BASE: 2494 case AMDGPU::SRC_SHARED_LIMIT: 2495 case AMDGPU::SRC_PRIVATE_LIMIT: 2496 return true; 2497 default: 2498 return false; 2499 } 2500 } 2501 2502 ArrayRef<MCPhysReg> 2503 SIRegisterInfo::getAllSGPR128(const MachineFunction &MF) const { 2504 return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(), 2505 ST.getMaxNumSGPRs(MF) / 4); 2506 } 2507 2508 ArrayRef<MCPhysReg> 2509 SIRegisterInfo::getAllSGPR64(const MachineFunction &MF) const { 2510 return makeArrayRef(AMDGPU::SGPR_64RegClass.begin(), 2511 ST.getMaxNumSGPRs(MF) / 2); 2512 } 2513 2514 ArrayRef<MCPhysReg> 2515 SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const { 2516 return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF)); 2517 } 2518