1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 16 17 #include "AMDGPUMachineFunction.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "SIRegisterInfo.h" 20 #include "llvm/CodeGen/PseudoSourceValue.h" 21 #include "llvm/MC/MCRegisterInfo.h" 22 #include "llvm/Support/ErrorHandling.h" 23 #include <array> 24 #include <cassert> 25 #include <map> 26 #include <utility> 27 28 namespace llvm { 29 30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { 31 public: 32 explicit AMDGPUImagePseudoSourceValue() : 33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 34 35 bool isConstant(const MachineFrameInfo *) const override { 36 // This should probably be true for most images, but we will start by being 37 // conservative. 38 return false; 39 } 40 41 bool isAliased(const MachineFrameInfo *) const override { 42 // FIXME: If we ever change image intrinsics to accept fat pointers, then 43 // this could be true for some cases. 44 return false; 45 } 46 47 bool mayAlias(const MachineFrameInfo*) const override { 48 // FIXME: If we ever change image intrinsics to accept fat pointers, then 49 // this could be true for some cases. 50 return false; 51 } 52 }; 53 54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue { 55 public: 56 explicit AMDGPUBufferPseudoSourceValue() : 57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { } 58 59 bool isConstant(const MachineFrameInfo *) const override { 60 // This should probably be true for most images, but we will start by being 61 // conservative. 62 return false; 63 } 64 65 bool isAliased(const MachineFrameInfo *) const override { 66 // FIXME: If we ever change image intrinsics to accept fat pointers, then 67 // this could be true for some cases. 68 return false; 69 } 70 71 bool mayAlias(const MachineFrameInfo*) const override { 72 // FIXME: If we ever change image intrinsics to accept fat pointers, then 73 // this could be true for some cases. 74 return false; 75 } 76 }; 77 78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which 79 /// tells the hardware which interpolation parameters to load. 80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction { 81 // FIXME: This should be removed and getPreloadedValue moved here. 82 friend class SIRegisterInfo; 83 84 unsigned TIDReg; 85 86 // Registers that may be reserved for spilling purposes. These may be the same 87 // as the input registers. 88 unsigned ScratchRSrcReg; 89 unsigned ScratchWaveOffsetReg; 90 91 // This is the current function's incremented size from the kernel's scratch 92 // wave offset register. For an entry function, this is exactly the same as 93 // the ScratchWaveOffsetReg. 94 unsigned FrameOffsetReg; 95 96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. 97 unsigned StackPtrOffsetReg; 98 99 // Input registers for non-HSA ABI 100 unsigned ImplicitBufferPtrUserSGPR; 101 102 // Input registers setup for the HSA ABI. 103 // User SGPRs in allocation order. 104 unsigned PrivateSegmentBufferUserSGPR; 105 unsigned DispatchPtrUserSGPR; 106 unsigned QueuePtrUserSGPR; 107 unsigned KernargSegmentPtrUserSGPR; 108 unsigned DispatchIDUserSGPR; 109 unsigned FlatScratchInitUserSGPR; 110 unsigned PrivateSegmentSizeUserSGPR; 111 unsigned GridWorkGroupCountXUserSGPR; 112 unsigned GridWorkGroupCountYUserSGPR; 113 unsigned GridWorkGroupCountZUserSGPR; 114 115 // System SGPRs in allocation order. 116 unsigned WorkGroupIDXSystemSGPR; 117 unsigned WorkGroupIDYSystemSGPR; 118 unsigned WorkGroupIDZSystemSGPR; 119 unsigned WorkGroupInfoSystemSGPR; 120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR; 121 122 // Graphics info. 123 unsigned PSInputAddr; 124 unsigned PSInputEnable; 125 126 bool ReturnsVoid; 127 128 // A pair of default/requested minimum/maximum flat work group sizes. 129 // Minimum - first, maximum - second. 130 std::pair<unsigned, unsigned> FlatWorkGroupSizes; 131 132 // A pair of default/requested minimum/maximum number of waves per execution 133 // unit. Minimum - first, maximum - second. 134 std::pair<unsigned, unsigned> WavesPerEU; 135 136 // Stack object indices for work group IDs. 137 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices; 138 // Stack object indices for work item IDs. 139 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices; 140 141 AMDGPUBufferPseudoSourceValue BufferPSV; 142 AMDGPUImagePseudoSourceValue ImagePSV; 143 144 private: 145 unsigned LDSWaveSpillSize; 146 unsigned ScratchOffsetReg; 147 unsigned NumUserSGPRs; 148 unsigned NumSystemSGPRs; 149 150 bool HasSpilledSGPRs; 151 bool HasSpilledVGPRs; 152 bool HasNonSpillStackObjects; 153 154 unsigned NumSpilledSGPRs; 155 unsigned NumSpilledVGPRs; 156 157 // Feature bits required for inputs passed in user SGPRs. 158 bool PrivateSegmentBuffer : 1; 159 bool DispatchPtr : 1; 160 bool QueuePtr : 1; 161 bool KernargSegmentPtr : 1; 162 bool DispatchID : 1; 163 bool FlatScratchInit : 1; 164 bool GridWorkgroupCountX : 1; 165 bool GridWorkgroupCountY : 1; 166 bool GridWorkgroupCountZ : 1; 167 168 // Feature bits required for inputs passed in system SGPRs. 169 bool WorkGroupIDX : 1; // Always initialized. 170 bool WorkGroupIDY : 1; 171 bool WorkGroupIDZ : 1; 172 bool WorkGroupInfo : 1; 173 bool PrivateSegmentWaveByteOffset : 1; 174 175 bool WorkItemIDX : 1; // Always initialized. 176 bool WorkItemIDY : 1; 177 bool WorkItemIDZ : 1; 178 179 // Private memory buffer 180 // Compute directly in sgpr[0:1] 181 // Other shaders indirect 64-bits at sgpr[0:1] 182 bool ImplicitBufferPtr : 1; 183 184 MCPhysReg getNextUserSGPR() const { 185 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 186 return AMDGPU::SGPR0 + NumUserSGPRs; 187 } 188 189 MCPhysReg getNextSystemSGPR() const { 190 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 191 } 192 193 public: 194 struct SpilledReg { 195 unsigned VGPR = AMDGPU::NoRegister; 196 int Lane = -1; 197 198 SpilledReg() = default; 199 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } 200 201 bool hasLane() { return Lane != -1;} 202 bool hasReg() { return VGPR != AMDGPU::NoRegister;} 203 }; 204 205 private: 206 // SGPR->VGPR spilling support. 207 typedef std::pair<unsigned, unsigned> SpillRegMask; 208 209 // Track VGPR + wave index for each subregister of the SGPR spilled to 210 // frameindex key. 211 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; 212 unsigned NumVGPRSpillLanes = 0; 213 SmallVector<unsigned, 2> SpillVGPRs; 214 215 public: 216 217 SIMachineFunctionInfo(const MachineFunction &MF); 218 219 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { 220 auto I = SGPRToVGPRSpills.find(FrameIndex); 221 return (I == SGPRToVGPRSpills.end()) ? 222 ArrayRef<SpilledReg>() : makeArrayRef(I->second); 223 } 224 225 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); 226 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); 227 228 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; 229 unsigned getTIDReg() const { return TIDReg; }; 230 void setTIDReg(unsigned Reg) { TIDReg = Reg; } 231 232 // Add user SGPRs. 233 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI); 234 unsigned addDispatchPtr(const SIRegisterInfo &TRI); 235 unsigned addQueuePtr(const SIRegisterInfo &TRI); 236 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI); 237 unsigned addDispatchID(const SIRegisterInfo &TRI); 238 unsigned addFlatScratchInit(const SIRegisterInfo &TRI); 239 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI); 240 241 // Add system SGPRs. 242 unsigned addWorkGroupIDX() { 243 WorkGroupIDXSystemSGPR = getNextSystemSGPR(); 244 NumSystemSGPRs += 1; 245 return WorkGroupIDXSystemSGPR; 246 } 247 248 unsigned addWorkGroupIDY() { 249 WorkGroupIDYSystemSGPR = getNextSystemSGPR(); 250 NumSystemSGPRs += 1; 251 return WorkGroupIDYSystemSGPR; 252 } 253 254 unsigned addWorkGroupIDZ() { 255 WorkGroupIDZSystemSGPR = getNextSystemSGPR(); 256 NumSystemSGPRs += 1; 257 return WorkGroupIDZSystemSGPR; 258 } 259 260 unsigned addWorkGroupInfo() { 261 WorkGroupInfoSystemSGPR = getNextSystemSGPR(); 262 NumSystemSGPRs += 1; 263 return WorkGroupInfoSystemSGPR; 264 } 265 266 unsigned addPrivateSegmentWaveByteOffset() { 267 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR(); 268 NumSystemSGPRs += 1; 269 return PrivateSegmentWaveByteOffsetSystemSGPR; 270 } 271 272 void setPrivateSegmentWaveByteOffset(unsigned Reg) { 273 PrivateSegmentWaveByteOffsetSystemSGPR = Reg; 274 } 275 276 bool hasPrivateSegmentBuffer() const { 277 return PrivateSegmentBuffer; 278 } 279 280 bool hasDispatchPtr() const { 281 return DispatchPtr; 282 } 283 284 bool hasQueuePtr() const { 285 return QueuePtr; 286 } 287 288 bool hasKernargSegmentPtr() const { 289 return KernargSegmentPtr; 290 } 291 292 bool hasDispatchID() const { 293 return DispatchID; 294 } 295 296 bool hasFlatScratchInit() const { 297 return FlatScratchInit; 298 } 299 300 bool hasGridWorkgroupCountX() const { 301 return GridWorkgroupCountX; 302 } 303 304 bool hasGridWorkgroupCountY() const { 305 return GridWorkgroupCountY; 306 } 307 308 bool hasGridWorkgroupCountZ() const { 309 return GridWorkgroupCountZ; 310 } 311 312 bool hasWorkGroupIDX() const { 313 return WorkGroupIDX; 314 } 315 316 bool hasWorkGroupIDY() const { 317 return WorkGroupIDY; 318 } 319 320 bool hasWorkGroupIDZ() const { 321 return WorkGroupIDZ; 322 } 323 324 bool hasWorkGroupInfo() const { 325 return WorkGroupInfo; 326 } 327 328 bool hasPrivateSegmentWaveByteOffset() const { 329 return PrivateSegmentWaveByteOffset; 330 } 331 332 bool hasWorkItemIDX() const { 333 return WorkItemIDX; 334 } 335 336 bool hasWorkItemIDY() const { 337 return WorkItemIDY; 338 } 339 340 bool hasWorkItemIDZ() const { 341 return WorkItemIDZ; 342 } 343 344 bool hasImplicitBufferPtr() const { 345 return ImplicitBufferPtr; 346 } 347 348 unsigned getNumUserSGPRs() const { 349 return NumUserSGPRs; 350 } 351 352 unsigned getNumPreloadedSGPRs() const { 353 return NumUserSGPRs + NumSystemSGPRs; 354 } 355 356 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const { 357 return PrivateSegmentWaveByteOffsetSystemSGPR; 358 } 359 360 /// \brief Returns the physical register reserved for use as the resource 361 /// descriptor for scratch accesses. 362 unsigned getScratchRSrcReg() const { 363 return ScratchRSrcReg; 364 } 365 366 void setScratchRSrcReg(unsigned Reg) { 367 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 368 ScratchRSrcReg = Reg; 369 } 370 371 unsigned getScratchWaveOffsetReg() const { 372 return ScratchWaveOffsetReg; 373 } 374 375 unsigned getFrameOffsetReg() const { 376 return FrameOffsetReg; 377 } 378 379 void setStackPtrOffsetReg(unsigned Reg) { 380 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 381 StackPtrOffsetReg = Reg; 382 } 383 384 unsigned getStackPtrOffsetReg() const { 385 return StackPtrOffsetReg; 386 } 387 388 void setScratchWaveOffsetReg(unsigned Reg) { 389 assert(Reg != AMDGPU::NoRegister && "Should never be unset"); 390 ScratchWaveOffsetReg = Reg; 391 if (isEntryFunction()) 392 FrameOffsetReg = ScratchWaveOffsetReg; 393 } 394 395 unsigned getQueuePtrUserSGPR() const { 396 return QueuePtrUserSGPR; 397 } 398 399 unsigned getImplicitBufferPtrUserSGPR() const { 400 return ImplicitBufferPtrUserSGPR; 401 } 402 403 bool hasSpilledSGPRs() const { 404 return HasSpilledSGPRs; 405 } 406 407 void setHasSpilledSGPRs(bool Spill = true) { 408 HasSpilledSGPRs = Spill; 409 } 410 411 bool hasSpilledVGPRs() const { 412 return HasSpilledVGPRs; 413 } 414 415 void setHasSpilledVGPRs(bool Spill = true) { 416 HasSpilledVGPRs = Spill; 417 } 418 419 bool hasNonSpillStackObjects() const { 420 return HasNonSpillStackObjects; 421 } 422 423 void setHasNonSpillStackObjects(bool StackObject = true) { 424 HasNonSpillStackObjects = StackObject; 425 } 426 427 unsigned getNumSpilledSGPRs() const { 428 return NumSpilledSGPRs; 429 } 430 431 unsigned getNumSpilledVGPRs() const { 432 return NumSpilledVGPRs; 433 } 434 435 void addToSpilledSGPRs(unsigned num) { 436 NumSpilledSGPRs += num; 437 } 438 439 void addToSpilledVGPRs(unsigned num) { 440 NumSpilledVGPRs += num; 441 } 442 443 unsigned getPSInputAddr() const { 444 return PSInputAddr; 445 } 446 447 unsigned getPSInputEnable() const { 448 return PSInputEnable; 449 } 450 451 bool isPSInputAllocated(unsigned Index) const { 452 return PSInputAddr & (1 << Index); 453 } 454 455 void markPSInputAllocated(unsigned Index) { 456 PSInputAddr |= 1 << Index; 457 } 458 459 void markPSInputEnabled(unsigned Index) { 460 PSInputEnable |= 1 << Index; 461 } 462 463 bool returnsVoid() const { 464 return ReturnsVoid; 465 } 466 467 void setIfReturnsVoid(bool Value) { 468 ReturnsVoid = Value; 469 } 470 471 /// \returns A pair of default/requested minimum/maximum flat work group sizes 472 /// for this function. 473 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const { 474 return FlatWorkGroupSizes; 475 } 476 477 /// \returns Default/requested minimum flat work group size for this function. 478 unsigned getMinFlatWorkGroupSize() const { 479 return FlatWorkGroupSizes.first; 480 } 481 482 /// \returns Default/requested maximum flat work group size for this function. 483 unsigned getMaxFlatWorkGroupSize() const { 484 return FlatWorkGroupSizes.second; 485 } 486 487 /// \returns A pair of default/requested minimum/maximum number of waves per 488 /// execution unit. 489 std::pair<unsigned, unsigned> getWavesPerEU() const { 490 return WavesPerEU; 491 } 492 493 /// \returns Default/requested minimum number of waves per execution unit. 494 unsigned getMinWavesPerEU() const { 495 return WavesPerEU.first; 496 } 497 498 /// \returns Default/requested maximum number of waves per execution unit. 499 unsigned getMaxWavesPerEU() const { 500 return WavesPerEU.second; 501 } 502 503 /// \returns Stack object index for \p Dim's work group ID. 504 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const { 505 assert(Dim < 3); 506 return DebuggerWorkGroupIDStackObjectIndices[Dim]; 507 } 508 509 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx. 510 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 511 assert(Dim < 3); 512 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx; 513 } 514 515 /// \returns Stack object index for \p Dim's work item ID. 516 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const { 517 assert(Dim < 3); 518 return DebuggerWorkItemIDStackObjectIndices[Dim]; 519 } 520 521 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx. 522 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) { 523 assert(Dim < 3); 524 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx; 525 } 526 527 /// \returns SGPR used for \p Dim's work group ID. 528 unsigned getWorkGroupIDSGPR(unsigned Dim) const { 529 switch (Dim) { 530 case 0: 531 assert(hasWorkGroupIDX()); 532 return WorkGroupIDXSystemSGPR; 533 case 1: 534 assert(hasWorkGroupIDY()); 535 return WorkGroupIDYSystemSGPR; 536 case 2: 537 assert(hasWorkGroupIDZ()); 538 return WorkGroupIDZSystemSGPR; 539 } 540 llvm_unreachable("unexpected dimension"); 541 } 542 543 /// \returns VGPR used for \p Dim' work item ID. 544 unsigned getWorkItemIDVGPR(unsigned Dim) const { 545 switch (Dim) { 546 case 0: 547 assert(hasWorkItemIDX()); 548 return AMDGPU::VGPR0; 549 case 1: 550 assert(hasWorkItemIDY()); 551 return AMDGPU::VGPR1; 552 case 2: 553 assert(hasWorkItemIDZ()); 554 return AMDGPU::VGPR2; 555 } 556 llvm_unreachable("unexpected dimension"); 557 } 558 559 unsigned getLDSWaveSpillSize() const { 560 return LDSWaveSpillSize; 561 } 562 563 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { 564 return &BufferPSV; 565 } 566 567 const AMDGPUImagePseudoSourceValue *getImagePSV() const { 568 return &ImagePSV; 569 } 570 }; 571 572 } // end namespace llvm 573 574 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H 575