1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 5, 51 /// false otherwise. 52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI); 53 /// \returns True if HSA OS ABI Version identification is 3 and above, 54 /// false otherwise. 55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); 56 57 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 58 unsigned getHostcallImplicitArgPosition(); 59 60 /// \returns The offset of the heap ptr argument from implicitarg_ptr 61 unsigned getHeapPtrImplicitArgPosition(); 62 63 /// \returns The offset of the queue ptr argument from implicitarg_ptr 64 unsigned getQueuePtrImplicitArgPosition(); 65 66 /// \returns Code object version. 67 unsigned getAmdhsaCodeObjectVersion(); 68 69 struct GcnBufferFormatInfo { 70 unsigned Format; 71 unsigned BitsPerComp; 72 unsigned NumComponents; 73 unsigned NumFormat; 74 unsigned DataFormat; 75 }; 76 77 #define GET_MIMGBaseOpcode_DECL 78 #define GET_MIMGDim_DECL 79 #define GET_MIMGEncoding_DECL 80 #define GET_MIMGLZMapping_DECL 81 #define GET_MIMGMIPMapping_DECL 82 #define GET_MIMGBiASMapping_DECL 83 #include "AMDGPUGenSearchableTables.inc" 84 85 namespace IsaInfo { 86 87 enum { 88 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 89 // doesn't spill SGPRs as much as when 80 is set. 90 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 91 TRAP_NUM_SGPRS = 16 92 }; 93 94 enum class TargetIDSetting { 95 Unsupported, 96 Any, 97 Off, 98 On 99 }; 100 101 class AMDGPUTargetID { 102 private: 103 const MCSubtargetInfo &STI; 104 TargetIDSetting XnackSetting; 105 TargetIDSetting SramEccSetting; 106 107 public: 108 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 109 ~AMDGPUTargetID() = default; 110 111 /// \return True if the current xnack setting is not "Unsupported". 112 bool isXnackSupported() const { 113 return XnackSetting != TargetIDSetting::Unsupported; 114 } 115 116 /// \returns True if the current xnack setting is "On" or "Any". 117 bool isXnackOnOrAny() const { 118 return XnackSetting == TargetIDSetting::On || 119 XnackSetting == TargetIDSetting::Any; 120 } 121 122 /// \returns True if current xnack setting is "On" or "Off", 123 /// false otherwise. 124 bool isXnackOnOrOff() const { 125 return getXnackSetting() == TargetIDSetting::On || 126 getXnackSetting() == TargetIDSetting::Off; 127 } 128 129 /// \returns The current xnack TargetIDSetting, possible options are 130 /// "Unsupported", "Any", "Off", and "On". 131 TargetIDSetting getXnackSetting() const { 132 return XnackSetting; 133 } 134 135 /// Sets xnack setting to \p NewXnackSetting. 136 void setXnackSetting(TargetIDSetting NewXnackSetting) { 137 XnackSetting = NewXnackSetting; 138 } 139 140 /// \return True if the current sramecc setting is not "Unsupported". 141 bool isSramEccSupported() const { 142 return SramEccSetting != TargetIDSetting::Unsupported; 143 } 144 145 /// \returns True if the current sramecc setting is "On" or "Any". 146 bool isSramEccOnOrAny() const { 147 return SramEccSetting == TargetIDSetting::On || 148 SramEccSetting == TargetIDSetting::Any; 149 } 150 151 /// \returns True if current sramecc setting is "On" or "Off", 152 /// false otherwise. 153 bool isSramEccOnOrOff() const { 154 return getSramEccSetting() == TargetIDSetting::On || 155 getSramEccSetting() == TargetIDSetting::Off; 156 } 157 158 /// \returns The current sramecc TargetIDSetting, possible options are 159 /// "Unsupported", "Any", "Off", and "On". 160 TargetIDSetting getSramEccSetting() const { 161 return SramEccSetting; 162 } 163 164 /// Sets sramecc setting to \p NewSramEccSetting. 165 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 166 SramEccSetting = NewSramEccSetting; 167 } 168 169 void setTargetIDFromFeaturesString(StringRef FS); 170 void setTargetIDFromTargetIDStream(StringRef TargetID); 171 172 /// \returns String representation of an object. 173 std::string toString() const; 174 }; 175 176 /// \returns Wavefront size for given subtarget \p STI. 177 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 178 179 /// \returns Local memory size in bytes for given subtarget \p STI. 180 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 181 182 /// \returns Number of execution units per compute unit for given subtarget \p 183 /// STI. 184 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 185 186 /// \returns Maximum number of work groups per compute unit for given subtarget 187 /// \p STI and limited by given \p FlatWorkGroupSize. 188 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 189 unsigned FlatWorkGroupSize); 190 191 /// \returns Minimum number of waves per execution unit for given subtarget \p 192 /// STI. 193 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 194 195 /// \returns Maximum number of waves per execution unit for given subtarget \p 196 /// STI without any kind of limitation. 197 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 198 199 /// \returns Number of waves per execution unit required to support the given \p 200 /// FlatWorkGroupSize. 201 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 202 unsigned FlatWorkGroupSize); 203 204 /// \returns Minimum flat work group size for given subtarget \p STI. 205 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 206 207 /// \returns Maximum flat work group size for given subtarget \p STI. 208 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 209 210 /// \returns Number of waves per work group for given subtarget \p STI and 211 /// \p FlatWorkGroupSize. 212 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 213 unsigned FlatWorkGroupSize); 214 215 /// \returns SGPR allocation granularity for given subtarget \p STI. 216 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 217 218 /// \returns SGPR encoding granularity for given subtarget \p STI. 219 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 220 221 /// \returns Total number of SGPRs for given subtarget \p STI. 222 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 223 224 /// \returns Addressable number of SGPRs for given subtarget \p STI. 225 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 226 227 /// \returns Minimum number of SGPRs that meets the given number of waves per 228 /// execution unit requirement for given subtarget \p STI. 229 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 230 231 /// \returns Maximum number of SGPRs that meets the given number of waves per 232 /// execution unit requirement for given subtarget \p STI. 233 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 234 bool Addressable); 235 236 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 237 /// STI when the given special registers are used. 238 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 239 bool FlatScrUsed, bool XNACKUsed); 240 241 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 242 /// STI when the given special registers are used. XNACK is inferred from 243 /// \p STI. 244 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 245 bool FlatScrUsed); 246 247 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 248 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 249 /// register counts. 250 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 251 252 /// \returns VGPR allocation granularity for given subtarget \p STI. 253 /// 254 /// For subtargets which support it, \p EnableWavefrontSize32 should match 255 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 256 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 257 Optional<bool> EnableWavefrontSize32 = None); 258 259 /// \returns VGPR encoding granularity for given subtarget \p STI. 260 /// 261 /// For subtargets which support it, \p EnableWavefrontSize32 should match 262 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 263 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 264 Optional<bool> EnableWavefrontSize32 = None); 265 266 /// \returns Total number of VGPRs for given subtarget \p STI. 267 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 268 269 /// \returns Addressable number of VGPRs for given subtarget \p STI. 270 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 271 272 /// \returns Minimum number of VGPRs that meets given number of waves per 273 /// execution unit requirement for given subtarget \p STI. 274 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 275 276 /// \returns Maximum number of VGPRs that meets given number of waves per 277 /// execution unit requirement for given subtarget \p STI. 278 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 279 280 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 281 /// \p NumVGPRs are used. 282 /// 283 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 284 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 285 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 286 Optional<bool> EnableWavefrontSize32 = None); 287 288 } // end namespace IsaInfo 289 290 LLVM_READONLY 291 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 292 293 LLVM_READONLY 294 int getSOPPWithRelaxation(uint16_t Opcode); 295 296 struct MIMGBaseOpcodeInfo { 297 MIMGBaseOpcode BaseOpcode; 298 bool Store; 299 bool Atomic; 300 bool AtomicX2; 301 bool Sampler; 302 bool Gather4; 303 304 uint8_t NumExtraArgs; 305 bool Gradients; 306 bool G16; 307 bool Coordinates; 308 bool LodOrClampOrMip; 309 bool HasD16; 310 bool MSAA; 311 bool BVH; 312 }; 313 314 LLVM_READONLY 315 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 316 317 LLVM_READONLY 318 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 319 320 struct MIMGDimInfo { 321 MIMGDim Dim; 322 uint8_t NumCoords; 323 uint8_t NumGradients; 324 bool MSAA; 325 bool DA; 326 uint8_t Encoding; 327 const char *AsmSuffix; 328 }; 329 330 LLVM_READONLY 331 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 332 333 LLVM_READONLY 334 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 335 336 LLVM_READONLY 337 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 338 339 struct MIMGLZMappingInfo { 340 MIMGBaseOpcode L; 341 MIMGBaseOpcode LZ; 342 }; 343 344 struct MIMGMIPMappingInfo { 345 MIMGBaseOpcode MIP; 346 MIMGBaseOpcode NONMIP; 347 }; 348 349 struct MIMGBiasMappingInfo { 350 MIMGBaseOpcode Bias; 351 MIMGBaseOpcode NoBias; 352 }; 353 354 struct MIMGOffsetMappingInfo { 355 MIMGBaseOpcode Offset; 356 MIMGBaseOpcode NoOffset; 357 }; 358 359 struct MIMGG16MappingInfo { 360 MIMGBaseOpcode G; 361 MIMGBaseOpcode G16; 362 }; 363 364 LLVM_READONLY 365 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 366 367 LLVM_READONLY 368 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 369 370 LLVM_READONLY 371 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 372 373 LLVM_READONLY 374 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 375 376 LLVM_READONLY 377 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 378 379 LLVM_READONLY 380 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 381 unsigned VDataDwords, unsigned VAddrDwords); 382 383 LLVM_READONLY 384 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 385 386 LLVM_READONLY 387 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 388 const MIMGDimInfo *Dim, bool IsA16, 389 bool IsG16Supported); 390 391 struct MIMGInfo { 392 uint16_t Opcode; 393 uint16_t BaseOpcode; 394 uint8_t MIMGEncoding; 395 uint8_t VDataDwords; 396 uint8_t VAddrDwords; 397 }; 398 399 LLVM_READONLY 400 const MIMGInfo *getMIMGInfo(unsigned Opc); 401 402 LLVM_READONLY 403 int getMTBUFBaseOpcode(unsigned Opc); 404 405 LLVM_READONLY 406 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 407 408 LLVM_READONLY 409 int getMTBUFElements(unsigned Opc); 410 411 LLVM_READONLY 412 bool getMTBUFHasVAddr(unsigned Opc); 413 414 LLVM_READONLY 415 bool getMTBUFHasSrsrc(unsigned Opc); 416 417 LLVM_READONLY 418 bool getMTBUFHasSoffset(unsigned Opc); 419 420 LLVM_READONLY 421 int getMUBUFBaseOpcode(unsigned Opc); 422 423 LLVM_READONLY 424 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 425 426 LLVM_READONLY 427 int getMUBUFElements(unsigned Opc); 428 429 LLVM_READONLY 430 bool getMUBUFHasVAddr(unsigned Opc); 431 432 LLVM_READONLY 433 bool getMUBUFHasSrsrc(unsigned Opc); 434 435 LLVM_READONLY 436 bool getMUBUFHasSoffset(unsigned Opc); 437 438 LLVM_READONLY 439 bool getMUBUFIsBufferInv(unsigned Opc); 440 441 LLVM_READONLY 442 bool getSMEMIsBuffer(unsigned Opc); 443 444 LLVM_READONLY 445 bool getVOP1IsSingle(unsigned Opc); 446 447 LLVM_READONLY 448 bool getVOP2IsSingle(unsigned Opc); 449 450 LLVM_READONLY 451 bool getVOP3IsSingle(unsigned Opc); 452 453 LLVM_READONLY 454 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 455 uint8_t NumComponents, 456 uint8_t NumFormat, 457 const MCSubtargetInfo &STI); 458 LLVM_READONLY 459 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 460 const MCSubtargetInfo &STI); 461 462 LLVM_READONLY 463 int getMCOpcode(uint16_t Opcode, unsigned Gen); 464 465 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 466 const MCSubtargetInfo *STI); 467 468 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 469 const MCSubtargetInfo *STI); 470 471 bool isGroupSegment(const GlobalValue *GV); 472 bool isGlobalSegment(const GlobalValue *GV); 473 bool isReadOnlySegment(const GlobalValue *GV); 474 475 /// \returns True if constants should be emitted to .text section for given 476 /// target triple \p TT, false otherwise. 477 bool shouldEmitConstantsToTextSection(const Triple &TT); 478 479 /// \returns Integer value requested using \p F's \p Name attribute. 480 /// 481 /// \returns \p Default if attribute is not present. 482 /// 483 /// \returns \p Default and emits error if requested value cannot be converted 484 /// to integer. 485 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 486 487 /// \returns A pair of integer values requested using \p F's \p Name attribute 488 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 489 /// is false). 490 /// 491 /// \returns \p Default if attribute is not present. 492 /// 493 /// \returns \p Default and emits error if one of the requested values cannot be 494 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 495 /// not present. 496 std::pair<int, int> getIntegerPairAttribute(const Function &F, 497 StringRef Name, 498 std::pair<int, int> Default, 499 bool OnlyFirstRequired = false); 500 501 /// Represents the counter values to wait for in an s_waitcnt instruction. 502 /// 503 /// Large values (including the maximum possible integer) can be used to 504 /// represent "don't care" waits. 505 struct Waitcnt { 506 unsigned VmCnt = ~0u; 507 unsigned ExpCnt = ~0u; 508 unsigned LgkmCnt = ~0u; 509 unsigned VsCnt = ~0u; 510 511 Waitcnt() = default; 512 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 513 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 514 515 static Waitcnt allZero(bool HasVscnt) { 516 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 517 } 518 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 519 520 bool hasWait() const { 521 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 522 } 523 524 bool hasWaitExceptVsCnt() const { 525 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 526 } 527 528 bool hasWaitVsCnt() const { 529 return VsCnt != ~0u; 530 } 531 532 bool dominates(const Waitcnt &Other) const { 533 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 534 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 535 } 536 537 Waitcnt combined(const Waitcnt &Other) const { 538 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 539 std::min(LgkmCnt, Other.LgkmCnt), 540 std::min(VsCnt, Other.VsCnt)); 541 } 542 }; 543 544 /// \returns Vmcnt bit mask for given isa \p Version. 545 unsigned getVmcntBitMask(const IsaVersion &Version); 546 547 /// \returns Expcnt bit mask for given isa \p Version. 548 unsigned getExpcntBitMask(const IsaVersion &Version); 549 550 /// \returns Lgkmcnt bit mask for given isa \p Version. 551 unsigned getLgkmcntBitMask(const IsaVersion &Version); 552 553 /// \returns Waitcnt bit mask for given isa \p Version. 554 unsigned getWaitcntBitMask(const IsaVersion &Version); 555 556 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 557 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 558 559 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 560 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 561 562 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 563 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 564 565 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 566 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 567 /// \p Lgkmcnt respectively. 568 /// 569 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 570 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 571 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 572 /// \p Expcnt = \p Waitcnt[6:4] 573 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 574 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 575 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 576 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 577 578 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 579 580 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 581 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 582 unsigned Vmcnt); 583 584 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 585 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 586 unsigned Expcnt); 587 588 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 589 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 590 unsigned Lgkmcnt); 591 592 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 593 /// \p Version. 594 /// 595 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 596 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 597 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 598 /// Waitcnt[6:4] = \p Expcnt 599 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 600 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 601 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 602 /// 603 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 604 /// isa \p Version. 605 unsigned encodeWaitcnt(const IsaVersion &Version, 606 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 607 608 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 609 610 namespace Hwreg { 611 612 LLVM_READONLY 613 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); 614 615 LLVM_READNONE 616 bool isValidHwreg(int64_t Id); 617 618 LLVM_READNONE 619 bool isValidHwregOffset(int64_t Offset); 620 621 LLVM_READNONE 622 bool isValidHwregWidth(int64_t Width); 623 624 LLVM_READNONE 625 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 626 627 LLVM_READNONE 628 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 629 630 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 631 632 } // namespace Hwreg 633 634 namespace Exp { 635 636 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 637 638 LLVM_READONLY 639 unsigned getTgtId(const StringRef Name); 640 641 LLVM_READNONE 642 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 643 644 } // namespace Exp 645 646 namespace MTBUFFormat { 647 648 LLVM_READNONE 649 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 650 651 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 652 653 int64_t getDfmt(const StringRef Name); 654 655 StringRef getDfmtName(unsigned Id); 656 657 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 658 659 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 660 661 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 662 663 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 664 665 int64_t getUnifiedFormat(const StringRef Name); 666 667 StringRef getUnifiedFormatName(unsigned Id); 668 669 bool isValidUnifiedFormat(unsigned Val); 670 671 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 672 673 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 674 675 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 676 677 } // namespace MTBUFFormat 678 679 namespace SendMsg { 680 681 LLVM_READONLY 682 int64_t getMsgId(const StringRef Name); 683 684 LLVM_READONLY 685 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 686 687 LLVM_READNONE 688 StringRef getMsgName(int64_t MsgId); 689 690 LLVM_READNONE 691 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 692 693 LLVM_READNONE 694 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 695 696 LLVM_READNONE 697 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 698 bool Strict = true); 699 700 LLVM_READNONE 701 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 702 const MCSubtargetInfo &STI, bool Strict = true); 703 704 LLVM_READNONE 705 bool msgRequiresOp(int64_t MsgId); 706 707 LLVM_READNONE 708 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 709 710 void decodeMsg(unsigned Val, 711 uint16_t &MsgId, 712 uint16_t &OpId, 713 uint16_t &StreamId); 714 715 LLVM_READNONE 716 uint64_t encodeMsg(uint64_t MsgId, 717 uint64_t OpId, 718 uint64_t StreamId); 719 720 } // namespace SendMsg 721 722 723 unsigned getInitialPSInputAddr(const Function &F); 724 725 bool getHasColorExport(const Function &F); 726 727 bool getHasDepthExport(const Function &F); 728 729 LLVM_READNONE 730 bool isShader(CallingConv::ID CC); 731 732 LLVM_READNONE 733 bool isGraphics(CallingConv::ID CC); 734 735 LLVM_READNONE 736 bool isCompute(CallingConv::ID CC); 737 738 LLVM_READNONE 739 bool isEntryFunctionCC(CallingConv::ID CC); 740 741 // These functions are considered entrypoints into the current module, i.e. they 742 // are allowed to be called from outside the current module. This is different 743 // from isEntryFunctionCC, which is only true for functions that are entered by 744 // the hardware. Module entry points include all entry functions but also 745 // include functions that can be called from other functions inside or outside 746 // the current module. Module entry functions are allowed to allocate LDS. 747 LLVM_READNONE 748 bool isModuleEntryFunctionCC(CallingConv::ID CC); 749 750 bool isKernelCC(const Function *Func); 751 752 // FIXME: Remove this when calling conventions cleaned up 753 LLVM_READNONE 754 inline bool isKernel(CallingConv::ID CC) { 755 switch (CC) { 756 case CallingConv::AMDGPU_KERNEL: 757 case CallingConv::SPIR_KERNEL: 758 return true; 759 default: 760 return false; 761 } 762 } 763 764 bool hasXNACK(const MCSubtargetInfo &STI); 765 bool hasSRAMECC(const MCSubtargetInfo &STI); 766 bool hasMIMG_R128(const MCSubtargetInfo &STI); 767 bool hasGFX10A16(const MCSubtargetInfo &STI); 768 bool hasG16(const MCSubtargetInfo &STI); 769 bool hasPackedD16(const MCSubtargetInfo &STI); 770 771 bool isSI(const MCSubtargetInfo &STI); 772 bool isCI(const MCSubtargetInfo &STI); 773 bool isVI(const MCSubtargetInfo &STI); 774 bool isGFX9(const MCSubtargetInfo &STI); 775 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 776 bool isGFX9Plus(const MCSubtargetInfo &STI); 777 bool isGFX10(const MCSubtargetInfo &STI); 778 bool isGFX10Plus(const MCSubtargetInfo &STI); 779 bool isGCN3Encoding(const MCSubtargetInfo &STI); 780 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 781 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 782 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 783 bool isGFX90A(const MCSubtargetInfo &STI); 784 bool isGFX940(const MCSubtargetInfo &STI); 785 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 786 bool hasMAIInsts(const MCSubtargetInfo &STI); 787 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 788 789 /// Is Reg - scalar register 790 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 791 792 /// If \p Reg is a pseudo reg, return the correct hardware register given 793 /// \p STI otherwise return \p Reg. 794 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 795 796 /// Convert hardware register \p Reg to a pseudo register 797 LLVM_READNONE 798 unsigned mc2PseudoReg(unsigned Reg); 799 800 /// Can this operand also contain immediate values? 801 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 802 803 /// Is this floating-point operand? 804 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 805 806 /// Does this operand support only inlinable literals? 807 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 808 809 /// Get the size in bits of a register from the register class \p RC. 810 unsigned getRegBitWidth(unsigned RCID); 811 812 /// Get the size in bits of a register from the register class \p RC. 813 unsigned getRegBitWidth(const MCRegisterClass &RC); 814 815 /// Get size of register operand 816 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 817 unsigned OpNo); 818 819 LLVM_READNONE 820 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 821 switch (OpInfo.OperandType) { 822 case AMDGPU::OPERAND_REG_IMM_INT32: 823 case AMDGPU::OPERAND_REG_IMM_FP32: 824 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 825 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 826 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 827 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 828 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 829 case AMDGPU::OPERAND_REG_IMM_V2INT32: 830 case AMDGPU::OPERAND_REG_IMM_V2FP32: 831 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 832 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 833 case AMDGPU::OPERAND_KIMM32: 834 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 835 return 4; 836 837 case AMDGPU::OPERAND_REG_IMM_INT64: 838 case AMDGPU::OPERAND_REG_IMM_FP64: 839 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 840 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 841 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 842 return 8; 843 844 case AMDGPU::OPERAND_REG_IMM_INT16: 845 case AMDGPU::OPERAND_REG_IMM_FP16: 846 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 847 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 848 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 849 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 850 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 851 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 852 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 853 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 854 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 855 case AMDGPU::OPERAND_REG_IMM_V2INT16: 856 case AMDGPU::OPERAND_REG_IMM_V2FP16: 857 return 2; 858 859 default: 860 llvm_unreachable("unhandled operand type"); 861 } 862 } 863 864 LLVM_READNONE 865 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 866 return getOperandSize(Desc.OpInfo[OpNo]); 867 } 868 869 /// Is this literal inlinable, and not one of the values intended for floating 870 /// point values. 871 LLVM_READNONE 872 inline bool isInlinableIntLiteral(int64_t Literal) { 873 return Literal >= -16 && Literal <= 64; 874 } 875 876 /// Is this literal inlinable 877 LLVM_READNONE 878 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 879 880 LLVM_READNONE 881 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 882 883 LLVM_READNONE 884 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 885 886 LLVM_READNONE 887 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 888 889 LLVM_READNONE 890 bool isInlinableIntLiteralV216(int32_t Literal); 891 892 LLVM_READNONE 893 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 894 895 bool isArgPassedInSGPR(const Argument *Arg); 896 897 LLVM_READONLY 898 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 899 int64_t EncodedOffset); 900 901 LLVM_READONLY 902 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 903 int64_t EncodedOffset, 904 bool IsBuffer); 905 906 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 907 /// offsets. 908 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 909 910 /// \returns The encoding that will be used for \p ByteOffset in the 911 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 912 /// S_LOAD instructions have a signed offset, on other subtargets it is 913 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 914 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 915 int64_t ByteOffset, bool IsBuffer); 916 917 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 918 /// instruction. This is only useful on CI.s 919 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 920 int64_t ByteOffset); 921 922 /// For FLAT segment the offset must be positive; 923 /// MSB is ignored and forced to zero. 924 /// 925 /// \return The number of bits available for the offset field in flat 926 /// instructions. 927 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 928 929 /// \returns true if this offset is small enough to fit in the SMRD 930 /// offset field. \p ByteOffset should be the offset in bytes and 931 /// not the encoded offset. 932 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 933 934 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 935 const GCNSubtarget *Subtarget, 936 Align Alignment = Align(4)); 937 938 LLVM_READNONE 939 inline bool isLegal64BitDPPControl(unsigned DC) { 940 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 941 } 942 943 /// \returns true if the intrinsic is divergent 944 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 945 946 // Track defaults for fields in the MODE register. 947 struct SIModeRegisterDefaults { 948 /// Floating point opcodes that support exception flag gathering quiet and 949 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 950 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 951 /// quieting. 952 bool IEEE : 1; 953 954 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 955 /// clamp NaN to zero; otherwise, pass NaN through. 956 bool DX10Clamp : 1; 957 958 /// If this is set, neither input or output denormals are flushed for most f32 959 /// instructions. 960 bool FP32InputDenormals : 1; 961 bool FP32OutputDenormals : 1; 962 963 /// If this is set, neither input or output denormals are flushed for both f64 964 /// and f16/v2f16 instructions. 965 bool FP64FP16InputDenormals : 1; 966 bool FP64FP16OutputDenormals : 1; 967 968 SIModeRegisterDefaults() : 969 IEEE(true), 970 DX10Clamp(true), 971 FP32InputDenormals(true), 972 FP32OutputDenormals(true), 973 FP64FP16InputDenormals(true), 974 FP64FP16OutputDenormals(true) {} 975 976 SIModeRegisterDefaults(const Function &F); 977 978 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 979 SIModeRegisterDefaults Mode; 980 Mode.IEEE = !AMDGPU::isShader(CC); 981 return Mode; 982 } 983 984 bool operator ==(const SIModeRegisterDefaults Other) const { 985 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 986 FP32InputDenormals == Other.FP32InputDenormals && 987 FP32OutputDenormals == Other.FP32OutputDenormals && 988 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 989 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 990 } 991 992 bool allFP32Denormals() const { 993 return FP32InputDenormals && FP32OutputDenormals; 994 } 995 996 bool allFP64FP16Denormals() const { 997 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 998 } 999 1000 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1001 /// FP32 denormal mode. 1002 uint32_t fpDenormModeSPValue() const { 1003 if (FP32InputDenormals && FP32OutputDenormals) 1004 return FP_DENORM_FLUSH_NONE; 1005 if (FP32InputDenormals) 1006 return FP_DENORM_FLUSH_OUT; 1007 if (FP32OutputDenormals) 1008 return FP_DENORM_FLUSH_IN; 1009 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1010 } 1011 1012 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1013 /// FP64/FP16 denormal mode. 1014 uint32_t fpDenormModeDPValue() const { 1015 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 1016 return FP_DENORM_FLUSH_NONE; 1017 if (FP64FP16InputDenormals) 1018 return FP_DENORM_FLUSH_OUT; 1019 if (FP64FP16OutputDenormals) 1020 return FP_DENORM_FLUSH_IN; 1021 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1022 } 1023 1024 /// Returns true if a flag is compatible if it's enabled in the callee, but 1025 /// disabled in the caller. 1026 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 1027 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 1028 } 1029 1030 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 1031 // be able to override. 1032 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1033 if (DX10Clamp != CalleeMode.DX10Clamp) 1034 return false; 1035 if (IEEE != CalleeMode.IEEE) 1036 return false; 1037 1038 // Allow inlining denormals enabled into denormals flushed functions. 1039 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1040 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1041 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1042 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1043 } 1044 }; 1045 1046 } // end namespace AMDGPU 1047 1048 raw_ostream &operator<<(raw_ostream &OS, 1049 const AMDGPU::IsaInfo::TargetIDSetting S); 1050 1051 } // end namespace llvm 1052 1053 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1054