1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 3 or 4, 51 /// false otherwise. 52 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI); 53 54 struct GcnBufferFormatInfo { 55 unsigned Format; 56 unsigned BitsPerComp; 57 unsigned NumComponents; 58 unsigned NumFormat; 59 unsigned DataFormat; 60 }; 61 62 #define GET_MIMGBaseOpcode_DECL 63 #define GET_MIMGDim_DECL 64 #define GET_MIMGEncoding_DECL 65 #define GET_MIMGLZMapping_DECL 66 #define GET_MIMGMIPMapping_DECL 67 #include "AMDGPUGenSearchableTables.inc" 68 69 namespace IsaInfo { 70 71 enum { 72 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 73 // doesn't spill SGPRs as much as when 80 is set. 74 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 75 TRAP_NUM_SGPRS = 16 76 }; 77 78 enum class TargetIDSetting { 79 Unsupported, 80 Any, 81 Off, 82 On 83 }; 84 85 class AMDGPUTargetID { 86 private: 87 const MCSubtargetInfo &STI; 88 TargetIDSetting XnackSetting; 89 TargetIDSetting SramEccSetting; 90 91 public: 92 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 93 ~AMDGPUTargetID() = default; 94 95 /// \return True if the current xnack setting is not "Unsupported". 96 bool isXnackSupported() const { 97 return XnackSetting != TargetIDSetting::Unsupported; 98 } 99 100 /// \returns True if the current xnack setting is "On" or "Any". 101 bool isXnackOnOrAny() const { 102 return XnackSetting == TargetIDSetting::On || 103 XnackSetting == TargetIDSetting::Any; 104 } 105 106 /// \returns True if current xnack setting is "On" or "Off", 107 /// false otherwise. 108 bool isXnackOnOrOff() const { 109 return getXnackSetting() == TargetIDSetting::On || 110 getXnackSetting() == TargetIDSetting::Off; 111 } 112 113 /// \returns The current xnack TargetIDSetting, possible options are 114 /// "Unsupported", "Any", "Off", and "On". 115 TargetIDSetting getXnackSetting() const { 116 return XnackSetting; 117 } 118 119 /// Sets xnack setting to \p NewXnackSetting. 120 void setXnackSetting(TargetIDSetting NewXnackSetting) { 121 XnackSetting = NewXnackSetting; 122 } 123 124 /// \return True if the current sramecc setting is not "Unsupported". 125 bool isSramEccSupported() const { 126 return SramEccSetting != TargetIDSetting::Unsupported; 127 } 128 129 /// \returns True if the current sramecc setting is "On" or "Any". 130 bool isSramEccOnOrAny() const { 131 return SramEccSetting == TargetIDSetting::On || 132 SramEccSetting == TargetIDSetting::Any; 133 } 134 135 /// \returns True if current sramecc setting is "On" or "Off", 136 /// false otherwise. 137 bool isSramEccOnOrOff() const { 138 return getSramEccSetting() == TargetIDSetting::On || 139 getSramEccSetting() == TargetIDSetting::Off; 140 } 141 142 /// \returns The current sramecc TargetIDSetting, possible options are 143 /// "Unsupported", "Any", "Off", and "On". 144 TargetIDSetting getSramEccSetting() const { 145 return SramEccSetting; 146 } 147 148 /// Sets sramecc setting to \p NewSramEccSetting. 149 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 150 SramEccSetting = NewSramEccSetting; 151 } 152 153 void setTargetIDFromFeaturesString(StringRef FS); 154 void setTargetIDFromTargetIDStream(StringRef TargetID); 155 156 /// \returns String representation of an object. 157 std::string toString() const; 158 }; 159 160 /// \returns Wavefront size for given subtarget \p STI. 161 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 162 163 /// \returns Local memory size in bytes for given subtarget \p STI. 164 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 165 166 /// \returns Number of execution units per compute unit for given subtarget \p 167 /// STI. 168 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 169 170 /// \returns Maximum number of work groups per compute unit for given subtarget 171 /// \p STI and limited by given \p FlatWorkGroupSize. 172 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 173 unsigned FlatWorkGroupSize); 174 175 /// \returns Minimum number of waves per execution unit for given subtarget \p 176 /// STI. 177 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 178 179 /// \returns Maximum number of waves per execution unit for given subtarget \p 180 /// STI without any kind of limitation. 181 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 182 183 /// \returns Number of waves per execution unit required to support the given \p 184 /// FlatWorkGroupSize. 185 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 186 unsigned FlatWorkGroupSize); 187 188 /// \returns Minimum flat work group size for given subtarget \p STI. 189 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 190 191 /// \returns Maximum flat work group size for given subtarget \p STI. 192 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 193 194 /// \returns Number of waves per work group for given subtarget \p STI and 195 /// \p FlatWorkGroupSize. 196 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 197 unsigned FlatWorkGroupSize); 198 199 /// \returns SGPR allocation granularity for given subtarget \p STI. 200 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 201 202 /// \returns SGPR encoding granularity for given subtarget \p STI. 203 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 204 205 /// \returns Total number of SGPRs for given subtarget \p STI. 206 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 207 208 /// \returns Addressable number of SGPRs for given subtarget \p STI. 209 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 210 211 /// \returns Minimum number of SGPRs that meets the given number of waves per 212 /// execution unit requirement for given subtarget \p STI. 213 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 214 215 /// \returns Maximum number of SGPRs that meets the given number of waves per 216 /// execution unit requirement for given subtarget \p STI. 217 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 218 bool Addressable); 219 220 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 221 /// STI when the given special registers are used. 222 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 223 bool FlatScrUsed, bool XNACKUsed); 224 225 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 226 /// STI when the given special registers are used. XNACK is inferred from 227 /// \p STI. 228 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 229 bool FlatScrUsed); 230 231 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 232 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 233 /// register counts. 234 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 235 236 /// \returns VGPR allocation granularity for given subtarget \p STI. 237 /// 238 /// For subtargets which support it, \p EnableWavefrontSize32 should match 239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 240 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 241 Optional<bool> EnableWavefrontSize32 = None); 242 243 /// \returns VGPR encoding granularity for given subtarget \p STI. 244 /// 245 /// For subtargets which support it, \p EnableWavefrontSize32 should match 246 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 247 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 248 Optional<bool> EnableWavefrontSize32 = None); 249 250 /// \returns Total number of VGPRs for given subtarget \p STI. 251 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 252 253 /// \returns Addressable number of VGPRs for given subtarget \p STI. 254 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 255 256 /// \returns Minimum number of VGPRs that meets given number of waves per 257 /// execution unit requirement for given subtarget \p STI. 258 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 259 260 /// \returns Maximum number of VGPRs that meets given number of waves per 261 /// execution unit requirement for given subtarget \p STI. 262 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 263 264 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 265 /// \p NumVGPRs are used. 266 /// 267 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 268 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 269 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 270 Optional<bool> EnableWavefrontSize32 = None); 271 272 } // end namespace IsaInfo 273 274 LLVM_READONLY 275 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 276 277 LLVM_READONLY 278 int getSOPPWithRelaxation(uint16_t Opcode); 279 280 struct MIMGBaseOpcodeInfo { 281 MIMGBaseOpcode BaseOpcode; 282 bool Store; 283 bool Atomic; 284 bool AtomicX2; 285 bool Sampler; 286 bool Gather4; 287 288 uint8_t NumExtraArgs; 289 bool Gradients; 290 bool G16; 291 bool Coordinates; 292 bool LodOrClampOrMip; 293 bool HasD16; 294 bool MSAA; 295 bool BVH; 296 }; 297 298 LLVM_READONLY 299 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 300 301 LLVM_READONLY 302 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 303 304 struct MIMGDimInfo { 305 MIMGDim Dim; 306 uint8_t NumCoords; 307 uint8_t NumGradients; 308 bool MSAA; 309 bool DA; 310 uint8_t Encoding; 311 const char *AsmSuffix; 312 }; 313 314 LLVM_READONLY 315 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 316 317 LLVM_READONLY 318 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 319 320 LLVM_READONLY 321 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 322 323 struct MIMGLZMappingInfo { 324 MIMGBaseOpcode L; 325 MIMGBaseOpcode LZ; 326 }; 327 328 struct MIMGMIPMappingInfo { 329 MIMGBaseOpcode MIP; 330 MIMGBaseOpcode NONMIP; 331 }; 332 333 struct MIMGG16MappingInfo { 334 MIMGBaseOpcode G; 335 MIMGBaseOpcode G16; 336 }; 337 338 LLVM_READONLY 339 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 340 341 LLVM_READONLY 342 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 343 344 LLVM_READONLY 345 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 346 347 LLVM_READONLY 348 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 349 unsigned VDataDwords, unsigned VAddrDwords); 350 351 LLVM_READONLY 352 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 353 354 LLVM_READONLY 355 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 356 const MIMGDimInfo *Dim, bool IsA16, 357 bool IsG16Supported); 358 359 struct MIMGInfo { 360 uint16_t Opcode; 361 uint16_t BaseOpcode; 362 uint8_t MIMGEncoding; 363 uint8_t VDataDwords; 364 uint8_t VAddrDwords; 365 }; 366 367 LLVM_READONLY 368 const MIMGInfo *getMIMGInfo(unsigned Opc); 369 370 LLVM_READONLY 371 int getMTBUFBaseOpcode(unsigned Opc); 372 373 LLVM_READONLY 374 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 375 376 LLVM_READONLY 377 int getMTBUFElements(unsigned Opc); 378 379 LLVM_READONLY 380 bool getMTBUFHasVAddr(unsigned Opc); 381 382 LLVM_READONLY 383 bool getMTBUFHasSrsrc(unsigned Opc); 384 385 LLVM_READONLY 386 bool getMTBUFHasSoffset(unsigned Opc); 387 388 LLVM_READONLY 389 int getMUBUFBaseOpcode(unsigned Opc); 390 391 LLVM_READONLY 392 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 393 394 LLVM_READONLY 395 int getMUBUFElements(unsigned Opc); 396 397 LLVM_READONLY 398 bool getMUBUFHasVAddr(unsigned Opc); 399 400 LLVM_READONLY 401 bool getMUBUFHasSrsrc(unsigned Opc); 402 403 LLVM_READONLY 404 bool getMUBUFHasSoffset(unsigned Opc); 405 406 LLVM_READONLY 407 bool getMUBUFIsBufferInv(unsigned Opc); 408 409 LLVM_READONLY 410 bool getSMEMIsBuffer(unsigned Opc); 411 412 LLVM_READONLY 413 bool getVOP1IsSingle(unsigned Opc); 414 415 LLVM_READONLY 416 bool getVOP2IsSingle(unsigned Opc); 417 418 LLVM_READONLY 419 bool getVOP3IsSingle(unsigned Opc); 420 421 LLVM_READONLY 422 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 423 uint8_t NumComponents, 424 uint8_t NumFormat, 425 const MCSubtargetInfo &STI); 426 LLVM_READONLY 427 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 428 const MCSubtargetInfo &STI); 429 430 LLVM_READONLY 431 int getMCOpcode(uint16_t Opcode, unsigned Gen); 432 433 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 434 const MCSubtargetInfo *STI); 435 436 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 437 const MCSubtargetInfo *STI); 438 439 bool isGroupSegment(const GlobalValue *GV); 440 bool isGlobalSegment(const GlobalValue *GV); 441 bool isReadOnlySegment(const GlobalValue *GV); 442 443 /// \returns True if constants should be emitted to .text section for given 444 /// target triple \p TT, false otherwise. 445 bool shouldEmitConstantsToTextSection(const Triple &TT); 446 447 /// \returns Integer value requested using \p F's \p Name attribute. 448 /// 449 /// \returns \p Default if attribute is not present. 450 /// 451 /// \returns \p Default and emits error if requested value cannot be converted 452 /// to integer. 453 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 454 455 /// \returns A pair of integer values requested using \p F's \p Name attribute 456 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 457 /// is false). 458 /// 459 /// \returns \p Default if attribute is not present. 460 /// 461 /// \returns \p Default and emits error if one of the requested values cannot be 462 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 463 /// not present. 464 std::pair<int, int> getIntegerPairAttribute(const Function &F, 465 StringRef Name, 466 std::pair<int, int> Default, 467 bool OnlyFirstRequired = false); 468 469 /// Represents the counter values to wait for in an s_waitcnt instruction. 470 /// 471 /// Large values (including the maximum possible integer) can be used to 472 /// represent "don't care" waits. 473 struct Waitcnt { 474 unsigned VmCnt = ~0u; 475 unsigned ExpCnt = ~0u; 476 unsigned LgkmCnt = ~0u; 477 unsigned VsCnt = ~0u; 478 479 Waitcnt() {} 480 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 481 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 482 483 static Waitcnt allZero(bool HasVscnt) { 484 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 485 } 486 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 487 488 bool hasWait() const { 489 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 490 } 491 492 bool hasWaitExceptVsCnt() const { 493 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 494 } 495 496 bool hasWaitVsCnt() const { 497 return VsCnt != ~0u; 498 } 499 500 bool dominates(const Waitcnt &Other) const { 501 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 502 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 503 } 504 505 Waitcnt combined(const Waitcnt &Other) const { 506 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 507 std::min(LgkmCnt, Other.LgkmCnt), 508 std::min(VsCnt, Other.VsCnt)); 509 } 510 }; 511 512 /// \returns Vmcnt bit mask for given isa \p Version. 513 unsigned getVmcntBitMask(const IsaVersion &Version); 514 515 /// \returns Expcnt bit mask for given isa \p Version. 516 unsigned getExpcntBitMask(const IsaVersion &Version); 517 518 /// \returns Lgkmcnt bit mask for given isa \p Version. 519 unsigned getLgkmcntBitMask(const IsaVersion &Version); 520 521 /// \returns Waitcnt bit mask for given isa \p Version. 522 unsigned getWaitcntBitMask(const IsaVersion &Version); 523 524 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 525 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 526 527 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 528 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 529 530 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 531 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 532 533 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 534 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 535 /// \p Lgkmcnt respectively. 536 /// 537 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 538 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 539 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 540 /// \p Expcnt = \p Waitcnt[6:4] 541 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 542 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 543 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 544 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 545 546 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 547 548 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 549 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 550 unsigned Vmcnt); 551 552 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 553 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 554 unsigned Expcnt); 555 556 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 557 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 558 unsigned Lgkmcnt); 559 560 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 561 /// \p Version. 562 /// 563 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 564 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 565 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 566 /// Waitcnt[6:4] = \p Expcnt 567 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 568 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 569 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 570 /// 571 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 572 /// isa \p Version. 573 unsigned encodeWaitcnt(const IsaVersion &Version, 574 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 575 576 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 577 578 namespace Hwreg { 579 580 LLVM_READONLY 581 int64_t getHwregId(const StringRef Name); 582 583 LLVM_READNONE 584 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI); 585 586 LLVM_READNONE 587 bool isValidHwreg(int64_t Id); 588 589 LLVM_READNONE 590 bool isValidHwregOffset(int64_t Offset); 591 592 LLVM_READNONE 593 bool isValidHwregWidth(int64_t Width); 594 595 LLVM_READNONE 596 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 597 598 LLVM_READNONE 599 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 600 601 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 602 603 } // namespace Hwreg 604 605 namespace Exp { 606 607 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 608 609 LLVM_READONLY 610 unsigned getTgtId(const StringRef Name); 611 612 LLVM_READNONE 613 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 614 615 } // namespace Exp 616 617 namespace MTBUFFormat { 618 619 LLVM_READNONE 620 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 621 622 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 623 624 int64_t getDfmt(const StringRef Name); 625 626 StringRef getDfmtName(unsigned Id); 627 628 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 629 630 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 631 632 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 633 634 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 635 636 int64_t getUnifiedFormat(const StringRef Name); 637 638 StringRef getUnifiedFormatName(unsigned Id); 639 640 bool isValidUnifiedFormat(unsigned Val); 641 642 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 643 644 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 645 646 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 647 648 } // namespace MTBUFFormat 649 650 namespace SendMsg { 651 652 LLVM_READONLY 653 int64_t getMsgId(const StringRef Name); 654 655 LLVM_READONLY 656 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 657 658 LLVM_READNONE 659 StringRef getMsgName(int64_t MsgId); 660 661 LLVM_READNONE 662 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 663 664 LLVM_READNONE 665 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true); 666 667 LLVM_READNONE 668 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 669 bool Strict = true); 670 671 LLVM_READNONE 672 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 673 const MCSubtargetInfo &STI, bool Strict = true); 674 675 LLVM_READNONE 676 bool msgRequiresOp(int64_t MsgId); 677 678 LLVM_READNONE 679 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 680 681 void decodeMsg(unsigned Val, 682 uint16_t &MsgId, 683 uint16_t &OpId, 684 uint16_t &StreamId); 685 686 LLVM_READNONE 687 uint64_t encodeMsg(uint64_t MsgId, 688 uint64_t OpId, 689 uint64_t StreamId); 690 691 } // namespace SendMsg 692 693 694 unsigned getInitialPSInputAddr(const Function &F); 695 696 bool getHasColorExport(const Function &F); 697 698 bool getHasDepthExport(const Function &F); 699 700 LLVM_READNONE 701 bool isShader(CallingConv::ID CC); 702 703 LLVM_READNONE 704 bool isGraphics(CallingConv::ID CC); 705 706 LLVM_READNONE 707 bool isCompute(CallingConv::ID CC); 708 709 LLVM_READNONE 710 bool isEntryFunctionCC(CallingConv::ID CC); 711 712 // These functions are considered entrypoints into the current module, i.e. they 713 // are allowed to be called from outside the current module. This is different 714 // from isEntryFunctionCC, which is only true for functions that are entered by 715 // the hardware. Module entry points include all entry functions but also 716 // include functions that can be called from other functions inside or outside 717 // the current module. Module entry functions are allowed to allocate LDS. 718 LLVM_READNONE 719 bool isModuleEntryFunctionCC(CallingConv::ID CC); 720 721 // FIXME: Remove this when calling conventions cleaned up 722 LLVM_READNONE 723 inline bool isKernel(CallingConv::ID CC) { 724 switch (CC) { 725 case CallingConv::AMDGPU_KERNEL: 726 case CallingConv::SPIR_KERNEL: 727 return true; 728 default: 729 return false; 730 } 731 } 732 733 bool hasXNACK(const MCSubtargetInfo &STI); 734 bool hasSRAMECC(const MCSubtargetInfo &STI); 735 bool hasMIMG_R128(const MCSubtargetInfo &STI); 736 bool hasGFX10A16(const MCSubtargetInfo &STI); 737 bool hasG16(const MCSubtargetInfo &STI); 738 bool hasPackedD16(const MCSubtargetInfo &STI); 739 740 bool isSI(const MCSubtargetInfo &STI); 741 bool isCI(const MCSubtargetInfo &STI); 742 bool isVI(const MCSubtargetInfo &STI); 743 bool isGFX9(const MCSubtargetInfo &STI); 744 bool isGFX9Plus(const MCSubtargetInfo &STI); 745 bool isGFX10(const MCSubtargetInfo &STI); 746 bool isGFX10Plus(const MCSubtargetInfo &STI); 747 bool isGCN3Encoding(const MCSubtargetInfo &STI); 748 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 749 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 750 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 751 bool isGFX90A(const MCSubtargetInfo &STI); 752 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 753 754 /// Is Reg - scalar register 755 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 756 757 /// Is there any intersection between registers 758 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI); 759 760 /// If \p Reg is a pseudo reg, return the correct hardware register given 761 /// \p STI otherwise return \p Reg. 762 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 763 764 /// Convert hardware register \p Reg to a pseudo register 765 LLVM_READNONE 766 unsigned mc2PseudoReg(unsigned Reg); 767 768 /// Can this operand also contain immediate values? 769 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 770 771 /// Is this floating-point operand? 772 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 773 774 /// Does this operand support only inlinable literals? 775 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 776 777 /// Get the size in bits of a register from the register class \p RC. 778 unsigned getRegBitWidth(unsigned RCID); 779 780 /// Get the size in bits of a register from the register class \p RC. 781 unsigned getRegBitWidth(const MCRegisterClass &RC); 782 783 /// Get size of register operand 784 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 785 unsigned OpNo); 786 787 LLVM_READNONE 788 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 789 switch (OpInfo.OperandType) { 790 case AMDGPU::OPERAND_REG_IMM_INT32: 791 case AMDGPU::OPERAND_REG_IMM_FP32: 792 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 793 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 794 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 795 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 796 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 797 case AMDGPU::OPERAND_REG_IMM_V2INT32: 798 case AMDGPU::OPERAND_REG_IMM_V2FP32: 799 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 800 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 801 case AMDGPU::OPERAND_KIMM32: 802 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 803 return 4; 804 805 case AMDGPU::OPERAND_REG_IMM_INT64: 806 case AMDGPU::OPERAND_REG_IMM_FP64: 807 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 808 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 809 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 810 return 8; 811 812 case AMDGPU::OPERAND_REG_IMM_INT16: 813 case AMDGPU::OPERAND_REG_IMM_FP16: 814 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 815 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 816 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 817 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 818 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 819 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 820 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 821 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 822 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 823 case AMDGPU::OPERAND_REG_IMM_V2INT16: 824 case AMDGPU::OPERAND_REG_IMM_V2FP16: 825 return 2; 826 827 default: 828 llvm_unreachable("unhandled operand type"); 829 } 830 } 831 832 LLVM_READNONE 833 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 834 return getOperandSize(Desc.OpInfo[OpNo]); 835 } 836 837 /// Is this literal inlinable, and not one of the values intended for floating 838 /// point values. 839 LLVM_READNONE 840 inline bool isInlinableIntLiteral(int64_t Literal) { 841 return Literal >= -16 && Literal <= 64; 842 } 843 844 /// Is this literal inlinable 845 LLVM_READNONE 846 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 847 848 LLVM_READNONE 849 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 850 851 LLVM_READNONE 852 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 853 854 LLVM_READNONE 855 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 856 857 LLVM_READNONE 858 bool isInlinableIntLiteralV216(int32_t Literal); 859 860 LLVM_READNONE 861 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 862 863 bool isArgPassedInSGPR(const Argument *Arg); 864 865 LLVM_READONLY 866 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 867 int64_t EncodedOffset); 868 869 LLVM_READONLY 870 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 871 int64_t EncodedOffset, 872 bool IsBuffer); 873 874 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 875 /// offsets. 876 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 877 878 /// \returns The encoding that will be used for \p ByteOffset in the 879 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 880 /// S_LOAD instructions have a signed offset, on other subtargets it is 881 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 882 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 883 int64_t ByteOffset, bool IsBuffer); 884 885 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 886 /// instruction. This is only useful on CI.s 887 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 888 int64_t ByteOffset); 889 890 /// For FLAT segment the offset must be positive; 891 /// MSB is ignored and forced to zero. 892 /// 893 /// \return The number of bits available for the offset field in flat 894 /// instructions. 895 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 896 897 /// \returns true if this offset is small enough to fit in the SMRD 898 /// offset field. \p ByteOffset should be the offset in bytes and 899 /// not the encoded offset. 900 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 901 902 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 903 const GCNSubtarget *Subtarget, 904 Align Alignment = Align(4)); 905 906 LLVM_READNONE 907 inline bool isLegal64BitDPPControl(unsigned DC) { 908 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 909 } 910 911 /// \returns true if the intrinsic is divergent 912 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 913 914 // Track defaults for fields in the MODE registser. 915 struct SIModeRegisterDefaults { 916 /// Floating point opcodes that support exception flag gathering quiet and 917 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 918 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 919 /// quieting. 920 bool IEEE : 1; 921 922 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 923 /// clamp NaN to zero; otherwise, pass NaN through. 924 bool DX10Clamp : 1; 925 926 /// If this is set, neither input or output denormals are flushed for most f32 927 /// instructions. 928 bool FP32InputDenormals : 1; 929 bool FP32OutputDenormals : 1; 930 931 /// If this is set, neither input or output denormals are flushed for both f64 932 /// and f16/v2f16 instructions. 933 bool FP64FP16InputDenormals : 1; 934 bool FP64FP16OutputDenormals : 1; 935 936 SIModeRegisterDefaults() : 937 IEEE(true), 938 DX10Clamp(true), 939 FP32InputDenormals(true), 940 FP32OutputDenormals(true), 941 FP64FP16InputDenormals(true), 942 FP64FP16OutputDenormals(true) {} 943 944 SIModeRegisterDefaults(const Function &F); 945 946 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 947 SIModeRegisterDefaults Mode; 948 Mode.IEEE = !AMDGPU::isShader(CC); 949 return Mode; 950 } 951 952 bool operator ==(const SIModeRegisterDefaults Other) const { 953 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 954 FP32InputDenormals == Other.FP32InputDenormals && 955 FP32OutputDenormals == Other.FP32OutputDenormals && 956 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 957 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 958 } 959 960 bool allFP32Denormals() const { 961 return FP32InputDenormals && FP32OutputDenormals; 962 } 963 964 bool allFP64FP16Denormals() const { 965 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 966 } 967 968 /// Get the encoding value for the FP_DENORM bits of the mode register for the 969 /// FP32 denormal mode. 970 uint32_t fpDenormModeSPValue() const { 971 if (FP32InputDenormals && FP32OutputDenormals) 972 return FP_DENORM_FLUSH_NONE; 973 if (FP32InputDenormals) 974 return FP_DENORM_FLUSH_OUT; 975 if (FP32OutputDenormals) 976 return FP_DENORM_FLUSH_IN; 977 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 978 } 979 980 /// Get the encoding value for the FP_DENORM bits of the mode register for the 981 /// FP64/FP16 denormal mode. 982 uint32_t fpDenormModeDPValue() const { 983 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 984 return FP_DENORM_FLUSH_NONE; 985 if (FP64FP16InputDenormals) 986 return FP_DENORM_FLUSH_OUT; 987 if (FP64FP16OutputDenormals) 988 return FP_DENORM_FLUSH_IN; 989 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 990 } 991 992 /// Returns true if a flag is compatible if it's enabled in the callee, but 993 /// disabled in the caller. 994 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 995 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 996 } 997 998 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 999 // be able to override. 1000 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1001 if (DX10Clamp != CalleeMode.DX10Clamp) 1002 return false; 1003 if (IEEE != CalleeMode.IEEE) 1004 return false; 1005 1006 // Allow inlining denormals enabled into denormals flushed functions. 1007 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1008 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1009 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1010 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1011 } 1012 }; 1013 1014 } // end namespace AMDGPU 1015 1016 raw_ostream &operator<<(raw_ostream &OS, 1017 const AMDGPU::IsaInfo::TargetIDSetting S); 1018 1019 } // end namespace llvm 1020 1021 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1022