1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 11 12 #include "SIDefines.h" 13 #include "llvm/IR/CallingConv.h" 14 #include "llvm/Support/Alignment.h" 15 16 struct amd_kernel_code_t; 17 18 namespace llvm { 19 20 struct Align; 21 class Argument; 22 class Function; 23 class GCNSubtarget; 24 class GlobalValue; 25 class MCRegisterClass; 26 class MCRegisterInfo; 27 class MCSubtargetInfo; 28 class StringRef; 29 class Triple; 30 31 namespace amdhsa { 32 struct kernel_descriptor_t; 33 } 34 35 namespace AMDGPU { 36 37 struct IsaVersion; 38 39 /// \returns HSA OS ABI Version identification. 40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI); 41 /// \returns True if HSA OS ABI Version identification is 2, 42 /// false otherwise. 43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI); 44 /// \returns True if HSA OS ABI Version identification is 3, 45 /// false otherwise. 46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI); 47 /// \returns True if HSA OS ABI Version identification is 4, 48 /// false otherwise. 49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI); 50 /// \returns True if HSA OS ABI Version identification is 5, 51 /// false otherwise. 52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI); 53 /// \returns True if HSA OS ABI Version identification is 3 and above, 54 /// false otherwise. 55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI); 56 57 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr 58 unsigned getHostcallImplicitArgPosition(); 59 60 /// \returns Code object version. 61 unsigned getAmdhsaCodeObjectVersion(); 62 63 struct GcnBufferFormatInfo { 64 unsigned Format; 65 unsigned BitsPerComp; 66 unsigned NumComponents; 67 unsigned NumFormat; 68 unsigned DataFormat; 69 }; 70 71 #define GET_MIMGBaseOpcode_DECL 72 #define GET_MIMGDim_DECL 73 #define GET_MIMGEncoding_DECL 74 #define GET_MIMGLZMapping_DECL 75 #define GET_MIMGMIPMapping_DECL 76 #define GET_MIMGBiASMapping_DECL 77 #include "AMDGPUGenSearchableTables.inc" 78 79 namespace IsaInfo { 80 81 enum { 82 // The closed Vulkan driver sets 96, which limits the wave count to 8 but 83 // doesn't spill SGPRs as much as when 80 is set. 84 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96, 85 TRAP_NUM_SGPRS = 16 86 }; 87 88 enum class TargetIDSetting { 89 Unsupported, 90 Any, 91 Off, 92 On 93 }; 94 95 class AMDGPUTargetID { 96 private: 97 const MCSubtargetInfo &STI; 98 TargetIDSetting XnackSetting; 99 TargetIDSetting SramEccSetting; 100 101 public: 102 explicit AMDGPUTargetID(const MCSubtargetInfo &STI); 103 ~AMDGPUTargetID() = default; 104 105 /// \return True if the current xnack setting is not "Unsupported". 106 bool isXnackSupported() const { 107 return XnackSetting != TargetIDSetting::Unsupported; 108 } 109 110 /// \returns True if the current xnack setting is "On" or "Any". 111 bool isXnackOnOrAny() const { 112 return XnackSetting == TargetIDSetting::On || 113 XnackSetting == TargetIDSetting::Any; 114 } 115 116 /// \returns True if current xnack setting is "On" or "Off", 117 /// false otherwise. 118 bool isXnackOnOrOff() const { 119 return getXnackSetting() == TargetIDSetting::On || 120 getXnackSetting() == TargetIDSetting::Off; 121 } 122 123 /// \returns The current xnack TargetIDSetting, possible options are 124 /// "Unsupported", "Any", "Off", and "On". 125 TargetIDSetting getXnackSetting() const { 126 return XnackSetting; 127 } 128 129 /// Sets xnack setting to \p NewXnackSetting. 130 void setXnackSetting(TargetIDSetting NewXnackSetting) { 131 XnackSetting = NewXnackSetting; 132 } 133 134 /// \return True if the current sramecc setting is not "Unsupported". 135 bool isSramEccSupported() const { 136 return SramEccSetting != TargetIDSetting::Unsupported; 137 } 138 139 /// \returns True if the current sramecc setting is "On" or "Any". 140 bool isSramEccOnOrAny() const { 141 return SramEccSetting == TargetIDSetting::On || 142 SramEccSetting == TargetIDSetting::Any; 143 } 144 145 /// \returns True if current sramecc setting is "On" or "Off", 146 /// false otherwise. 147 bool isSramEccOnOrOff() const { 148 return getSramEccSetting() == TargetIDSetting::On || 149 getSramEccSetting() == TargetIDSetting::Off; 150 } 151 152 /// \returns The current sramecc TargetIDSetting, possible options are 153 /// "Unsupported", "Any", "Off", and "On". 154 TargetIDSetting getSramEccSetting() const { 155 return SramEccSetting; 156 } 157 158 /// Sets sramecc setting to \p NewSramEccSetting. 159 void setSramEccSetting(TargetIDSetting NewSramEccSetting) { 160 SramEccSetting = NewSramEccSetting; 161 } 162 163 void setTargetIDFromFeaturesString(StringRef FS); 164 void setTargetIDFromTargetIDStream(StringRef TargetID); 165 166 /// \returns String representation of an object. 167 std::string toString() const; 168 }; 169 170 /// \returns Wavefront size for given subtarget \p STI. 171 unsigned getWavefrontSize(const MCSubtargetInfo *STI); 172 173 /// \returns Local memory size in bytes for given subtarget \p STI. 174 unsigned getLocalMemorySize(const MCSubtargetInfo *STI); 175 176 /// \returns Number of execution units per compute unit for given subtarget \p 177 /// STI. 178 unsigned getEUsPerCU(const MCSubtargetInfo *STI); 179 180 /// \returns Maximum number of work groups per compute unit for given subtarget 181 /// \p STI and limited by given \p FlatWorkGroupSize. 182 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 183 unsigned FlatWorkGroupSize); 184 185 /// \returns Minimum number of waves per execution unit for given subtarget \p 186 /// STI. 187 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); 188 189 /// \returns Maximum number of waves per execution unit for given subtarget \p 190 /// STI without any kind of limitation. 191 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); 192 193 /// \returns Number of waves per execution unit required to support the given \p 194 /// FlatWorkGroupSize. 195 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 196 unsigned FlatWorkGroupSize); 197 198 /// \returns Minimum flat work group size for given subtarget \p STI. 199 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI); 200 201 /// \returns Maximum flat work group size for given subtarget \p STI. 202 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI); 203 204 /// \returns Number of waves per work group for given subtarget \p STI and 205 /// \p FlatWorkGroupSize. 206 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 207 unsigned FlatWorkGroupSize); 208 209 /// \returns SGPR allocation granularity for given subtarget \p STI. 210 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI); 211 212 /// \returns SGPR encoding granularity for given subtarget \p STI. 213 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI); 214 215 /// \returns Total number of SGPRs for given subtarget \p STI. 216 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI); 217 218 /// \returns Addressable number of SGPRs for given subtarget \p STI. 219 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI); 220 221 /// \returns Minimum number of SGPRs that meets the given number of waves per 222 /// execution unit requirement for given subtarget \p STI. 223 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 224 225 /// \returns Maximum number of SGPRs that meets the given number of waves per 226 /// execution unit requirement for given subtarget \p STI. 227 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 228 bool Addressable); 229 230 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 231 /// STI when the given special registers are used. 232 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 233 bool FlatScrUsed, bool XNACKUsed); 234 235 /// \returns Number of extra SGPRs implicitly required by given subtarget \p 236 /// STI when the given special registers are used. XNACK is inferred from 237 /// \p STI. 238 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 239 bool FlatScrUsed); 240 241 /// \returns Number of SGPR blocks needed for given subtarget \p STI when 242 /// \p NumSGPRs are used. \p NumSGPRs should already include any special 243 /// register counts. 244 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs); 245 246 /// \returns VGPR allocation granularity for given subtarget \p STI. 247 /// 248 /// For subtargets which support it, \p EnableWavefrontSize32 should match 249 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 250 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 251 Optional<bool> EnableWavefrontSize32 = None); 252 253 /// \returns VGPR encoding granularity for given subtarget \p STI. 254 /// 255 /// For subtargets which support it, \p EnableWavefrontSize32 should match 256 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 257 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 258 Optional<bool> EnableWavefrontSize32 = None); 259 260 /// \returns Total number of VGPRs for given subtarget \p STI. 261 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI); 262 263 /// \returns Addressable number of VGPRs for given subtarget \p STI. 264 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI); 265 266 /// \returns Minimum number of VGPRs that meets given number of waves per 267 /// execution unit requirement for given subtarget \p STI. 268 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 269 270 /// \returns Maximum number of VGPRs that meets given number of waves per 271 /// execution unit requirement for given subtarget \p STI. 272 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU); 273 274 /// \returns Number of VGPR blocks needed for given subtarget \p STI when 275 /// \p NumVGPRs are used. 276 /// 277 /// For subtargets which support it, \p EnableWavefrontSize32 should match the 278 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field. 279 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, 280 Optional<bool> EnableWavefrontSize32 = None); 281 282 } // end namespace IsaInfo 283 284 LLVM_READONLY 285 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); 286 287 LLVM_READONLY 288 int getSOPPWithRelaxation(uint16_t Opcode); 289 290 struct MIMGBaseOpcodeInfo { 291 MIMGBaseOpcode BaseOpcode; 292 bool Store; 293 bool Atomic; 294 bool AtomicX2; 295 bool Sampler; 296 bool Gather4; 297 298 uint8_t NumExtraArgs; 299 bool Gradients; 300 bool G16; 301 bool Coordinates; 302 bool LodOrClampOrMip; 303 bool HasD16; 304 bool MSAA; 305 bool BVH; 306 }; 307 308 LLVM_READONLY 309 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc); 310 311 LLVM_READONLY 312 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode); 313 314 struct MIMGDimInfo { 315 MIMGDim Dim; 316 uint8_t NumCoords; 317 uint8_t NumGradients; 318 bool MSAA; 319 bool DA; 320 uint8_t Encoding; 321 const char *AsmSuffix; 322 }; 323 324 LLVM_READONLY 325 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum); 326 327 LLVM_READONLY 328 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc); 329 330 LLVM_READONLY 331 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix); 332 333 struct MIMGLZMappingInfo { 334 MIMGBaseOpcode L; 335 MIMGBaseOpcode LZ; 336 }; 337 338 struct MIMGMIPMappingInfo { 339 MIMGBaseOpcode MIP; 340 MIMGBaseOpcode NONMIP; 341 }; 342 343 struct MIMGBiasMappingInfo { 344 MIMGBaseOpcode Bias; 345 MIMGBaseOpcode NoBias; 346 }; 347 348 struct MIMGOffsetMappingInfo { 349 MIMGBaseOpcode Offset; 350 MIMGBaseOpcode NoOffset; 351 }; 352 353 struct MIMGG16MappingInfo { 354 MIMGBaseOpcode G; 355 MIMGBaseOpcode G16; 356 }; 357 358 LLVM_READONLY 359 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L); 360 361 LLVM_READONLY 362 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP); 363 364 LLVM_READONLY 365 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias); 366 367 LLVM_READONLY 368 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset); 369 370 LLVM_READONLY 371 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G); 372 373 LLVM_READONLY 374 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 375 unsigned VDataDwords, unsigned VAddrDwords); 376 377 LLVM_READONLY 378 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels); 379 380 LLVM_READONLY 381 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, 382 const MIMGDimInfo *Dim, bool IsA16, 383 bool IsG16Supported); 384 385 struct MIMGInfo { 386 uint16_t Opcode; 387 uint16_t BaseOpcode; 388 uint8_t MIMGEncoding; 389 uint8_t VDataDwords; 390 uint8_t VAddrDwords; 391 }; 392 393 LLVM_READONLY 394 const MIMGInfo *getMIMGInfo(unsigned Opc); 395 396 LLVM_READONLY 397 int getMTBUFBaseOpcode(unsigned Opc); 398 399 LLVM_READONLY 400 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements); 401 402 LLVM_READONLY 403 int getMTBUFElements(unsigned Opc); 404 405 LLVM_READONLY 406 bool getMTBUFHasVAddr(unsigned Opc); 407 408 LLVM_READONLY 409 bool getMTBUFHasSrsrc(unsigned Opc); 410 411 LLVM_READONLY 412 bool getMTBUFHasSoffset(unsigned Opc); 413 414 LLVM_READONLY 415 int getMUBUFBaseOpcode(unsigned Opc); 416 417 LLVM_READONLY 418 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements); 419 420 LLVM_READONLY 421 int getMUBUFElements(unsigned Opc); 422 423 LLVM_READONLY 424 bool getMUBUFHasVAddr(unsigned Opc); 425 426 LLVM_READONLY 427 bool getMUBUFHasSrsrc(unsigned Opc); 428 429 LLVM_READONLY 430 bool getMUBUFHasSoffset(unsigned Opc); 431 432 LLVM_READONLY 433 bool getMUBUFIsBufferInv(unsigned Opc); 434 435 LLVM_READONLY 436 bool getSMEMIsBuffer(unsigned Opc); 437 438 LLVM_READONLY 439 bool getVOP1IsSingle(unsigned Opc); 440 441 LLVM_READONLY 442 bool getVOP2IsSingle(unsigned Opc); 443 444 LLVM_READONLY 445 bool getVOP3IsSingle(unsigned Opc); 446 447 LLVM_READONLY 448 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 449 uint8_t NumComponents, 450 uint8_t NumFormat, 451 const MCSubtargetInfo &STI); 452 LLVM_READONLY 453 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 454 const MCSubtargetInfo &STI); 455 456 LLVM_READONLY 457 int getMCOpcode(uint16_t Opcode, unsigned Gen); 458 459 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 460 const MCSubtargetInfo *STI); 461 462 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 463 const MCSubtargetInfo *STI); 464 465 bool isGroupSegment(const GlobalValue *GV); 466 bool isGlobalSegment(const GlobalValue *GV); 467 bool isReadOnlySegment(const GlobalValue *GV); 468 469 /// \returns True if constants should be emitted to .text section for given 470 /// target triple \p TT, false otherwise. 471 bool shouldEmitConstantsToTextSection(const Triple &TT); 472 473 /// \returns Integer value requested using \p F's \p Name attribute. 474 /// 475 /// \returns \p Default if attribute is not present. 476 /// 477 /// \returns \p Default and emits error if requested value cannot be converted 478 /// to integer. 479 int getIntegerAttribute(const Function &F, StringRef Name, int Default); 480 481 /// \returns A pair of integer values requested using \p F's \p Name attribute 482 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired 483 /// is false). 484 /// 485 /// \returns \p Default if attribute is not present. 486 /// 487 /// \returns \p Default and emits error if one of the requested values cannot be 488 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is 489 /// not present. 490 std::pair<int, int> getIntegerPairAttribute(const Function &F, 491 StringRef Name, 492 std::pair<int, int> Default, 493 bool OnlyFirstRequired = false); 494 495 /// Represents the counter values to wait for in an s_waitcnt instruction. 496 /// 497 /// Large values (including the maximum possible integer) can be used to 498 /// represent "don't care" waits. 499 struct Waitcnt { 500 unsigned VmCnt = ~0u; 501 unsigned ExpCnt = ~0u; 502 unsigned LgkmCnt = ~0u; 503 unsigned VsCnt = ~0u; 504 505 Waitcnt() = default; 506 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt) 507 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {} 508 509 static Waitcnt allZero(bool HasVscnt) { 510 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u); 511 } 512 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); } 513 514 bool hasWait() const { 515 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u; 516 } 517 518 bool hasWaitExceptVsCnt() const { 519 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u; 520 } 521 522 bool hasWaitVsCnt() const { 523 return VsCnt != ~0u; 524 } 525 526 bool dominates(const Waitcnt &Other) const { 527 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt && 528 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt; 529 } 530 531 Waitcnt combined(const Waitcnt &Other) const { 532 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt), 533 std::min(LgkmCnt, Other.LgkmCnt), 534 std::min(VsCnt, Other.VsCnt)); 535 } 536 }; 537 538 /// \returns Vmcnt bit mask for given isa \p Version. 539 unsigned getVmcntBitMask(const IsaVersion &Version); 540 541 /// \returns Expcnt bit mask for given isa \p Version. 542 unsigned getExpcntBitMask(const IsaVersion &Version); 543 544 /// \returns Lgkmcnt bit mask for given isa \p Version. 545 unsigned getLgkmcntBitMask(const IsaVersion &Version); 546 547 /// \returns Waitcnt bit mask for given isa \p Version. 548 unsigned getWaitcntBitMask(const IsaVersion &Version); 549 550 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version. 551 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt); 552 553 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version. 554 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt); 555 556 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version. 557 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt); 558 559 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa 560 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and 561 /// \p Lgkmcnt respectively. 562 /// 563 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows: 564 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only) 565 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only) 566 /// \p Expcnt = \p Waitcnt[6:4] 567 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only) 568 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only) 569 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 570 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt); 571 572 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded); 573 574 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version. 575 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 576 unsigned Vmcnt); 577 578 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version. 579 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 580 unsigned Expcnt); 581 582 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version. 583 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 584 unsigned Lgkmcnt); 585 586 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa 587 /// \p Version. 588 /// 589 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows: 590 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only) 591 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only) 592 /// Waitcnt[6:4] = \p Expcnt 593 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only) 594 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only) 595 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only) 596 /// 597 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given 598 /// isa \p Version. 599 unsigned encodeWaitcnt(const IsaVersion &Version, 600 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt); 601 602 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded); 603 604 namespace Hwreg { 605 606 LLVM_READONLY 607 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI); 608 609 LLVM_READNONE 610 bool isValidHwreg(int64_t Id); 611 612 LLVM_READNONE 613 bool isValidHwregOffset(int64_t Offset); 614 615 LLVM_READNONE 616 bool isValidHwregWidth(int64_t Width); 617 618 LLVM_READNONE 619 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width); 620 621 LLVM_READNONE 622 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI); 623 624 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width); 625 626 } // namespace Hwreg 627 628 namespace Exp { 629 630 bool getTgtName(unsigned Id, StringRef &Name, int &Index); 631 632 LLVM_READONLY 633 unsigned getTgtId(const StringRef Name); 634 635 LLVM_READNONE 636 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI); 637 638 } // namespace Exp 639 640 namespace MTBUFFormat { 641 642 LLVM_READNONE 643 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt); 644 645 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt); 646 647 int64_t getDfmt(const StringRef Name); 648 649 StringRef getDfmtName(unsigned Id); 650 651 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI); 652 653 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI); 654 655 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI); 656 657 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI); 658 659 int64_t getUnifiedFormat(const StringRef Name); 660 661 StringRef getUnifiedFormatName(unsigned Id); 662 663 bool isValidUnifiedFormat(unsigned Val); 664 665 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt); 666 667 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI); 668 669 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI); 670 671 } // namespace MTBUFFormat 672 673 namespace SendMsg { 674 675 LLVM_READONLY 676 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI); 677 678 LLVM_READONLY 679 int64_t getMsgOpId(int64_t MsgId, const StringRef Name); 680 681 LLVM_READNONE 682 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI); 683 684 LLVM_READNONE 685 StringRef getMsgOpName(int64_t MsgId, int64_t OpId); 686 687 LLVM_READNONE 688 bool isValidMsgId(int64_t MsgId); 689 690 LLVM_READNONE 691 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 692 bool Strict = true); 693 694 LLVM_READNONE 695 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 696 const MCSubtargetInfo &STI, bool Strict = true); 697 698 LLVM_READNONE 699 bool msgRequiresOp(int64_t MsgId); 700 701 LLVM_READNONE 702 bool msgSupportsStream(int64_t MsgId, int64_t OpId); 703 704 void decodeMsg(unsigned Val, 705 uint16_t &MsgId, 706 uint16_t &OpId, 707 uint16_t &StreamId); 708 709 LLVM_READNONE 710 uint64_t encodeMsg(uint64_t MsgId, 711 uint64_t OpId, 712 uint64_t StreamId); 713 714 } // namespace SendMsg 715 716 717 unsigned getInitialPSInputAddr(const Function &F); 718 719 bool getHasColorExport(const Function &F); 720 721 bool getHasDepthExport(const Function &F); 722 723 LLVM_READNONE 724 bool isShader(CallingConv::ID CC); 725 726 LLVM_READNONE 727 bool isGraphics(CallingConv::ID CC); 728 729 LLVM_READNONE 730 bool isCompute(CallingConv::ID CC); 731 732 LLVM_READNONE 733 bool isEntryFunctionCC(CallingConv::ID CC); 734 735 // These functions are considered entrypoints into the current module, i.e. they 736 // are allowed to be called from outside the current module. This is different 737 // from isEntryFunctionCC, which is only true for functions that are entered by 738 // the hardware. Module entry points include all entry functions but also 739 // include functions that can be called from other functions inside or outside 740 // the current module. Module entry functions are allowed to allocate LDS. 741 LLVM_READNONE 742 bool isModuleEntryFunctionCC(CallingConv::ID CC); 743 744 bool isKernelCC(const Function *Func); 745 746 // FIXME: Remove this when calling conventions cleaned up 747 LLVM_READNONE 748 inline bool isKernel(CallingConv::ID CC) { 749 switch (CC) { 750 case CallingConv::AMDGPU_KERNEL: 751 case CallingConv::SPIR_KERNEL: 752 return true; 753 default: 754 return false; 755 } 756 } 757 758 bool hasXNACK(const MCSubtargetInfo &STI); 759 bool hasSRAMECC(const MCSubtargetInfo &STI); 760 bool hasMIMG_R128(const MCSubtargetInfo &STI); 761 bool hasGFX10A16(const MCSubtargetInfo &STI); 762 bool hasG16(const MCSubtargetInfo &STI); 763 bool hasPackedD16(const MCSubtargetInfo &STI); 764 765 bool isSI(const MCSubtargetInfo &STI); 766 bool isCI(const MCSubtargetInfo &STI); 767 bool isVI(const MCSubtargetInfo &STI); 768 bool isGFX9(const MCSubtargetInfo &STI); 769 bool isGFX9_GFX10(const MCSubtargetInfo &STI); 770 bool isGFX8Plus(const MCSubtargetInfo &STI); 771 bool isGFX9Plus(const MCSubtargetInfo &STI); 772 bool isGFX10(const MCSubtargetInfo &STI); 773 bool isGFX10Plus(const MCSubtargetInfo &STI); 774 bool isNotGFX10Plus(const MCSubtargetInfo &STI); 775 bool isGFX10Before1030(const MCSubtargetInfo &STI); 776 bool isGCN3Encoding(const MCSubtargetInfo &STI); 777 bool isGFX10_AEncoding(const MCSubtargetInfo &STI); 778 bool isGFX10_BEncoding(const MCSubtargetInfo &STI); 779 bool hasGFX10_3Insts(const MCSubtargetInfo &STI); 780 bool isGFX90A(const MCSubtargetInfo &STI); 781 bool isGFX940(const MCSubtargetInfo &STI); 782 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI); 783 bool hasMAIInsts(const MCSubtargetInfo &STI); 784 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); 785 786 /// Is Reg - scalar register 787 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); 788 789 /// If \p Reg is a pseudo reg, return the correct hardware register given 790 /// \p STI otherwise return \p Reg. 791 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); 792 793 /// Convert hardware register \p Reg to a pseudo register 794 LLVM_READNONE 795 unsigned mc2PseudoReg(unsigned Reg); 796 797 /// Can this operand also contain immediate values? 798 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo); 799 800 /// Is this floating-point operand? 801 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo); 802 803 /// Does this operand support only inlinable literals? 804 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo); 805 806 /// Get the size in bits of a register from the register class \p RC. 807 unsigned getRegBitWidth(unsigned RCID); 808 809 /// Get the size in bits of a register from the register class \p RC. 810 unsigned getRegBitWidth(const MCRegisterClass &RC); 811 812 /// Get size of register operand 813 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 814 unsigned OpNo); 815 816 LLVM_READNONE 817 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { 818 switch (OpInfo.OperandType) { 819 case AMDGPU::OPERAND_REG_IMM_INT32: 820 case AMDGPU::OPERAND_REG_IMM_FP32: 821 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 822 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 823 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 824 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 825 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 826 case AMDGPU::OPERAND_REG_IMM_V2INT32: 827 case AMDGPU::OPERAND_REG_IMM_V2FP32: 828 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 829 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 830 case AMDGPU::OPERAND_KIMM32: 831 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4 832 return 4; 833 834 case AMDGPU::OPERAND_REG_IMM_INT64: 835 case AMDGPU::OPERAND_REG_IMM_FP64: 836 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 837 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 838 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 839 return 8; 840 841 case AMDGPU::OPERAND_REG_IMM_INT16: 842 case AMDGPU::OPERAND_REG_IMM_FP16: 843 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 844 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 845 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 846 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 848 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 849 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 850 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 851 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 852 case AMDGPU::OPERAND_REG_IMM_V2INT16: 853 case AMDGPU::OPERAND_REG_IMM_V2FP16: 854 return 2; 855 856 default: 857 llvm_unreachable("unhandled operand type"); 858 } 859 } 860 861 LLVM_READNONE 862 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) { 863 return getOperandSize(Desc.OpInfo[OpNo]); 864 } 865 866 /// Is this literal inlinable, and not one of the values intended for floating 867 /// point values. 868 LLVM_READNONE 869 inline bool isInlinableIntLiteral(int64_t Literal) { 870 return Literal >= -16 && Literal <= 64; 871 } 872 873 /// Is this literal inlinable 874 LLVM_READNONE 875 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi); 876 877 LLVM_READNONE 878 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi); 879 880 LLVM_READNONE 881 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi); 882 883 LLVM_READNONE 884 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi); 885 886 LLVM_READNONE 887 bool isInlinableIntLiteralV216(int32_t Literal); 888 889 LLVM_READNONE 890 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi); 891 892 bool isArgPassedInSGPR(const Argument *Arg); 893 894 LLVM_READONLY 895 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 896 int64_t EncodedOffset); 897 898 LLVM_READONLY 899 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 900 int64_t EncodedOffset, 901 bool IsBuffer); 902 903 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate 904 /// offsets. 905 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); 906 907 /// \returns The encoding that will be used for \p ByteOffset in the 908 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10 909 /// S_LOAD instructions have a signed offset, on other subtargets it is 910 /// unsigned. S_BUFFER has an unsigned offset for all subtargets. 911 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 912 int64_t ByteOffset, bool IsBuffer); 913 914 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD 915 /// instruction. This is only useful on CI.s 916 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 917 int64_t ByteOffset); 918 919 /// For FLAT segment the offset must be positive; 920 /// MSB is ignored and forced to zero. 921 /// 922 /// \return The number of bits available for the offset field in flat 923 /// instructions. 924 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed); 925 926 /// \returns true if this offset is small enough to fit in the SMRD 927 /// offset field. \p ByteOffset should be the offset in bytes and 928 /// not the encoded offset. 929 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset); 930 931 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 932 const GCNSubtarget *Subtarget, 933 Align Alignment = Align(4)); 934 935 LLVM_READNONE 936 inline bool isLegal64BitDPPControl(unsigned DC) { 937 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST; 938 } 939 940 /// \returns true if the intrinsic is divergent 941 bool isIntrinsicSourceOfDivergence(unsigned IntrID); 942 943 // Track defaults for fields in the MODE register. 944 struct SIModeRegisterDefaults { 945 /// Floating point opcodes that support exception flag gathering quiet and 946 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 947 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 948 /// quieting. 949 bool IEEE : 1; 950 951 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 952 /// clamp NaN to zero; otherwise, pass NaN through. 953 bool DX10Clamp : 1; 954 955 /// If this is set, neither input or output denormals are flushed for most f32 956 /// instructions. 957 bool FP32InputDenormals : 1; 958 bool FP32OutputDenormals : 1; 959 960 /// If this is set, neither input or output denormals are flushed for both f64 961 /// and f16/v2f16 instructions. 962 bool FP64FP16InputDenormals : 1; 963 bool FP64FP16OutputDenormals : 1; 964 965 SIModeRegisterDefaults() : 966 IEEE(true), 967 DX10Clamp(true), 968 FP32InputDenormals(true), 969 FP32OutputDenormals(true), 970 FP64FP16InputDenormals(true), 971 FP64FP16OutputDenormals(true) {} 972 973 SIModeRegisterDefaults(const Function &F); 974 975 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 976 SIModeRegisterDefaults Mode; 977 Mode.IEEE = !AMDGPU::isShader(CC); 978 return Mode; 979 } 980 981 bool operator ==(const SIModeRegisterDefaults Other) const { 982 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 983 FP32InputDenormals == Other.FP32InputDenormals && 984 FP32OutputDenormals == Other.FP32OutputDenormals && 985 FP64FP16InputDenormals == Other.FP64FP16InputDenormals && 986 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; 987 } 988 989 bool allFP32Denormals() const { 990 return FP32InputDenormals && FP32OutputDenormals; 991 } 992 993 bool allFP64FP16Denormals() const { 994 return FP64FP16InputDenormals && FP64FP16OutputDenormals; 995 } 996 997 /// Get the encoding value for the FP_DENORM bits of the mode register for the 998 /// FP32 denormal mode. 999 uint32_t fpDenormModeSPValue() const { 1000 if (FP32InputDenormals && FP32OutputDenormals) 1001 return FP_DENORM_FLUSH_NONE; 1002 if (FP32InputDenormals) 1003 return FP_DENORM_FLUSH_OUT; 1004 if (FP32OutputDenormals) 1005 return FP_DENORM_FLUSH_IN; 1006 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1007 } 1008 1009 /// Get the encoding value for the FP_DENORM bits of the mode register for the 1010 /// FP64/FP16 denormal mode. 1011 uint32_t fpDenormModeDPValue() const { 1012 if (FP64FP16InputDenormals && FP64FP16OutputDenormals) 1013 return FP_DENORM_FLUSH_NONE; 1014 if (FP64FP16InputDenormals) 1015 return FP_DENORM_FLUSH_OUT; 1016 if (FP64FP16OutputDenormals) 1017 return FP_DENORM_FLUSH_IN; 1018 return FP_DENORM_FLUSH_IN_FLUSH_OUT; 1019 } 1020 1021 /// Returns true if a flag is compatible if it's enabled in the callee, but 1022 /// disabled in the caller. 1023 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { 1024 return CallerMode == CalleeMode || (!CallerMode && CalleeMode); 1025 } 1026 1027 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 1028 // be able to override. 1029 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 1030 if (DX10Clamp != CalleeMode.DX10Clamp) 1031 return false; 1032 if (IEEE != CalleeMode.IEEE) 1033 return false; 1034 1035 // Allow inlining denormals enabled into denormals flushed functions. 1036 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && 1037 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && 1038 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && 1039 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); 1040 } 1041 }; 1042 1043 } // end namespace AMDGPU 1044 1045 raw_ostream &operator<<(raw_ostream &OS, 1046 const AMDGPU::IsaInfo::TargetIDSetting S); 1047 1048 } // end namespace llvm 1049 1050 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H 1051