1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 5,
51 /// false otherwise.
52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 3 and above,
54 /// false otherwise.
55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
56 
57 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
58 unsigned getMultigridSyncArgImplicitArgPosition();
59 
60 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
61 unsigned getHostcallImplicitArgPosition();
62 
63 /// \returns Code object version.
64 unsigned getAmdhsaCodeObjectVersion();
65 
66 struct GcnBufferFormatInfo {
67   unsigned Format;
68   unsigned BitsPerComp;
69   unsigned NumComponents;
70   unsigned NumFormat;
71   unsigned DataFormat;
72 };
73 
74 struct MAIInstInfo {
75   uint16_t Opcode;
76   bool is_dgemm;
77   bool is_gfx940_xdl;
78 };
79 
80 #define GET_MIMGBaseOpcode_DECL
81 #define GET_MIMGDim_DECL
82 #define GET_MIMGEncoding_DECL
83 #define GET_MIMGLZMapping_DECL
84 #define GET_MIMGMIPMapping_DECL
85 #define GET_MIMGBiASMapping_DECL
86 #define GET_MAIInstInfoTable_DECL
87 #include "AMDGPUGenSearchableTables.inc"
88 
89 namespace IsaInfo {
90 
91 enum {
92   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
93   // doesn't spill SGPRs as much as when 80 is set.
94   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
95   TRAP_NUM_SGPRS = 16
96 };
97 
98 enum class TargetIDSetting {
99   Unsupported,
100   Any,
101   Off,
102   On
103 };
104 
105 class AMDGPUTargetID {
106 private:
107   const MCSubtargetInfo &STI;
108   TargetIDSetting XnackSetting;
109   TargetIDSetting SramEccSetting;
110 
111 public:
112   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
113   ~AMDGPUTargetID() = default;
114 
115   /// \return True if the current xnack setting is not "Unsupported".
116   bool isXnackSupported() const {
117     return XnackSetting != TargetIDSetting::Unsupported;
118   }
119 
120   /// \returns True if the current xnack setting is "On" or "Any".
121   bool isXnackOnOrAny() const {
122     return XnackSetting == TargetIDSetting::On ||
123         XnackSetting == TargetIDSetting::Any;
124   }
125 
126   /// \returns True if current xnack setting is "On" or "Off",
127   /// false otherwise.
128   bool isXnackOnOrOff() const {
129     return getXnackSetting() == TargetIDSetting::On ||
130         getXnackSetting() == TargetIDSetting::Off;
131   }
132 
133   /// \returns The current xnack TargetIDSetting, possible options are
134   /// "Unsupported", "Any", "Off", and "On".
135   TargetIDSetting getXnackSetting() const {
136     return XnackSetting;
137   }
138 
139   /// Sets xnack setting to \p NewXnackSetting.
140   void setXnackSetting(TargetIDSetting NewXnackSetting) {
141     XnackSetting = NewXnackSetting;
142   }
143 
144   /// \return True if the current sramecc setting is not "Unsupported".
145   bool isSramEccSupported() const {
146     return SramEccSetting != TargetIDSetting::Unsupported;
147   }
148 
149   /// \returns True if the current sramecc setting is "On" or "Any".
150   bool isSramEccOnOrAny() const {
151   return SramEccSetting == TargetIDSetting::On ||
152       SramEccSetting == TargetIDSetting::Any;
153   }
154 
155   /// \returns True if current sramecc setting is "On" or "Off",
156   /// false otherwise.
157   bool isSramEccOnOrOff() const {
158     return getSramEccSetting() == TargetIDSetting::On ||
159         getSramEccSetting() == TargetIDSetting::Off;
160   }
161 
162   /// \returns The current sramecc TargetIDSetting, possible options are
163   /// "Unsupported", "Any", "Off", and "On".
164   TargetIDSetting getSramEccSetting() const {
165     return SramEccSetting;
166   }
167 
168   /// Sets sramecc setting to \p NewSramEccSetting.
169   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
170     SramEccSetting = NewSramEccSetting;
171   }
172 
173   void setTargetIDFromFeaturesString(StringRef FS);
174   void setTargetIDFromTargetIDStream(StringRef TargetID);
175 
176   /// \returns String representation of an object.
177   std::string toString() const;
178 };
179 
180 /// \returns Wavefront size for given subtarget \p STI.
181 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
182 
183 /// \returns Local memory size in bytes for given subtarget \p STI.
184 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
185 
186 /// \returns Number of execution units per compute unit for given subtarget \p
187 /// STI.
188 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
189 
190 /// \returns Maximum number of work groups per compute unit for given subtarget
191 /// \p STI and limited by given \p FlatWorkGroupSize.
192 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
193                                unsigned FlatWorkGroupSize);
194 
195 /// \returns Minimum number of waves per execution unit for given subtarget \p
196 /// STI.
197 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
198 
199 /// \returns Maximum number of waves per execution unit for given subtarget \p
200 /// STI without any kind of limitation.
201 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
202 
203 /// \returns Number of waves per execution unit required to support the given \p
204 /// FlatWorkGroupSize.
205 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
206                                    unsigned FlatWorkGroupSize);
207 
208 /// \returns Minimum flat work group size for given subtarget \p STI.
209 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
210 
211 /// \returns Maximum flat work group size for given subtarget \p STI.
212 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
213 
214 /// \returns Number of waves per work group for given subtarget \p STI and
215 /// \p FlatWorkGroupSize.
216 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
217                               unsigned FlatWorkGroupSize);
218 
219 /// \returns SGPR allocation granularity for given subtarget \p STI.
220 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
221 
222 /// \returns SGPR encoding granularity for given subtarget \p STI.
223 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
224 
225 /// \returns Total number of SGPRs for given subtarget \p STI.
226 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
227 
228 /// \returns Addressable number of SGPRs for given subtarget \p STI.
229 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
230 
231 /// \returns Minimum number of SGPRs that meets the given number of waves per
232 /// execution unit requirement for given subtarget \p STI.
233 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
234 
235 /// \returns Maximum number of SGPRs that meets the given number of waves per
236 /// execution unit requirement for given subtarget \p STI.
237 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
238                         bool Addressable);
239 
240 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
241 /// STI when the given special registers are used.
242 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
243                           bool FlatScrUsed, bool XNACKUsed);
244 
245 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
246 /// STI when the given special registers are used. XNACK is inferred from
247 /// \p STI.
248 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
249                           bool FlatScrUsed);
250 
251 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
252 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
253 /// register counts.
254 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
255 
256 /// \returns VGPR allocation granularity for given subtarget \p STI.
257 ///
258 /// For subtargets which support it, \p EnableWavefrontSize32 should match
259 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
260 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
261                              Optional<bool> EnableWavefrontSize32 = None);
262 
263 /// \returns VGPR encoding granularity for given subtarget \p STI.
264 ///
265 /// For subtargets which support it, \p EnableWavefrontSize32 should match
266 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
267 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
268                                 Optional<bool> EnableWavefrontSize32 = None);
269 
270 /// \returns Total number of VGPRs for given subtarget \p STI.
271 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
272 
273 /// \returns Addressable number of VGPRs for given subtarget \p STI.
274 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
275 
276 /// \returns Minimum number of VGPRs that meets given number of waves per
277 /// execution unit requirement for given subtarget \p STI.
278 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
279 
280 /// \returns Maximum number of VGPRs that meets given number of waves per
281 /// execution unit requirement for given subtarget \p STI.
282 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
283 
284 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
285 /// \p NumVGPRs are used.
286 ///
287 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
288 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
289 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
290                           Optional<bool> EnableWavefrontSize32 = None);
291 
292 } // end namespace IsaInfo
293 
294 LLVM_READONLY
295 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
296 
297 LLVM_READONLY
298 int getSOPPWithRelaxation(uint16_t Opcode);
299 
300 struct MIMGBaseOpcodeInfo {
301   MIMGBaseOpcode BaseOpcode;
302   bool Store;
303   bool Atomic;
304   bool AtomicX2;
305   bool Sampler;
306   bool Gather4;
307 
308   uint8_t NumExtraArgs;
309   bool Gradients;
310   bool G16;
311   bool Coordinates;
312   bool LodOrClampOrMip;
313   bool HasD16;
314   bool MSAA;
315   bool BVH;
316 };
317 
318 LLVM_READONLY
319 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
320 
321 LLVM_READONLY
322 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
323 
324 struct MIMGDimInfo {
325   MIMGDim Dim;
326   uint8_t NumCoords;
327   uint8_t NumGradients;
328   bool MSAA;
329   bool DA;
330   uint8_t Encoding;
331   const char *AsmSuffix;
332 };
333 
334 LLVM_READONLY
335 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
336 
337 LLVM_READONLY
338 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
339 
340 LLVM_READONLY
341 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
342 
343 struct MIMGLZMappingInfo {
344   MIMGBaseOpcode L;
345   MIMGBaseOpcode LZ;
346 };
347 
348 struct MIMGMIPMappingInfo {
349   MIMGBaseOpcode MIP;
350   MIMGBaseOpcode NONMIP;
351 };
352 
353 struct MIMGBiasMappingInfo {
354   MIMGBaseOpcode Bias;
355   MIMGBaseOpcode NoBias;
356 };
357 
358 struct MIMGOffsetMappingInfo {
359   MIMGBaseOpcode Offset;
360   MIMGBaseOpcode NoOffset;
361 };
362 
363 struct MIMGG16MappingInfo {
364   MIMGBaseOpcode G;
365   MIMGBaseOpcode G16;
366 };
367 
368 LLVM_READONLY
369 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
370 
371 LLVM_READONLY
372 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
373 
374 LLVM_READONLY
375 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
376 
377 LLVM_READONLY
378 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
379 
380 LLVM_READONLY
381 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
382 
383 LLVM_READONLY
384 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
385                   unsigned VDataDwords, unsigned VAddrDwords);
386 
387 LLVM_READONLY
388 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
389 
390 LLVM_READONLY
391 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
392                            const MIMGDimInfo *Dim, bool IsA16,
393                            bool IsG16Supported);
394 
395 struct MIMGInfo {
396   uint16_t Opcode;
397   uint16_t BaseOpcode;
398   uint8_t MIMGEncoding;
399   uint8_t VDataDwords;
400   uint8_t VAddrDwords;
401   uint8_t VAddrOperands;
402 };
403 
404 LLVM_READONLY
405 const MIMGInfo *getMIMGInfo(unsigned Opc);
406 
407 LLVM_READONLY
408 int getMTBUFBaseOpcode(unsigned Opc);
409 
410 LLVM_READONLY
411 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
412 
413 LLVM_READONLY
414 int getMTBUFElements(unsigned Opc);
415 
416 LLVM_READONLY
417 bool getMTBUFHasVAddr(unsigned Opc);
418 
419 LLVM_READONLY
420 bool getMTBUFHasSrsrc(unsigned Opc);
421 
422 LLVM_READONLY
423 bool getMTBUFHasSoffset(unsigned Opc);
424 
425 LLVM_READONLY
426 int getMUBUFBaseOpcode(unsigned Opc);
427 
428 LLVM_READONLY
429 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
430 
431 LLVM_READONLY
432 int getMUBUFElements(unsigned Opc);
433 
434 LLVM_READONLY
435 bool getMUBUFHasVAddr(unsigned Opc);
436 
437 LLVM_READONLY
438 bool getMUBUFHasSrsrc(unsigned Opc);
439 
440 LLVM_READONLY
441 bool getMUBUFHasSoffset(unsigned Opc);
442 
443 LLVM_READONLY
444 bool getMUBUFIsBufferInv(unsigned Opc);
445 
446 LLVM_READONLY
447 bool getSMEMIsBuffer(unsigned Opc);
448 
449 LLVM_READONLY
450 bool getVOP1IsSingle(unsigned Opc);
451 
452 LLVM_READONLY
453 bool getVOP2IsSingle(unsigned Opc);
454 
455 LLVM_READONLY
456 bool getVOP3IsSingle(unsigned Opc);
457 
458 LLVM_READONLY
459 bool isVOPC64DPP(unsigned Opc);
460 
461 /// Returns true if MAI operation is a double precision GEMM.
462 LLVM_READONLY
463 bool getMAIIsDGEMM(unsigned Opc);
464 
465 LLVM_READONLY
466 bool getMAIIsGFX940XDL(unsigned Opc);
467 
468 LLVM_READONLY
469 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
470                                                   uint8_t NumComponents,
471                                                   uint8_t NumFormat,
472                                                   const MCSubtargetInfo &STI);
473 LLVM_READONLY
474 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
475                                                   const MCSubtargetInfo &STI);
476 
477 LLVM_READONLY
478 int getMCOpcode(uint16_t Opcode, unsigned Gen);
479 
480 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
481                                const MCSubtargetInfo *STI);
482 
483 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
484     const MCSubtargetInfo *STI);
485 
486 bool isGroupSegment(const GlobalValue *GV);
487 bool isGlobalSegment(const GlobalValue *GV);
488 bool isReadOnlySegment(const GlobalValue *GV);
489 
490 /// \returns True if constants should be emitted to .text section for given
491 /// target triple \p TT, false otherwise.
492 bool shouldEmitConstantsToTextSection(const Triple &TT);
493 
494 /// \returns Integer value requested using \p F's \p Name attribute.
495 ///
496 /// \returns \p Default if attribute is not present.
497 ///
498 /// \returns \p Default and emits error if requested value cannot be converted
499 /// to integer.
500 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
501 
502 /// \returns A pair of integer values requested using \p F's \p Name attribute
503 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
504 /// is false).
505 ///
506 /// \returns \p Default if attribute is not present.
507 ///
508 /// \returns \p Default and emits error if one of the requested values cannot be
509 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
510 /// not present.
511 std::pair<int, int> getIntegerPairAttribute(const Function &F,
512                                             StringRef Name,
513                                             std::pair<int, int> Default,
514                                             bool OnlyFirstRequired = false);
515 
516 /// Represents the counter values to wait for in an s_waitcnt instruction.
517 ///
518 /// Large values (including the maximum possible integer) can be used to
519 /// represent "don't care" waits.
520 struct Waitcnt {
521   unsigned VmCnt = ~0u;
522   unsigned ExpCnt = ~0u;
523   unsigned LgkmCnt = ~0u;
524   unsigned VsCnt = ~0u;
525 
526   Waitcnt() = default;
527   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
528       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
529 
530   static Waitcnt allZero(bool HasVscnt) {
531     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
532   }
533   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
534 
535   bool hasWait() const {
536     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
537   }
538 
539   bool hasWaitExceptVsCnt() const {
540     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
541   }
542 
543   bool hasWaitVsCnt() const {
544     return VsCnt != ~0u;
545   }
546 
547   bool dominates(const Waitcnt &Other) const {
548     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
549            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
550   }
551 
552   Waitcnt combined(const Waitcnt &Other) const {
553     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
554                    std::min(LgkmCnt, Other.LgkmCnt),
555                    std::min(VsCnt, Other.VsCnt));
556   }
557 };
558 
559 /// \returns Vmcnt bit mask for given isa \p Version.
560 unsigned getVmcntBitMask(const IsaVersion &Version);
561 
562 /// \returns Expcnt bit mask for given isa \p Version.
563 unsigned getExpcntBitMask(const IsaVersion &Version);
564 
565 /// \returns Lgkmcnt bit mask for given isa \p Version.
566 unsigned getLgkmcntBitMask(const IsaVersion &Version);
567 
568 /// \returns Waitcnt bit mask for given isa \p Version.
569 unsigned getWaitcntBitMask(const IsaVersion &Version);
570 
571 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
572 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
573 
574 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
575 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
576 
577 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
578 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
579 
580 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
581 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
582 /// \p Lgkmcnt respectively.
583 ///
584 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
585 ///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
586 ///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
587 ///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11+)
588 ///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
589 ///     \p Expcnt = \p Waitcnt[2:0]       (gfx11+)
590 ///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
591 ///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
592 ///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11+)
593 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
594                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
595 
596 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
597 
598 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
599 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
600                      unsigned Vmcnt);
601 
602 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
603 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
604                       unsigned Expcnt);
605 
606 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
607 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
608                        unsigned Lgkmcnt);
609 
610 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
611 /// \p Version.
612 ///
613 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
614 ///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
615 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
616 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
617 ///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
618 ///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11+)
619 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
620 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
621 ///     Waitcnt[15:10] = \p Vmcnt       (gfx11+)
622 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
623 ///
624 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
625 /// isa \p Version.
626 unsigned encodeWaitcnt(const IsaVersion &Version,
627                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
628 
629 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
630 
631 namespace Hwreg {
632 
633 LLVM_READONLY
634 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
635 
636 LLVM_READNONE
637 bool isValidHwreg(int64_t Id);
638 
639 LLVM_READNONE
640 bool isValidHwregOffset(int64_t Offset);
641 
642 LLVM_READNONE
643 bool isValidHwregWidth(int64_t Width);
644 
645 LLVM_READNONE
646 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
647 
648 LLVM_READNONE
649 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
650 
651 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
652 
653 } // namespace Hwreg
654 
655 namespace DepCtr {
656 
657 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
658 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
659                  const MCSubtargetInfo &STI);
660 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
661                               const MCSubtargetInfo &STI);
662 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
663                   bool &IsDefault, const MCSubtargetInfo &STI);
664 
665 } // namespace DepCtr
666 
667 namespace Exp {
668 
669 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
670 
671 LLVM_READONLY
672 unsigned getTgtId(const StringRef Name);
673 
674 LLVM_READNONE
675 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
676 
677 } // namespace Exp
678 
679 namespace MTBUFFormat {
680 
681 LLVM_READNONE
682 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
683 
684 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
685 
686 int64_t getDfmt(const StringRef Name);
687 
688 StringRef getDfmtName(unsigned Id);
689 
690 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
691 
692 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
693 
694 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
695 
696 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
697 
698 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
699 
700 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
701 
702 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
703 
704 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
705                              const MCSubtargetInfo &STI);
706 
707 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
708 
709 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
710 
711 } // namespace MTBUFFormat
712 
713 namespace SendMsg {
714 
715 LLVM_READONLY
716 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
717 
718 LLVM_READONLY
719 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
720 
721 LLVM_READNONE
722 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
723 
724 LLVM_READNONE
725 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
726 
727 LLVM_READNONE
728 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
729 
730 LLVM_READNONE
731 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
732                   bool Strict = true);
733 
734 LLVM_READNONE
735 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
736                       const MCSubtargetInfo &STI, bool Strict = true);
737 
738 LLVM_READNONE
739 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
740 
741 LLVM_READNONE
742 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
743 
744 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
745                uint16_t &StreamId, const MCSubtargetInfo &STI);
746 
747 LLVM_READNONE
748 uint64_t encodeMsg(uint64_t MsgId,
749                    uint64_t OpId,
750                    uint64_t StreamId);
751 
752 } // namespace SendMsg
753 
754 
755 unsigned getInitialPSInputAddr(const Function &F);
756 
757 bool getHasColorExport(const Function &F);
758 
759 bool getHasDepthExport(const Function &F);
760 
761 LLVM_READNONE
762 bool isShader(CallingConv::ID CC);
763 
764 LLVM_READNONE
765 bool isGraphics(CallingConv::ID CC);
766 
767 LLVM_READNONE
768 bool isCompute(CallingConv::ID CC);
769 
770 LLVM_READNONE
771 bool isEntryFunctionCC(CallingConv::ID CC);
772 
773 // These functions are considered entrypoints into the current module, i.e. they
774 // are allowed to be called from outside the current module. This is different
775 // from isEntryFunctionCC, which is only true for functions that are entered by
776 // the hardware. Module entry points include all entry functions but also
777 // include functions that can be called from other functions inside or outside
778 // the current module. Module entry functions are allowed to allocate LDS.
779 LLVM_READNONE
780 bool isModuleEntryFunctionCC(CallingConv::ID CC);
781 
782 bool isKernelCC(const Function *Func);
783 
784 // FIXME: Remove this when calling conventions cleaned up
785 LLVM_READNONE
786 inline bool isKernel(CallingConv::ID CC) {
787   switch (CC) {
788   case CallingConv::AMDGPU_KERNEL:
789   case CallingConv::SPIR_KERNEL:
790     return true;
791   default:
792     return false;
793   }
794 }
795 
796 bool hasXNACK(const MCSubtargetInfo &STI);
797 bool hasSRAMECC(const MCSubtargetInfo &STI);
798 bool hasMIMG_R128(const MCSubtargetInfo &STI);
799 bool hasGFX10A16(const MCSubtargetInfo &STI);
800 bool hasG16(const MCSubtargetInfo &STI);
801 bool hasPackedD16(const MCSubtargetInfo &STI);
802 
803 bool isSI(const MCSubtargetInfo &STI);
804 bool isCI(const MCSubtargetInfo &STI);
805 bool isVI(const MCSubtargetInfo &STI);
806 bool isGFX9(const MCSubtargetInfo &STI);
807 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
808 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
809 bool isGFX8Plus(const MCSubtargetInfo &STI);
810 bool isGFX9Plus(const MCSubtargetInfo &STI);
811 bool isGFX10(const MCSubtargetInfo &STI);
812 bool isGFX10Plus(const MCSubtargetInfo &STI);
813 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
814 bool isGFX10Before1030(const MCSubtargetInfo &STI);
815 bool isGFX11(const MCSubtargetInfo &STI);
816 bool isGFX11Plus(const MCSubtargetInfo &STI);
817 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
818 bool isGCN3Encoding(const MCSubtargetInfo &STI);
819 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
820 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
821 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
822 bool isGFX90A(const MCSubtargetInfo &STI);
823 bool isGFX940(const MCSubtargetInfo &STI);
824 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
825 bool hasMAIInsts(const MCSubtargetInfo &STI);
826 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
827 
828 /// Is Reg - scalar register
829 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
830 
831 /// If \p Reg is a pseudo reg, return the correct hardware register given
832 /// \p STI otherwise return \p Reg.
833 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
834 
835 /// Convert hardware register \p Reg to a pseudo register
836 LLVM_READNONE
837 unsigned mc2PseudoReg(unsigned Reg);
838 
839 /// Can this operand also contain immediate values?
840 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
841 
842 /// Is this floating-point operand?
843 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
844 
845 /// Does this operand support only inlinable literals?
846 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
847 
848 /// Get the size in bits of a register from the register class \p RC.
849 unsigned getRegBitWidth(unsigned RCID);
850 
851 /// Get the size in bits of a register from the register class \p RC.
852 unsigned getRegBitWidth(const MCRegisterClass &RC);
853 
854 /// Get size of register operand
855 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
856                            unsigned OpNo);
857 
858 LLVM_READNONE
859 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
860   switch (OpInfo.OperandType) {
861   case AMDGPU::OPERAND_REG_IMM_INT32:
862   case AMDGPU::OPERAND_REG_IMM_FP32:
863   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
864   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
865   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
866   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
867   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
868   case AMDGPU::OPERAND_REG_IMM_V2INT32:
869   case AMDGPU::OPERAND_REG_IMM_V2FP32:
870   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
871   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
872   case AMDGPU::OPERAND_KIMM32:
873   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
874     return 4;
875 
876   case AMDGPU::OPERAND_REG_IMM_INT64:
877   case AMDGPU::OPERAND_REG_IMM_FP64:
878   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
879   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
880   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
881     return 8;
882 
883   case AMDGPU::OPERAND_REG_IMM_INT16:
884   case AMDGPU::OPERAND_REG_IMM_FP16:
885   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
886   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
887   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
888   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
889   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
890   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
891   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
892   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
893   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
894   case AMDGPU::OPERAND_REG_IMM_V2INT16:
895   case AMDGPU::OPERAND_REG_IMM_V2FP16:
896     return 2;
897 
898   default:
899     llvm_unreachable("unhandled operand type");
900   }
901 }
902 
903 LLVM_READNONE
904 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
905   return getOperandSize(Desc.OpInfo[OpNo]);
906 }
907 
908 /// Is this literal inlinable, and not one of the values intended for floating
909 /// point values.
910 LLVM_READNONE
911 inline bool isInlinableIntLiteral(int64_t Literal) {
912   return Literal >= -16 && Literal <= 64;
913 }
914 
915 /// Is this literal inlinable
916 LLVM_READNONE
917 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
918 
919 LLVM_READNONE
920 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
921 
922 LLVM_READNONE
923 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
924 
925 LLVM_READNONE
926 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
927 
928 LLVM_READNONE
929 bool isInlinableIntLiteralV216(int32_t Literal);
930 
931 LLVM_READNONE
932 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
933 
934 bool isArgPassedInSGPR(const Argument *Arg);
935 
936 LLVM_READONLY
937 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
938                                       int64_t EncodedOffset);
939 
940 LLVM_READONLY
941 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
942                                     int64_t EncodedOffset,
943                                     bool IsBuffer);
944 
945 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
946 /// offsets.
947 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
948 
949 /// \returns The encoding that will be used for \p ByteOffset in the
950 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
951 /// S_LOAD instructions have a signed offset, on other subtargets it is
952 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
953 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
954                                        int64_t ByteOffset, bool IsBuffer);
955 
956 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
957 /// instruction. This is only useful on CI.s
958 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
959                                                 int64_t ByteOffset);
960 
961 /// For FLAT segment the offset must be positive;
962 /// MSB is ignored and forced to zero.
963 ///
964 /// \return The number of bits available for the offset field in flat
965 /// instructions.
966 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
967 
968 /// \returns true if this offset is small enough to fit in the SMRD
969 /// offset field.  \p ByteOffset should be the offset in bytes and
970 /// not the encoded offset.
971 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
972 
973 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
974                       const GCNSubtarget *Subtarget,
975                       Align Alignment = Align(4));
976 
977 LLVM_READNONE
978 inline bool isLegal64BitDPPControl(unsigned DC) {
979   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
980 }
981 
982 /// \returns true if the intrinsic is divergent
983 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
984 
985 // Track defaults for fields in the MODE register.
986 struct SIModeRegisterDefaults {
987   /// Floating point opcodes that support exception flag gathering quiet and
988   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
989   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
990   /// quieting.
991   bool IEEE : 1;
992 
993   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
994   /// clamp NaN to zero; otherwise, pass NaN through.
995   bool DX10Clamp : 1;
996 
997   /// If this is set, neither input or output denormals are flushed for most f32
998   /// instructions.
999   bool FP32InputDenormals : 1;
1000   bool FP32OutputDenormals : 1;
1001 
1002   /// If this is set, neither input or output denormals are flushed for both f64
1003   /// and f16/v2f16 instructions.
1004   bool FP64FP16InputDenormals : 1;
1005   bool FP64FP16OutputDenormals : 1;
1006 
1007   SIModeRegisterDefaults() :
1008     IEEE(true),
1009     DX10Clamp(true),
1010     FP32InputDenormals(true),
1011     FP32OutputDenormals(true),
1012     FP64FP16InputDenormals(true),
1013     FP64FP16OutputDenormals(true) {}
1014 
1015   SIModeRegisterDefaults(const Function &F);
1016 
1017   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
1018     SIModeRegisterDefaults Mode;
1019     Mode.IEEE = !AMDGPU::isShader(CC);
1020     return Mode;
1021   }
1022 
1023   bool operator ==(const SIModeRegisterDefaults Other) const {
1024     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
1025            FP32InputDenormals == Other.FP32InputDenormals &&
1026            FP32OutputDenormals == Other.FP32OutputDenormals &&
1027            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
1028            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
1029   }
1030 
1031   bool allFP32Denormals() const {
1032     return FP32InputDenormals && FP32OutputDenormals;
1033   }
1034 
1035   bool allFP64FP16Denormals() const {
1036     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
1037   }
1038 
1039   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1040   /// FP32 denormal mode.
1041   uint32_t fpDenormModeSPValue() const {
1042     if (FP32InputDenormals && FP32OutputDenormals)
1043       return FP_DENORM_FLUSH_NONE;
1044     if (FP32InputDenormals)
1045       return FP_DENORM_FLUSH_OUT;
1046     if (FP32OutputDenormals)
1047       return FP_DENORM_FLUSH_IN;
1048     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1049   }
1050 
1051   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1052   /// FP64/FP16 denormal mode.
1053   uint32_t fpDenormModeDPValue() const {
1054     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
1055       return FP_DENORM_FLUSH_NONE;
1056     if (FP64FP16InputDenormals)
1057       return FP_DENORM_FLUSH_OUT;
1058     if (FP64FP16OutputDenormals)
1059       return FP_DENORM_FLUSH_IN;
1060     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1061   }
1062 
1063   /// Returns true if a flag is compatible if it's enabled in the callee, but
1064   /// disabled in the caller.
1065   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1066     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1067   }
1068 
1069   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1070   // be able to override.
1071   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1072     if (DX10Clamp != CalleeMode.DX10Clamp)
1073       return false;
1074     if (IEEE != CalleeMode.IEEE)
1075       return false;
1076 
1077     // Allow inlining denormals enabled into denormals flushed functions.
1078     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1079            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1080            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1081            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1082   }
1083 };
1084 
1085 } // end namespace AMDGPU
1086 
1087 raw_ostream &operator<<(raw_ostream &OS,
1088                         const AMDGPU::IsaInfo::TargetIDSetting S);
1089 
1090 } // end namespace llvm
1091 
1092 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1093