1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 3 or 4,
51 /// false otherwise.
52 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
53 
54 struct GcnBufferFormatInfo {
55   unsigned Format;
56   unsigned BitsPerComp;
57   unsigned NumComponents;
58   unsigned NumFormat;
59   unsigned DataFormat;
60 };
61 
62 #define GET_MIMGBaseOpcode_DECL
63 #define GET_MIMGDim_DECL
64 #define GET_MIMGEncoding_DECL
65 #define GET_MIMGLZMapping_DECL
66 #define GET_MIMGMIPMapping_DECL
67 #include "AMDGPUGenSearchableTables.inc"
68 
69 namespace IsaInfo {
70 
71 enum {
72   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
73   // doesn't spill SGPRs as much as when 80 is set.
74   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
75   TRAP_NUM_SGPRS = 16
76 };
77 
78 enum class TargetIDSetting {
79   Unsupported,
80   Any,
81   Off,
82   On
83 };
84 
85 class AMDGPUTargetID {
86 private:
87   const MCSubtargetInfo &STI;
88   TargetIDSetting XnackSetting;
89   TargetIDSetting SramEccSetting;
90 
91 public:
92   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
93   ~AMDGPUTargetID() = default;
94 
95   /// \return True if the current xnack setting is not "Unsupported".
96   bool isXnackSupported() const {
97     return XnackSetting != TargetIDSetting::Unsupported;
98   }
99 
100   /// \returns True if the current xnack setting is "On" or "Any".
101   bool isXnackOnOrAny() const {
102     return XnackSetting == TargetIDSetting::On ||
103         XnackSetting == TargetIDSetting::Any;
104   }
105 
106   /// \returns True if current xnack setting is "On" or "Off",
107   /// false otherwise.
108   bool isXnackOnOrOff() const {
109     return getXnackSetting() == TargetIDSetting::On ||
110         getXnackSetting() == TargetIDSetting::Off;
111   }
112 
113   /// \returns The current xnack TargetIDSetting, possible options are
114   /// "Unsupported", "Any", "Off", and "On".
115   TargetIDSetting getXnackSetting() const {
116     return XnackSetting;
117   }
118 
119   /// Sets xnack setting to \p NewXnackSetting.
120   void setXnackSetting(TargetIDSetting NewXnackSetting) {
121     XnackSetting = NewXnackSetting;
122   }
123 
124   /// \return True if the current sramecc setting is not "Unsupported".
125   bool isSramEccSupported() const {
126     return SramEccSetting != TargetIDSetting::Unsupported;
127   }
128 
129   /// \returns True if the current sramecc setting is "On" or "Any".
130   bool isSramEccOnOrAny() const {
131   return SramEccSetting == TargetIDSetting::On ||
132       SramEccSetting == TargetIDSetting::Any;
133   }
134 
135   /// \returns True if current sramecc setting is "On" or "Off",
136   /// false otherwise.
137   bool isSramEccOnOrOff() const {
138     return getSramEccSetting() == TargetIDSetting::On ||
139         getSramEccSetting() == TargetIDSetting::Off;
140   }
141 
142   /// \returns The current sramecc TargetIDSetting, possible options are
143   /// "Unsupported", "Any", "Off", and "On".
144   TargetIDSetting getSramEccSetting() const {
145     return SramEccSetting;
146   }
147 
148   /// Sets sramecc setting to \p NewSramEccSetting.
149   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
150     SramEccSetting = NewSramEccSetting;
151   }
152 
153   void setTargetIDFromFeaturesString(StringRef FS);
154   void setTargetIDFromTargetIDStream(StringRef TargetID);
155 
156   /// \returns String representation of an object.
157   std::string toString() const;
158 };
159 
160 /// \returns Wavefront size for given subtarget \p STI.
161 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
162 
163 /// \returns Local memory size in bytes for given subtarget \p STI.
164 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
165 
166 /// \returns Number of execution units per compute unit for given subtarget \p
167 /// STI.
168 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
169 
170 /// \returns Maximum number of work groups per compute unit for given subtarget
171 /// \p STI and limited by given \p FlatWorkGroupSize.
172 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
173                                unsigned FlatWorkGroupSize);
174 
175 /// \returns Minimum number of waves per execution unit for given subtarget \p
176 /// STI.
177 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
178 
179 /// \returns Maximum number of waves per execution unit for given subtarget \p
180 /// STI without any kind of limitation.
181 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
182 
183 /// \returns Number of waves per execution unit required to support the given \p
184 /// FlatWorkGroupSize.
185 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
186                                    unsigned FlatWorkGroupSize);
187 
188 /// \returns Minimum flat work group size for given subtarget \p STI.
189 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
190 
191 /// \returns Maximum flat work group size for given subtarget \p STI.
192 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
193 
194 /// \returns Number of waves per work group for given subtarget \p STI and
195 /// \p FlatWorkGroupSize.
196 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
197                               unsigned FlatWorkGroupSize);
198 
199 /// \returns SGPR allocation granularity for given subtarget \p STI.
200 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
201 
202 /// \returns SGPR encoding granularity for given subtarget \p STI.
203 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
204 
205 /// \returns Total number of SGPRs for given subtarget \p STI.
206 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
207 
208 /// \returns Addressable number of SGPRs for given subtarget \p STI.
209 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
210 
211 /// \returns Minimum number of SGPRs that meets the given number of waves per
212 /// execution unit requirement for given subtarget \p STI.
213 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
214 
215 /// \returns Maximum number of SGPRs that meets the given number of waves per
216 /// execution unit requirement for given subtarget \p STI.
217 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
218                         bool Addressable);
219 
220 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
221 /// STI when the given special registers are used.
222 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
223                           bool FlatScrUsed, bool XNACKUsed);
224 
225 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
226 /// STI when the given special registers are used. XNACK is inferred from
227 /// \p STI.
228 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
229                           bool FlatScrUsed);
230 
231 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
232 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
233 /// register counts.
234 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
235 
236 /// \returns VGPR allocation granularity for given subtarget \p STI.
237 ///
238 /// For subtargets which support it, \p EnableWavefrontSize32 should match
239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
240 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
241                              Optional<bool> EnableWavefrontSize32 = None);
242 
243 /// \returns VGPR encoding granularity for given subtarget \p STI.
244 ///
245 /// For subtargets which support it, \p EnableWavefrontSize32 should match
246 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
247 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
248                                 Optional<bool> EnableWavefrontSize32 = None);
249 
250 /// \returns Total number of VGPRs for given subtarget \p STI.
251 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
252 
253 /// \returns Addressable number of VGPRs for given subtarget \p STI.
254 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
255 
256 /// \returns Minimum number of VGPRs that meets given number of waves per
257 /// execution unit requirement for given subtarget \p STI.
258 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
259 
260 /// \returns Maximum number of VGPRs that meets given number of waves per
261 /// execution unit requirement for given subtarget \p STI.
262 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
263 
264 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
265 /// \p NumVGPRs are used.
266 ///
267 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
268 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
269 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
270                           Optional<bool> EnableWavefrontSize32 = None);
271 
272 } // end namespace IsaInfo
273 
274 LLVM_READONLY
275 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
276 
277 LLVM_READONLY
278 int getSOPPWithRelaxation(uint16_t Opcode);
279 
280 struct MIMGBaseOpcodeInfo {
281   MIMGBaseOpcode BaseOpcode;
282   bool Store;
283   bool Atomic;
284   bool AtomicX2;
285   bool Sampler;
286   bool Gather4;
287 
288   uint8_t NumExtraArgs;
289   bool Gradients;
290   bool G16;
291   bool Coordinates;
292   bool LodOrClampOrMip;
293   bool HasD16;
294   bool MSAA;
295   bool BVH;
296 };
297 
298 LLVM_READONLY
299 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
300 
301 LLVM_READONLY
302 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
303 
304 struct MIMGDimInfo {
305   MIMGDim Dim;
306   uint8_t NumCoords;
307   uint8_t NumGradients;
308   bool MSAA;
309   bool DA;
310   uint8_t Encoding;
311   const char *AsmSuffix;
312 };
313 
314 LLVM_READONLY
315 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
316 
317 LLVM_READONLY
318 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
319 
320 LLVM_READONLY
321 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
322 
323 struct MIMGLZMappingInfo {
324   MIMGBaseOpcode L;
325   MIMGBaseOpcode LZ;
326 };
327 
328 struct MIMGMIPMappingInfo {
329   MIMGBaseOpcode MIP;
330   MIMGBaseOpcode NONMIP;
331 };
332 
333 struct MIMGG16MappingInfo {
334   MIMGBaseOpcode G;
335   MIMGBaseOpcode G16;
336 };
337 
338 LLVM_READONLY
339 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
340 
341 LLVM_READONLY
342 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
343 
344 LLVM_READONLY
345 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
346 
347 LLVM_READONLY
348 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
349                   unsigned VDataDwords, unsigned VAddrDwords);
350 
351 LLVM_READONLY
352 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
353 
354 LLVM_READONLY
355 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
356                            const MIMGDimInfo *Dim, bool IsA16,
357                            bool IsG16Supported);
358 
359 struct MIMGInfo {
360   uint16_t Opcode;
361   uint16_t BaseOpcode;
362   uint8_t MIMGEncoding;
363   uint8_t VDataDwords;
364   uint8_t VAddrDwords;
365 };
366 
367 LLVM_READONLY
368 const MIMGInfo *getMIMGInfo(unsigned Opc);
369 
370 LLVM_READONLY
371 int getMTBUFBaseOpcode(unsigned Opc);
372 
373 LLVM_READONLY
374 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
375 
376 LLVM_READONLY
377 int getMTBUFElements(unsigned Opc);
378 
379 LLVM_READONLY
380 bool getMTBUFHasVAddr(unsigned Opc);
381 
382 LLVM_READONLY
383 bool getMTBUFHasSrsrc(unsigned Opc);
384 
385 LLVM_READONLY
386 bool getMTBUFHasSoffset(unsigned Opc);
387 
388 LLVM_READONLY
389 int getMUBUFBaseOpcode(unsigned Opc);
390 
391 LLVM_READONLY
392 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
393 
394 LLVM_READONLY
395 int getMUBUFElements(unsigned Opc);
396 
397 LLVM_READONLY
398 bool getMUBUFHasVAddr(unsigned Opc);
399 
400 LLVM_READONLY
401 bool getMUBUFHasSrsrc(unsigned Opc);
402 
403 LLVM_READONLY
404 bool getMUBUFHasSoffset(unsigned Opc);
405 
406 LLVM_READONLY
407 bool getMUBUFIsBufferInv(unsigned Opc);
408 
409 LLVM_READONLY
410 bool getSMEMIsBuffer(unsigned Opc);
411 
412 LLVM_READONLY
413 bool getVOP1IsSingle(unsigned Opc);
414 
415 LLVM_READONLY
416 bool getVOP2IsSingle(unsigned Opc);
417 
418 LLVM_READONLY
419 bool getVOP3IsSingle(unsigned Opc);
420 
421 LLVM_READONLY
422 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
423                                                   uint8_t NumComponents,
424                                                   uint8_t NumFormat,
425                                                   const MCSubtargetInfo &STI);
426 LLVM_READONLY
427 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
428                                                   const MCSubtargetInfo &STI);
429 
430 LLVM_READONLY
431 int getMCOpcode(uint16_t Opcode, unsigned Gen);
432 
433 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
434                                const MCSubtargetInfo *STI);
435 
436 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
437     const MCSubtargetInfo *STI);
438 
439 bool isGroupSegment(const GlobalValue *GV);
440 bool isGlobalSegment(const GlobalValue *GV);
441 bool isReadOnlySegment(const GlobalValue *GV);
442 
443 /// \returns True if constants should be emitted to .text section for given
444 /// target triple \p TT, false otherwise.
445 bool shouldEmitConstantsToTextSection(const Triple &TT);
446 
447 /// \returns Integer value requested using \p F's \p Name attribute.
448 ///
449 /// \returns \p Default if attribute is not present.
450 ///
451 /// \returns \p Default and emits error if requested value cannot be converted
452 /// to integer.
453 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
454 
455 /// \returns A pair of integer values requested using \p F's \p Name attribute
456 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
457 /// is false).
458 ///
459 /// \returns \p Default if attribute is not present.
460 ///
461 /// \returns \p Default and emits error if one of the requested values cannot be
462 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
463 /// not present.
464 std::pair<int, int> getIntegerPairAttribute(const Function &F,
465                                             StringRef Name,
466                                             std::pair<int, int> Default,
467                                             bool OnlyFirstRequired = false);
468 
469 /// Represents the counter values to wait for in an s_waitcnt instruction.
470 ///
471 /// Large values (including the maximum possible integer) can be used to
472 /// represent "don't care" waits.
473 struct Waitcnt {
474   unsigned VmCnt = ~0u;
475   unsigned ExpCnt = ~0u;
476   unsigned LgkmCnt = ~0u;
477   unsigned VsCnt = ~0u;
478 
479   Waitcnt() {}
480   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
481       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
482 
483   static Waitcnt allZero(bool HasVscnt) {
484     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
485   }
486   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
487 
488   bool hasWait() const {
489     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
490   }
491 
492   bool hasWaitExceptVsCnt() const {
493     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
494   }
495 
496   bool hasWaitVsCnt() const {
497     return VsCnt != ~0u;
498   }
499 
500   bool dominates(const Waitcnt &Other) const {
501     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
502            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
503   }
504 
505   Waitcnt combined(const Waitcnt &Other) const {
506     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
507                    std::min(LgkmCnt, Other.LgkmCnt),
508                    std::min(VsCnt, Other.VsCnt));
509   }
510 };
511 
512 /// \returns Vmcnt bit mask for given isa \p Version.
513 unsigned getVmcntBitMask(const IsaVersion &Version);
514 
515 /// \returns Expcnt bit mask for given isa \p Version.
516 unsigned getExpcntBitMask(const IsaVersion &Version);
517 
518 /// \returns Lgkmcnt bit mask for given isa \p Version.
519 unsigned getLgkmcntBitMask(const IsaVersion &Version);
520 
521 /// \returns Waitcnt bit mask for given isa \p Version.
522 unsigned getWaitcntBitMask(const IsaVersion &Version);
523 
524 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
525 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
526 
527 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
528 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
529 
530 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
531 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
532 
533 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
534 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
535 /// \p Lgkmcnt respectively.
536 ///
537 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
538 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
539 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
540 ///     \p Expcnt = \p Waitcnt[6:4]
541 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
542 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
543 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
544                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
545 
546 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
547 
548 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
549 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
550                      unsigned Vmcnt);
551 
552 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
553 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
554                       unsigned Expcnt);
555 
556 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
557 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
558                        unsigned Lgkmcnt);
559 
560 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
561 /// \p Version.
562 ///
563 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
564 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
565 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
566 ///     Waitcnt[6:4]   = \p Expcnt
567 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
568 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
569 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
570 ///
571 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
572 /// isa \p Version.
573 unsigned encodeWaitcnt(const IsaVersion &Version,
574                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
575 
576 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
577 
578 namespace Hwreg {
579 
580 LLVM_READONLY
581 int64_t getHwregId(const StringRef Name);
582 
583 LLVM_READNONE
584 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
585 
586 LLVM_READNONE
587 bool isValidHwreg(int64_t Id);
588 
589 LLVM_READNONE
590 bool isValidHwregOffset(int64_t Offset);
591 
592 LLVM_READNONE
593 bool isValidHwregWidth(int64_t Width);
594 
595 LLVM_READNONE
596 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
597 
598 LLVM_READNONE
599 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
600 
601 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
602 
603 } // namespace Hwreg
604 
605 namespace Exp {
606 
607 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
608 
609 LLVM_READONLY
610 unsigned getTgtId(const StringRef Name);
611 
612 LLVM_READNONE
613 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
614 
615 } // namespace Exp
616 
617 namespace MTBUFFormat {
618 
619 LLVM_READNONE
620 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
621 
622 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
623 
624 int64_t getDfmt(const StringRef Name);
625 
626 StringRef getDfmtName(unsigned Id);
627 
628 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
629 
630 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
631 
632 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
633 
634 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
635 
636 int64_t getUnifiedFormat(const StringRef Name);
637 
638 StringRef getUnifiedFormatName(unsigned Id);
639 
640 bool isValidUnifiedFormat(unsigned Val);
641 
642 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
643 
644 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
645 
646 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
647 
648 } // namespace MTBUFFormat
649 
650 namespace SendMsg {
651 
652 LLVM_READONLY
653 int64_t getMsgId(const StringRef Name);
654 
655 LLVM_READONLY
656 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
657 
658 LLVM_READNONE
659 StringRef getMsgName(int64_t MsgId);
660 
661 LLVM_READNONE
662 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
663 
664 LLVM_READNONE
665 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
666 
667 LLVM_READNONE
668 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
669                   bool Strict = true);
670 
671 LLVM_READNONE
672 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
673                       const MCSubtargetInfo &STI, bool Strict = true);
674 
675 LLVM_READNONE
676 bool msgRequiresOp(int64_t MsgId);
677 
678 LLVM_READNONE
679 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
680 
681 void decodeMsg(unsigned Val,
682                uint16_t &MsgId,
683                uint16_t &OpId,
684                uint16_t &StreamId);
685 
686 LLVM_READNONE
687 uint64_t encodeMsg(uint64_t MsgId,
688                    uint64_t OpId,
689                    uint64_t StreamId);
690 
691 } // namespace SendMsg
692 
693 
694 unsigned getInitialPSInputAddr(const Function &F);
695 
696 bool getHasColorExport(const Function &F);
697 
698 bool getHasDepthExport(const Function &F);
699 
700 LLVM_READNONE
701 bool isShader(CallingConv::ID CC);
702 
703 LLVM_READNONE
704 bool isGraphics(CallingConv::ID CC);
705 
706 LLVM_READNONE
707 bool isCompute(CallingConv::ID CC);
708 
709 LLVM_READNONE
710 bool isEntryFunctionCC(CallingConv::ID CC);
711 
712 // These functions are considered entrypoints into the current module, i.e. they
713 // are allowed to be called from outside the current module. This is different
714 // from isEntryFunctionCC, which is only true for functions that are entered by
715 // the hardware. Module entry points include all entry functions but also
716 // include functions that can be called from other functions inside or outside
717 // the current module. Module entry functions are allowed to allocate LDS.
718 LLVM_READNONE
719 bool isModuleEntryFunctionCC(CallingConv::ID CC);
720 
721 // FIXME: Remove this when calling conventions cleaned up
722 LLVM_READNONE
723 inline bool isKernel(CallingConv::ID CC) {
724   switch (CC) {
725   case CallingConv::AMDGPU_KERNEL:
726   case CallingConv::SPIR_KERNEL:
727     return true;
728   default:
729     return false;
730   }
731 }
732 
733 bool hasXNACK(const MCSubtargetInfo &STI);
734 bool hasSRAMECC(const MCSubtargetInfo &STI);
735 bool hasMIMG_R128(const MCSubtargetInfo &STI);
736 bool hasGFX10A16(const MCSubtargetInfo &STI);
737 bool hasG16(const MCSubtargetInfo &STI);
738 bool hasPackedD16(const MCSubtargetInfo &STI);
739 
740 bool isSI(const MCSubtargetInfo &STI);
741 bool isCI(const MCSubtargetInfo &STI);
742 bool isVI(const MCSubtargetInfo &STI);
743 bool isGFX9(const MCSubtargetInfo &STI);
744 bool isGFX9Plus(const MCSubtargetInfo &STI);
745 bool isGFX10(const MCSubtargetInfo &STI);
746 bool isGFX10Plus(const MCSubtargetInfo &STI);
747 bool isGCN3Encoding(const MCSubtargetInfo &STI);
748 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
749 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
750 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
751 bool isGFX90A(const MCSubtargetInfo &STI);
752 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
753 
754 /// Is Reg - scalar register
755 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
756 
757 /// Is there any intersection between registers
758 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
759 
760 /// If \p Reg is a pseudo reg, return the correct hardware register given
761 /// \p STI otherwise return \p Reg.
762 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
763 
764 /// Convert hardware register \p Reg to a pseudo register
765 LLVM_READNONE
766 unsigned mc2PseudoReg(unsigned Reg);
767 
768 /// Can this operand also contain immediate values?
769 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
770 
771 /// Is this floating-point operand?
772 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
773 
774 /// Does this operand support only inlinable literals?
775 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
776 
777 /// Get the size in bits of a register from the register class \p RC.
778 unsigned getRegBitWidth(unsigned RCID);
779 
780 /// Get the size in bits of a register from the register class \p RC.
781 unsigned getRegBitWidth(const MCRegisterClass &RC);
782 
783 /// Get size of register operand
784 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
785                            unsigned OpNo);
786 
787 LLVM_READNONE
788 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
789   switch (OpInfo.OperandType) {
790   case AMDGPU::OPERAND_REG_IMM_INT32:
791   case AMDGPU::OPERAND_REG_IMM_FP32:
792   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
793   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
794   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
795   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
796   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
797   case AMDGPU::OPERAND_REG_IMM_V2INT32:
798   case AMDGPU::OPERAND_REG_IMM_V2FP32:
799   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
800   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
801   case AMDGPU::OPERAND_KIMM32:
802   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
803     return 4;
804 
805   case AMDGPU::OPERAND_REG_IMM_INT64:
806   case AMDGPU::OPERAND_REG_IMM_FP64:
807   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
808   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
809   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
810     return 8;
811 
812   case AMDGPU::OPERAND_REG_IMM_INT16:
813   case AMDGPU::OPERAND_REG_IMM_FP16:
814   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
815   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
816   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
817   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
818   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
819   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
820   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
821   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
822   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
823   case AMDGPU::OPERAND_REG_IMM_V2INT16:
824   case AMDGPU::OPERAND_REG_IMM_V2FP16:
825     return 2;
826 
827   default:
828     llvm_unreachable("unhandled operand type");
829   }
830 }
831 
832 LLVM_READNONE
833 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
834   return getOperandSize(Desc.OpInfo[OpNo]);
835 }
836 
837 /// Is this literal inlinable, and not one of the values intended for floating
838 /// point values.
839 LLVM_READNONE
840 inline bool isInlinableIntLiteral(int64_t Literal) {
841   return Literal >= -16 && Literal <= 64;
842 }
843 
844 /// Is this literal inlinable
845 LLVM_READNONE
846 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
847 
848 LLVM_READNONE
849 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
850 
851 LLVM_READNONE
852 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
853 
854 LLVM_READNONE
855 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
856 
857 LLVM_READNONE
858 bool isInlinableIntLiteralV216(int32_t Literal);
859 
860 LLVM_READNONE
861 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
862 
863 bool isArgPassedInSGPR(const Argument *Arg);
864 
865 LLVM_READONLY
866 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
867                                       int64_t EncodedOffset);
868 
869 LLVM_READONLY
870 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
871                                     int64_t EncodedOffset,
872                                     bool IsBuffer);
873 
874 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
875 /// offsets.
876 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
877 
878 /// \returns The encoding that will be used for \p ByteOffset in the
879 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
880 /// S_LOAD instructions have a signed offset, on other subtargets it is
881 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
882 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
883                                        int64_t ByteOffset, bool IsBuffer);
884 
885 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
886 /// instruction. This is only useful on CI.s
887 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
888                                                 int64_t ByteOffset);
889 
890 /// For FLAT segment the offset must be positive;
891 /// MSB is ignored and forced to zero.
892 ///
893 /// \return The number of bits available for the offset field in flat
894 /// instructions.
895 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
896 
897 /// \returns true if this offset is small enough to fit in the SMRD
898 /// offset field.  \p ByteOffset should be the offset in bytes and
899 /// not the encoded offset.
900 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
901 
902 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
903                       const GCNSubtarget *Subtarget,
904                       Align Alignment = Align(4));
905 
906 LLVM_READNONE
907 inline bool isLegal64BitDPPControl(unsigned DC) {
908   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
909 }
910 
911 /// \returns true if the intrinsic is divergent
912 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
913 
914 // Track defaults for fields in the MODE registser.
915 struct SIModeRegisterDefaults {
916   /// Floating point opcodes that support exception flag gathering quiet and
917   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
918   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
919   /// quieting.
920   bool IEEE : 1;
921 
922   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
923   /// clamp NaN to zero; otherwise, pass NaN through.
924   bool DX10Clamp : 1;
925 
926   /// If this is set, neither input or output denormals are flushed for most f32
927   /// instructions.
928   bool FP32InputDenormals : 1;
929   bool FP32OutputDenormals : 1;
930 
931   /// If this is set, neither input or output denormals are flushed for both f64
932   /// and f16/v2f16 instructions.
933   bool FP64FP16InputDenormals : 1;
934   bool FP64FP16OutputDenormals : 1;
935 
936   SIModeRegisterDefaults() :
937     IEEE(true),
938     DX10Clamp(true),
939     FP32InputDenormals(true),
940     FP32OutputDenormals(true),
941     FP64FP16InputDenormals(true),
942     FP64FP16OutputDenormals(true) {}
943 
944   SIModeRegisterDefaults(const Function &F);
945 
946   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
947     SIModeRegisterDefaults Mode;
948     Mode.IEEE = !AMDGPU::isShader(CC);
949     return Mode;
950   }
951 
952   bool operator ==(const SIModeRegisterDefaults Other) const {
953     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
954            FP32InputDenormals == Other.FP32InputDenormals &&
955            FP32OutputDenormals == Other.FP32OutputDenormals &&
956            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
957            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
958   }
959 
960   bool allFP32Denormals() const {
961     return FP32InputDenormals && FP32OutputDenormals;
962   }
963 
964   bool allFP64FP16Denormals() const {
965     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
966   }
967 
968   /// Get the encoding value for the FP_DENORM bits of the mode register for the
969   /// FP32 denormal mode.
970   uint32_t fpDenormModeSPValue() const {
971     if (FP32InputDenormals && FP32OutputDenormals)
972       return FP_DENORM_FLUSH_NONE;
973     if (FP32InputDenormals)
974       return FP_DENORM_FLUSH_OUT;
975     if (FP32OutputDenormals)
976       return FP_DENORM_FLUSH_IN;
977     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
978   }
979 
980   /// Get the encoding value for the FP_DENORM bits of the mode register for the
981   /// FP64/FP16 denormal mode.
982   uint32_t fpDenormModeDPValue() const {
983     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
984       return FP_DENORM_FLUSH_NONE;
985     if (FP64FP16InputDenormals)
986       return FP_DENORM_FLUSH_OUT;
987     if (FP64FP16OutputDenormals)
988       return FP_DENORM_FLUSH_IN;
989     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
990   }
991 
992   /// Returns true if a flag is compatible if it's enabled in the callee, but
993   /// disabled in the caller.
994   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
995     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
996   }
997 
998   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
999   // be able to override.
1000   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1001     if (DX10Clamp != CalleeMode.DX10Clamp)
1002       return false;
1003     if (IEEE != CalleeMode.IEEE)
1004       return false;
1005 
1006     // Allow inlining denormals enabled into denormals flushed functions.
1007     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1008            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1009            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1010            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1011   }
1012 };
1013 
1014 } // end namespace AMDGPU
1015 
1016 raw_ostream &operator<<(raw_ostream &OS,
1017                         const AMDGPU::IsaInfo::TargetIDSetting S);
1018 
1019 } // end namespace llvm
1020 
1021 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1022