1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 5,
51 /// false otherwise.
52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 3 and above,
54 /// false otherwise.
55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
56 
57 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
58 unsigned getHostcallImplicitArgPosition();
59 
60 /// \returns Code object version.
61 unsigned getAmdhsaCodeObjectVersion();
62 
63 struct GcnBufferFormatInfo {
64   unsigned Format;
65   unsigned BitsPerComp;
66   unsigned NumComponents;
67   unsigned NumFormat;
68   unsigned DataFormat;
69 };
70 
71 #define GET_MIMGBaseOpcode_DECL
72 #define GET_MIMGDim_DECL
73 #define GET_MIMGEncoding_DECL
74 #define GET_MIMGLZMapping_DECL
75 #define GET_MIMGMIPMapping_DECL
76 #define GET_MIMGBiASMapping_DECL
77 #include "AMDGPUGenSearchableTables.inc"
78 
79 namespace IsaInfo {
80 
81 enum {
82   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
83   // doesn't spill SGPRs as much as when 80 is set.
84   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
85   TRAP_NUM_SGPRS = 16
86 };
87 
88 enum class TargetIDSetting {
89   Unsupported,
90   Any,
91   Off,
92   On
93 };
94 
95 class AMDGPUTargetID {
96 private:
97   const MCSubtargetInfo &STI;
98   TargetIDSetting XnackSetting;
99   TargetIDSetting SramEccSetting;
100 
101 public:
102   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
103   ~AMDGPUTargetID() = default;
104 
105   /// \return True if the current xnack setting is not "Unsupported".
106   bool isXnackSupported() const {
107     return XnackSetting != TargetIDSetting::Unsupported;
108   }
109 
110   /// \returns True if the current xnack setting is "On" or "Any".
111   bool isXnackOnOrAny() const {
112     return XnackSetting == TargetIDSetting::On ||
113         XnackSetting == TargetIDSetting::Any;
114   }
115 
116   /// \returns True if current xnack setting is "On" or "Off",
117   /// false otherwise.
118   bool isXnackOnOrOff() const {
119     return getXnackSetting() == TargetIDSetting::On ||
120         getXnackSetting() == TargetIDSetting::Off;
121   }
122 
123   /// \returns The current xnack TargetIDSetting, possible options are
124   /// "Unsupported", "Any", "Off", and "On".
125   TargetIDSetting getXnackSetting() const {
126     return XnackSetting;
127   }
128 
129   /// Sets xnack setting to \p NewXnackSetting.
130   void setXnackSetting(TargetIDSetting NewXnackSetting) {
131     XnackSetting = NewXnackSetting;
132   }
133 
134   /// \return True if the current sramecc setting is not "Unsupported".
135   bool isSramEccSupported() const {
136     return SramEccSetting != TargetIDSetting::Unsupported;
137   }
138 
139   /// \returns True if the current sramecc setting is "On" or "Any".
140   bool isSramEccOnOrAny() const {
141   return SramEccSetting == TargetIDSetting::On ||
142       SramEccSetting == TargetIDSetting::Any;
143   }
144 
145   /// \returns True if current sramecc setting is "On" or "Off",
146   /// false otherwise.
147   bool isSramEccOnOrOff() const {
148     return getSramEccSetting() == TargetIDSetting::On ||
149         getSramEccSetting() == TargetIDSetting::Off;
150   }
151 
152   /// \returns The current sramecc TargetIDSetting, possible options are
153   /// "Unsupported", "Any", "Off", and "On".
154   TargetIDSetting getSramEccSetting() const {
155     return SramEccSetting;
156   }
157 
158   /// Sets sramecc setting to \p NewSramEccSetting.
159   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
160     SramEccSetting = NewSramEccSetting;
161   }
162 
163   void setTargetIDFromFeaturesString(StringRef FS);
164   void setTargetIDFromTargetIDStream(StringRef TargetID);
165 
166   /// \returns String representation of an object.
167   std::string toString() const;
168 };
169 
170 /// \returns Wavefront size for given subtarget \p STI.
171 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
172 
173 /// \returns Local memory size in bytes for given subtarget \p STI.
174 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
175 
176 /// \returns Number of execution units per compute unit for given subtarget \p
177 /// STI.
178 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
179 
180 /// \returns Maximum number of work groups per compute unit for given subtarget
181 /// \p STI and limited by given \p FlatWorkGroupSize.
182 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
183                                unsigned FlatWorkGroupSize);
184 
185 /// \returns Minimum number of waves per execution unit for given subtarget \p
186 /// STI.
187 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
188 
189 /// \returns Maximum number of waves per execution unit for given subtarget \p
190 /// STI without any kind of limitation.
191 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
192 
193 /// \returns Number of waves per execution unit required to support the given \p
194 /// FlatWorkGroupSize.
195 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
196                                    unsigned FlatWorkGroupSize);
197 
198 /// \returns Minimum flat work group size for given subtarget \p STI.
199 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
200 
201 /// \returns Maximum flat work group size for given subtarget \p STI.
202 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
203 
204 /// \returns Number of waves per work group for given subtarget \p STI and
205 /// \p FlatWorkGroupSize.
206 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
207                               unsigned FlatWorkGroupSize);
208 
209 /// \returns SGPR allocation granularity for given subtarget \p STI.
210 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
211 
212 /// \returns SGPR encoding granularity for given subtarget \p STI.
213 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
214 
215 /// \returns Total number of SGPRs for given subtarget \p STI.
216 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
217 
218 /// \returns Addressable number of SGPRs for given subtarget \p STI.
219 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
220 
221 /// \returns Minimum number of SGPRs that meets the given number of waves per
222 /// execution unit requirement for given subtarget \p STI.
223 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
224 
225 /// \returns Maximum number of SGPRs that meets the given number of waves per
226 /// execution unit requirement for given subtarget \p STI.
227 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
228                         bool Addressable);
229 
230 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
231 /// STI when the given special registers are used.
232 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
233                           bool FlatScrUsed, bool XNACKUsed);
234 
235 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
236 /// STI when the given special registers are used. XNACK is inferred from
237 /// \p STI.
238 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
239                           bool FlatScrUsed);
240 
241 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
242 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
243 /// register counts.
244 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
245 
246 /// \returns VGPR allocation granularity for given subtarget \p STI.
247 ///
248 /// For subtargets which support it, \p EnableWavefrontSize32 should match
249 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
250 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
251                              Optional<bool> EnableWavefrontSize32 = None);
252 
253 /// \returns VGPR encoding granularity for given subtarget \p STI.
254 ///
255 /// For subtargets which support it, \p EnableWavefrontSize32 should match
256 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
257 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
258                                 Optional<bool> EnableWavefrontSize32 = None);
259 
260 /// \returns Total number of VGPRs for given subtarget \p STI.
261 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
262 
263 /// \returns Addressable number of VGPRs for given subtarget \p STI.
264 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
265 
266 /// \returns Minimum number of VGPRs that meets given number of waves per
267 /// execution unit requirement for given subtarget \p STI.
268 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
269 
270 /// \returns Maximum number of VGPRs that meets given number of waves per
271 /// execution unit requirement for given subtarget \p STI.
272 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
273 
274 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
275 /// \p NumVGPRs are used.
276 ///
277 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
278 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
279 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
280                           Optional<bool> EnableWavefrontSize32 = None);
281 
282 } // end namespace IsaInfo
283 
284 LLVM_READONLY
285 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
286 
287 LLVM_READONLY
288 int getSOPPWithRelaxation(uint16_t Opcode);
289 
290 struct MIMGBaseOpcodeInfo {
291   MIMGBaseOpcode BaseOpcode;
292   bool Store;
293   bool Atomic;
294   bool AtomicX2;
295   bool Sampler;
296   bool Gather4;
297 
298   uint8_t NumExtraArgs;
299   bool Gradients;
300   bool G16;
301   bool Coordinates;
302   bool LodOrClampOrMip;
303   bool HasD16;
304   bool MSAA;
305   bool BVH;
306 };
307 
308 LLVM_READONLY
309 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
310 
311 LLVM_READONLY
312 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
313 
314 struct MIMGDimInfo {
315   MIMGDim Dim;
316   uint8_t NumCoords;
317   uint8_t NumGradients;
318   bool MSAA;
319   bool DA;
320   uint8_t Encoding;
321   const char *AsmSuffix;
322 };
323 
324 LLVM_READONLY
325 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
326 
327 LLVM_READONLY
328 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
329 
330 LLVM_READONLY
331 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
332 
333 struct MIMGLZMappingInfo {
334   MIMGBaseOpcode L;
335   MIMGBaseOpcode LZ;
336 };
337 
338 struct MIMGMIPMappingInfo {
339   MIMGBaseOpcode MIP;
340   MIMGBaseOpcode NONMIP;
341 };
342 
343 struct MIMGBiasMappingInfo {
344   MIMGBaseOpcode Bias;
345   MIMGBaseOpcode NoBias;
346 };
347 
348 struct MIMGOffsetMappingInfo {
349   MIMGBaseOpcode Offset;
350   MIMGBaseOpcode NoOffset;
351 };
352 
353 struct MIMGG16MappingInfo {
354   MIMGBaseOpcode G;
355   MIMGBaseOpcode G16;
356 };
357 
358 LLVM_READONLY
359 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
360 
361 LLVM_READONLY
362 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
363 
364 LLVM_READONLY
365 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
366 
367 LLVM_READONLY
368 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
369 
370 LLVM_READONLY
371 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
372 
373 LLVM_READONLY
374 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
375                   unsigned VDataDwords, unsigned VAddrDwords);
376 
377 LLVM_READONLY
378 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
379 
380 LLVM_READONLY
381 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
382                            const MIMGDimInfo *Dim, bool IsA16,
383                            bool IsG16Supported);
384 
385 struct MIMGInfo {
386   uint16_t Opcode;
387   uint16_t BaseOpcode;
388   uint8_t MIMGEncoding;
389   uint8_t VDataDwords;
390   uint8_t VAddrDwords;
391 };
392 
393 LLVM_READONLY
394 const MIMGInfo *getMIMGInfo(unsigned Opc);
395 
396 LLVM_READONLY
397 int getMTBUFBaseOpcode(unsigned Opc);
398 
399 LLVM_READONLY
400 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
401 
402 LLVM_READONLY
403 int getMTBUFElements(unsigned Opc);
404 
405 LLVM_READONLY
406 bool getMTBUFHasVAddr(unsigned Opc);
407 
408 LLVM_READONLY
409 bool getMTBUFHasSrsrc(unsigned Opc);
410 
411 LLVM_READONLY
412 bool getMTBUFHasSoffset(unsigned Opc);
413 
414 LLVM_READONLY
415 int getMUBUFBaseOpcode(unsigned Opc);
416 
417 LLVM_READONLY
418 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
419 
420 LLVM_READONLY
421 int getMUBUFElements(unsigned Opc);
422 
423 LLVM_READONLY
424 bool getMUBUFHasVAddr(unsigned Opc);
425 
426 LLVM_READONLY
427 bool getMUBUFHasSrsrc(unsigned Opc);
428 
429 LLVM_READONLY
430 bool getMUBUFHasSoffset(unsigned Opc);
431 
432 LLVM_READONLY
433 bool getMUBUFIsBufferInv(unsigned Opc);
434 
435 LLVM_READONLY
436 bool getSMEMIsBuffer(unsigned Opc);
437 
438 LLVM_READONLY
439 bool getVOP1IsSingle(unsigned Opc);
440 
441 LLVM_READONLY
442 bool getVOP2IsSingle(unsigned Opc);
443 
444 LLVM_READONLY
445 bool getVOP3IsSingle(unsigned Opc);
446 
447 LLVM_READONLY
448 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
449                                                   uint8_t NumComponents,
450                                                   uint8_t NumFormat,
451                                                   const MCSubtargetInfo &STI);
452 LLVM_READONLY
453 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
454                                                   const MCSubtargetInfo &STI);
455 
456 LLVM_READONLY
457 int getMCOpcode(uint16_t Opcode, unsigned Gen);
458 
459 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
460                                const MCSubtargetInfo *STI);
461 
462 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
463     const MCSubtargetInfo *STI);
464 
465 bool isGroupSegment(const GlobalValue *GV);
466 bool isGlobalSegment(const GlobalValue *GV);
467 bool isReadOnlySegment(const GlobalValue *GV);
468 
469 /// \returns True if constants should be emitted to .text section for given
470 /// target triple \p TT, false otherwise.
471 bool shouldEmitConstantsToTextSection(const Triple &TT);
472 
473 /// \returns Integer value requested using \p F's \p Name attribute.
474 ///
475 /// \returns \p Default if attribute is not present.
476 ///
477 /// \returns \p Default and emits error if requested value cannot be converted
478 /// to integer.
479 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
480 
481 /// \returns A pair of integer values requested using \p F's \p Name attribute
482 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
483 /// is false).
484 ///
485 /// \returns \p Default if attribute is not present.
486 ///
487 /// \returns \p Default and emits error if one of the requested values cannot be
488 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
489 /// not present.
490 std::pair<int, int> getIntegerPairAttribute(const Function &F,
491                                             StringRef Name,
492                                             std::pair<int, int> Default,
493                                             bool OnlyFirstRequired = false);
494 
495 /// Represents the counter values to wait for in an s_waitcnt instruction.
496 ///
497 /// Large values (including the maximum possible integer) can be used to
498 /// represent "don't care" waits.
499 struct Waitcnt {
500   unsigned VmCnt = ~0u;
501   unsigned ExpCnt = ~0u;
502   unsigned LgkmCnt = ~0u;
503   unsigned VsCnt = ~0u;
504 
505   Waitcnt() = default;
506   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
507       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
508 
509   static Waitcnt allZero(bool HasVscnt) {
510     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
511   }
512   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
513 
514   bool hasWait() const {
515     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
516   }
517 
518   bool hasWaitExceptVsCnt() const {
519     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
520   }
521 
522   bool hasWaitVsCnt() const {
523     return VsCnt != ~0u;
524   }
525 
526   bool dominates(const Waitcnt &Other) const {
527     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
528            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
529   }
530 
531   Waitcnt combined(const Waitcnt &Other) const {
532     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
533                    std::min(LgkmCnt, Other.LgkmCnt),
534                    std::min(VsCnt, Other.VsCnt));
535   }
536 };
537 
538 /// \returns Vmcnt bit mask for given isa \p Version.
539 unsigned getVmcntBitMask(const IsaVersion &Version);
540 
541 /// \returns Expcnt bit mask for given isa \p Version.
542 unsigned getExpcntBitMask(const IsaVersion &Version);
543 
544 /// \returns Lgkmcnt bit mask for given isa \p Version.
545 unsigned getLgkmcntBitMask(const IsaVersion &Version);
546 
547 /// \returns Waitcnt bit mask for given isa \p Version.
548 unsigned getWaitcntBitMask(const IsaVersion &Version);
549 
550 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
551 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
552 
553 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
554 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
555 
556 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
557 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
558 
559 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
560 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
561 /// \p Lgkmcnt respectively.
562 ///
563 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
564 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
565 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
566 ///     \p Expcnt = \p Waitcnt[6:4]
567 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
568 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
569 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
570                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
571 
572 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
573 
574 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
575 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
576                      unsigned Vmcnt);
577 
578 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
579 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
580                       unsigned Expcnt);
581 
582 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
583 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
584                        unsigned Lgkmcnt);
585 
586 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
587 /// \p Version.
588 ///
589 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
590 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
591 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
592 ///     Waitcnt[6:4]   = \p Expcnt
593 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
594 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
595 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
596 ///
597 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
598 /// isa \p Version.
599 unsigned encodeWaitcnt(const IsaVersion &Version,
600                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
601 
602 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
603 
604 namespace Hwreg {
605 
606 LLVM_READONLY
607 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
608 
609 LLVM_READNONE
610 bool isValidHwreg(int64_t Id);
611 
612 LLVM_READNONE
613 bool isValidHwregOffset(int64_t Offset);
614 
615 LLVM_READNONE
616 bool isValidHwregWidth(int64_t Width);
617 
618 LLVM_READNONE
619 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
620 
621 LLVM_READNONE
622 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
623 
624 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
625 
626 } // namespace Hwreg
627 
628 namespace Exp {
629 
630 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
631 
632 LLVM_READONLY
633 unsigned getTgtId(const StringRef Name);
634 
635 LLVM_READNONE
636 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
637 
638 } // namespace Exp
639 
640 namespace MTBUFFormat {
641 
642 LLVM_READNONE
643 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
644 
645 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
646 
647 int64_t getDfmt(const StringRef Name);
648 
649 StringRef getDfmtName(unsigned Id);
650 
651 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
652 
653 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
654 
655 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
656 
657 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
658 
659 int64_t getUnifiedFormat(const StringRef Name);
660 
661 StringRef getUnifiedFormatName(unsigned Id);
662 
663 bool isValidUnifiedFormat(unsigned Val);
664 
665 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
666 
667 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
668 
669 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
670 
671 } // namespace MTBUFFormat
672 
673 namespace SendMsg {
674 
675 LLVM_READONLY
676 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
677 
678 LLVM_READONLY
679 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
680 
681 LLVM_READNONE
682 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
683 
684 LLVM_READNONE
685 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
686 
687 LLVM_READNONE
688 bool isValidMsgId(int64_t MsgId);
689 
690 LLVM_READNONE
691 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
692                   bool Strict = true);
693 
694 LLVM_READNONE
695 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
696                       const MCSubtargetInfo &STI, bool Strict = true);
697 
698 LLVM_READNONE
699 bool msgRequiresOp(int64_t MsgId);
700 
701 LLVM_READNONE
702 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
703 
704 void decodeMsg(unsigned Val,
705                uint16_t &MsgId,
706                uint16_t &OpId,
707                uint16_t &StreamId);
708 
709 LLVM_READNONE
710 uint64_t encodeMsg(uint64_t MsgId,
711                    uint64_t OpId,
712                    uint64_t StreamId);
713 
714 } // namespace SendMsg
715 
716 
717 unsigned getInitialPSInputAddr(const Function &F);
718 
719 bool getHasColorExport(const Function &F);
720 
721 bool getHasDepthExport(const Function &F);
722 
723 LLVM_READNONE
724 bool isShader(CallingConv::ID CC);
725 
726 LLVM_READNONE
727 bool isGraphics(CallingConv::ID CC);
728 
729 LLVM_READNONE
730 bool isCompute(CallingConv::ID CC);
731 
732 LLVM_READNONE
733 bool isEntryFunctionCC(CallingConv::ID CC);
734 
735 // These functions are considered entrypoints into the current module, i.e. they
736 // are allowed to be called from outside the current module. This is different
737 // from isEntryFunctionCC, which is only true for functions that are entered by
738 // the hardware. Module entry points include all entry functions but also
739 // include functions that can be called from other functions inside or outside
740 // the current module. Module entry functions are allowed to allocate LDS.
741 LLVM_READNONE
742 bool isModuleEntryFunctionCC(CallingConv::ID CC);
743 
744 bool isKernelCC(const Function *Func);
745 
746 // FIXME: Remove this when calling conventions cleaned up
747 LLVM_READNONE
748 inline bool isKernel(CallingConv::ID CC) {
749   switch (CC) {
750   case CallingConv::AMDGPU_KERNEL:
751   case CallingConv::SPIR_KERNEL:
752     return true;
753   default:
754     return false;
755   }
756 }
757 
758 bool hasXNACK(const MCSubtargetInfo &STI);
759 bool hasSRAMECC(const MCSubtargetInfo &STI);
760 bool hasMIMG_R128(const MCSubtargetInfo &STI);
761 bool hasGFX10A16(const MCSubtargetInfo &STI);
762 bool hasG16(const MCSubtargetInfo &STI);
763 bool hasPackedD16(const MCSubtargetInfo &STI);
764 
765 bool isSI(const MCSubtargetInfo &STI);
766 bool isCI(const MCSubtargetInfo &STI);
767 bool isVI(const MCSubtargetInfo &STI);
768 bool isGFX9(const MCSubtargetInfo &STI);
769 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
770 bool isGFX8Plus(const MCSubtargetInfo &STI);
771 bool isGFX9Plus(const MCSubtargetInfo &STI);
772 bool isGFX10(const MCSubtargetInfo &STI);
773 bool isGFX10Plus(const MCSubtargetInfo &STI);
774 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
775 bool isGFX10Before1030(const MCSubtargetInfo &STI);
776 bool isGCN3Encoding(const MCSubtargetInfo &STI);
777 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
778 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
779 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
780 bool isGFX90A(const MCSubtargetInfo &STI);
781 bool isGFX940(const MCSubtargetInfo &STI);
782 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
783 bool hasMAIInsts(const MCSubtargetInfo &STI);
784 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
785 
786 /// Is Reg - scalar register
787 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
788 
789 /// If \p Reg is a pseudo reg, return the correct hardware register given
790 /// \p STI otherwise return \p Reg.
791 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
792 
793 /// Convert hardware register \p Reg to a pseudo register
794 LLVM_READNONE
795 unsigned mc2PseudoReg(unsigned Reg);
796 
797 /// Can this operand also contain immediate values?
798 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
799 
800 /// Is this floating-point operand?
801 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
802 
803 /// Does this operand support only inlinable literals?
804 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
805 
806 /// Get the size in bits of a register from the register class \p RC.
807 unsigned getRegBitWidth(unsigned RCID);
808 
809 /// Get the size in bits of a register from the register class \p RC.
810 unsigned getRegBitWidth(const MCRegisterClass &RC);
811 
812 /// Get size of register operand
813 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
814                            unsigned OpNo);
815 
816 LLVM_READNONE
817 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
818   switch (OpInfo.OperandType) {
819   case AMDGPU::OPERAND_REG_IMM_INT32:
820   case AMDGPU::OPERAND_REG_IMM_FP32:
821   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
822   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
823   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
824   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
825   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
826   case AMDGPU::OPERAND_REG_IMM_V2INT32:
827   case AMDGPU::OPERAND_REG_IMM_V2FP32:
828   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
829   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
830   case AMDGPU::OPERAND_KIMM32:
831   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
832     return 4;
833 
834   case AMDGPU::OPERAND_REG_IMM_INT64:
835   case AMDGPU::OPERAND_REG_IMM_FP64:
836   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
837   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
838   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
839     return 8;
840 
841   case AMDGPU::OPERAND_REG_IMM_INT16:
842   case AMDGPU::OPERAND_REG_IMM_FP16:
843   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
844   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
845   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
846   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
847   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
848   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
849   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
850   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
851   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
852   case AMDGPU::OPERAND_REG_IMM_V2INT16:
853   case AMDGPU::OPERAND_REG_IMM_V2FP16:
854     return 2;
855 
856   default:
857     llvm_unreachable("unhandled operand type");
858   }
859 }
860 
861 LLVM_READNONE
862 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
863   return getOperandSize(Desc.OpInfo[OpNo]);
864 }
865 
866 /// Is this literal inlinable, and not one of the values intended for floating
867 /// point values.
868 LLVM_READNONE
869 inline bool isInlinableIntLiteral(int64_t Literal) {
870   return Literal >= -16 && Literal <= 64;
871 }
872 
873 /// Is this literal inlinable
874 LLVM_READNONE
875 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
876 
877 LLVM_READNONE
878 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
879 
880 LLVM_READNONE
881 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
882 
883 LLVM_READNONE
884 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
885 
886 LLVM_READNONE
887 bool isInlinableIntLiteralV216(int32_t Literal);
888 
889 LLVM_READNONE
890 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
891 
892 bool isArgPassedInSGPR(const Argument *Arg);
893 
894 LLVM_READONLY
895 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
896                                       int64_t EncodedOffset);
897 
898 LLVM_READONLY
899 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
900                                     int64_t EncodedOffset,
901                                     bool IsBuffer);
902 
903 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
904 /// offsets.
905 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
906 
907 /// \returns The encoding that will be used for \p ByteOffset in the
908 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
909 /// S_LOAD instructions have a signed offset, on other subtargets it is
910 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
911 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
912                                        int64_t ByteOffset, bool IsBuffer);
913 
914 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
915 /// instruction. This is only useful on CI.s
916 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
917                                                 int64_t ByteOffset);
918 
919 /// For FLAT segment the offset must be positive;
920 /// MSB is ignored and forced to zero.
921 ///
922 /// \return The number of bits available for the offset field in flat
923 /// instructions.
924 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
925 
926 /// \returns true if this offset is small enough to fit in the SMRD
927 /// offset field.  \p ByteOffset should be the offset in bytes and
928 /// not the encoded offset.
929 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
930 
931 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
932                       const GCNSubtarget *Subtarget,
933                       Align Alignment = Align(4));
934 
935 LLVM_READNONE
936 inline bool isLegal64BitDPPControl(unsigned DC) {
937   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
938 }
939 
940 /// \returns true if the intrinsic is divergent
941 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
942 
943 // Track defaults for fields in the MODE register.
944 struct SIModeRegisterDefaults {
945   /// Floating point opcodes that support exception flag gathering quiet and
946   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
947   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
948   /// quieting.
949   bool IEEE : 1;
950 
951   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
952   /// clamp NaN to zero; otherwise, pass NaN through.
953   bool DX10Clamp : 1;
954 
955   /// If this is set, neither input or output denormals are flushed for most f32
956   /// instructions.
957   bool FP32InputDenormals : 1;
958   bool FP32OutputDenormals : 1;
959 
960   /// If this is set, neither input or output denormals are flushed for both f64
961   /// and f16/v2f16 instructions.
962   bool FP64FP16InputDenormals : 1;
963   bool FP64FP16OutputDenormals : 1;
964 
965   SIModeRegisterDefaults() :
966     IEEE(true),
967     DX10Clamp(true),
968     FP32InputDenormals(true),
969     FP32OutputDenormals(true),
970     FP64FP16InputDenormals(true),
971     FP64FP16OutputDenormals(true) {}
972 
973   SIModeRegisterDefaults(const Function &F);
974 
975   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
976     SIModeRegisterDefaults Mode;
977     Mode.IEEE = !AMDGPU::isShader(CC);
978     return Mode;
979   }
980 
981   bool operator ==(const SIModeRegisterDefaults Other) const {
982     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
983            FP32InputDenormals == Other.FP32InputDenormals &&
984            FP32OutputDenormals == Other.FP32OutputDenormals &&
985            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
986            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
987   }
988 
989   bool allFP32Denormals() const {
990     return FP32InputDenormals && FP32OutputDenormals;
991   }
992 
993   bool allFP64FP16Denormals() const {
994     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
995   }
996 
997   /// Get the encoding value for the FP_DENORM bits of the mode register for the
998   /// FP32 denormal mode.
999   uint32_t fpDenormModeSPValue() const {
1000     if (FP32InputDenormals && FP32OutputDenormals)
1001       return FP_DENORM_FLUSH_NONE;
1002     if (FP32InputDenormals)
1003       return FP_DENORM_FLUSH_OUT;
1004     if (FP32OutputDenormals)
1005       return FP_DENORM_FLUSH_IN;
1006     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1007   }
1008 
1009   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1010   /// FP64/FP16 denormal mode.
1011   uint32_t fpDenormModeDPValue() const {
1012     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
1013       return FP_DENORM_FLUSH_NONE;
1014     if (FP64FP16InputDenormals)
1015       return FP_DENORM_FLUSH_OUT;
1016     if (FP64FP16OutputDenormals)
1017       return FP_DENORM_FLUSH_IN;
1018     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1019   }
1020 
1021   /// Returns true if a flag is compatible if it's enabled in the callee, but
1022   /// disabled in the caller.
1023   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1024     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1025   }
1026 
1027   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1028   // be able to override.
1029   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1030     if (DX10Clamp != CalleeMode.DX10Clamp)
1031       return false;
1032     if (IEEE != CalleeMode.IEEE)
1033       return false;
1034 
1035     // Allow inlining denormals enabled into denormals flushed functions.
1036     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1037            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1038            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1039            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1040   }
1041 };
1042 
1043 } // end namespace AMDGPU
1044 
1045 raw_ostream &operator<<(raw_ostream &OS,
1046                         const AMDGPU::IsaInfo::TargetIDSetting S);
1047 
1048 } // end namespace llvm
1049 
1050 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1051