1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 5,
51 /// false otherwise.
52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 3 and above,
54 /// false otherwise.
55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
56 
57 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
58 unsigned getHostcallImplicitArgPosition();
59 
60 /// \returns The offset of the heap ptr argument from implicitarg_ptr
61 unsigned getHeapPtrImplicitArgPosition();
62 
63 /// \returns The offset of the queue ptr argument from implicitarg_ptr
64 unsigned getQueuePtrImplicitArgPosition();
65 
66 /// \returns Code object version.
67 unsigned getAmdhsaCodeObjectVersion();
68 
69 struct GcnBufferFormatInfo {
70   unsigned Format;
71   unsigned BitsPerComp;
72   unsigned NumComponents;
73   unsigned NumFormat;
74   unsigned DataFormat;
75 };
76 
77 #define GET_MIMGBaseOpcode_DECL
78 #define GET_MIMGDim_DECL
79 #define GET_MIMGEncoding_DECL
80 #define GET_MIMGLZMapping_DECL
81 #define GET_MIMGMIPMapping_DECL
82 #define GET_MIMGBiASMapping_DECL
83 #include "AMDGPUGenSearchableTables.inc"
84 
85 namespace IsaInfo {
86 
87 enum {
88   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
89   // doesn't spill SGPRs as much as when 80 is set.
90   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
91   TRAP_NUM_SGPRS = 16
92 };
93 
94 enum class TargetIDSetting {
95   Unsupported,
96   Any,
97   Off,
98   On
99 };
100 
101 class AMDGPUTargetID {
102 private:
103   const MCSubtargetInfo &STI;
104   TargetIDSetting XnackSetting;
105   TargetIDSetting SramEccSetting;
106 
107 public:
108   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
109   ~AMDGPUTargetID() = default;
110 
111   /// \return True if the current xnack setting is not "Unsupported".
112   bool isXnackSupported() const {
113     return XnackSetting != TargetIDSetting::Unsupported;
114   }
115 
116   /// \returns True if the current xnack setting is "On" or "Any".
117   bool isXnackOnOrAny() const {
118     return XnackSetting == TargetIDSetting::On ||
119         XnackSetting == TargetIDSetting::Any;
120   }
121 
122   /// \returns True if current xnack setting is "On" or "Off",
123   /// false otherwise.
124   bool isXnackOnOrOff() const {
125     return getXnackSetting() == TargetIDSetting::On ||
126         getXnackSetting() == TargetIDSetting::Off;
127   }
128 
129   /// \returns The current xnack TargetIDSetting, possible options are
130   /// "Unsupported", "Any", "Off", and "On".
131   TargetIDSetting getXnackSetting() const {
132     return XnackSetting;
133   }
134 
135   /// Sets xnack setting to \p NewXnackSetting.
136   void setXnackSetting(TargetIDSetting NewXnackSetting) {
137     XnackSetting = NewXnackSetting;
138   }
139 
140   /// \return True if the current sramecc setting is not "Unsupported".
141   bool isSramEccSupported() const {
142     return SramEccSetting != TargetIDSetting::Unsupported;
143   }
144 
145   /// \returns True if the current sramecc setting is "On" or "Any".
146   bool isSramEccOnOrAny() const {
147   return SramEccSetting == TargetIDSetting::On ||
148       SramEccSetting == TargetIDSetting::Any;
149   }
150 
151   /// \returns True if current sramecc setting is "On" or "Off",
152   /// false otherwise.
153   bool isSramEccOnOrOff() const {
154     return getSramEccSetting() == TargetIDSetting::On ||
155         getSramEccSetting() == TargetIDSetting::Off;
156   }
157 
158   /// \returns The current sramecc TargetIDSetting, possible options are
159   /// "Unsupported", "Any", "Off", and "On".
160   TargetIDSetting getSramEccSetting() const {
161     return SramEccSetting;
162   }
163 
164   /// Sets sramecc setting to \p NewSramEccSetting.
165   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
166     SramEccSetting = NewSramEccSetting;
167   }
168 
169   void setTargetIDFromFeaturesString(StringRef FS);
170   void setTargetIDFromTargetIDStream(StringRef TargetID);
171 
172   /// \returns String representation of an object.
173   std::string toString() const;
174 };
175 
176 /// \returns Wavefront size for given subtarget \p STI.
177 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
178 
179 /// \returns Local memory size in bytes for given subtarget \p STI.
180 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
181 
182 /// \returns Number of execution units per compute unit for given subtarget \p
183 /// STI.
184 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
185 
186 /// \returns Maximum number of work groups per compute unit for given subtarget
187 /// \p STI and limited by given \p FlatWorkGroupSize.
188 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
189                                unsigned FlatWorkGroupSize);
190 
191 /// \returns Minimum number of waves per execution unit for given subtarget \p
192 /// STI.
193 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
194 
195 /// \returns Maximum number of waves per execution unit for given subtarget \p
196 /// STI without any kind of limitation.
197 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
198 
199 /// \returns Number of waves per execution unit required to support the given \p
200 /// FlatWorkGroupSize.
201 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
202                                    unsigned FlatWorkGroupSize);
203 
204 /// \returns Minimum flat work group size for given subtarget \p STI.
205 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
206 
207 /// \returns Maximum flat work group size for given subtarget \p STI.
208 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
209 
210 /// \returns Number of waves per work group for given subtarget \p STI and
211 /// \p FlatWorkGroupSize.
212 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
213                               unsigned FlatWorkGroupSize);
214 
215 /// \returns SGPR allocation granularity for given subtarget \p STI.
216 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
217 
218 /// \returns SGPR encoding granularity for given subtarget \p STI.
219 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
220 
221 /// \returns Total number of SGPRs for given subtarget \p STI.
222 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
223 
224 /// \returns Addressable number of SGPRs for given subtarget \p STI.
225 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
226 
227 /// \returns Minimum number of SGPRs that meets the given number of waves per
228 /// execution unit requirement for given subtarget \p STI.
229 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
230 
231 /// \returns Maximum number of SGPRs that meets the given number of waves per
232 /// execution unit requirement for given subtarget \p STI.
233 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
234                         bool Addressable);
235 
236 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
237 /// STI when the given special registers are used.
238 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
239                           bool FlatScrUsed, bool XNACKUsed);
240 
241 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
242 /// STI when the given special registers are used. XNACK is inferred from
243 /// \p STI.
244 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
245                           bool FlatScrUsed);
246 
247 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
248 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
249 /// register counts.
250 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
251 
252 /// \returns VGPR allocation granularity for given subtarget \p STI.
253 ///
254 /// For subtargets which support it, \p EnableWavefrontSize32 should match
255 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
256 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
257                              Optional<bool> EnableWavefrontSize32 = None);
258 
259 /// \returns VGPR encoding granularity for given subtarget \p STI.
260 ///
261 /// For subtargets which support it, \p EnableWavefrontSize32 should match
262 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
263 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
264                                 Optional<bool> EnableWavefrontSize32 = None);
265 
266 /// \returns Total number of VGPRs for given subtarget \p STI.
267 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
268 
269 /// \returns Addressable number of VGPRs for given subtarget \p STI.
270 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
271 
272 /// \returns Minimum number of VGPRs that meets given number of waves per
273 /// execution unit requirement for given subtarget \p STI.
274 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
275 
276 /// \returns Maximum number of VGPRs that meets given number of waves per
277 /// execution unit requirement for given subtarget \p STI.
278 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
279 
280 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
281 /// \p NumVGPRs are used.
282 ///
283 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
284 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
285 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
286                           Optional<bool> EnableWavefrontSize32 = None);
287 
288 } // end namespace IsaInfo
289 
290 LLVM_READONLY
291 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
292 
293 LLVM_READONLY
294 int getSOPPWithRelaxation(uint16_t Opcode);
295 
296 struct MIMGBaseOpcodeInfo {
297   MIMGBaseOpcode BaseOpcode;
298   bool Store;
299   bool Atomic;
300   bool AtomicX2;
301   bool Sampler;
302   bool Gather4;
303 
304   uint8_t NumExtraArgs;
305   bool Gradients;
306   bool G16;
307   bool Coordinates;
308   bool LodOrClampOrMip;
309   bool HasD16;
310   bool MSAA;
311   bool BVH;
312 };
313 
314 LLVM_READONLY
315 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
316 
317 LLVM_READONLY
318 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
319 
320 struct MIMGDimInfo {
321   MIMGDim Dim;
322   uint8_t NumCoords;
323   uint8_t NumGradients;
324   bool MSAA;
325   bool DA;
326   uint8_t Encoding;
327   const char *AsmSuffix;
328 };
329 
330 LLVM_READONLY
331 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
332 
333 LLVM_READONLY
334 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
335 
336 LLVM_READONLY
337 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
338 
339 struct MIMGLZMappingInfo {
340   MIMGBaseOpcode L;
341   MIMGBaseOpcode LZ;
342 };
343 
344 struct MIMGMIPMappingInfo {
345   MIMGBaseOpcode MIP;
346   MIMGBaseOpcode NONMIP;
347 };
348 
349 struct MIMGBiasMappingInfo {
350   MIMGBaseOpcode Bias;
351   MIMGBaseOpcode NoBias;
352 };
353 
354 struct MIMGOffsetMappingInfo {
355   MIMGBaseOpcode Offset;
356   MIMGBaseOpcode NoOffset;
357 };
358 
359 struct MIMGG16MappingInfo {
360   MIMGBaseOpcode G;
361   MIMGBaseOpcode G16;
362 };
363 
364 LLVM_READONLY
365 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
366 
367 LLVM_READONLY
368 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
369 
370 LLVM_READONLY
371 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
372 
373 LLVM_READONLY
374 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
375 
376 LLVM_READONLY
377 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
378 
379 LLVM_READONLY
380 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
381                   unsigned VDataDwords, unsigned VAddrDwords);
382 
383 LLVM_READONLY
384 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
385 
386 LLVM_READONLY
387 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
388                            const MIMGDimInfo *Dim, bool IsA16,
389                            bool IsG16Supported);
390 
391 struct MIMGInfo {
392   uint16_t Opcode;
393   uint16_t BaseOpcode;
394   uint8_t MIMGEncoding;
395   uint8_t VDataDwords;
396   uint8_t VAddrDwords;
397 };
398 
399 LLVM_READONLY
400 const MIMGInfo *getMIMGInfo(unsigned Opc);
401 
402 LLVM_READONLY
403 int getMTBUFBaseOpcode(unsigned Opc);
404 
405 LLVM_READONLY
406 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
407 
408 LLVM_READONLY
409 int getMTBUFElements(unsigned Opc);
410 
411 LLVM_READONLY
412 bool getMTBUFHasVAddr(unsigned Opc);
413 
414 LLVM_READONLY
415 bool getMTBUFHasSrsrc(unsigned Opc);
416 
417 LLVM_READONLY
418 bool getMTBUFHasSoffset(unsigned Opc);
419 
420 LLVM_READONLY
421 int getMUBUFBaseOpcode(unsigned Opc);
422 
423 LLVM_READONLY
424 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
425 
426 LLVM_READONLY
427 int getMUBUFElements(unsigned Opc);
428 
429 LLVM_READONLY
430 bool getMUBUFHasVAddr(unsigned Opc);
431 
432 LLVM_READONLY
433 bool getMUBUFHasSrsrc(unsigned Opc);
434 
435 LLVM_READONLY
436 bool getMUBUFHasSoffset(unsigned Opc);
437 
438 LLVM_READONLY
439 bool getMUBUFIsBufferInv(unsigned Opc);
440 
441 LLVM_READONLY
442 bool getSMEMIsBuffer(unsigned Opc);
443 
444 LLVM_READONLY
445 bool getVOP1IsSingle(unsigned Opc);
446 
447 LLVM_READONLY
448 bool getVOP2IsSingle(unsigned Opc);
449 
450 LLVM_READONLY
451 bool getVOP3IsSingle(unsigned Opc);
452 
453 LLVM_READONLY
454 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
455                                                   uint8_t NumComponents,
456                                                   uint8_t NumFormat,
457                                                   const MCSubtargetInfo &STI);
458 LLVM_READONLY
459 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
460                                                   const MCSubtargetInfo &STI);
461 
462 LLVM_READONLY
463 int getMCOpcode(uint16_t Opcode, unsigned Gen);
464 
465 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
466                                const MCSubtargetInfo *STI);
467 
468 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
469     const MCSubtargetInfo *STI);
470 
471 bool isGroupSegment(const GlobalValue *GV);
472 bool isGlobalSegment(const GlobalValue *GV);
473 bool isReadOnlySegment(const GlobalValue *GV);
474 
475 /// \returns True if constants should be emitted to .text section for given
476 /// target triple \p TT, false otherwise.
477 bool shouldEmitConstantsToTextSection(const Triple &TT);
478 
479 /// \returns Integer value requested using \p F's \p Name attribute.
480 ///
481 /// \returns \p Default if attribute is not present.
482 ///
483 /// \returns \p Default and emits error if requested value cannot be converted
484 /// to integer.
485 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
486 
487 /// \returns A pair of integer values requested using \p F's \p Name attribute
488 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
489 /// is false).
490 ///
491 /// \returns \p Default if attribute is not present.
492 ///
493 /// \returns \p Default and emits error if one of the requested values cannot be
494 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
495 /// not present.
496 std::pair<int, int> getIntegerPairAttribute(const Function &F,
497                                             StringRef Name,
498                                             std::pair<int, int> Default,
499                                             bool OnlyFirstRequired = false);
500 
501 /// Represents the counter values to wait for in an s_waitcnt instruction.
502 ///
503 /// Large values (including the maximum possible integer) can be used to
504 /// represent "don't care" waits.
505 struct Waitcnt {
506   unsigned VmCnt = ~0u;
507   unsigned ExpCnt = ~0u;
508   unsigned LgkmCnt = ~0u;
509   unsigned VsCnt = ~0u;
510 
511   Waitcnt() = default;
512   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
513       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
514 
515   static Waitcnt allZero(bool HasVscnt) {
516     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
517   }
518   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
519 
520   bool hasWait() const {
521     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
522   }
523 
524   bool hasWaitExceptVsCnt() const {
525     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
526   }
527 
528   bool hasWaitVsCnt() const {
529     return VsCnt != ~0u;
530   }
531 
532   bool dominates(const Waitcnt &Other) const {
533     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
534            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
535   }
536 
537   Waitcnt combined(const Waitcnt &Other) const {
538     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
539                    std::min(LgkmCnt, Other.LgkmCnt),
540                    std::min(VsCnt, Other.VsCnt));
541   }
542 };
543 
544 /// \returns Vmcnt bit mask for given isa \p Version.
545 unsigned getVmcntBitMask(const IsaVersion &Version);
546 
547 /// \returns Expcnt bit mask for given isa \p Version.
548 unsigned getExpcntBitMask(const IsaVersion &Version);
549 
550 /// \returns Lgkmcnt bit mask for given isa \p Version.
551 unsigned getLgkmcntBitMask(const IsaVersion &Version);
552 
553 /// \returns Waitcnt bit mask for given isa \p Version.
554 unsigned getWaitcntBitMask(const IsaVersion &Version);
555 
556 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
557 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
558 
559 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
560 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
561 
562 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
563 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
564 
565 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
566 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
567 /// \p Lgkmcnt respectively.
568 ///
569 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
570 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
571 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
572 ///     \p Expcnt = \p Waitcnt[6:4]
573 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
574 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
575 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
576                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
577 
578 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
579 
580 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
581 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
582                      unsigned Vmcnt);
583 
584 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
585 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
586                       unsigned Expcnt);
587 
588 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
589 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
590                        unsigned Lgkmcnt);
591 
592 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
593 /// \p Version.
594 ///
595 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
596 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
597 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
598 ///     Waitcnt[6:4]   = \p Expcnt
599 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
600 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
601 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
602 ///
603 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
604 /// isa \p Version.
605 unsigned encodeWaitcnt(const IsaVersion &Version,
606                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
607 
608 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
609 
610 namespace Hwreg {
611 
612 LLVM_READONLY
613 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
614 
615 LLVM_READNONE
616 bool isValidHwreg(int64_t Id);
617 
618 LLVM_READNONE
619 bool isValidHwregOffset(int64_t Offset);
620 
621 LLVM_READNONE
622 bool isValidHwregWidth(int64_t Width);
623 
624 LLVM_READNONE
625 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
626 
627 LLVM_READNONE
628 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
629 
630 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
631 
632 } // namespace Hwreg
633 
634 namespace Exp {
635 
636 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
637 
638 LLVM_READONLY
639 unsigned getTgtId(const StringRef Name);
640 
641 LLVM_READNONE
642 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
643 
644 } // namespace Exp
645 
646 namespace MTBUFFormat {
647 
648 LLVM_READNONE
649 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
650 
651 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
652 
653 int64_t getDfmt(const StringRef Name);
654 
655 StringRef getDfmtName(unsigned Id);
656 
657 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
658 
659 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
660 
661 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
662 
663 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
664 
665 int64_t getUnifiedFormat(const StringRef Name);
666 
667 StringRef getUnifiedFormatName(unsigned Id);
668 
669 bool isValidUnifiedFormat(unsigned Val);
670 
671 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
672 
673 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
674 
675 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
676 
677 } // namespace MTBUFFormat
678 
679 namespace SendMsg {
680 
681 LLVM_READONLY
682 int64_t getMsgId(const StringRef Name);
683 
684 LLVM_READONLY
685 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
686 
687 LLVM_READNONE
688 StringRef getMsgName(int64_t MsgId);
689 
690 LLVM_READNONE
691 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
692 
693 LLVM_READNONE
694 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
695 
696 LLVM_READNONE
697 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
698                   bool Strict = true);
699 
700 LLVM_READNONE
701 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
702                       const MCSubtargetInfo &STI, bool Strict = true);
703 
704 LLVM_READNONE
705 bool msgRequiresOp(int64_t MsgId);
706 
707 LLVM_READNONE
708 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
709 
710 void decodeMsg(unsigned Val,
711                uint16_t &MsgId,
712                uint16_t &OpId,
713                uint16_t &StreamId);
714 
715 LLVM_READNONE
716 uint64_t encodeMsg(uint64_t MsgId,
717                    uint64_t OpId,
718                    uint64_t StreamId);
719 
720 } // namespace SendMsg
721 
722 
723 unsigned getInitialPSInputAddr(const Function &F);
724 
725 bool getHasColorExport(const Function &F);
726 
727 bool getHasDepthExport(const Function &F);
728 
729 LLVM_READNONE
730 bool isShader(CallingConv::ID CC);
731 
732 LLVM_READNONE
733 bool isGraphics(CallingConv::ID CC);
734 
735 LLVM_READNONE
736 bool isCompute(CallingConv::ID CC);
737 
738 LLVM_READNONE
739 bool isEntryFunctionCC(CallingConv::ID CC);
740 
741 // These functions are considered entrypoints into the current module, i.e. they
742 // are allowed to be called from outside the current module. This is different
743 // from isEntryFunctionCC, which is only true for functions that are entered by
744 // the hardware. Module entry points include all entry functions but also
745 // include functions that can be called from other functions inside or outside
746 // the current module. Module entry functions are allowed to allocate LDS.
747 LLVM_READNONE
748 bool isModuleEntryFunctionCC(CallingConv::ID CC);
749 
750 bool isKernelCC(const Function *Func);
751 
752 // FIXME: Remove this when calling conventions cleaned up
753 LLVM_READNONE
754 inline bool isKernel(CallingConv::ID CC) {
755   switch (CC) {
756   case CallingConv::AMDGPU_KERNEL:
757   case CallingConv::SPIR_KERNEL:
758     return true;
759   default:
760     return false;
761   }
762 }
763 
764 bool hasXNACK(const MCSubtargetInfo &STI);
765 bool hasSRAMECC(const MCSubtargetInfo &STI);
766 bool hasMIMG_R128(const MCSubtargetInfo &STI);
767 bool hasGFX10A16(const MCSubtargetInfo &STI);
768 bool hasG16(const MCSubtargetInfo &STI);
769 bool hasPackedD16(const MCSubtargetInfo &STI);
770 
771 bool isSI(const MCSubtargetInfo &STI);
772 bool isCI(const MCSubtargetInfo &STI);
773 bool isVI(const MCSubtargetInfo &STI);
774 bool isGFX9(const MCSubtargetInfo &STI);
775 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
776 bool isGFX9Plus(const MCSubtargetInfo &STI);
777 bool isGFX10(const MCSubtargetInfo &STI);
778 bool isGFX10Plus(const MCSubtargetInfo &STI);
779 bool isGCN3Encoding(const MCSubtargetInfo &STI);
780 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
781 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
782 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
783 bool isGFX90A(const MCSubtargetInfo &STI);
784 bool isGFX940(const MCSubtargetInfo &STI);
785 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
786 bool hasMAIInsts(const MCSubtargetInfo &STI);
787 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
788 
789 /// Is Reg - scalar register
790 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
791 
792 /// If \p Reg is a pseudo reg, return the correct hardware register given
793 /// \p STI otherwise return \p Reg.
794 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
795 
796 /// Convert hardware register \p Reg to a pseudo register
797 LLVM_READNONE
798 unsigned mc2PseudoReg(unsigned Reg);
799 
800 /// Can this operand also contain immediate values?
801 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
802 
803 /// Is this floating-point operand?
804 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
805 
806 /// Does this operand support only inlinable literals?
807 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
808 
809 /// Get the size in bits of a register from the register class \p RC.
810 unsigned getRegBitWidth(unsigned RCID);
811 
812 /// Get the size in bits of a register from the register class \p RC.
813 unsigned getRegBitWidth(const MCRegisterClass &RC);
814 
815 /// Get size of register operand
816 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
817                            unsigned OpNo);
818 
819 LLVM_READNONE
820 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
821   switch (OpInfo.OperandType) {
822   case AMDGPU::OPERAND_REG_IMM_INT32:
823   case AMDGPU::OPERAND_REG_IMM_FP32:
824   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
825   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
826   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
827   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
828   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
829   case AMDGPU::OPERAND_REG_IMM_V2INT32:
830   case AMDGPU::OPERAND_REG_IMM_V2FP32:
831   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
832   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
833   case AMDGPU::OPERAND_KIMM32:
834   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
835     return 4;
836 
837   case AMDGPU::OPERAND_REG_IMM_INT64:
838   case AMDGPU::OPERAND_REG_IMM_FP64:
839   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
840   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
841   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
842     return 8;
843 
844   case AMDGPU::OPERAND_REG_IMM_INT16:
845   case AMDGPU::OPERAND_REG_IMM_FP16:
846   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
847   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
848   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
849   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
850   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
851   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
852   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
853   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
854   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
855   case AMDGPU::OPERAND_REG_IMM_V2INT16:
856   case AMDGPU::OPERAND_REG_IMM_V2FP16:
857     return 2;
858 
859   default:
860     llvm_unreachable("unhandled operand type");
861   }
862 }
863 
864 LLVM_READNONE
865 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
866   return getOperandSize(Desc.OpInfo[OpNo]);
867 }
868 
869 /// Is this literal inlinable, and not one of the values intended for floating
870 /// point values.
871 LLVM_READNONE
872 inline bool isInlinableIntLiteral(int64_t Literal) {
873   return Literal >= -16 && Literal <= 64;
874 }
875 
876 /// Is this literal inlinable
877 LLVM_READNONE
878 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
879 
880 LLVM_READNONE
881 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
882 
883 LLVM_READNONE
884 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
885 
886 LLVM_READNONE
887 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
888 
889 LLVM_READNONE
890 bool isInlinableIntLiteralV216(int32_t Literal);
891 
892 LLVM_READNONE
893 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
894 
895 bool isArgPassedInSGPR(const Argument *Arg);
896 
897 LLVM_READONLY
898 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
899                                       int64_t EncodedOffset);
900 
901 LLVM_READONLY
902 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
903                                     int64_t EncodedOffset,
904                                     bool IsBuffer);
905 
906 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
907 /// offsets.
908 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
909 
910 /// \returns The encoding that will be used for \p ByteOffset in the
911 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
912 /// S_LOAD instructions have a signed offset, on other subtargets it is
913 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
914 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
915                                        int64_t ByteOffset, bool IsBuffer);
916 
917 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
918 /// instruction. This is only useful on CI.s
919 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
920                                                 int64_t ByteOffset);
921 
922 /// For FLAT segment the offset must be positive;
923 /// MSB is ignored and forced to zero.
924 ///
925 /// \return The number of bits available for the offset field in flat
926 /// instructions.
927 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
928 
929 /// \returns true if this offset is small enough to fit in the SMRD
930 /// offset field.  \p ByteOffset should be the offset in bytes and
931 /// not the encoded offset.
932 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
933 
934 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
935                       const GCNSubtarget *Subtarget,
936                       Align Alignment = Align(4));
937 
938 LLVM_READNONE
939 inline bool isLegal64BitDPPControl(unsigned DC) {
940   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
941 }
942 
943 /// \returns true if the intrinsic is divergent
944 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
945 
946 // Track defaults for fields in the MODE register.
947 struct SIModeRegisterDefaults {
948   /// Floating point opcodes that support exception flag gathering quiet and
949   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
950   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
951   /// quieting.
952   bool IEEE : 1;
953 
954   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
955   /// clamp NaN to zero; otherwise, pass NaN through.
956   bool DX10Clamp : 1;
957 
958   /// If this is set, neither input or output denormals are flushed for most f32
959   /// instructions.
960   bool FP32InputDenormals : 1;
961   bool FP32OutputDenormals : 1;
962 
963   /// If this is set, neither input or output denormals are flushed for both f64
964   /// and f16/v2f16 instructions.
965   bool FP64FP16InputDenormals : 1;
966   bool FP64FP16OutputDenormals : 1;
967 
968   SIModeRegisterDefaults() :
969     IEEE(true),
970     DX10Clamp(true),
971     FP32InputDenormals(true),
972     FP32OutputDenormals(true),
973     FP64FP16InputDenormals(true),
974     FP64FP16OutputDenormals(true) {}
975 
976   SIModeRegisterDefaults(const Function &F);
977 
978   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
979     SIModeRegisterDefaults Mode;
980     Mode.IEEE = !AMDGPU::isShader(CC);
981     return Mode;
982   }
983 
984   bool operator ==(const SIModeRegisterDefaults Other) const {
985     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
986            FP32InputDenormals == Other.FP32InputDenormals &&
987            FP32OutputDenormals == Other.FP32OutputDenormals &&
988            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
989            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
990   }
991 
992   bool allFP32Denormals() const {
993     return FP32InputDenormals && FP32OutputDenormals;
994   }
995 
996   bool allFP64FP16Denormals() const {
997     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
998   }
999 
1000   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1001   /// FP32 denormal mode.
1002   uint32_t fpDenormModeSPValue() const {
1003     if (FP32InputDenormals && FP32OutputDenormals)
1004       return FP_DENORM_FLUSH_NONE;
1005     if (FP32InputDenormals)
1006       return FP_DENORM_FLUSH_OUT;
1007     if (FP32OutputDenormals)
1008       return FP_DENORM_FLUSH_IN;
1009     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1010   }
1011 
1012   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1013   /// FP64/FP16 denormal mode.
1014   uint32_t fpDenormModeDPValue() const {
1015     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
1016       return FP_DENORM_FLUSH_NONE;
1017     if (FP64FP16InputDenormals)
1018       return FP_DENORM_FLUSH_OUT;
1019     if (FP64FP16OutputDenormals)
1020       return FP_DENORM_FLUSH_IN;
1021     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1022   }
1023 
1024   /// Returns true if a flag is compatible if it's enabled in the callee, but
1025   /// disabled in the caller.
1026   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1027     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1028   }
1029 
1030   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1031   // be able to override.
1032   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1033     if (DX10Clamp != CalleeMode.DX10Clamp)
1034       return false;
1035     if (IEEE != CalleeMode.IEEE)
1036       return false;
1037 
1038     // Allow inlining denormals enabled into denormals flushed functions.
1039     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1040            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1041            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1042            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1043   }
1044 };
1045 
1046 } // end namespace AMDGPU
1047 
1048 raw_ostream &operator<<(raw_ostream &OS,
1049                         const AMDGPU::IsaInfo::TargetIDSetting S);
1050 
1051 } // end namespace llvm
1052 
1053 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1054