1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
12
13 #include "AMDGPU.h"
14 #include "AMDKernelCodeT.h"
15 #include "SIDefines.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/IR/CallingConv.h"
18 #include "llvm/MC/MCInstrDesc.h"
19 #include "llvm/Support/AMDHSAKernelDescriptor.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/TargetParser.h"
23 #include <cstdint>
24 #include <string>
25 #include <utility>
26
27 namespace llvm {
28
29 class Argument;
30 class AMDGPUSubtarget;
31 class FeatureBitset;
32 class Function;
33 class GCNSubtarget;
34 class GlobalValue;
35 class MCContext;
36 class MCRegisterClass;
37 class MCRegisterInfo;
38 class MCSection;
39 class MCSubtargetInfo;
40 class MachineMemOperand;
41 class Triple;
42
43 namespace AMDGPU {
44
45 #define GET_MIMGBaseOpcode_DECL
46 #define GET_MIMGDim_DECL
47 #define GET_MIMGEncoding_DECL
48 #define GET_MIMGLZMapping_DECL
49 #include "AMDGPUGenSearchableTables.inc"
50
51 namespace IsaInfo {
52
53 enum {
54 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
55 // doesn't spill SGPRs as much as when 80 is set.
56 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
57 TRAP_NUM_SGPRS = 16
58 };
59
60 /// Streams isa version string for given subtarget \p STI into \p Stream.
61 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
62
63 /// \returns True if given subtarget \p STI supports code object version 3,
64 /// false otherwise.
65 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
66
67 /// \returns Wavefront size for given subtarget \p STI.
68 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
69
70 /// \returns Local memory size in bytes for given subtarget \p STI.
71 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
72
73 /// \returns Number of execution units per compute unit for given subtarget \p
74 /// STI.
75 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
76
77 /// \returns Maximum number of work groups per compute unit for given subtarget
78 /// \p STI and limited by given \p FlatWorkGroupSize.
79 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
80 unsigned FlatWorkGroupSize);
81
82 /// \returns Maximum number of waves per compute unit for given subtarget \p
83 /// STI without any kind of limitation.
84 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
85
86 /// \returns Maximum number of waves per compute unit for given subtarget \p
87 /// STI and limited by given \p FlatWorkGroupSize.
88 unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
89 unsigned FlatWorkGroupSize);
90
91 /// \returns Minimum number of waves per execution unit for given subtarget \p
92 /// STI.
93 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
94
95 /// \returns Maximum number of waves per execution unit for given subtarget \p
96 /// STI without any kind of limitation.
97 unsigned getMaxWavesPerEU();
98
99 /// \returns Maximum number of waves per execution unit for given subtarget \p
100 /// STI and limited by given \p FlatWorkGroupSize.
101 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
102 unsigned FlatWorkGroupSize);
103
104 /// \returns Minimum flat work group size for given subtarget \p STI.
105 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
106
107 /// \returns Maximum flat work group size for given subtarget \p STI.
108 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
109
110 /// \returns Number of waves per work group for given subtarget \p STI and
111 /// limited by given \p FlatWorkGroupSize.
112 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
113 unsigned FlatWorkGroupSize);
114
115 /// \returns SGPR allocation granularity for given subtarget \p STI.
116 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
117
118 /// \returns SGPR encoding granularity for given subtarget \p STI.
119 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
120
121 /// \returns Total number of SGPRs for given subtarget \p STI.
122 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
123
124 /// \returns Addressable number of SGPRs for given subtarget \p STI.
125 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
126
127 /// \returns Minimum number of SGPRs that meets the given number of waves per
128 /// execution unit requirement for given subtarget \p STI.
129 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
130
131 /// \returns Maximum number of SGPRs that meets the given number of waves per
132 /// execution unit requirement for given subtarget \p STI.
133 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
134 bool Addressable);
135
136 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
137 /// STI when the given special registers are used.
138 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
139 bool FlatScrUsed, bool XNACKUsed);
140
141 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
142 /// STI when the given special registers are used. XNACK is inferred from
143 /// \p STI.
144 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
145 bool FlatScrUsed);
146
147 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
148 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
149 /// register counts.
150 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
151
152 /// \returns VGPR allocation granularity for given subtarget \p STI.
153 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
154
155 /// \returns VGPR encoding granularity for given subtarget \p STI.
156 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
157
158 /// \returns Total number of VGPRs for given subtarget \p STI.
159 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
160
161 /// \returns Addressable number of VGPRs for given subtarget \p STI.
162 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
163
164 /// \returns Minimum number of VGPRs that meets given number of waves per
165 /// execution unit requirement for given subtarget \p STI.
166 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
167
168 /// \returns Maximum number of VGPRs that meets given number of waves per
169 /// execution unit requirement for given subtarget \p STI.
170 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
171
172 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
173 /// \p NumVGPRs are used.
174 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
175
176 } // end namespace IsaInfo
177
178 LLVM_READONLY
179 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
180
181 struct MIMGBaseOpcodeInfo {
182 MIMGBaseOpcode BaseOpcode;
183 bool Store;
184 bool Atomic;
185 bool AtomicX2;
186 bool Sampler;
187 bool Gather4;
188
189 uint8_t NumExtraArgs;
190 bool Gradients;
191 bool Coordinates;
192 bool LodOrClampOrMip;
193 bool HasD16;
194 };
195
196 LLVM_READONLY
197 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
198
199 struct MIMGDimInfo {
200 MIMGDim Dim;
201 uint8_t NumCoords;
202 uint8_t NumGradients;
203 bool DA;
204 };
205
206 LLVM_READONLY
207 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
208
209 struct MIMGLZMappingInfo {
210 MIMGBaseOpcode L;
211 MIMGBaseOpcode LZ;
212 };
213
214 LLVM_READONLY
215 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
216
217 LLVM_READONLY
218 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
219 unsigned VDataDwords, unsigned VAddrDwords);
220
221 LLVM_READONLY
222 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
223
224 LLVM_READONLY
225 int getMUBUFBaseOpcode(unsigned Opc);
226
227 LLVM_READONLY
228 int getMUBUFOpcode(unsigned BaseOpc, unsigned Dwords);
229
230 LLVM_READONLY
231 int getMUBUFDwords(unsigned Opc);
232
233 LLVM_READONLY
234 bool getMUBUFHasVAddr(unsigned Opc);
235
236 LLVM_READONLY
237 bool getMUBUFHasSrsrc(unsigned Opc);
238
239 LLVM_READONLY
240 bool getMUBUFHasSoffset(unsigned Opc);
241
242 LLVM_READONLY
243 int getMCOpcode(uint16_t Opcode, unsigned Gen);
244
245 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
246 const MCSubtargetInfo *STI);
247
248 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
249
250 bool isGroupSegment(const GlobalValue *GV);
251 bool isGlobalSegment(const GlobalValue *GV);
252 bool isReadOnlySegment(const GlobalValue *GV);
253
254 /// \returns True if constants should be emitted to .text section for given
255 /// target triple \p TT, false otherwise.
256 bool shouldEmitConstantsToTextSection(const Triple &TT);
257
258 /// \returns Integer value requested using \p F's \p Name attribute.
259 ///
260 /// \returns \p Default if attribute is not present.
261 ///
262 /// \returns \p Default and emits error if requested value cannot be converted
263 /// to integer.
264 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
265
266 /// \returns A pair of integer values requested using \p F's \p Name attribute
267 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
268 /// is false).
269 ///
270 /// \returns \p Default if attribute is not present.
271 ///
272 /// \returns \p Default and emits error if one of the requested values cannot be
273 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
274 /// not present.
275 std::pair<int, int> getIntegerPairAttribute(const Function &F,
276 StringRef Name,
277 std::pair<int, int> Default,
278 bool OnlyFirstRequired = false);
279
280 /// Represents the counter values to wait for in an s_waitcnt instruction.
281 ///
282 /// Large values (including the maximum possible integer) can be used to
283 /// represent "don't care" waits.
284 struct Waitcnt {
285 unsigned VmCnt = ~0u;
286 unsigned ExpCnt = ~0u;
287 unsigned LgkmCnt = ~0u;
288
WaitcntWaitcnt289 Waitcnt() {}
WaitcntWaitcnt290 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
291 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
292
allZeroWaitcnt293 static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
294
dominatesWaitcnt295 bool dominates(const Waitcnt &Other) const {
296 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
297 LgkmCnt <= Other.LgkmCnt;
298 }
299
combinedWaitcnt300 Waitcnt combined(const Waitcnt &Other) const {
301 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
302 std::min(LgkmCnt, Other.LgkmCnt));
303 }
304 };
305
306 /// \returns Vmcnt bit mask for given isa \p Version.
307 unsigned getVmcntBitMask(const IsaVersion &Version);
308
309 /// \returns Expcnt bit mask for given isa \p Version.
310 unsigned getExpcntBitMask(const IsaVersion &Version);
311
312 /// \returns Lgkmcnt bit mask for given isa \p Version.
313 unsigned getLgkmcntBitMask(const IsaVersion &Version);
314
315 /// \returns Waitcnt bit mask for given isa \p Version.
316 unsigned getWaitcntBitMask(const IsaVersion &Version);
317
318 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
319 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
320
321 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
322 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
323
324 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
325 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
326
327 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
328 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
329 /// \p Lgkmcnt respectively.
330 ///
331 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
332 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
333 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
334 /// \p Expcnt = \p Waitcnt[6:4]
335 /// \p Lgkmcnt = \p Waitcnt[11:8]
336 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
337 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
338
339 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
340
341 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
342 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
343 unsigned Vmcnt);
344
345 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
346 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
347 unsigned Expcnt);
348
349 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
350 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
351 unsigned Lgkmcnt);
352
353 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
354 /// \p Version.
355 ///
356 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
357 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
358 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
359 /// Waitcnt[6:4] = \p Expcnt
360 /// Waitcnt[11:8] = \p Lgkmcnt
361 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
362 ///
363 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
364 /// isa \p Version.
365 unsigned encodeWaitcnt(const IsaVersion &Version,
366 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
367
368 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
369
370 unsigned getInitialPSInputAddr(const Function &F);
371
372 LLVM_READNONE
373 bool isShader(CallingConv::ID CC);
374
375 LLVM_READNONE
376 bool isCompute(CallingConv::ID CC);
377
378 LLVM_READNONE
379 bool isEntryFunctionCC(CallingConv::ID CC);
380
381 // FIXME: Remove this when calling conventions cleaned up
382 LLVM_READNONE
isKernel(CallingConv::ID CC)383 inline bool isKernel(CallingConv::ID CC) {
384 switch (CC) {
385 case CallingConv::AMDGPU_KERNEL:
386 case CallingConv::SPIR_KERNEL:
387 return true;
388 default:
389 return false;
390 }
391 }
392
393 bool hasXNACK(const MCSubtargetInfo &STI);
394 bool hasSRAMECC(const MCSubtargetInfo &STI);
395 bool hasMIMG_R128(const MCSubtargetInfo &STI);
396 bool hasPackedD16(const MCSubtargetInfo &STI);
397
398 bool isSI(const MCSubtargetInfo &STI);
399 bool isCI(const MCSubtargetInfo &STI);
400 bool isVI(const MCSubtargetInfo &STI);
401 bool isGFX9(const MCSubtargetInfo &STI);
402
403 /// Is Reg - scalar register
404 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
405
406 /// Is there any intersection between registers
407 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
408
409 /// If \p Reg is a pseudo reg, return the correct hardware register given
410 /// \p STI otherwise return \p Reg.
411 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
412
413 /// Convert hardware register \p Reg to a pseudo register
414 LLVM_READNONE
415 unsigned mc2PseudoReg(unsigned Reg);
416
417 /// Can this operand also contain immediate values?
418 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
419
420 /// Is this floating-point operand?
421 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
422
423 /// Does this opearnd support only inlinable literals?
424 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
425
426 /// Get the size in bits of a register from the register class \p RC.
427 unsigned getRegBitWidth(unsigned RCID);
428
429 /// Get the size in bits of a register from the register class \p RC.
430 unsigned getRegBitWidth(const MCRegisterClass &RC);
431
432 /// Get size of register operand
433 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
434 unsigned OpNo);
435
436 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)437 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
438 switch (OpInfo.OperandType) {
439 case AMDGPU::OPERAND_REG_IMM_INT32:
440 case AMDGPU::OPERAND_REG_IMM_FP32:
441 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
442 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
443 return 4;
444
445 case AMDGPU::OPERAND_REG_IMM_INT64:
446 case AMDGPU::OPERAND_REG_IMM_FP64:
447 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
448 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
449 return 8;
450
451 case AMDGPU::OPERAND_REG_IMM_INT16:
452 case AMDGPU::OPERAND_REG_IMM_FP16:
453 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
454 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
455 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
456 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
457 return 2;
458
459 default:
460 llvm_unreachable("unhandled operand type");
461 }
462 }
463
464 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)465 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
466 return getOperandSize(Desc.OpInfo[OpNo]);
467 }
468
469 /// Is this literal inlinable
470 LLVM_READNONE
471 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
472
473 LLVM_READNONE
474 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
475
476 LLVM_READNONE
477 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
478
479 LLVM_READNONE
480 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
481
482 bool isArgPassedInSGPR(const Argument *Arg);
483
484 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
485 /// offset field.
486 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
487
488 /// \returns true if this offset is small enough to fit in the SMRD
489 /// offset field. \p ByteOffset should be the offset in bytes and
490 /// not the encoded offset.
491 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
492
493 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
494 const GCNSubtarget *Subtarget, uint32_t Align = 4);
495
496 /// \returns true if the intrinsic is divergent
497 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
498
499 } // end namespace AMDGPU
500 } // end namespace llvm
501
502 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
503