1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15
16 struct amd_kernel_code_t;
17
18 namespace llvm {
19
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34
35 namespace AMDGPU {
36
37 struct IsaVersion;
38
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 5,
51 /// false otherwise.
52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 3 and above,
54 /// false otherwise.
55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
56
57 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
58 unsigned getMultigridSyncArgImplicitArgPosition();
59
60 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
61 unsigned getHostcallImplicitArgPosition();
62
63 /// \returns Code object version.
64 unsigned getAmdhsaCodeObjectVersion();
65
66 struct GcnBufferFormatInfo {
67 unsigned Format;
68 unsigned BitsPerComp;
69 unsigned NumComponents;
70 unsigned NumFormat;
71 unsigned DataFormat;
72 };
73
74 struct MAIInstInfo {
75 uint16_t Opcode;
76 bool is_dgemm;
77 bool is_gfx940_xdl;
78 };
79
80 #define GET_MIMGBaseOpcode_DECL
81 #define GET_MIMGDim_DECL
82 #define GET_MIMGEncoding_DECL
83 #define GET_MIMGLZMapping_DECL
84 #define GET_MIMGMIPMapping_DECL
85 #define GET_MIMGBiASMapping_DECL
86 #define GET_MAIInstInfoTable_DECL
87 #include "AMDGPUGenSearchableTables.inc"
88
89 namespace IsaInfo {
90
91 enum {
92 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
93 // doesn't spill SGPRs as much as when 80 is set.
94 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
95 TRAP_NUM_SGPRS = 16
96 };
97
98 enum class TargetIDSetting {
99 Unsupported,
100 Any,
101 Off,
102 On
103 };
104
105 class AMDGPUTargetID {
106 private:
107 const MCSubtargetInfo &STI;
108 TargetIDSetting XnackSetting;
109 TargetIDSetting SramEccSetting;
110
111 public:
112 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
113 ~AMDGPUTargetID() = default;
114
115 /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()116 bool isXnackSupported() const {
117 return XnackSetting != TargetIDSetting::Unsupported;
118 }
119
120 /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()121 bool isXnackOnOrAny() const {
122 return XnackSetting == TargetIDSetting::On ||
123 XnackSetting == TargetIDSetting::Any;
124 }
125
126 /// \returns True if current xnack setting is "On" or "Off",
127 /// false otherwise.
isXnackOnOrOff()128 bool isXnackOnOrOff() const {
129 return getXnackSetting() == TargetIDSetting::On ||
130 getXnackSetting() == TargetIDSetting::Off;
131 }
132
133 /// \returns The current xnack TargetIDSetting, possible options are
134 /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()135 TargetIDSetting getXnackSetting() const {
136 return XnackSetting;
137 }
138
139 /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)140 void setXnackSetting(TargetIDSetting NewXnackSetting) {
141 XnackSetting = NewXnackSetting;
142 }
143
144 /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()145 bool isSramEccSupported() const {
146 return SramEccSetting != TargetIDSetting::Unsupported;
147 }
148
149 /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()150 bool isSramEccOnOrAny() const {
151 return SramEccSetting == TargetIDSetting::On ||
152 SramEccSetting == TargetIDSetting::Any;
153 }
154
155 /// \returns True if current sramecc setting is "On" or "Off",
156 /// false otherwise.
isSramEccOnOrOff()157 bool isSramEccOnOrOff() const {
158 return getSramEccSetting() == TargetIDSetting::On ||
159 getSramEccSetting() == TargetIDSetting::Off;
160 }
161
162 /// \returns The current sramecc TargetIDSetting, possible options are
163 /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()164 TargetIDSetting getSramEccSetting() const {
165 return SramEccSetting;
166 }
167
168 /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)169 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
170 SramEccSetting = NewSramEccSetting;
171 }
172
173 void setTargetIDFromFeaturesString(StringRef FS);
174 void setTargetIDFromTargetIDStream(StringRef TargetID);
175
176 /// \returns String representation of an object.
177 std::string toString() const;
178 };
179
180 /// \returns Wavefront size for given subtarget \p STI.
181 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
182
183 /// \returns Local memory size in bytes for given subtarget \p STI.
184 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
185
186 /// \returns Number of execution units per compute unit for given subtarget \p
187 /// STI.
188 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
189
190 /// \returns Maximum number of work groups per compute unit for given subtarget
191 /// \p STI and limited by given \p FlatWorkGroupSize.
192 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
193 unsigned FlatWorkGroupSize);
194
195 /// \returns Minimum number of waves per execution unit for given subtarget \p
196 /// STI.
197 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
198
199 /// \returns Maximum number of waves per execution unit for given subtarget \p
200 /// STI without any kind of limitation.
201 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
202
203 /// \returns Number of waves per execution unit required to support the given \p
204 /// FlatWorkGroupSize.
205 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
206 unsigned FlatWorkGroupSize);
207
208 /// \returns Minimum flat work group size for given subtarget \p STI.
209 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
210
211 /// \returns Maximum flat work group size for given subtarget \p STI.
212 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
213
214 /// \returns Number of waves per work group for given subtarget \p STI and
215 /// \p FlatWorkGroupSize.
216 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
217 unsigned FlatWorkGroupSize);
218
219 /// \returns SGPR allocation granularity for given subtarget \p STI.
220 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
221
222 /// \returns SGPR encoding granularity for given subtarget \p STI.
223 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
224
225 /// \returns Total number of SGPRs for given subtarget \p STI.
226 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
227
228 /// \returns Addressable number of SGPRs for given subtarget \p STI.
229 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
230
231 /// \returns Minimum number of SGPRs that meets the given number of waves per
232 /// execution unit requirement for given subtarget \p STI.
233 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
234
235 /// \returns Maximum number of SGPRs that meets the given number of waves per
236 /// execution unit requirement for given subtarget \p STI.
237 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
238 bool Addressable);
239
240 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
241 /// STI when the given special registers are used.
242 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
243 bool FlatScrUsed, bool XNACKUsed);
244
245 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
246 /// STI when the given special registers are used. XNACK is inferred from
247 /// \p STI.
248 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
249 bool FlatScrUsed);
250
251 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
252 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
253 /// register counts.
254 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
255
256 /// \returns VGPR allocation granularity for given subtarget \p STI.
257 ///
258 /// For subtargets which support it, \p EnableWavefrontSize32 should match
259 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
260 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
261 Optional<bool> EnableWavefrontSize32 = None);
262
263 /// \returns VGPR encoding granularity for given subtarget \p STI.
264 ///
265 /// For subtargets which support it, \p EnableWavefrontSize32 should match
266 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
267 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
268 Optional<bool> EnableWavefrontSize32 = None);
269
270 /// \returns Total number of VGPRs for given subtarget \p STI.
271 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
272
273 /// \returns Addressable number of VGPRs for given subtarget \p STI.
274 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
275
276 /// \returns Minimum number of VGPRs that meets given number of waves per
277 /// execution unit requirement for given subtarget \p STI.
278 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
279
280 /// \returns Maximum number of VGPRs that meets given number of waves per
281 /// execution unit requirement for given subtarget \p STI.
282 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
283
284 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
285 /// \p NumVGPRs are used.
286 ///
287 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
288 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
289 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
290 Optional<bool> EnableWavefrontSize32 = None);
291
292 } // end namespace IsaInfo
293
294 LLVM_READONLY
295 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
296
297 LLVM_READONLY
298 int getSOPPWithRelaxation(uint16_t Opcode);
299
300 struct MIMGBaseOpcodeInfo {
301 MIMGBaseOpcode BaseOpcode;
302 bool Store;
303 bool Atomic;
304 bool AtomicX2;
305 bool Sampler;
306 bool Gather4;
307
308 uint8_t NumExtraArgs;
309 bool Gradients;
310 bool G16;
311 bool Coordinates;
312 bool LodOrClampOrMip;
313 bool HasD16;
314 bool MSAA;
315 bool BVH;
316 };
317
318 LLVM_READONLY
319 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
320
321 LLVM_READONLY
322 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
323
324 struct MIMGDimInfo {
325 MIMGDim Dim;
326 uint8_t NumCoords;
327 uint8_t NumGradients;
328 bool MSAA;
329 bool DA;
330 uint8_t Encoding;
331 const char *AsmSuffix;
332 };
333
334 LLVM_READONLY
335 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
336
337 LLVM_READONLY
338 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
339
340 LLVM_READONLY
341 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
342
343 struct MIMGLZMappingInfo {
344 MIMGBaseOpcode L;
345 MIMGBaseOpcode LZ;
346 };
347
348 struct MIMGMIPMappingInfo {
349 MIMGBaseOpcode MIP;
350 MIMGBaseOpcode NONMIP;
351 };
352
353 struct MIMGBiasMappingInfo {
354 MIMGBaseOpcode Bias;
355 MIMGBaseOpcode NoBias;
356 };
357
358 struct MIMGOffsetMappingInfo {
359 MIMGBaseOpcode Offset;
360 MIMGBaseOpcode NoOffset;
361 };
362
363 struct MIMGG16MappingInfo {
364 MIMGBaseOpcode G;
365 MIMGBaseOpcode G16;
366 };
367
368 LLVM_READONLY
369 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
370
371 struct WMMAOpcodeMappingInfo {
372 unsigned Opcode2Addr;
373 unsigned Opcode3Addr;
374 };
375
376 LLVM_READONLY
377 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
378
379 LLVM_READONLY
380 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
381
382 LLVM_READONLY
383 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
384
385 LLVM_READONLY
386 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
387
388 LLVM_READONLY
389 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
390 unsigned VDataDwords, unsigned VAddrDwords);
391
392 LLVM_READONLY
393 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
394
395 LLVM_READONLY
396 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
397 const MIMGDimInfo *Dim, bool IsA16,
398 bool IsG16Supported);
399
400 struct MIMGInfo {
401 uint16_t Opcode;
402 uint16_t BaseOpcode;
403 uint8_t MIMGEncoding;
404 uint8_t VDataDwords;
405 uint8_t VAddrDwords;
406 uint8_t VAddrOperands;
407 };
408
409 LLVM_READONLY
410 const MIMGInfo *getMIMGInfo(unsigned Opc);
411
412 LLVM_READONLY
413 int getMTBUFBaseOpcode(unsigned Opc);
414
415 LLVM_READONLY
416 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
417
418 LLVM_READONLY
419 int getMTBUFElements(unsigned Opc);
420
421 LLVM_READONLY
422 bool getMTBUFHasVAddr(unsigned Opc);
423
424 LLVM_READONLY
425 bool getMTBUFHasSrsrc(unsigned Opc);
426
427 LLVM_READONLY
428 bool getMTBUFHasSoffset(unsigned Opc);
429
430 LLVM_READONLY
431 int getMUBUFBaseOpcode(unsigned Opc);
432
433 LLVM_READONLY
434 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
435
436 LLVM_READONLY
437 int getMUBUFElements(unsigned Opc);
438
439 LLVM_READONLY
440 bool getMUBUFHasVAddr(unsigned Opc);
441
442 LLVM_READONLY
443 bool getMUBUFHasSrsrc(unsigned Opc);
444
445 LLVM_READONLY
446 bool getMUBUFHasSoffset(unsigned Opc);
447
448 LLVM_READONLY
449 bool getMUBUFIsBufferInv(unsigned Opc);
450
451 LLVM_READONLY
452 bool getSMEMIsBuffer(unsigned Opc);
453
454 LLVM_READONLY
455 bool getVOP1IsSingle(unsigned Opc);
456
457 LLVM_READONLY
458 bool getVOP2IsSingle(unsigned Opc);
459
460 LLVM_READONLY
461 bool getVOP3IsSingle(unsigned Opc);
462
463 LLVM_READONLY
464 bool isVOPC64DPP(unsigned Opc);
465
466 /// Returns true if MAI operation is a double precision GEMM.
467 LLVM_READONLY
468 bool getMAIIsDGEMM(unsigned Opc);
469
470 LLVM_READONLY
471 bool getMAIIsGFX940XDL(unsigned Opc);
472
473 struct CanBeVOPD {
474 bool X;
475 bool Y;
476 };
477
478 LLVM_READONLY
479 CanBeVOPD getCanBeVOPD(unsigned Opc);
480
481 LLVM_READONLY
482 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
483 uint8_t NumComponents,
484 uint8_t NumFormat,
485 const MCSubtargetInfo &STI);
486 LLVM_READONLY
487 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
488 const MCSubtargetInfo &STI);
489
490 LLVM_READONLY
491 int getMCOpcode(uint16_t Opcode, unsigned Gen);
492
493 LLVM_READONLY
494 unsigned getVOPDOpcode(unsigned Opc);
495
496 LLVM_READONLY
497 int getVOPDFull(unsigned OpX, unsigned OpY);
498
499 LLVM_READONLY
500 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
501
502 LLVM_READONLY
503 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
504
505 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
506 const MCSubtargetInfo *STI);
507
508 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
509 const MCSubtargetInfo *STI);
510
511 bool isGroupSegment(const GlobalValue *GV);
512 bool isGlobalSegment(const GlobalValue *GV);
513 bool isReadOnlySegment(const GlobalValue *GV);
514
515 /// \returns True if constants should be emitted to .text section for given
516 /// target triple \p TT, false otherwise.
517 bool shouldEmitConstantsToTextSection(const Triple &TT);
518
519 /// \returns Integer value requested using \p F's \p Name attribute.
520 ///
521 /// \returns \p Default if attribute is not present.
522 ///
523 /// \returns \p Default and emits error if requested value cannot be converted
524 /// to integer.
525 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
526
527 /// \returns A pair of integer values requested using \p F's \p Name attribute
528 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
529 /// is false).
530 ///
531 /// \returns \p Default if attribute is not present.
532 ///
533 /// \returns \p Default and emits error if one of the requested values cannot be
534 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
535 /// not present.
536 std::pair<int, int> getIntegerPairAttribute(const Function &F,
537 StringRef Name,
538 std::pair<int, int> Default,
539 bool OnlyFirstRequired = false);
540
541 /// Represents the counter values to wait for in an s_waitcnt instruction.
542 ///
543 /// Large values (including the maximum possible integer) can be used to
544 /// represent "don't care" waits.
545 struct Waitcnt {
546 unsigned VmCnt = ~0u;
547 unsigned ExpCnt = ~0u;
548 unsigned LgkmCnt = ~0u;
549 unsigned VsCnt = ~0u;
550
551 Waitcnt() = default;
WaitcntWaitcnt552 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
553 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
554
allZeroWaitcnt555 static Waitcnt allZero(bool HasVscnt) {
556 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
557 }
allZeroExceptVsCntWaitcnt558 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
559
hasWaitWaitcnt560 bool hasWait() const {
561 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
562 }
563
hasWaitExceptVsCntWaitcnt564 bool hasWaitExceptVsCnt() const {
565 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
566 }
567
hasWaitVsCntWaitcnt568 bool hasWaitVsCnt() const {
569 return VsCnt != ~0u;
570 }
571
dominatesWaitcnt572 bool dominates(const Waitcnt &Other) const {
573 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
574 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
575 }
576
combinedWaitcnt577 Waitcnt combined(const Waitcnt &Other) const {
578 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
579 std::min(LgkmCnt, Other.LgkmCnt),
580 std::min(VsCnt, Other.VsCnt));
581 }
582 };
583
584 /// \returns Vmcnt bit mask for given isa \p Version.
585 unsigned getVmcntBitMask(const IsaVersion &Version);
586
587 /// \returns Expcnt bit mask for given isa \p Version.
588 unsigned getExpcntBitMask(const IsaVersion &Version);
589
590 /// \returns Lgkmcnt bit mask for given isa \p Version.
591 unsigned getLgkmcntBitMask(const IsaVersion &Version);
592
593 /// \returns Waitcnt bit mask for given isa \p Version.
594 unsigned getWaitcntBitMask(const IsaVersion &Version);
595
596 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
597 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
598
599 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
600 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
601
602 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
603 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
604
605 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
606 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
607 /// \p Lgkmcnt respectively.
608 ///
609 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
610 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
611 /// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
612 /// \p Vmcnt = \p Waitcnt[15:10] (gfx11+)
613 /// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
614 /// \p Expcnt = \p Waitcnt[2:0] (gfx11+)
615 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
616 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
617 /// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11+)
618 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
619 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
620
621 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
622
623 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
624 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
625 unsigned Vmcnt);
626
627 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
628 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
629 unsigned Expcnt);
630
631 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
632 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
633 unsigned Lgkmcnt);
634
635 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
636 /// \p Version.
637 ///
638 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
639 /// Waitcnt[2:0] = \p Expcnt (gfx11+)
640 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
641 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
642 /// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
643 /// Waitcnt[9:4] = \p Lgkmcnt (gfx11+)
644 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
645 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
646 /// Waitcnt[15:10] = \p Vmcnt (gfx11+)
647 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
648 ///
649 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
650 /// isa \p Version.
651 unsigned encodeWaitcnt(const IsaVersion &Version,
652 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
653
654 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
655
656 namespace Hwreg {
657
658 LLVM_READONLY
659 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
660
661 LLVM_READNONE
662 bool isValidHwreg(int64_t Id);
663
664 LLVM_READNONE
665 bool isValidHwregOffset(int64_t Offset);
666
667 LLVM_READNONE
668 bool isValidHwregWidth(int64_t Width);
669
670 LLVM_READNONE
671 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
672
673 LLVM_READNONE
674 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
675
676 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
677
678 } // namespace Hwreg
679
680 namespace DepCtr {
681
682 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
683 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
684 const MCSubtargetInfo &STI);
685 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
686 const MCSubtargetInfo &STI);
687 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
688 bool &IsDefault, const MCSubtargetInfo &STI);
689
690 } // namespace DepCtr
691
692 namespace Exp {
693
694 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
695
696 LLVM_READONLY
697 unsigned getTgtId(const StringRef Name);
698
699 LLVM_READNONE
700 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
701
702 } // namespace Exp
703
704 namespace MTBUFFormat {
705
706 LLVM_READNONE
707 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
708
709 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
710
711 int64_t getDfmt(const StringRef Name);
712
713 StringRef getDfmtName(unsigned Id);
714
715 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
716
717 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
718
719 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
720
721 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
722
723 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
724
725 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
726
727 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
728
729 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
730 const MCSubtargetInfo &STI);
731
732 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
733
734 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
735
736 } // namespace MTBUFFormat
737
738 namespace SendMsg {
739
740 LLVM_READONLY
741 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
742
743 LLVM_READONLY
744 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
745
746 LLVM_READNONE
747 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
748
749 LLVM_READNONE
750 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
751
752 LLVM_READNONE
753 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
754
755 LLVM_READNONE
756 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
757 bool Strict = true);
758
759 LLVM_READNONE
760 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
761 const MCSubtargetInfo &STI, bool Strict = true);
762
763 LLVM_READNONE
764 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
765
766 LLVM_READNONE
767 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
768
769 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
770 uint16_t &StreamId, const MCSubtargetInfo &STI);
771
772 LLVM_READNONE
773 uint64_t encodeMsg(uint64_t MsgId,
774 uint64_t OpId,
775 uint64_t StreamId);
776
777 } // namespace SendMsg
778
779
780 unsigned getInitialPSInputAddr(const Function &F);
781
782 bool getHasColorExport(const Function &F);
783
784 bool getHasDepthExport(const Function &F);
785
786 LLVM_READNONE
787 bool isShader(CallingConv::ID CC);
788
789 LLVM_READNONE
790 bool isGraphics(CallingConv::ID CC);
791
792 LLVM_READNONE
793 bool isCompute(CallingConv::ID CC);
794
795 LLVM_READNONE
796 bool isEntryFunctionCC(CallingConv::ID CC);
797
798 // These functions are considered entrypoints into the current module, i.e. they
799 // are allowed to be called from outside the current module. This is different
800 // from isEntryFunctionCC, which is only true for functions that are entered by
801 // the hardware. Module entry points include all entry functions but also
802 // include functions that can be called from other functions inside or outside
803 // the current module. Module entry functions are allowed to allocate LDS.
804 LLVM_READNONE
805 bool isModuleEntryFunctionCC(CallingConv::ID CC);
806
807 bool isKernelCC(const Function *Func);
808
809 // FIXME: Remove this when calling conventions cleaned up
810 LLVM_READNONE
isKernel(CallingConv::ID CC)811 inline bool isKernel(CallingConv::ID CC) {
812 switch (CC) {
813 case CallingConv::AMDGPU_KERNEL:
814 case CallingConv::SPIR_KERNEL:
815 return true;
816 default:
817 return false;
818 }
819 }
820
821 bool hasXNACK(const MCSubtargetInfo &STI);
822 bool hasSRAMECC(const MCSubtargetInfo &STI);
823 bool hasMIMG_R128(const MCSubtargetInfo &STI);
824 bool hasGFX10A16(const MCSubtargetInfo &STI);
825 bool hasG16(const MCSubtargetInfo &STI);
826 bool hasPackedD16(const MCSubtargetInfo &STI);
827
828 bool isSI(const MCSubtargetInfo &STI);
829 bool isCI(const MCSubtargetInfo &STI);
830 bool isVI(const MCSubtargetInfo &STI);
831 bool isGFX9(const MCSubtargetInfo &STI);
832 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
833 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
834 bool isGFX8Plus(const MCSubtargetInfo &STI);
835 bool isGFX9Plus(const MCSubtargetInfo &STI);
836 bool isGFX10(const MCSubtargetInfo &STI);
837 bool isGFX10Plus(const MCSubtargetInfo &STI);
838 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
839 bool isGFX10Before1030(const MCSubtargetInfo &STI);
840 bool isGFX11(const MCSubtargetInfo &STI);
841 bool isGFX11Plus(const MCSubtargetInfo &STI);
842 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
843 bool isGCN3Encoding(const MCSubtargetInfo &STI);
844 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
845 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
846 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
847 bool isGFX90A(const MCSubtargetInfo &STI);
848 bool isGFX940(const MCSubtargetInfo &STI);
849 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
850 bool hasMAIInsts(const MCSubtargetInfo &STI);
851 bool hasVOPD(const MCSubtargetInfo &STI);
852 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
853
854 /// Is Reg - scalar register
855 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
856
857 /// If \p Reg is a pseudo reg, return the correct hardware register given
858 /// \p STI otherwise return \p Reg.
859 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
860
861 /// Convert hardware register \p Reg to a pseudo register
862 LLVM_READNONE
863 unsigned mc2PseudoReg(unsigned Reg);
864
865 /// Can this operand also contain immediate values?
866 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
867
868 /// Is this floating-point operand?
869 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
870
871 /// Does this operand support only inlinable literals?
872 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
873
874 /// Get the size in bits of a register from the register class \p RC.
875 unsigned getRegBitWidth(unsigned RCID);
876
877 /// Get the size in bits of a register from the register class \p RC.
878 unsigned getRegBitWidth(const MCRegisterClass &RC);
879
880 /// Get size of register operand
881 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
882 unsigned OpNo);
883
884 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)885 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
886 switch (OpInfo.OperandType) {
887 case AMDGPU::OPERAND_REG_IMM_INT32:
888 case AMDGPU::OPERAND_REG_IMM_FP32:
889 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
890 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
891 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
892 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
893 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
894 case AMDGPU::OPERAND_REG_IMM_V2INT32:
895 case AMDGPU::OPERAND_REG_IMM_V2FP32:
896 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
897 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
898 case AMDGPU::OPERAND_KIMM32:
899 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
900 return 4;
901
902 case AMDGPU::OPERAND_REG_IMM_INT64:
903 case AMDGPU::OPERAND_REG_IMM_FP64:
904 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
905 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
906 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
907 return 8;
908
909 case AMDGPU::OPERAND_REG_IMM_INT16:
910 case AMDGPU::OPERAND_REG_IMM_FP16:
911 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
912 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
913 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
914 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
915 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
916 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
917 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
918 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
919 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
920 case AMDGPU::OPERAND_REG_IMM_V2INT16:
921 case AMDGPU::OPERAND_REG_IMM_V2FP16:
922 return 2;
923
924 default:
925 llvm_unreachable("unhandled operand type");
926 }
927 }
928
929 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)930 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
931 return getOperandSize(Desc.OpInfo[OpNo]);
932 }
933
934 /// Is this literal inlinable, and not one of the values intended for floating
935 /// point values.
936 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)937 inline bool isInlinableIntLiteral(int64_t Literal) {
938 return Literal >= -16 && Literal <= 64;
939 }
940
941 /// Is this literal inlinable
942 LLVM_READNONE
943 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
944
945 LLVM_READNONE
946 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
947
948 LLVM_READNONE
949 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
950
951 LLVM_READNONE
952 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
953
954 LLVM_READNONE
955 bool isInlinableIntLiteralV216(int32_t Literal);
956
957 LLVM_READNONE
958 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
959
960 bool isArgPassedInSGPR(const Argument *Arg);
961
962 LLVM_READONLY
963 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
964 int64_t EncodedOffset);
965
966 LLVM_READONLY
967 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
968 int64_t EncodedOffset,
969 bool IsBuffer);
970
971 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
972 /// offsets.
973 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
974
975 /// \returns The encoding that will be used for \p ByteOffset in the
976 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
977 /// S_LOAD instructions have a signed offset, on other subtargets it is
978 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
979 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
980 int64_t ByteOffset, bool IsBuffer);
981
982 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
983 /// instruction. This is only useful on CI.s
984 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
985 int64_t ByteOffset);
986
987 /// For FLAT segment the offset must be positive;
988 /// MSB is ignored and forced to zero.
989 ///
990 /// \return The number of bits available for the offset field in flat
991 /// instructions.
992 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
993
994 /// \returns true if this offset is small enough to fit in the SMRD
995 /// offset field. \p ByteOffset should be the offset in bytes and
996 /// not the encoded offset.
997 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
998
999 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1000 const GCNSubtarget *Subtarget,
1001 Align Alignment = Align(4));
1002
1003 LLVM_READNONE
isLegal64BitDPPControl(unsigned DC)1004 inline bool isLegal64BitDPPControl(unsigned DC) {
1005 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1006 }
1007
1008 /// \returns true if the intrinsic is divergent
1009 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1010
1011 // Track defaults for fields in the MODE register.
1012 struct SIModeRegisterDefaults {
1013 /// Floating point opcodes that support exception flag gathering quiet and
1014 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
1015 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
1016 /// quieting.
1017 bool IEEE : 1;
1018
1019 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
1020 /// clamp NaN to zero; otherwise, pass NaN through.
1021 bool DX10Clamp : 1;
1022
1023 /// If this is set, neither input or output denormals are flushed for most f32
1024 /// instructions.
1025 bool FP32InputDenormals : 1;
1026 bool FP32OutputDenormals : 1;
1027
1028 /// If this is set, neither input or output denormals are flushed for both f64
1029 /// and f16/v2f16 instructions.
1030 bool FP64FP16InputDenormals : 1;
1031 bool FP64FP16OutputDenormals : 1;
1032
SIModeRegisterDefaultsSIModeRegisterDefaults1033 SIModeRegisterDefaults() :
1034 IEEE(true),
1035 DX10Clamp(true),
1036 FP32InputDenormals(true),
1037 FP32OutputDenormals(true),
1038 FP64FP16InputDenormals(true),
1039 FP64FP16OutputDenormals(true) {}
1040
1041 SIModeRegisterDefaults(const Function &F);
1042
getDefaultForCallingConvSIModeRegisterDefaults1043 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
1044 SIModeRegisterDefaults Mode;
1045 Mode.IEEE = !AMDGPU::isShader(CC);
1046 return Mode;
1047 }
1048
1049 bool operator ==(const SIModeRegisterDefaults Other) const {
1050 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
1051 FP32InputDenormals == Other.FP32InputDenormals &&
1052 FP32OutputDenormals == Other.FP32OutputDenormals &&
1053 FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
1054 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
1055 }
1056
allFP32DenormalsSIModeRegisterDefaults1057 bool allFP32Denormals() const {
1058 return FP32InputDenormals && FP32OutputDenormals;
1059 }
1060
allFP64FP16DenormalsSIModeRegisterDefaults1061 bool allFP64FP16Denormals() const {
1062 return FP64FP16InputDenormals && FP64FP16OutputDenormals;
1063 }
1064
1065 /// Get the encoding value for the FP_DENORM bits of the mode register for the
1066 /// FP32 denormal mode.
fpDenormModeSPValueSIModeRegisterDefaults1067 uint32_t fpDenormModeSPValue() const {
1068 if (FP32InputDenormals && FP32OutputDenormals)
1069 return FP_DENORM_FLUSH_NONE;
1070 if (FP32InputDenormals)
1071 return FP_DENORM_FLUSH_OUT;
1072 if (FP32OutputDenormals)
1073 return FP_DENORM_FLUSH_IN;
1074 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1075 }
1076
1077 /// Get the encoding value for the FP_DENORM bits of the mode register for the
1078 /// FP64/FP16 denormal mode.
fpDenormModeDPValueSIModeRegisterDefaults1079 uint32_t fpDenormModeDPValue() const {
1080 if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
1081 return FP_DENORM_FLUSH_NONE;
1082 if (FP64FP16InputDenormals)
1083 return FP_DENORM_FLUSH_OUT;
1084 if (FP64FP16OutputDenormals)
1085 return FP_DENORM_FLUSH_IN;
1086 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1087 }
1088
1089 /// Returns true if a flag is compatible if it's enabled in the callee, but
1090 /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults1091 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1092 return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1093 }
1094
1095 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1096 // be able to override.
isInlineCompatibleSIModeRegisterDefaults1097 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1098 if (DX10Clamp != CalleeMode.DX10Clamp)
1099 return false;
1100 if (IEEE != CalleeMode.IEEE)
1101 return false;
1102
1103 // Allow inlining denormals enabled into denormals flushed functions.
1104 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1105 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1106 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1107 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1108 }
1109 };
1110
1111 } // end namespace AMDGPU
1112
1113 raw_ostream &operator<<(raw_ostream &OS,
1114 const AMDGPU::IsaInfo::TargetIDSetting S);
1115
1116 } // end namespace llvm
1117
1118 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1119