1fe013be4SDimitry Andric //===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
2fe013be4SDimitry Andric //
3fe013be4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe013be4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe013be4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe013be4SDimitry Andric //
7fe013be4SDimitry Andric //===----------------------------------------------------------------------===//
8fe013be4SDimitry Andric
9fe013be4SDimitry Andric #include "SIModeRegisterDefaults.h"
10*c9157d92SDimitry Andric #include "GCNSubtarget.h"
11fe013be4SDimitry Andric
12fe013be4SDimitry Andric using namespace llvm;
13fe013be4SDimitry Andric
SIModeRegisterDefaults(const Function & F,const GCNSubtarget & ST)14*c9157d92SDimitry Andric SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F,
15*c9157d92SDimitry Andric const GCNSubtarget &ST) {
16fe013be4SDimitry Andric *this = getDefaultForCallingConv(F.getCallingConv());
17fe013be4SDimitry Andric
18*c9157d92SDimitry Andric if (ST.hasIEEEMode()) {
19fe013be4SDimitry Andric StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
20fe013be4SDimitry Andric if (!IEEEAttr.empty())
21fe013be4SDimitry Andric IEEE = IEEEAttr == "true";
22*c9157d92SDimitry Andric }
23fe013be4SDimitry Andric
24*c9157d92SDimitry Andric if (ST.hasDX10ClampMode()) {
25fe013be4SDimitry Andric StringRef DX10ClampAttr =
26fe013be4SDimitry Andric F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
27fe013be4SDimitry Andric if (!DX10ClampAttr.empty())
28fe013be4SDimitry Andric DX10Clamp = DX10ClampAttr == "true";
29*c9157d92SDimitry Andric }
30fe013be4SDimitry Andric
31fe013be4SDimitry Andric StringRef DenormF32Attr =
32fe013be4SDimitry Andric F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
33fe013be4SDimitry Andric if (!DenormF32Attr.empty())
34fe013be4SDimitry Andric FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
35fe013be4SDimitry Andric
36fe013be4SDimitry Andric StringRef DenormAttr =
37fe013be4SDimitry Andric F.getFnAttribute("denormal-fp-math").getValueAsString();
38fe013be4SDimitry Andric if (!DenormAttr.empty()) {
39fe013be4SDimitry Andric DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
40fe013be4SDimitry Andric if (DenormF32Attr.empty())
41fe013be4SDimitry Andric FP32Denormals = DenormMode;
42fe013be4SDimitry Andric FP64FP16Denormals = DenormMode;
43fe013be4SDimitry Andric }
44fe013be4SDimitry Andric }
45*c9157d92SDimitry Andric
46*c9157d92SDimitry Andric using namespace AMDGPU;
47*c9157d92SDimitry Andric
48*c9157d92SDimitry Andric /// Combine f32 and f64 rounding modes into a combined rounding mode value.
getModeRegisterRoundMode(uint32_t HWFP32Val,uint32_t HWFP64Val)49*c9157d92SDimitry Andric static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val,
50*c9157d92SDimitry Andric uint32_t HWFP64Val) {
51*c9157d92SDimitry Andric return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset;
52*c9157d92SDimitry Andric }
53*c9157d92SDimitry Andric
encodeFltRoundsTable(uint32_t FltRoundsVal,uint32_t HWF32Val,uint32_t HWF64Val)54*c9157d92SDimitry Andric static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal,
55*c9157d92SDimitry Andric uint32_t HWF32Val,
56*c9157d92SDimitry Andric uint32_t HWF64Val) {
57*c9157d92SDimitry Andric uint32_t ModeVal = getModeRegisterRoundMode(HWF32Val, HWF64Val);
58*c9157d92SDimitry Andric if (FltRoundsVal > TowardNegative)
59*c9157d92SDimitry Andric FltRoundsVal -= ExtendedFltRoundOffset;
60*c9157d92SDimitry Andric
61*c9157d92SDimitry Andric uint32_t BitIndex = ModeVal << 2;
62*c9157d92SDimitry Andric return static_cast<uint64_t>(FltRoundsVal) << BitIndex;
63*c9157d92SDimitry Andric }
64*c9157d92SDimitry Andric
65*c9157d92SDimitry Andric // Encode FLT_ROUNDS value where the two rounding modes are the same and use a
66*c9157d92SDimitry Andric // standard value
67*c9157d92SDimitry Andric static constexpr uint64_t
encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode,uint32_t HWVal)68*c9157d92SDimitry Andric encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) {
69*c9157d92SDimitry Andric return encodeFltRoundsTable(FltRoundsMode, HWVal, HWVal);
70*c9157d92SDimitry Andric }
71*c9157d92SDimitry Andric
72*c9157d92SDimitry Andric // Convert mode register encoded rounding mode to AMDGPUFltRounds
73*c9157d92SDimitry Andric static constexpr AMDGPUFltRounds
decodeIndexFltRoundConversionTable(uint32_t HWMode)74*c9157d92SDimitry Andric decodeIndexFltRoundConversionTable(uint32_t HWMode) {
75*c9157d92SDimitry Andric uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf;
76*c9157d92SDimitry Andric if (TableRead > TowardNegative)
77*c9157d92SDimitry Andric TableRead += ExtendedFltRoundOffset;
78*c9157d92SDimitry Andric return static_cast<AMDGPUFltRounds>(TableRead);
79*c9157d92SDimitry Andric }
80*c9157d92SDimitry Andric
81*c9157d92SDimitry Andric static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO;
82*c9157d92SDimitry Andric static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST;
83*c9157d92SDimitry Andric static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF;
84*c9157d92SDimitry Andric static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF;
85*c9157d92SDimitry Andric
86*c9157d92SDimitry Andric const uint64_t AMDGPU::FltRoundConversionTable =
87*c9157d92SDimitry Andric encodeFltRoundsTableSame(TowardZeroF32_TowardZeroF64, HWTowardZero) |
88*c9157d92SDimitry Andric encodeFltRoundsTableSame(NearestTiesToEvenF32_NearestTiesToEvenF64,
89*c9157d92SDimitry Andric HWNearestTiesToEven) |
90*c9157d92SDimitry Andric encodeFltRoundsTableSame(TowardPositiveF32_TowardPositiveF64,
91*c9157d92SDimitry Andric HWTowardPositive) |
92*c9157d92SDimitry Andric encodeFltRoundsTableSame(TowardNegativeF32_TowardNegativeF64,
93*c9157d92SDimitry Andric HWTowardNegative) |
94*c9157d92SDimitry Andric
95*c9157d92SDimitry Andric encodeFltRoundsTable(TowardZeroF32_NearestTiesToEvenF64, HWTowardZero,
96*c9157d92SDimitry Andric HWNearestTiesToEven) |
97*c9157d92SDimitry Andric encodeFltRoundsTable(TowardZeroF32_TowardPositiveF64, HWTowardZero,
98*c9157d92SDimitry Andric HWTowardPositive) |
99*c9157d92SDimitry Andric encodeFltRoundsTable(TowardZeroF32_TowardNegativeF64, HWTowardZero,
100*c9157d92SDimitry Andric HWTowardNegative) |
101*c9157d92SDimitry Andric
102*c9157d92SDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardZeroF64,
103*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardZero) |
104*c9157d92SDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardPositiveF64,
105*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardPositive) |
106*c9157d92SDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardNegativeF64,
107*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardNegative) |
108*c9157d92SDimitry Andric
109*c9157d92SDimitry Andric encodeFltRoundsTable(TowardPositiveF32_TowardZeroF64, HWTowardPositive,
110*c9157d92SDimitry Andric HWTowardZero) |
111*c9157d92SDimitry Andric encodeFltRoundsTable(TowardPositiveF32_NearestTiesToEvenF64,
112*c9157d92SDimitry Andric HWTowardPositive, HWNearestTiesToEven) |
113*c9157d92SDimitry Andric encodeFltRoundsTable(TowardPositiveF32_TowardNegativeF64, HWTowardPositive,
114*c9157d92SDimitry Andric HWTowardNegative) |
115*c9157d92SDimitry Andric
116*c9157d92SDimitry Andric encodeFltRoundsTable(TowardNegativeF32_TowardZeroF64, HWTowardNegative,
117*c9157d92SDimitry Andric HWTowardZero) |
118*c9157d92SDimitry Andric encodeFltRoundsTable(TowardNegativeF32_NearestTiesToEvenF64,
119*c9157d92SDimitry Andric HWTowardNegative, HWNearestTiesToEven) |
120*c9157d92SDimitry Andric encodeFltRoundsTable(TowardNegativeF32_TowardPositiveF64, HWTowardNegative,
121*c9157d92SDimitry Andric HWTowardPositive);
122*c9157d92SDimitry Andric
123*c9157d92SDimitry Andric // Verify evaluation of FltRoundConversionTable
124*c9157d92SDimitry Andric
125*c9157d92SDimitry Andric // If both modes are the same, should return the standard values.
126*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
127*c9157d92SDimitry Andric HWTowardZero, HWTowardZero)) == AMDGPUFltRounds::TowardZero);
128*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
129*c9157d92SDimitry Andric HWNearestTiesToEven, HWNearestTiesToEven)) ==
130*c9157d92SDimitry Andric AMDGPUFltRounds::NearestTiesToEven);
131*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
132*c9157d92SDimitry Andric HWTowardPositive, HWTowardPositive)) ==
133*c9157d92SDimitry Andric AMDGPUFltRounds::TowardPositive);
134*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
135*c9157d92SDimitry Andric HWTowardNegative, HWTowardNegative)) ==
136*c9157d92SDimitry Andric AMDGPUFltRounds::TowardNegative);
137*c9157d92SDimitry Andric
138*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
139*c9157d92SDimitry Andric HWTowardZero, HWNearestTiesToEven)) ==
140*c9157d92SDimitry Andric TowardZeroF32_NearestTiesToEvenF64);
141*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(
142*c9157d92SDimitry Andric getModeRegisterRoundMode(HWTowardZero, HWTowardPositive)) ==
143*c9157d92SDimitry Andric TowardZeroF32_TowardPositiveF64);
144*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(
145*c9157d92SDimitry Andric getModeRegisterRoundMode(HWTowardZero, HWTowardNegative)) ==
146*c9157d92SDimitry Andric TowardZeroF32_TowardNegativeF64);
147*c9157d92SDimitry Andric
148*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
149*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardZero)) ==
150*c9157d92SDimitry Andric NearestTiesToEvenF32_TowardZeroF64);
151*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
152*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardPositive)) ==
153*c9157d92SDimitry Andric NearestTiesToEvenF32_TowardPositiveF64);
154*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
155*c9157d92SDimitry Andric HWNearestTiesToEven, HWTowardNegative)) ==
156*c9157d92SDimitry Andric NearestTiesToEvenF32_TowardNegativeF64);
157*c9157d92SDimitry Andric
158*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(
159*c9157d92SDimitry Andric getModeRegisterRoundMode(HWTowardPositive, HWTowardZero)) ==
160*c9157d92SDimitry Andric TowardPositiveF32_TowardZeroF64);
161*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
162*c9157d92SDimitry Andric HWTowardPositive, HWNearestTiesToEven)) ==
163*c9157d92SDimitry Andric TowardPositiveF32_NearestTiesToEvenF64);
164*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
165*c9157d92SDimitry Andric HWTowardPositive, HWTowardNegative)) ==
166*c9157d92SDimitry Andric TowardPositiveF32_TowardNegativeF64);
167*c9157d92SDimitry Andric
168*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(
169*c9157d92SDimitry Andric getModeRegisterRoundMode(HWTowardNegative, HWTowardZero)) ==
170*c9157d92SDimitry Andric TowardNegativeF32_TowardZeroF64);
171*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
172*c9157d92SDimitry Andric HWTowardNegative, HWNearestTiesToEven)) ==
173*c9157d92SDimitry Andric TowardNegativeF32_NearestTiesToEvenF64);
174*c9157d92SDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
175*c9157d92SDimitry Andric HWTowardNegative, HWTowardPositive)) ==
176*c9157d92SDimitry Andric TowardNegativeF32_TowardPositiveF64);
177