1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
5
6define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
7; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
11; GCN-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict:
14; GFX10PLUS:       ; %bb.0:
15; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
18; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
19  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
20  ret float %val
21}
22
23define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
24; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore:
25; GCN:       ; %bb.0:
26; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
28; GCN-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore:
31; GFX10PLUS:       ; %bb.0:
32; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
34; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
35; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
36  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
37  ret float %val
38}
39
40define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
41; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
42; GCN:       ; %bb.0:
43; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
45; GCN-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
48; GFX10PLUS:       ; %bb.0:
49; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
51; GFX10PLUS-NEXT:    v_sub_f32_e32 v0, v0, v1
52; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
53  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
54  ret float %val
55}
56
57define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
58; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
59; GCN:       ; %bb.0:
60; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
62; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
63; GCN-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
66; GFX10:       ; %bb.0:
67; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
69; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
70; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
71; GFX10-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
74; GFX11:       ; %bb.0:
75; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
77; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
78; GFX11-NEXT:    s_setpc_b64 s[30:31]
79  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
80  ret <2 x float> %val
81}
82
83define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
84; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
85; GCN:       ; %bb.0:
86; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
88; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
89; GCN-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
92; GFX10:       ; %bb.0:
93; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
95; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
96; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
97; GFX10-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
100; GFX11:       ; %bb.0:
101; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
103; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
104; GFX11-NEXT:    s_setpc_b64 s[30:31]
105  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
106  ret <2 x float> %val
107}
108
109define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
110; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
111; GCN:       ; %bb.0:
112; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GCN-NEXT:    v_sub_f32_e32 v0, v0, v2
114; GCN-NEXT:    v_sub_f32_e32 v1, v1, v3
115; GCN-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
118; GFX10:       ; %bb.0:
119; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
121; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
122; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v3
123; GFX10-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
126; GFX11:       ; %bb.0:
127; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
129; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
130; GFX11-NEXT:    s_setpc_b64 s[30:31]
131  %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
132  ret <2 x float> %val
133}
134
135define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
136; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
137; GCN:       ; %bb.0:
138; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GCN-NEXT:    v_sub_f32_e32 v0, v0, v3
140; GCN-NEXT:    v_sub_f32_e32 v1, v1, v4
141; GCN-NEXT:    v_sub_f32_e32 v2, v2, v5
142; GCN-NEXT:    s_setpc_b64 s[30:31]
143;
144; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
145; GFX10:       ; %bb.0:
146; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
148; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v3
149; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v4
150; GFX10-NEXT:    v_sub_f32_e32 v2, v2, v5
151; GFX10-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
154; GFX11:       ; %bb.0:
155; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
157; GFX11-NEXT:    v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
158; GFX11-NEXT:    v_sub_f32_e32 v2, v2, v5
159; GFX11-NEXT:    s_setpc_b64 s[30:31]
160  %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
161  ret <3 x float> %val
162}
163
164define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
165; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict:
166; GCN:       ; %bb.0:
167; GCN-NEXT:    v_mov_b32_e32 v0, s3
168; GCN-NEXT:    v_sub_f32_e32 v0, s2, v0
169; GCN-NEXT:    ; return to shader part epilog
170;
171; GFX10PLUS-LABEL: s_constained_fsub_f32_fpexcept_strict:
172; GFX10PLUS:       ; %bb.0:
173; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, s2, s3
174; GFX10PLUS-NEXT:    ; return to shader part epilog
175  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
176  ret float %val
177}
178
179define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
180; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
181; GCN:       ; %bb.0:
182; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GCN-NEXT:    v_sub_f32_e64 v0, |v0|, v1
184; GCN-NEXT:    s_setpc_b64 s[30:31]
185;
186; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
187; GFX10PLUS:       ; %bb.0:
188; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
189; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
190; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, |v0|, v1
191; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
192  %fabs.x = call float @llvm.fabs.f32(float %x)
193  %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
194  ret float %val
195}
196
197define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
198; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
199; GCN:       ; %bb.0:
200; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GCN-NEXT:    v_sub_f32_e64 v0, v0, |v1|
202; GCN-NEXT:    s_setpc_b64 s[30:31]
203;
204; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
205; GFX10PLUS:       ; %bb.0:
206; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
208; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, v0, |v1|
209; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
210  %fabs.y = call float @llvm.fabs.f32(float %y)
211  %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
212  ret float %val
213}
214
215define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
216; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
217; GCN:       ; %bb.0:
218; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GCN-NEXT:    v_sub_f32_e64 v0, -|v0|, v1
220; GCN-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
223; GFX10PLUS:       ; %bb.0:
224; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX10PLUS-NEXT:    s_waitcnt_vscnt null, 0x0
226; GFX10PLUS-NEXT:    v_sub_f32_e64 v0, -|v0|, v1
227; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
228  %fabs.x = call float @llvm.fabs.f32(float %x)
229  %neg.fabs.x = fneg float %fabs.x
230  %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
231  ret float %val
232}
233
234declare float @llvm.fabs.f32(float) #1
235declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1
236declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1
237declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1
238
239attributes #0 = { strictfp }
240attributes #1 = { inaccessiblememonly nounwind willreturn }
241