1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
5
6define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
7; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_fma_f32 v0, v0, v1, v2
11; GCN-NEXT:    s_setpc_b64 s[30:31]
12;
13; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
14; GFX10:       ; %bb.0:
15; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2
18; GFX10-NEXT:    s_setpc_b64 s[30:31]
19  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
20  ret float %val
21}
22
23define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
24; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
25; GCN:       ; %bb.0:
26; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GCN-NEXT:    v_fma_f32 v0, v0, v2, v4
28; GCN-NEXT:    v_fma_f32 v1, v1, v3, v5
29; GCN-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
32; GFX10:       ; %bb.0:
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
35; GFX10-NEXT:    v_fma_f32 v0, v0, v2, v4
36; GFX10-NEXT:    v_fma_f32 v1, v1, v3, v5
37; GFX10-NEXT:    s_setpc_b64 s[30:31]
38  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
39  ret <2 x float> %val
40}
41
42define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
43; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
44; GCN:       ; %bb.0:
45; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GCN-NEXT:    v_fma_f32 v0, v0, v3, v6
47; GCN-NEXT:    v_fma_f32 v1, v1, v4, v7
48; GCN-NEXT:    v_fma_f32 v2, v2, v5, v8
49; GCN-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
52; GFX10:       ; %bb.0:
53; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
55; GFX10-NEXT:    v_fma_f32 v0, v0, v3, v6
56; GFX10-NEXT:    v_fma_f32 v1, v1, v4, v7
57; GFX10-NEXT:    v_fma_f32 v2, v2, v5, v8
58; GFX10-NEXT:    s_setpc_b64 s[30:31]
59  %val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
60  ret <3 x float> %val
61}
62
63define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
64; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
65; GCN:       ; %bb.0:
66; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GCN-NEXT:    v_fma_f32 v0, v0, v4, v8
68; GCN-NEXT:    v_fma_f32 v1, v1, v5, v9
69; GCN-NEXT:    v_fma_f32 v2, v2, v6, v10
70; GCN-NEXT:    v_fma_f32 v3, v3, v7, v11
71; GCN-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
74; GFX10:       ; %bb.0:
75; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
77; GFX10-NEXT:    v_fma_f32 v0, v0, v4, v8
78; GFX10-NEXT:    v_fma_f32 v1, v1, v5, v9
79; GFX10-NEXT:    v_fma_f32 v2, v2, v6, v10
80; GFX10-NEXT:    v_fma_f32 v3, v3, v7, v11
81; GFX10-NEXT:    s_setpc_b64 s[30:31]
82  %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
83  ret <4 x float> %val
84}
85
86define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
87; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
88; GCN:       ; %bb.0:
89; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GCN-NEXT:    v_fma_f32 v0, v0, v1, -v2
91; GCN-NEXT:    s_setpc_b64 s[30:31]
92;
93; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
94; GFX10:       ; %bb.0:
95; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
97; GFX10-NEXT:    v_fma_f32 v0, v0, v1, -v2
98; GFX10-NEXT:    s_setpc_b64 s[30:31]
99  %neg.z = fneg float %z
100  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
101  ret float %val
102}
103
104define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
105; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
106; GCN:       ; %bb.0:
107; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GCN-NEXT:    v_fma_f32 v0, -v0, -v1, v2
109; GCN-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
112; GFX10:       ; %bb.0:
113; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
115; GFX10-NEXT:    v_fma_f32 v0, -v0, -v1, v2
116; GFX10-NEXT:    s_setpc_b64 s[30:31]
117  %neg.x = fneg float %x
118  %neg.y = fneg float %y
119  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
120  ret float %val
121}
122
123define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
124; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
125; GCN:       ; %bb.0:
126; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GCN-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
128; GCN-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
131; GFX10:       ; %bb.0:
132; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
134; GFX10-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
135; GFX10-NEXT:    s_setpc_b64 s[30:31]
136  %neg.x = call float @llvm.fabs.f32(float %x)
137  %neg.y = call float @llvm.fabs.f32(float %y)
138  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
139  ret float %val
140}
141
142define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
143; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
144; GCN:       ; %bb.0:
145; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GCN-NEXT:    v_fma_f32 v0, -v0, -v2, v4
147; GCN-NEXT:    v_fma_f32 v1, -v1, -v3, v5
148; GCN-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
151; GFX10:       ; %bb.0:
152; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
154; GFX10-NEXT:    v_fma_f32 v0, -v0, -v2, v4
155; GFX10-NEXT:    v_fma_f32 v1, -v1, -v3, v5
156; GFX10-NEXT:    s_setpc_b64 s[30:31]
157  %neg.x = fneg <2 x float> %x
158  %neg.y = fneg <2 x float> %y
159  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
160  ret <2 x float> %val
161}
162
163declare float @llvm.fabs.f32(float) #1
164declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
165declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) #1
166declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata) #1
167declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) #1
168
169attributes #0 = { strictfp }
170attributes #1 = { inaccessiblememonly nounwind willreturn }
171