1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
4
5define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
6; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
7; GCN:       ; %bb.0:
8; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GCN-NEXT:    v_fma_f32 v0, v0, v1, v2
10; GCN-NEXT:    s_setpc_b64 s[30:31]
11;
12; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict:
13; GFX10:       ; %bb.0:
14; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
16; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2
17; GFX10-NEXT:    s_setpc_b64 s[30:31]
18  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
19  ret float %val
20}
21
22define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
23; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
24; GCN:       ; %bb.0:
25; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GCN-NEXT:    v_fma_f32 v0, v0, v2, v4
27; GCN-NEXT:    v_fma_f32 v1, v1, v3, v5
28; GCN-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict:
31; GFX10:       ; %bb.0:
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
34; GFX10-NEXT:    v_fma_f32 v0, v0, v2, v4
35; GFX10-NEXT:    v_fma_f32 v1, v1, v3, v5
36; GFX10-NEXT:    s_setpc_b64 s[30:31]
37  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
38  ret <2 x float> %val
39}
40
41define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
42; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
43; GCN:       ; %bb.0:
44; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GCN-NEXT:    v_fma_f32 v0, v0, v3, v6
46; GCN-NEXT:    v_fma_f32 v1, v1, v4, v7
47; GCN-NEXT:    v_fma_f32 v2, v2, v5, v8
48; GCN-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX10-LABEL: v_constained_fma_v3f32_fpexcept_strict:
51; GFX10:       ; %bb.0:
52; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
54; GFX10-NEXT:    v_fma_f32 v0, v0, v3, v6
55; GFX10-NEXT:    v_fma_f32 v1, v1, v4, v7
56; GFX10-NEXT:    v_fma_f32 v2, v2, v5, v8
57; GFX10-NEXT:    s_setpc_b64 s[30:31]
58  %val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
59  ret <3 x float> %val
60}
61
62define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
63; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
64; GCN:       ; %bb.0:
65; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GCN-NEXT:    v_fma_f32 v0, v0, v4, v8
67; GCN-NEXT:    v_fma_f32 v1, v1, v5, v9
68; GCN-NEXT:    v_fma_f32 v2, v2, v6, v10
69; GCN-NEXT:    v_fma_f32 v3, v3, v7, v11
70; GCN-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX10-LABEL: v_constained_fma_v4f32_fpexcept_strict:
73; GFX10:       ; %bb.0:
74; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
76; GFX10-NEXT:    v_fma_f32 v0, v0, v4, v8
77; GFX10-NEXT:    v_fma_f32 v1, v1, v5, v9
78; GFX10-NEXT:    v_fma_f32 v2, v2, v6, v10
79; GFX10-NEXT:    v_fma_f32 v3, v3, v7, v11
80; GFX10-NEXT:    s_setpc_b64 s[30:31]
81  %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
82  ret <4 x float> %val
83}
84
85define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
86; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
87; GCN:       ; %bb.0:
88; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GCN-NEXT:    v_fma_f32 v0, v0, v1, -v2
90; GCN-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
93; GFX10:       ; %bb.0:
94; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
96; GFX10-NEXT:    v_fma_f32 v0, v0, v1, -v2
97; GFX10-NEXT:    s_setpc_b64 s[30:31]
98  %neg.z = fneg float %z
99  %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
100  ret float %val
101}
102
103define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
104; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
105; GCN:       ; %bb.0:
106; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GCN-NEXT:    v_fma_f32 v0, -v0, -v1, v2
108; GCN-NEXT:    s_setpc_b64 s[30:31]
109;
110; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
111; GFX10:       ; %bb.0:
112; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
114; GFX10-NEXT:    v_fma_f32 v0, -v0, -v1, v2
115; GFX10-NEXT:    s_setpc_b64 s[30:31]
116  %neg.x = fneg float %x
117  %neg.y = fneg float %y
118  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
119  ret float %val
120}
121
122define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
123; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
124; GCN:       ; %bb.0:
125; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126; GCN-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
127; GCN-NEXT:    s_setpc_b64 s[30:31]
128;
129; GFX10-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
130; GFX10:       ; %bb.0:
131; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
133; GFX10-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
134; GFX10-NEXT:    s_setpc_b64 s[30:31]
135  %neg.x = call float @llvm.fabs.f32(float %x)
136  %neg.y = call float @llvm.fabs.f32(float %y)
137  %val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
138  ret float %val
139}
140
141define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
142; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
143; GCN:       ; %bb.0:
144; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GCN-NEXT:    v_fma_f32 v0, -v0, -v2, v4
146; GCN-NEXT:    v_fma_f32 v1, -v1, -v3, v5
147; GCN-NEXT:    s_setpc_b64 s[30:31]
148;
149; GFX10-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
150; GFX10:       ; %bb.0:
151; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
153; GFX10-NEXT:    v_fma_f32 v0, -v0, -v2, v4
154; GFX10-NEXT:    v_fma_f32 v1, -v1, -v3, v5
155; GFX10-NEXT:    s_setpc_b64 s[30:31]
156  %neg.x = fneg <2 x float> %x
157  %neg.y = fneg <2 x float> %y
158  %val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
159  ret <2 x float> %val
160}
161
162declare float @llvm.fabs.f32(float) #1
163declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
164declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) #1
165declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata) #1
166declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) #1
167
168attributes #0 = { strictfp }
169attributes #1 = { inaccessiblememonly nounwind willreturn }
170