1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
3
4; These tests check that fdiv is expanded correctly and also test that the
5; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
6; instruction groups.
7
8; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
9
10; FUNC-LABEL: {{^}}fdiv_f32:
11; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
12; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
13
14; SI: v_div_scale_f32
15; SI-DAG: v_div_scale_f32
16
17; SI-DAG: v_rcp_f32
18; SI: v_fma_f32
19; SI: v_fma_f32
20; SI: v_mul_f32
21; SI: v_fma_f32
22; SI: v_fma_f32
23; SI: v_fma_f32
24; SI: v_div_fmas_f32
25; SI: v_div_fixup_f32
26define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 {
27entry:
28  %fdiv = fdiv float %a, %b
29  store float %fdiv, float addrspace(1)* %out
30  ret void
31}
32
33; FUNC-LABEL: {{^}}fdiv_25ulp_f32:
34; SI: v_cndmask_b32
35; SI: v_mul_f32
36; SI: v_rcp_f32
37; SI: v_mul_f32
38; SI: v_mul_f32
39define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 {
40entry:
41  %fdiv = fdiv float %a, %b, !fpmath !0
42  store float %fdiv, float addrspace(1)* %out
43  ret void
44}
45
46; Use correct fdiv
47; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32:
48; SI: v_fma_f32
49; SI: v_div_fmas_f32
50; SI: v_div_fixup_f32
51define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
52entry:
53  %fdiv = fdiv float %a, %b, !fpmath !0
54  store float %fdiv, float addrspace(1)* %out
55  ret void
56}
57
58; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32:
59; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
60; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
61; SI-NOT: [[RESULT]]
62; SI: buffer_store_dword [[RESULT]]
63define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 {
64entry:
65  %fdiv = fdiv fast float %a, %b
66  store float %fdiv, float addrspace(1)* %out
67  ret void
68}
69
70; FUNC-LABEL: {{^}}fdiv_f32_fast_math:
71; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
72; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
73
74; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
75; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
76; SI-NOT: [[RESULT]]
77; SI: buffer_store_dword [[RESULT]]
78define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 {
79entry:
80  %fdiv = fdiv fast float %a, %b
81  store float %fdiv, float addrspace(1)* %out
82  ret void
83}
84
85; FUNC-LABEL: {{^}}fdiv_f32_arcp_math:
86; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W
87; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS
88
89; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}}
90; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]]
91; SI-NOT: [[RESULT]]
92; SI: buffer_store_dword [[RESULT]]
93define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 {
94entry:
95  %fdiv = fdiv arcp float %a, %b
96  store float %fdiv, float addrspace(1)* %out
97  ret void
98}
99
100; FUNC-LABEL: {{^}}fdiv_v2f32:
101; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
102; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
103; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
104; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
105
106; SI: v_div_scale_f32
107; SI: v_div_scale_f32
108; SI: v_div_scale_f32
109; SI: v_div_scale_f32
110define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
111entry:
112  %fdiv = fdiv <2 x float> %a, %b
113  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
114  ret void
115}
116
117; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32:
118; SI: v_cmp_gt_f32
119; SI: v_cmp_gt_f32
120define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
121entry:
122  %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
123  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
124  ret void
125}
126
127; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math:
128; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
129; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
130; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
131; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
132
133; SI: v_rcp_f32
134; SI: v_rcp_f32
135define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
136entry:
137  %fdiv = fdiv fast <2 x float> %a, %b
138  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
139  ret void
140}
141
142; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math:
143; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
144; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
145; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
146; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
147
148; SI: v_rcp_f32
149; SI: v_rcp_f32
150define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 {
151entry:
152  %fdiv = fdiv arcp <2 x float> %a, %b
153  store <2 x float> %fdiv, <2 x float> addrspace(1)* %out
154  ret void
155}
156
157; FUNC-LABEL: {{^}}fdiv_v4f32:
158; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
159; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
160; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
161; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
162; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
163; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
164; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
165; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
166
167; SI: v_div_fixup_f32
168; SI: v_div_fixup_f32
169; SI: v_div_fixup_f32
170; SI: v_div_fixup_f32
171define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
172  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
173  %a = load <4 x float>, <4 x float> addrspace(1) * %in
174  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
175  %result = fdiv <4 x float> %a, %b
176  store <4 x float> %result, <4 x float> addrspace(1)* %out
177  ret void
178}
179
180; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math:
181; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
182; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
183; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
184; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
185; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
186; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
187; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
188; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
189
190; SI: v_rcp_f32
191; SI: v_rcp_f32
192; SI: v_rcp_f32
193; SI: v_rcp_f32
194define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
195  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
196  %a = load <4 x float>, <4 x float> addrspace(1) * %in
197  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
198  %result = fdiv fast <4 x float> %a, %b
199  store <4 x float> %result, <4 x float> addrspace(1)* %out
200  ret void
201}
202
203; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math:
204; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
205; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
206; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
207; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
208; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
209; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
210; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
211; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
212
213; SI: v_rcp_f32
214; SI: v_rcp_f32
215; SI: v_rcp_f32
216; SI: v_rcp_f32
217define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
218  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
219  %a = load <4 x float>, <4 x float> addrspace(1) * %in
220  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
221  %result = fdiv arcp <4 x float> %a, %b
222  store <4 x float> %result, <4 x float> addrspace(1)* %out
223  ret void
224}
225
226attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
227attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
228attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
229
230!0 = !{float 2.500000e+00}
231