1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4
5declare double @__sqrt_finite(double)
6declare float @__sqrtf_finite(float)
7declare x86_fp80 @__sqrtl_finite(x86_fp80)
8declare float @llvm.sqrt.f32(float)
9declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
10declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
11
12
13define double @finite_f64_no_estimate(double %d) #0 {
14; SSE-LABEL: finite_f64_no_estimate:
15; SSE:       # BB#0:
16; SSE-NEXT:    sqrtsd %xmm0, %xmm0
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: finite_f64_no_estimate:
20; AVX:       # BB#0:
21; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
22; AVX-NEXT:    retq
23  %call = tail call double @__sqrt_finite(double %d) #2
24  ret double %call
25}
26
27; No estimates for doubles.
28
29define double @finite_f64_estimate(double %d) #1 {
30; SSE-LABEL: finite_f64_estimate:
31; SSE:       # BB#0:
32; SSE-NEXT:    sqrtsd %xmm0, %xmm0
33; SSE-NEXT:    retq
34;
35; AVX-LABEL: finite_f64_estimate:
36; AVX:       # BB#0:
37; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
38; AVX-NEXT:    retq
39  %call = tail call double @__sqrt_finite(double %d) #2
40  ret double %call
41}
42
43define float @finite_f32_no_estimate(float %f) #0 {
44; SSE-LABEL: finite_f32_no_estimate:
45; SSE:       # BB#0:
46; SSE-NEXT:    sqrtss %xmm0, %xmm0
47; SSE-NEXT:    retq
48;
49; AVX-LABEL: finite_f32_no_estimate:
50; AVX:       # BB#0:
51; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
52; AVX-NEXT:    retq
53  %call = tail call float @__sqrtf_finite(float %f) #2
54  ret float %call
55}
56
57define float @finite_f32_estimate(float %f) #1 {
58; SSE-LABEL: finite_f32_estimate:
59; SSE:       # BB#0:
60; SSE-NEXT:    rsqrtss %xmm0, %xmm1
61; SSE-NEXT:    movaps %xmm0, %xmm2
62; SSE-NEXT:    mulss %xmm1, %xmm2
63; SSE-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
64; SSE-NEXT:    mulss %xmm2, %xmm3
65; SSE-NEXT:    mulss %xmm1, %xmm2
66; SSE-NEXT:    addss {{.*}}(%rip), %xmm2
67; SSE-NEXT:    mulss %xmm3, %xmm2
68; SSE-NEXT:    xorps %xmm1, %xmm1
69; SSE-NEXT:    cmpeqss %xmm1, %xmm0
70; SSE-NEXT:    andnps %xmm2, %xmm0
71; SSE-NEXT:    retq
72;
73; AVX-LABEL: finite_f32_estimate:
74; AVX:       # BB#0:
75; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
76; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm2
77; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
78; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
79; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
80; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
81; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
82; AVX-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
83; AVX-NEXT:    vandnps %xmm1, %xmm0, %xmm0
84; AVX-NEXT:    retq
85  %call = tail call float @__sqrtf_finite(float %f) #2
86  ret float %call
87}
88
89define x86_fp80 @finite_f80_no_estimate(x86_fp80 %ld) #0 {
90; CHECK-LABEL: finite_f80_no_estimate:
91; CHECK:       # BB#0:
92; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
93; CHECK-NEXT:    fsqrt
94; CHECK-NEXT:    retq
95  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
96  ret x86_fp80 %call
97}
98
99; Don't die on the impossible.
100
101define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
102; CHECK-LABEL: finite_f80_estimate_but_no:
103; CHECK:       # BB#0:
104; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
105; CHECK-NEXT:    fsqrt
106; CHECK-NEXT:    retq
107  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
108  ret x86_fp80 %call
109}
110
111define float @f32_no_estimate(float %x) #0 {
112; SSE-LABEL: f32_no_estimate:
113; SSE:       # BB#0:
114; SSE-NEXT:    sqrtss %xmm0, %xmm1
115; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
116; SSE-NEXT:    divss %xmm1, %xmm0
117; SSE-NEXT:    retq
118;
119; AVX-LABEL: f32_no_estimate:
120; AVX:       # BB#0:
121; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
122; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
123; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
124; AVX-NEXT:    retq
125  %sqrt = tail call float @llvm.sqrt.f32(float %x)
126  %div = fdiv fast float 1.0, %sqrt
127  ret float %div
128}
129
130define float @f32_estimate(float %x) #1 {
131; SSE-LABEL: f32_estimate:
132; SSE:       # BB#0:
133; SSE-NEXT:    rsqrtss %xmm0, %xmm1
134; SSE-NEXT:    movaps %xmm1, %xmm2
135; SSE-NEXT:    mulss %xmm2, %xmm2
136; SSE-NEXT:    mulss %xmm0, %xmm2
137; SSE-NEXT:    addss {{.*}}(%rip), %xmm2
138; SSE-NEXT:    mulss {{.*}}(%rip), %xmm1
139; SSE-NEXT:    mulss %xmm2, %xmm1
140; SSE-NEXT:    movaps %xmm1, %xmm0
141; SSE-NEXT:    retq
142;
143; AVX-LABEL: f32_estimate:
144; AVX:       # BB#0:
145; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
146; AVX-NEXT:    vmulss %xmm1, %xmm1, %xmm2
147; AVX-NEXT:    vmulss %xmm2, %xmm0, %xmm0
148; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
149; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm1
150; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
151; AVX-NEXT:    retq
152  %sqrt = tail call float @llvm.sqrt.f32(float %x)
153  %div = fdiv fast float 1.0, %sqrt
154  ret float %div
155}
156
157define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
158; SSE-LABEL: v4f32_no_estimate:
159; SSE:       # BB#0:
160; SSE-NEXT:    sqrtps %xmm0, %xmm1
161; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
162; SSE-NEXT:    divps %xmm1, %xmm0
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: v4f32_no_estimate:
166; AVX:       # BB#0:
167; AVX-NEXT:    vsqrtps %xmm0, %xmm0
168; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
169; AVX-NEXT:    vdivps %xmm0, %xmm1, %xmm0
170; AVX-NEXT:    retq
171  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
172  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
173  ret <4 x float> %div
174}
175
176define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
177; SSE-LABEL: v4f32_estimate:
178; SSE:       # BB#0:
179; SSE-NEXT:    rsqrtps %xmm0, %xmm1
180; SSE-NEXT:    movaps %xmm1, %xmm2
181; SSE-NEXT:    mulps %xmm2, %xmm2
182; SSE-NEXT:    mulps %xmm0, %xmm2
183; SSE-NEXT:    addps {{.*}}(%rip), %xmm2
184; SSE-NEXT:    mulps {{.*}}(%rip), %xmm1
185; SSE-NEXT:    mulps %xmm2, %xmm1
186; SSE-NEXT:    movaps %xmm1, %xmm0
187; SSE-NEXT:    retq
188;
189; AVX-LABEL: v4f32_estimate:
190; AVX:       # BB#0:
191; AVX-NEXT:    vrsqrtps %xmm0, %xmm1
192; AVX-NEXT:    vmulps %xmm1, %xmm1, %xmm2
193; AVX-NEXT:    vmulps %xmm2, %xmm0, %xmm0
194; AVX-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
195; AVX-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
196; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
197; AVX-NEXT:    retq
198  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
199  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
200  ret <4 x float> %div
201}
202
203define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
204; SSE-LABEL: v8f32_no_estimate:
205; SSE:       # BB#0:
206; SSE-NEXT:    sqrtps %xmm1, %xmm2
207; SSE-NEXT:    sqrtps %xmm0, %xmm3
208; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
209; SSE-NEXT:    movaps %xmm1, %xmm0
210; SSE-NEXT:    divps %xmm3, %xmm0
211; SSE-NEXT:    divps %xmm2, %xmm1
212; SSE-NEXT:    retq
213;
214; AVX-LABEL: v8f32_no_estimate:
215; AVX:       # BB#0:
216; AVX-NEXT:    vsqrtps %ymm0, %ymm0
217; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
218; AVX-NEXT:    vdivps %ymm0, %ymm1, %ymm0
219; AVX-NEXT:    retq
220  %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
221  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
222  ret <8 x float> %div
223}
224
225define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
226; SSE-LABEL: v8f32_estimate:
227; SSE:       # BB#0:
228; SSE-NEXT:    rsqrtps %xmm0, %xmm3
229; SSE-NEXT:    movaps {{.*#+}} xmm4 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01]
230; SSE-NEXT:    movaps %xmm3, %xmm2
231; SSE-NEXT:    mulps %xmm2, %xmm2
232; SSE-NEXT:    mulps %xmm0, %xmm2
233; SSE-NEXT:    movaps {{.*#+}} xmm0 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00]
234; SSE-NEXT:    addps %xmm0, %xmm2
235; SSE-NEXT:    mulps %xmm4, %xmm2
236; SSE-NEXT:    mulps %xmm3, %xmm2
237; SSE-NEXT:    rsqrtps %xmm1, %xmm5
238; SSE-NEXT:    movaps %xmm5, %xmm3
239; SSE-NEXT:    mulps %xmm3, %xmm3
240; SSE-NEXT:    mulps %xmm1, %xmm3
241; SSE-NEXT:    addps %xmm0, %xmm3
242; SSE-NEXT:    mulps %xmm4, %xmm3
243; SSE-NEXT:    mulps %xmm5, %xmm3
244; SSE-NEXT:    movaps %xmm2, %xmm0
245; SSE-NEXT:    movaps %xmm3, %xmm1
246; SSE-NEXT:    retq
247;
248; AVX-LABEL: v8f32_estimate:
249; AVX:       # BB#0:
250; AVX-NEXT:    vrsqrtps %ymm0, %ymm1
251; AVX-NEXT:    vmulps %ymm1, %ymm1, %ymm2
252; AVX-NEXT:    vmulps %ymm2, %ymm0, %ymm0
253; AVX-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
254; AVX-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
255; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
256; AVX-NEXT:    retq
257  %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
258  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
259  ret <8 x float> %div
260}
261
262
263attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
264attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
265attributes #2 = { nounwind readnone }
266
267