1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5declare double @__sqrt_finite(double) 6declare float @__sqrtf_finite(float) 7declare x86_fp80 @__sqrtl_finite(x86_fp80) 8declare float @llvm.sqrt.f32(float) 9declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 10declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) 11 12 13define double @finite_f64_no_estimate(double %d) #0 { 14; SSE-LABEL: finite_f64_no_estimate: 15; SSE: # BB#0: 16; SSE-NEXT: sqrtsd %xmm0, %xmm0 17; SSE-NEXT: retq 18; 19; AVX-LABEL: finite_f64_no_estimate: 20; AVX: # BB#0: 21; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 22; AVX-NEXT: retq 23 %call = tail call double @__sqrt_finite(double %d) #2 24 ret double %call 25} 26 27; No estimates for doubles. 28 29define double @finite_f64_estimate(double %d) #1 { 30; SSE-LABEL: finite_f64_estimate: 31; SSE: # BB#0: 32; SSE-NEXT: sqrtsd %xmm0, %xmm0 33; SSE-NEXT: retq 34; 35; AVX-LABEL: finite_f64_estimate: 36; AVX: # BB#0: 37; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 38; AVX-NEXT: retq 39 %call = tail call double @__sqrt_finite(double %d) #2 40 ret double %call 41} 42 43define float @finite_f32_no_estimate(float %f) #0 { 44; SSE-LABEL: finite_f32_no_estimate: 45; SSE: # BB#0: 46; SSE-NEXT: sqrtss %xmm0, %xmm0 47; SSE-NEXT: retq 48; 49; AVX-LABEL: finite_f32_no_estimate: 50; AVX: # BB#0: 51; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 52; AVX-NEXT: retq 53 %call = tail call float @__sqrtf_finite(float %f) #2 54 ret float %call 55} 56 57define float @finite_f32_estimate(float %f) #1 { 58; SSE-LABEL: finite_f32_estimate: 59; SSE: # BB#0: 60; SSE-NEXT: rsqrtss %xmm0, %xmm1 61; SSE-NEXT: movaps %xmm0, %xmm2 62; SSE-NEXT: mulss %xmm1, %xmm2 63; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 64; SSE-NEXT: mulss %xmm2, %xmm3 65; SSE-NEXT: mulss %xmm1, %xmm2 66; SSE-NEXT: addss {{.*}}(%rip), %xmm2 67; SSE-NEXT: mulss %xmm3, %xmm2 68; SSE-NEXT: xorps %xmm1, %xmm1 69; SSE-NEXT: cmpeqss %xmm1, %xmm0 70; SSE-NEXT: andnps %xmm2, %xmm0 71; SSE-NEXT: retq 72; 73; AVX-LABEL: finite_f32_estimate: 74; AVX: # BB#0: 75; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 76; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm2 77; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1 78; AVX-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 79; AVX-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 80; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1 81; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 82; AVX-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0 83; AVX-NEXT: vandnps %xmm1, %xmm0, %xmm0 84; AVX-NEXT: retq 85 %call = tail call float @__sqrtf_finite(float %f) #2 86 ret float %call 87} 88 89define x86_fp80 @finite_f80_no_estimate(x86_fp80 %ld) #0 { 90; CHECK-LABEL: finite_f80_no_estimate: 91; CHECK: # BB#0: 92; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 93; CHECK-NEXT: fsqrt 94; CHECK-NEXT: retq 95 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2 96 ret x86_fp80 %call 97} 98 99; Don't die on the impossible. 100 101define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 { 102; CHECK-LABEL: finite_f80_estimate_but_no: 103; CHECK: # BB#0: 104; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) 105; CHECK-NEXT: fsqrt 106; CHECK-NEXT: retq 107 %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2 108 ret x86_fp80 %call 109} 110 111define float @f32_no_estimate(float %x) #0 { 112; SSE-LABEL: f32_no_estimate: 113; SSE: # BB#0: 114; SSE-NEXT: sqrtss %xmm0, %xmm1 115; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 116; SSE-NEXT: divss %xmm1, %xmm0 117; SSE-NEXT: retq 118; 119; AVX-LABEL: f32_no_estimate: 120; AVX: # BB#0: 121; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 122; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 123; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 124; AVX-NEXT: retq 125 %sqrt = tail call float @llvm.sqrt.f32(float %x) 126 %div = fdiv fast float 1.0, %sqrt 127 ret float %div 128} 129 130define float @f32_estimate(float %x) #1 { 131; SSE-LABEL: f32_estimate: 132; SSE: # BB#0: 133; SSE-NEXT: rsqrtss %xmm0, %xmm1 134; SSE-NEXT: movaps %xmm1, %xmm2 135; SSE-NEXT: mulss %xmm2, %xmm2 136; SSE-NEXT: mulss %xmm0, %xmm2 137; SSE-NEXT: addss {{.*}}(%rip), %xmm2 138; SSE-NEXT: mulss {{.*}}(%rip), %xmm1 139; SSE-NEXT: mulss %xmm2, %xmm1 140; SSE-NEXT: movaps %xmm1, %xmm0 141; SSE-NEXT: retq 142; 143; AVX-LABEL: f32_estimate: 144; AVX: # BB#0: 145; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 146; AVX-NEXT: vmulss %xmm1, %xmm1, %xmm2 147; AVX-NEXT: vmulss %xmm2, %xmm0, %xmm0 148; AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 149; AVX-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm1 150; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 151; AVX-NEXT: retq 152 %sqrt = tail call float @llvm.sqrt.f32(float %x) 153 %div = fdiv fast float 1.0, %sqrt 154 ret float %div 155} 156 157define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { 158; SSE-LABEL: v4f32_no_estimate: 159; SSE: # BB#0: 160; SSE-NEXT: sqrtps %xmm0, %xmm1 161; SSE-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 162; SSE-NEXT: divps %xmm1, %xmm0 163; SSE-NEXT: retq 164; 165; AVX-LABEL: v4f32_no_estimate: 166; AVX: # BB#0: 167; AVX-NEXT: vsqrtps %xmm0, %xmm0 168; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 169; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 170; AVX-NEXT: retq 171 %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) 172 %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt 173 ret <4 x float> %div 174} 175 176define <4 x float> @v4f32_estimate(<4 x float> %x) #1 { 177; SSE-LABEL: v4f32_estimate: 178; SSE: # BB#0: 179; SSE-NEXT: rsqrtps %xmm0, %xmm1 180; SSE-NEXT: movaps %xmm1, %xmm2 181; SSE-NEXT: mulps %xmm2, %xmm2 182; SSE-NEXT: mulps %xmm0, %xmm2 183; SSE-NEXT: addps {{.*}}(%rip), %xmm2 184; SSE-NEXT: mulps {{.*}}(%rip), %xmm1 185; SSE-NEXT: mulps %xmm2, %xmm1 186; SSE-NEXT: movaps %xmm1, %xmm0 187; SSE-NEXT: retq 188; 189; AVX-LABEL: v4f32_estimate: 190; AVX: # BB#0: 191; AVX-NEXT: vrsqrtps %xmm0, %xmm1 192; AVX-NEXT: vmulps %xmm1, %xmm1, %xmm2 193; AVX-NEXT: vmulps %xmm2, %xmm0, %xmm0 194; AVX-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 195; AVX-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1 196; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0 197; AVX-NEXT: retq 198 %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) 199 %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt 200 ret <4 x float> %div 201} 202 203define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { 204; SSE-LABEL: v8f32_no_estimate: 205; SSE: # BB#0: 206; SSE-NEXT: sqrtps %xmm1, %xmm2 207; SSE-NEXT: sqrtps %xmm0, %xmm3 208; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 209; SSE-NEXT: movaps %xmm1, %xmm0 210; SSE-NEXT: divps %xmm3, %xmm0 211; SSE-NEXT: divps %xmm2, %xmm1 212; SSE-NEXT: retq 213; 214; AVX-LABEL: v8f32_no_estimate: 215; AVX: # BB#0: 216; AVX-NEXT: vsqrtps %ymm0, %ymm0 217; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 218; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 219; AVX-NEXT: retq 220 %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x) 221 %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt 222 ret <8 x float> %div 223} 224 225define <8 x float> @v8f32_estimate(<8 x float> %x) #1 { 226; SSE-LABEL: v8f32_estimate: 227; SSE: # BB#0: 228; SSE-NEXT: rsqrtps %xmm0, %xmm3 229; SSE-NEXT: movaps {{.*#+}} xmm4 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01] 230; SSE-NEXT: movaps %xmm3, %xmm2 231; SSE-NEXT: mulps %xmm2, %xmm2 232; SSE-NEXT: mulps %xmm0, %xmm2 233; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00] 234; SSE-NEXT: addps %xmm0, %xmm2 235; SSE-NEXT: mulps %xmm4, %xmm2 236; SSE-NEXT: mulps %xmm3, %xmm2 237; SSE-NEXT: rsqrtps %xmm1, %xmm5 238; SSE-NEXT: movaps %xmm5, %xmm3 239; SSE-NEXT: mulps %xmm3, %xmm3 240; SSE-NEXT: mulps %xmm1, %xmm3 241; SSE-NEXT: addps %xmm0, %xmm3 242; SSE-NEXT: mulps %xmm4, %xmm3 243; SSE-NEXT: mulps %xmm5, %xmm3 244; SSE-NEXT: movaps %xmm2, %xmm0 245; SSE-NEXT: movaps %xmm3, %xmm1 246; SSE-NEXT: retq 247; 248; AVX-LABEL: v8f32_estimate: 249; AVX: # BB#0: 250; AVX-NEXT: vrsqrtps %ymm0, %ymm1 251; AVX-NEXT: vmulps %ymm1, %ymm1, %ymm2 252; AVX-NEXT: vmulps %ymm2, %ymm0, %ymm0 253; AVX-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0 254; AVX-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 255; AVX-NEXT: vmulps %ymm0, %ymm1, %ymm0 256; AVX-NEXT: retq 257 %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x) 258 %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt 259 ret <8 x float> %div 260} 261 262 263attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" } 264attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" } 265attributes #2 = { nounwind readnone } 266 267