1; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s 2; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s 3 4target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" 5target triple = "powerpc64-unknown-linux-gnu" 6 7declare double @llvm.sqrt.f64(double) 8declare float @llvm.sqrt.f32(float) 9declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 10 11define double @foo(double %a, double %b) nounwind { 12 %x = call double @llvm.sqrt.f64(double %b) 13 %r = fdiv double %a, %x 14 ret double %r 15 16; CHECK: @foo 17; CHECK-DAG: frsqrte 18; CHECK-DAG: fnmsub 19; CHECK: fmul 20; CHECK-NEXT: fmadd 21; CHECK-NEXT: fmul 22; CHECK-NEXT: fmul 23; CHECK-NEXT: fmadd 24; CHECK-NEXT: fmul 25; CHECK-NEXT: fmul 26; CHECK: blr 27 28; CHECK-SAFE: @foo 29; CHECK-SAFE: fsqrt 30; CHECK-SAFE: fdiv 31; CHECK-SAFE: blr 32} 33 34define double @no_estimate_refinement_f64(double %a, double %b) #0 { 35 %x = call double @llvm.sqrt.f64(double %b) 36 %r = fdiv double %a, %x 37 ret double %r 38 39; CHECK-LABEL: @no_estimate_refinement_f64 40; CHECK: frsqrte 41; CHECK-NOT: fmadd 42; CHECK: fmul 43; CHECK-NOT: fmadd 44; CHECK: blr 45} 46 47 48define double @foof(double %a, float %b) nounwind { 49 %x = call float @llvm.sqrt.f32(float %b) 50 %y = fpext float %x to double 51 %r = fdiv double %a, %y 52 ret double %r 53 54; CHECK: @foof 55; CHECK-DAG: frsqrtes 56; CHECK-DAG: fnmsubs 57; CHECK: fmuls 58; CHECK-NEXT: fmadds 59; CHECK-NEXT: fmuls 60; CHECK-NEXT: fmul 61; CHECK-NEXT: blr 62 63; CHECK-SAFE: @foof 64; CHECK-SAFE: fsqrts 65; CHECK-SAFE: fdiv 66; CHECK-SAFE: blr 67} 68 69define float @food(float %a, double %b) nounwind { 70 %x = call double @llvm.sqrt.f64(double %b) 71 %y = fptrunc double %x to float 72 %r = fdiv float %a, %y 73 ret float %r 74 75; CHECK: @foo 76; CHECK-DAG: frsqrte 77; CHECK-DAG: fnmsub 78; CHECK: fmul 79; CHECK-NEXT: fmadd 80; CHECK-NEXT: fmul 81; CHECK-NEXT: fmul 82; CHECK-NEXT: fmadd 83; CHECK-NEXT: fmul 84; CHECK-NEXT: frsp 85; CHECK-NEXT: fmuls 86; CHECK-NEXT: blr 87 88; CHECK-SAFE: @foo 89; CHECK-SAFE: fsqrt 90; CHECK-SAFE: fdivs 91; CHECK-SAFE: blr 92} 93 94define float @goo(float %a, float %b) nounwind { 95 %x = call float @llvm.sqrt.f32(float %b) 96 %r = fdiv float %a, %x 97 ret float %r 98 99; CHECK: @goo 100; CHECK-DAG: frsqrtes 101; CHECK-DAG: fnmsubs 102; CHECK: fmuls 103; CHECK-NEXT: fmadds 104; CHECK-NEXT: fmuls 105; CHECK-NEXT: fmuls 106; CHECK-NEXT: blr 107 108; CHECK-SAFE: @goo 109; CHECK-SAFE: fsqrts 110; CHECK-SAFE: fdivs 111; CHECK-SAFE: blr 112} 113 114 115define float @no_estimate_refinement_f32(float %a, float %b) #0 { 116 %x = call float @llvm.sqrt.f32(float %b) 117 %r = fdiv float %a, %x 118 ret float %r 119 120; CHECK-LABEL: @no_estimate_refinement_f32 121; CHECK: frsqrtes 122; CHECK-NOT: fmadds 123; CHECK: fmuls 124; CHECK-NOT: fmadds 125; CHECK: blr 126} 127 128; Recognize that this is rsqrt(a) * rcp(b) * c, 129; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. 130define float @rsqrt_fmul(float %a, float %b, float %c) { 131 %x = call float @llvm.sqrt.f32(float %a) 132 %y = fmul float %x, %b 133 %z = fdiv float %c, %y 134 ret float %z 135 136; CHECK: @rsqrt_fmul 137; CHECK-DAG: frsqrtes 138; CHECK-DAG: fres 139; CHECK-DAG: fnmsubs 140; CHECK-DAG: fmuls 141; CHECK-DAG: fnmsubs 142; CHECK-DAG: fmadds 143; CHECK-DAG: fmadds 144; CHECK: fmuls 145; CHECK-NEXT: fmuls 146; CHECK-NEXT: fmuls 147; CHECK-NEXT: blr 148 149; CHECK-SAFE: @rsqrt_fmul 150; CHECK-SAFE: fsqrts 151; CHECK-SAFE: fmuls 152; CHECK-SAFE: fdivs 153; CHECK-SAFE: blr 154} 155 156define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { 157 %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) 158 %r = fdiv <4 x float> %a, %x 159 ret <4 x float> %r 160 161; CHECK: @hoo 162; CHECK: vrsqrtefp 163 164; CHECK-SAFE: @hoo 165; CHECK-SAFE-NOT: vrsqrtefp 166; CHECK-SAFE: blr 167} 168 169define double @foo2(double %a, double %b) nounwind { 170 %r = fdiv double %a, %b 171 ret double %r 172 173; CHECK: @foo2 174; CHECK-DAG: fre 175; CHECK-DAG: fnmsub 176; CHECK: fmadd 177; CHECK-NEXT: fnmsub 178; CHECK-NEXT: fmadd 179; CHECK-NEXT: fmul 180; CHECK-NEXT: blr 181 182; CHECK-SAFE: @foo2 183; CHECK-SAFE: fdiv 184; CHECK-SAFE: blr 185} 186 187define float @goo2(float %a, float %b) nounwind { 188 %r = fdiv float %a, %b 189 ret float %r 190 191; CHECK: @goo2 192; CHECK-DAG: fres 193; CHECK-DAG: fnmsubs 194; CHECK: fmadds 195; CHECK-NEXT: fmuls 196; CHECK-NEXT: blr 197 198; CHECK-SAFE: @goo2 199; CHECK-SAFE: fdivs 200; CHECK-SAFE: blr 201} 202 203define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind { 204 %r = fdiv <4 x float> %a, %b 205 ret <4 x float> %r 206 207; CHECK: @hoo2 208; CHECK: vrefp 209 210; CHECK-SAFE: @hoo2 211; CHECK-SAFE-NOT: vrefp 212; CHECK-SAFE: blr 213} 214 215define double @foo3(double %a) nounwind { 216 %r = call double @llvm.sqrt.f64(double %a) 217 ret double %r 218 219; CHECK: @foo3 220; CHECK: fcmpu 221; CHECK-DAG: frsqrte 222; CHECK-DAG: fnmsub 223; CHECK: fmul 224; CHECK-NEXT: fmadd 225; CHECK-NEXT: fmul 226; CHECK-NEXT: fmul 227; CHECK-NEXT: fmadd 228; CHECK-NEXT: fmul 229; CHECK-NEXT: fmul 230; CHECK: blr 231 232; CHECK-SAFE: @foo3 233; CHECK-SAFE: fsqrt 234; CHECK-SAFE: blr 235} 236 237define float @goo3(float %a) nounwind { 238 %r = call float @llvm.sqrt.f32(float %a) 239 ret float %r 240 241; CHECK: @goo3 242; CHECK: fcmpu 243; CHECK-DAG: frsqrtes 244; CHECK-DAG: fnmsubs 245; CHECK: fmuls 246; CHECK-NEXT: fmadds 247; CHECK-NEXT: fmuls 248; CHECK-NEXT: fmuls 249; CHECK: blr 250 251; CHECK-SAFE: @goo3 252; CHECK-SAFE: fsqrts 253; CHECK-SAFE: blr 254} 255 256define <4 x float> @hoo3(<4 x float> %a) nounwind { 257 %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 258 ret <4 x float> %r 259 260; CHECK: @hoo3 261; CHECK: vrsqrtefp 262; CHECK-DAG: vcmpeqfp 263 264; CHECK-SAFE: @hoo3 265; CHECK-SAFE-NOT: vrsqrtefp 266; CHECK-SAFE: blr 267} 268 269attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" } 270 271