1; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT 2; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s 3 4declare float @llvm.sqrt.f32(float) #0 5declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 6declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 7declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 8declare double @llvm.sqrt.f64(double) #0 9declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 10declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 11 12define float @fsqrt(float %a) #0 { 13 %1 = tail call fast float @llvm.sqrt.f32(float %a) 14 ret float %1 15 16; FAULT-LABEL: fsqrt: 17; FAULT-NEXT: BB#0 18; FAULT-NEXT: fsqrt 19 20; CHECK-LABEL: fsqrt: 21; CHECK-NEXT: BB#0 22; CHECK-NEXT: frsqrte [[RA:s[0-7]]] 23; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] 24; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] 25; CHECK: fcmp s0, #0 26} 27 28define <2 x float> @f2sqrt(<2 x float> %a) #0 { 29 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 30 ret <2 x float> %1 31 32; FAULT-LABEL: f2sqrt: 33; FAULT-NEXT: BB#0 34; FAULT-NEXT: fsqrt 35 36; CHECK-LABEL: f2sqrt: 37; CHECK-NEXT: BB#0 38; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] 39; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] 40; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] 41; CHECK: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0 42} 43 44define <4 x float> @f4sqrt(<4 x float> %a) #0 { 45 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 46 ret <4 x float> %1 47 48; FAULT-LABEL: f4sqrt: 49; FAULT-NEXT: BB#0 50; FAULT-NEXT: fsqrt 51 52; CHECK-LABEL: f4sqrt: 53; CHECK-NEXT: BB#0 54; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] 55; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] 56; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] 57; CHECK: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 58} 59 60define <8 x float> @f8sqrt(<8 x float> %a) #0 { 61 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 62 ret <8 x float> %1 63 64; FAULT-LABEL: f8sqrt: 65; FAULT-NEXT: BB#0 66; FAULT-NEXT: fsqrt 67; FAULT-NEXT: fsqrt 68 69; CHECK-LABEL: f8sqrt: 70; CHECK-NEXT: BB#0 71; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] 72; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] 73; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] 74; CHECK: fcmeq {{v[0-7]\.4s, v[0-1]\.4s}}, #0 75} 76 77define double @dsqrt(double %a) #0 { 78 %1 = tail call fast double @llvm.sqrt.f64(double %a) 79 ret double %1 80 81; FAULT-LABEL: dsqrt: 82; FAULT-NEXT: BB#0 83; FAULT-NEXT: fsqrt 84 85; CHECK-LABEL: dsqrt: 86; CHECK-NEXT: BB#0 87; CHECK-NEXT: frsqrte [[RA:d[0-7]]] 88; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] 89; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] 90; CHECK: fcmp d0, #0 91} 92 93define <2 x double> @d2sqrt(<2 x double> %a) #0 { 94 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 95 ret <2 x double> %1 96 97; FAULT-LABEL: d2sqrt: 98; FAULT-NEXT: BB#0 99; FAULT-NEXT: fsqrt 100 101; CHECK-LABEL: d2sqrt: 102; CHECK-NEXT: BB#0 103; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] 104; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] 105; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] 106; CHECK: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 107} 108 109define <4 x double> @d4sqrt(<4 x double> %a) #0 { 110 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 111 ret <4 x double> %1 112 113; FAULT-LABEL: d4sqrt: 114; FAULT-NEXT: BB#0 115; FAULT-NEXT: fsqrt 116; FAULT-NEXT: fsqrt 117 118; CHECK-LABEL: d4sqrt: 119; CHECK-NEXT: BB#0 120; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] 121; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] 122; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] 123; CHECK: fcmeq {{v[0-7]\.2d, v[0-1]\.2d}}, #0 124} 125 126define float @frsqrt(float %a) #0 { 127 %1 = tail call fast float @llvm.sqrt.f32(float %a) 128 %2 = fdiv fast float 1.000000e+00, %1 129 ret float %2 130 131; FAULT-LABEL: frsqrt: 132; FAULT-NEXT: BB#0 133; FAULT-NEXT: fsqrt 134 135; CHECK-LABEL: frsqrt: 136; CHECK-NEXT: BB#0 137; CHECK-NEXT: frsqrte [[RA:s[0-7]]] 138; CHECK-NEXT: fmul [[RB:s[0-7]]], [[RA]], [[RA]] 139; CHECK-NEXT: frsqrts {{s[0-7](, s[0-7])?}}, [[RB]] 140; CHECK-NOT: fcmp {{s[0-7]}}, #0 141} 142 143define <2 x float> @f2rsqrt(<2 x float> %a) #0 { 144 %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) 145 %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1 146 ret <2 x float> %2 147 148; FAULT-LABEL: f2rsqrt: 149; FAULT-NEXT: BB#0 150; FAULT-NEXT: fsqrt 151 152; CHECK-LABEL: f2rsqrt: 153; CHECK-NEXT: BB#0 154; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2s]] 155; CHECK-NEXT: fmul [[RB:v[0-7]\.2s]], [[RA]], [[RA]] 156; CHECK-NEXT: frsqrts {{v[0-7]\.2s(, v[0-7]\.2s)?}}, [[RB]] 157; CHECK-NOT: fcmeq {{v[0-7]\.2s, v0\.2s}}, #0 158} 159 160define <4 x float> @f4rsqrt(<4 x float> %a) #0 { 161 %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 162 %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 163 ret <4 x float> %2 164 165; FAULT-LABEL: f4rsqrt: 166; FAULT-NEXT: BB#0 167; FAULT-NEXT: fsqrt 168 169; CHECK-LABEL: f4rsqrt: 170; CHECK-NEXT: BB#0 171; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] 172; CHECK-NEXT: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] 173; CHECK-NEXT: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] 174; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 175} 176 177define <8 x float> @f8rsqrt(<8 x float> %a) #0 { 178 %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a) 179 %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1 180 ret <8 x float> %2 181 182; FAULT-LABEL: f8rsqrt: 183; FAULT-NEXT: BB#0 184; FAULT-NEXT: fsqrt 185; FAULT-NEXT: fsqrt 186 187; CHECK-LABEL: f8rsqrt: 188; CHECK-NEXT: BB#0 189; CHECK-NEXT: frsqrte [[RA:v[0-7]\.4s]] 190; CHECK: fmul [[RB:v[0-7]\.4s]], [[RA]], [[RA]] 191; CHECK: frsqrts {{v[0-7]\.4s(, v[0-7]\.4s)?}}, [[RB]] 192; CHECK-NOT: fcmeq {{v[0-7]\.4s, v0\.4s}}, #0 193} 194 195define double @drsqrt(double %a) #0 { 196 %1 = tail call fast double @llvm.sqrt.f64(double %a) 197 %2 = fdiv fast double 1.000000e+00, %1 198 ret double %2 199 200; FAULT-LABEL: drsqrt: 201; FAULT-NEXT: BB#0 202; FAULT-NEXT: fsqrt 203 204; CHECK-LABEL: drsqrt: 205; CHECK-NEXT: BB#0 206; CHECK-NEXT: frsqrte [[RA:d[0-7]]] 207; CHECK-NEXT: fmul [[RB:d[0-7]]], [[RA]], [[RA]] 208; CHECK-NEXT: frsqrts {{d[0-7](, d[0-7])?}}, [[RB]] 209; CHECK-NOT: fcmp d0, #0 210} 211 212define <2 x double> @d2rsqrt(<2 x double> %a) #0 { 213 %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 214 %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1 215 ret <2 x double> %2 216 217; FAULT-LABEL: d2rsqrt: 218; FAULT-NEXT: BB#0 219; FAULT-NEXT: fsqrt 220 221; CHECK-LABEL: d2rsqrt: 222; CHECK-NEXT: BB#0 223; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] 224; CHECK-NEXT: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] 225; CHECK-NEXT: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] 226; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 227} 228 229define <4 x double> @d4rsqrt(<4 x double> %a) #0 { 230 %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) 231 %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1 232 ret <4 x double> %2 233 234; FAULT-LABEL: d4rsqrt: 235; FAULT-NEXT: BB#0 236; FAULT-NEXT: fsqrt 237; FAULT-NEXT: fsqrt 238 239; CHECK-LABEL: d4rsqrt: 240; CHECK-NEXT: BB#0 241; CHECK-NEXT: frsqrte [[RA:v[0-7]\.2d]] 242; CHECK: fmul [[RB:v[0-7]\.2d]], [[RA]], [[RA]] 243; CHECK: frsqrts {{v[0-7]\.2d(, v[0-7]\.2d)?}}, [[RB]] 244; CHECK-NOT: fcmeq {{v[0-7]\.2d, v0\.2d}}, #0 245} 246 247attributes #0 = { nounwind "unsafe-fp-math"="true" } 248