1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s 3 4; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527 5 6; Use an AVX target to show that the potential problem 7; is not limited to 128-bit types/registers. Ie, widening 8; up to 256-bits may also result in bogus libcalls. 9 10; Use fsin as the representative test for various data types. 11 12declare <1 x float> @llvm.sin.v1f32(<1 x float>) 13declare <2 x float> @llvm.sin.v2f32(<2 x float>) 14declare <3 x float> @llvm.sin.v3f32(<3 x float>) 15declare <4 x float> @llvm.sin.v4f32(<4 x float>) 16declare <5 x float> @llvm.sin.v5f32(<5 x float>) 17declare <6 x float> @llvm.sin.v6f32(<6 x float>) 18declare <3 x double> @llvm.sin.v3f64(<3 x double>) 19 20; Verify that all of the potential libcall candidates are handled. 21; Some of these have custom lowering, so those cases won't have 22; libcalls. 23 24declare <2 x float> @llvm.fabs.v2f32(<2 x float>) 25declare <2 x float> @llvm.ceil.v2f32(<2 x float>) 26declare <2 x float> @llvm.cos.v2f32(<2 x float>) 27declare <2 x float> @llvm.exp.v2f32(<2 x float>) 28declare <2 x float> @llvm.exp2.v2f32(<2 x float>) 29declare <2 x float> @llvm.floor.v2f32(<2 x float>) 30declare <2 x float> @llvm.log.v2f32(<2 x float>) 31declare <2 x float> @llvm.log10.v2f32(<2 x float>) 32declare <2 x float> @llvm.log2.v2f32(<2 x float>) 33declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) 34declare <2 x float> @llvm.rint.v2f32(<2 x float>) 35declare <2 x float> @llvm.round.v2f32(<2 x float>) 36declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 37declare <2 x float> @llvm.trunc.v2f32(<2 x float>) 38 39define <1 x float> @sin_v1f32(<1 x float> %x) nounwind { 40; CHECK-LABEL: sin_v1f32: 41; CHECK: # %bb.0: 42; CHECK-NEXT: pushq %rax 43; CHECK-NEXT: callq sinf@PLT 44; CHECK-NEXT: popq %rax 45; CHECK-NEXT: retq 46 %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x) 47 ret <1 x float> %r 48} 49 50define <2 x float> @sin_v2f32(<2 x float> %x) nounwind { 51; CHECK-LABEL: sin_v2f32: 52; CHECK: # %bb.0: 53; CHECK-NEXT: subq $40, %rsp 54; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 55; CHECK-NEXT: callq sinf@PLT 56; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 57; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 58; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 59; CHECK-NEXT: callq sinf@PLT 60; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 61; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 62; CHECK-NEXT: addq $40, %rsp 63; CHECK-NEXT: retq 64 %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x) 65 ret <2 x float> %r 66} 67 68define <3 x float> @sin_v3f32(<3 x float> %x) nounwind { 69; CHECK-LABEL: sin_v3f32: 70; CHECK: # %bb.0: 71; CHECK-NEXT: subq $40, %rsp 72; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 73; CHECK-NEXT: callq sinf@PLT 74; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 75; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 76; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 77; CHECK-NEXT: callq sinf@PLT 78; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 79; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 80; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 81; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 82; CHECK-NEXT: # xmm0 = mem[1,0] 83; CHECK-NEXT: callq sinf@PLT 84; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 85; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 86; CHECK-NEXT: addq $40, %rsp 87; CHECK-NEXT: retq 88 %r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x) 89 ret <3 x float> %r 90} 91 92define <4 x float> @sin_v4f32(<4 x float> %x) nounwind { 93; CHECK-LABEL: sin_v4f32: 94; CHECK: # %bb.0: 95; CHECK-NEXT: subq $40, %rsp 96; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 97; CHECK-NEXT: callq sinf@PLT 98; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 99; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 100; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 101; CHECK-NEXT: callq sinf@PLT 102; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 103; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 104; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 105; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 106; CHECK-NEXT: # xmm0 = mem[1,0] 107; CHECK-NEXT: callq sinf@PLT 108; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 109; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 110; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 111; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 112; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 113; CHECK-NEXT: callq sinf@PLT 114; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 115; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 116; CHECK-NEXT: addq $40, %rsp 117; CHECK-NEXT: retq 118 %r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x) 119 ret <4 x float> %r 120} 121 122define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { 123; CHECK-LABEL: sin_v5f32: 124; CHECK: # %bb.0: 125; CHECK-NEXT: subq $72, %rsp 126; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 127; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 128; CHECK-NEXT: vzeroupper 129; CHECK-NEXT: callq sinf@PLT 130; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 131; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 132; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 133; CHECK-NEXT: callq sinf@PLT 134; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 135; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 136; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 137; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 138; CHECK-NEXT: # xmm0 = mem[1,0] 139; CHECK-NEXT: callq sinf@PLT 140; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 141; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 142; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 143; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 144; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 145; CHECK-NEXT: callq sinf@PLT 146; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 147; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 148; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 149; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 150; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 151; CHECK-NEXT: vzeroupper 152; CHECK-NEXT: callq sinf@PLT 153; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 154; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 155; CHECK-NEXT: addq $72, %rsp 156; CHECK-NEXT: retq 157 %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x) 158 ret <5 x float> %r 159} 160 161define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { 162; CHECK-LABEL: sin_v6f32: 163; CHECK: # %bb.0: 164; CHECK-NEXT: subq $72, %rsp 165; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 166; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 167; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 168; CHECK-NEXT: vzeroupper 169; CHECK-NEXT: callq sinf@PLT 170; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 171; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 172; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 173; CHECK-NEXT: callq sinf@PLT 174; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 175; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 176; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 177; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 178; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 179; CHECK-NEXT: vzeroupper 180; CHECK-NEXT: callq sinf@PLT 181; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 182; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 183; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 184; CHECK-NEXT: callq sinf@PLT 185; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 186; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 187; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 188; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 189; CHECK-NEXT: # xmm0 = mem[1,0] 190; CHECK-NEXT: callq sinf@PLT 191; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 192; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 193; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 194; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 195; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 196; CHECK-NEXT: callq sinf@PLT 197; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 198; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 199; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 200; CHECK-NEXT: addq $72, %rsp 201; CHECK-NEXT: retq 202 %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x) 203 ret <6 x float> %r 204} 205 206define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { 207; CHECK-LABEL: sin_v3f64: 208; CHECK: # %bb.0: 209; CHECK-NEXT: subq $72, %rsp 210; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 211; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 212; CHECK-NEXT: vzeroupper 213; CHECK-NEXT: callq sin@PLT 214; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 215; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 216; CHECK-NEXT: # xmm0 = mem[1,0] 217; CHECK-NEXT: callq sin@PLT 218; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 219; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 220; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 221; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 222; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 223; CHECK-NEXT: vzeroupper 224; CHECK-NEXT: callq sin@PLT 225; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 226; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 227; CHECK-NEXT: addq $72, %rsp 228; CHECK-NEXT: retq 229 %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x) 230 ret <3 x double> %r 231} 232 233define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind { 234; CHECK-LABEL: fabs_v2f32: 235; CHECK: # %bb.0: 236; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 237; CHECK-NEXT: retq 238 %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x) 239 ret <2 x float> %r 240} 241 242define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind { 243; CHECK-LABEL: ceil_v2f32: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vroundps $10, %xmm0, %xmm0 246; CHECK-NEXT: retq 247 %r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) 248 ret <2 x float> %r 249} 250 251define <2 x float> @cos_v2f32(<2 x float> %x) nounwind { 252; CHECK-LABEL: cos_v2f32: 253; CHECK: # %bb.0: 254; CHECK-NEXT: subq $40, %rsp 255; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 256; CHECK-NEXT: callq cosf@PLT 257; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 258; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 259; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 260; CHECK-NEXT: callq cosf@PLT 261; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 262; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 263; CHECK-NEXT: addq $40, %rsp 264; CHECK-NEXT: retq 265 %r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x) 266 ret <2 x float> %r 267} 268 269define <2 x float> @exp_v2f32(<2 x float> %x) nounwind { 270; CHECK-LABEL: exp_v2f32: 271; CHECK: # %bb.0: 272; CHECK-NEXT: subq $40, %rsp 273; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 274; CHECK-NEXT: callq expf@PLT 275; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 276; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 277; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 278; CHECK-NEXT: callq expf@PLT 279; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 280; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 281; CHECK-NEXT: addq $40, %rsp 282; CHECK-NEXT: retq 283 %r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x) 284 ret <2 x float> %r 285} 286 287define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind { 288; CHECK-LABEL: exp2_v2f32: 289; CHECK: # %bb.0: 290; CHECK-NEXT: subq $40, %rsp 291; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 292; CHECK-NEXT: callq exp2f@PLT 293; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 294; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 295; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 296; CHECK-NEXT: callq exp2f@PLT 297; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 298; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 299; CHECK-NEXT: addq $40, %rsp 300; CHECK-NEXT: retq 301 %r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x) 302 ret <2 x float> %r 303} 304 305define <2 x float> @floor_v2f32(<2 x float> %x) nounwind { 306; CHECK-LABEL: floor_v2f32: 307; CHECK: # %bb.0: 308; CHECK-NEXT: vroundps $9, %xmm0, %xmm0 309; CHECK-NEXT: retq 310 %r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x) 311 ret <2 x float> %r 312} 313 314define <2 x float> @log_v2f32(<2 x float> %x) nounwind { 315; CHECK-LABEL: log_v2f32: 316; CHECK: # %bb.0: 317; CHECK-NEXT: subq $40, %rsp 318; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 319; CHECK-NEXT: callq logf@PLT 320; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 321; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 322; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 323; CHECK-NEXT: callq logf@PLT 324; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 325; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 326; CHECK-NEXT: addq $40, %rsp 327; CHECK-NEXT: retq 328 %r = call <2 x float> @llvm.log.v2f32(<2 x float> %x) 329 ret <2 x float> %r 330} 331 332define <2 x float> @log10_v2f32(<2 x float> %x) nounwind { 333; CHECK-LABEL: log10_v2f32: 334; CHECK: # %bb.0: 335; CHECK-NEXT: subq $40, %rsp 336; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 337; CHECK-NEXT: callq log10f@PLT 338; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 339; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 340; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 341; CHECK-NEXT: callq log10f@PLT 342; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 343; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 344; CHECK-NEXT: addq $40, %rsp 345; CHECK-NEXT: retq 346 %r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x) 347 ret <2 x float> %r 348} 349 350define <2 x float> @log2_v2f32(<2 x float> %x) nounwind { 351; CHECK-LABEL: log2_v2f32: 352; CHECK: # %bb.0: 353; CHECK-NEXT: subq $40, %rsp 354; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 355; CHECK-NEXT: callq log2f@PLT 356; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 357; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 358; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 359; CHECK-NEXT: callq log2f@PLT 360; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 361; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 362; CHECK-NEXT: addq $40, %rsp 363; CHECK-NEXT: retq 364 %r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x) 365 ret <2 x float> %r 366} 367 368define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind { 369; CHECK-LABEL: nearbyint__v2f32: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vroundps $12, %xmm0, %xmm0 372; CHECK-NEXT: retq 373 %r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x) 374 ret <2 x float> %r 375} 376 377define <2 x float> @rint_v2f32(<2 x float> %x) nounwind { 378; CHECK-LABEL: rint_v2f32: 379; CHECK: # %bb.0: 380; CHECK-NEXT: vroundps $4, %xmm0, %xmm0 381; CHECK-NEXT: retq 382 %r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x) 383 ret <2 x float> %r 384} 385 386define <2 x float> @round_v2f32(<2 x float> %x) nounwind { 387; CHECK-LABEL: round_v2f32: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 390; CHECK-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 391; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 392; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 393; CHECK-NEXT: retq 394 %r = call <2 x float> @llvm.round.v2f32(<2 x float> %x) 395 ret <2 x float> %r 396} 397 398define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind { 399; CHECK-LABEL: sqrt_v2f32: 400; CHECK: # %bb.0: 401; CHECK-NEXT: vsqrtps %xmm0, %xmm0 402; CHECK-NEXT: retq 403 %r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 404 ret <2 x float> %r 405} 406 407define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind { 408; CHECK-LABEL: trunc_v2f32: 409; CHECK: # %bb.0: 410; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 411; CHECK-NEXT: retq 412 %r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) 413 ret <2 x float> %r 414} 415 416