1; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math \ 2; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CST --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 \ 4; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CST --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx \ 6; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CST --check-prefix=AVX 7; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 \ 8; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 9; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f \ 10; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F 11; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512vl \ 12; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL 13 14; SSE2: [[MASKCSTADDR:.LCPI[0-9_]+]]: 15; SSE2-NEXT: .long 65535 # 0xffff 16; SSE2-NEXT: .long 65535 # 0xffff 17; SSE2-NEXT: .long 65535 # 0xffff 18; SSE2-NEXT: .long 65535 # 0xffff 19 20; CST: [[FPMASKCSTADDR:.LCPI[0-9_]+]]: 21; CST-NEXT: .long 1199570944 # float 65536 22; CST-NEXT: .long 1199570944 # float 65536 23; CST-NEXT: .long 1199570944 # float 65536 24; CST-NEXT: .long 1199570944 # float 65536 25 26; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]: 27; AVX2-NEXT: .long 1199570944 # float 65536 28 29define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) { 30; SSE2-LABEL: test_uitofp_v4i32_to_v4f32: 31; SSE2: # BB#0: 32; SSE2-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535] 33; SSE2-NEXT: andps %xmm0, %xmm1 34; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 35; SSE2-NEXT: psrld $16, %xmm0 36; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 37; SSE2-NEXT: mulps [[FPMASKCSTADDR]](%rip), %xmm0 38; SSE2-NEXT: addps %xmm1, %xmm0 39; SSE2-NEXT: retq 40; 41; SSE41-LABEL: test_uitofp_v4i32_to_v4f32: 42; SSE41: # BB#0: 43; SSE41-NEXT: pxor %xmm1, %xmm1 44; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 45; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 46; SSE41-NEXT: psrld $16, %xmm0 47; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 48; SSE41-NEXT: mulps [[FPMASKCSTADDR]](%rip), %xmm0 49; SSE41-NEXT: addps %xmm1, %xmm0 50; SSE41-NEXT: retq 51; 52; AVX-LABEL: test_uitofp_v4i32_to_v4f32: 53; AVX: # BB#0: 54; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 55; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 56; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1 57; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 58; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 59; AVX-NEXT: vmulps [[FPMASKCSTADDR]](%rip), %xmm0, %xmm0 60; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 61; AVX-NEXT: retq 62; 63; AVX2-LABEL: test_uitofp_v4i32_to_v4f32: 64; AVX2: # BB#0: 65; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 66; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1 67; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2 68; AVX2-NEXT: vmulps %xmm2, %xmm1, %xmm1 69; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 70; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 71; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 72; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 73; AVX2-NEXT: retq 74; 75; AVX512F-LABEL: test_uitofp_v4i32_to_v4f32: 76; AVX512F: # BB#0: 77; AVX512F-NEXT: # kill 78; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 79; AVX512F-NEXT: # kill 80; AVX512F-NEXT: retq 81; 82; AVX512VL-LABEL: test_uitofp_v4i32_to_v4f32: 83; AVX512VL: # BB#0: 84; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 85; AVX512VL-NEXT: retq 86 %tmp = uitofp <4 x i32> %arg to <4 x float> 87 ret <4 x float> %tmp 88} 89 90; AVX: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]: 91; AVX-NEXT: .long 1199570944 # float 65536 92; AVX-NEXT: .long 1199570944 # float 65536 93; AVX-NEXT: .long 1199570944 # float 65536 94; AVX-NEXT: .long 1199570944 # float 65536 95 96; AVX: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]: 97; AVX-NEXT: .long 65535 # 0xffff 98; AVX-NEXT: .long 65535 # 0xffff 99; AVX-NEXT: .long 65535 # 0xffff 100; AVX-NEXT: .long 65535 # 0xffff 101 102; AVX2: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]: 103; AVX2-NEXT: .long 1199570944 # float 65536 104 105; AVX2: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]: 106; AVX2-NEXT: .long 65535 # 0xffff 107 108define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) { 109; SSE2-LABEL: test_uitofp_v8i32_to_v8f32: 110; SSE2: # BB#0: 111; SSE2-NEXT: movdqa %xmm0, %xmm2 112; SSE2-NEXT: psrld $16, %xmm2 113; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 114; SSE2-NEXT: movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04] 115; SSE2-NEXT: mulps %xmm3, %xmm2 116; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535] 117; SSE2-NEXT: pand %xmm4, %xmm0 118; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 119; SSE2-NEXT: addps %xmm2, %xmm0 120; SSE2-NEXT: movdqa %xmm1, %xmm2 121; SSE2-NEXT: psrld $16, %xmm2 122; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 123; SSE2-NEXT: mulps %xmm3, %xmm2 124; SSE2-NEXT: pand %xmm4, %xmm1 125; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 126; SSE2-NEXT: addps %xmm2, %xmm1 127; SSE2-NEXT: retq 128; 129; SSE41-LABEL: test_uitofp_v8i32_to_v8f32: 130; SSE41: # BB#0: 131; SSE41-NEXT: movdqa %xmm0, %xmm2 132; SSE41-NEXT: psrld $16, %xmm2 133; SSE41-NEXT: cvtdq2ps %xmm2, %xmm2 134; SSE41-NEXT: movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04] 135; SSE41-NEXT: mulps %xmm3, %xmm2 136; SSE41-NEXT: pxor %xmm4, %xmm4 137; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 138; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 139; SSE41-NEXT: addps %xmm2, %xmm0 140; SSE41-NEXT: movdqa %xmm1, %xmm2 141; SSE41-NEXT: psrld $16, %xmm2 142; SSE41-NEXT: cvtdq2ps %xmm2, %xmm2 143; SSE41-NEXT: mulps %xmm3, %xmm2 144; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 145; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 146; SSE41-NEXT: addps %xmm2, %xmm1 147; SSE41-NEXT: retq 148; 149; AVX-LABEL: test_uitofp_v8i32_to_v8f32: 150; AVX: # BB#0: 151; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 152; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 153; AVX-NEXT: vpsrld $16, %xmm2, %xmm2 154; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 155; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1 156; AVX-NEXT: vmulps [[FPMASKCSTADDR_v8]](%rip), %ymm1, %ymm1 157; AVX-NEXT: vandps [[MASKCSTADDR_v8]](%rip), %ymm0, %ymm0 158; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 159; AVX-NEXT: vaddps %ymm0, %ymm1, %ymm0 160; AVX-NEXT: retq 161; 162; AVX2-LABEL: test_uitofp_v8i32_to_v8f32: 163; AVX2: # BB#0: 164; AVX2-NEXT: vpsrld $16, %ymm0, %ymm1 165; AVX2-NEXT: vcvtdq2ps %ymm1, %ymm1 166; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR_v8]](%rip), %ymm2 167; AVX2-NEXT: vmulps %ymm2, %ymm1, %ymm1 168; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR_v8]](%rip), %ymm2 169; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 170; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 171; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 172; AVX2-NEXT: retq 173; 174; AVX512F-LABEL: test_uitofp_v8i32_to_v8f32: 175; AVX512F: # BB#0: 176; AVX512F-NEXT: # kill 177; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 178; AVX512F-NEXT: # kill 179; AVX512F-NEXT: retq 180; 181; AVX512VL-LABEL: test_uitofp_v8i32_to_v8f32: 182; AVX512VL: # BB#0: 183; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 184; AVX512VL-NEXT: retq 185 %tmp = uitofp <8 x i32> %arg to <8 x float> 186 ret <8 x float> %tmp 187} 188