1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) { 8; X32-SSE-LABEL: fptrunc_frommem2: 9; X32-SSE: # BB#0: # %entry 10; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0 13; X32-SSE-NEXT: extractps $1, %xmm0, 4(%eax) 14; X32-SSE-NEXT: movss %xmm0, (%eax) 15; X32-SSE-NEXT: retl 16; 17; X32-AVX-LABEL: fptrunc_frommem2: 18; X32-AVX: # BB#0: # %entry 19; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 20; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 21; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0 22; X32-AVX-NEXT: vextractps $1, %xmm0, 4(%eax) 23; X32-AVX-NEXT: vmovss %xmm0, (%eax) 24; X32-AVX-NEXT: retl 25; 26; X64-SSE-LABEL: fptrunc_frommem2: 27; X64-SSE: # BB#0: # %entry 28; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 29; X64-SSE-NEXT: movlpd %xmm0, (%rsi) 30; X64-SSE-NEXT: retq 31; 32; X64-AVX-LABEL: fptrunc_frommem2: 33; X64-AVX: # BB#0: # %entry 34; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 35; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi) 36; X64-AVX-NEXT: retq 37entry: 38 %0 = load <2 x double>, <2 x double>* %in 39 %1 = fptrunc <2 x double> %0 to <2 x float> 40 store <2 x float> %1, <2 x float>* %out, align 1 41 ret void 42} 43 44define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) { 45; X32-SSE-LABEL: fptrunc_frommem4: 46; X32-SSE: # BB#0: # %entry 47; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 48; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 49; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 50; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 51; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 52; X32-SSE-NEXT: movupd %xmm1, (%eax) 53; X32-SSE-NEXT: retl 54; 55; X32-AVX-LABEL: fptrunc_frommem4: 56; X32-AVX: # BB#0: # %entry 57; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 58; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 59; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 60; X32-AVX-NEXT: vmovupd %xmm0, (%eax) 61; X32-AVX-NEXT: retl 62; 63; X64-SSE-LABEL: fptrunc_frommem4: 64; X64-SSE: # BB#0: # %entry 65; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 66; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 67; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 68; X64-SSE-NEXT: movupd %xmm1, (%rsi) 69; X64-SSE-NEXT: retq 70; 71; X64-AVX-LABEL: fptrunc_frommem4: 72; X64-AVX: # BB#0: # %entry 73; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 74; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 75; X64-AVX-NEXT: retq 76entry: 77 %0 = load <4 x double>, <4 x double>* %in 78 %1 = fptrunc <4 x double> %0 to <4 x float> 79 store <4 x float> %1, <4 x float>* %out, align 1 80 ret void 81} 82 83define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) { 84; X32-SSE-LABEL: fptrunc_frommem8: 85; X32-SSE: # BB#0: # %entry 86; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 87; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 88; X32-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 89; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 90; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 91; X32-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0 92; X32-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2 93; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 94; X32-SSE-NEXT: movupd %xmm2, 16(%eax) 95; X32-SSE-NEXT: movupd %xmm1, (%eax) 96; X32-SSE-NEXT: retl 97; 98; X32-AVX-LABEL: fptrunc_frommem8: 99; X32-AVX: # BB#0: # %entry 100; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 101; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 102; X32-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 103; X32-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 104; X32-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 105; X32-AVX-NEXT: vmovupd %ymm0, (%eax) 106; X32-AVX-NEXT: vzeroupper 107; X32-AVX-NEXT: retl 108; 109; X64-SSE-LABEL: fptrunc_frommem8: 110; X64-SSE: # BB#0: # %entry 111; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 112; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 113; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 114; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0 115; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2 116; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 117; X64-SSE-NEXT: movupd %xmm2, 16(%rsi) 118; X64-SSE-NEXT: movupd %xmm1, (%rsi) 119; X64-SSE-NEXT: retq 120; 121; X64-AVX-LABEL: fptrunc_frommem8: 122; X64-AVX: # BB#0: # %entry 123; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 124; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 125; X64-AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 126; X64-AVX-NEXT: vmovupd %ymm0, (%rsi) 127; X64-AVX-NEXT: vzeroupper 128; X64-AVX-NEXT: retq 129entry: 130 %0 = load <8 x double>, <8 x double>* %in 131 %1 = fptrunc <8 x double> %0 to <8 x float> 132 store <8 x float> %1, <8 x float>* %out, align 1 133 ret void 134} 135 136define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) { 137; X32-SSE-LABEL: fptrunc_frommem2_zext: 138; X32-SSE: # BB#0: 139; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X32-SSE-NEXT: cvtpd2ps (%eax), %xmm0 141; X32-SSE-NEXT: retl 142; 143; X32-AVX-LABEL: fptrunc_frommem2_zext: 144; X32-AVX: # BB#0: 145; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 146; X32-AVX-NEXT: vcvtpd2psx (%eax), %xmm0 147; X32-AVX-NEXT: retl 148; 149; X64-SSE-LABEL: fptrunc_frommem2_zext: 150; X64-SSE: # BB#0: 151; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 152; X64-SSE-NEXT: retq 153; 154; X64-AVX-LABEL: fptrunc_frommem2_zext: 155; X64-AVX: # BB#0: 156; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 157; X64-AVX-NEXT: retq 158 %arg = load <2 x double>, <2 x double> * %ld, align 16 159 %cvt = fptrunc <2 x double> %arg to <2 x float> 160 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 161 ret <4 x float> %ret 162} 163 164define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) { 165; X32-SSE-LABEL: fptrunc_fromreg2_zext: 166; X32-SSE: # BB#0: 167; X32-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 168; X32-SSE-NEXT: retl 169; 170; X32-AVX-LABEL: fptrunc_fromreg2_zext: 171; X32-AVX: # BB#0: 172; X32-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 173; X32-AVX-NEXT: retl 174; 175; X64-SSE-LABEL: fptrunc_fromreg2_zext: 176; X64-SSE: # BB#0: 177; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 178; X64-SSE-NEXT: retq 179; 180; X64-AVX-LABEL: fptrunc_fromreg2_zext: 181; X64-AVX: # BB#0: 182; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 183; X64-AVX-NEXT: retq 184 %cvt = fptrunc <2 x double> %arg to <2 x float> 185 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 186 ret <4 x float> %ret 187} 188 189; FIXME: For exact truncations we should be able to fold this. 190define <4 x float> @fptrunc_fromconst() { 191; X32-SSE-LABEL: fptrunc_fromconst: 192; X32-SSE: # BB#0: # %entry 193; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm1 194; X32-SSE-NEXT: cvtpd2ps {{\.LCPI.*}}, %xmm0 195; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 196; X32-SSE-NEXT: retl 197; 198; X32-AVX-LABEL: fptrunc_fromconst: 199; X32-AVX: # BB#0: # %entry 200; X32-AVX-NEXT: vcvtpd2psy {{\.LCPI.*}}, %xmm0 201; X32-AVX-NEXT: retl 202; 203; X64-SSE-LABEL: fptrunc_fromconst: 204; X64-SSE: # BB#0: # %entry 205; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm1 206; X64-SSE-NEXT: cvtpd2ps {{.*}}(%rip), %xmm0 207; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 208; X64-SSE-NEXT: retq 209; 210; X64-AVX-LABEL: fptrunc_fromconst: 211; X64-AVX: # BB#0: # %entry 212; X64-AVX-NEXT: vcvtpd2psy {{.*}}(%rip), %xmm0 213; X64-AVX-NEXT: retq 214entry: 215 %0 = insertelement <4 x double> undef, double 1.0, i32 0 216 %1 = insertelement <4 x double> %0, double -2.0, i32 1 217 %2 = insertelement <4 x double> %1, double +4.0, i32 2 218 %3 = insertelement <4 x double> %2, double -0.0, i32 3 219 %4 = fptrunc <4 x double> %3 to <4 x float> 220 ret <4 x float> %4 221} 222