1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL 11 12define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) { 13; SSE-LABEL: trunc8i64_8i32: 14; SSE: # BB#0: # %entry 15; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 16; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 17; SSE-NEXT: movaps %xmm2, %xmm1 18; SSE-NEXT: retq 19; 20; AVX1-LABEL: trunc8i64_8i32: 21; AVX1: # BB#0: # %entry 22; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 23; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 24; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 25; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 26; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 27; AVX1-NEXT: retq 28; 29; AVX2-LABEL: trunc8i64_8i32: 30; AVX2: # BB#0: # %entry 31; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 32; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 33; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 34; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 35; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 36; AVX2-NEXT: retq 37; 38; AVX512-LABEL: trunc8i64_8i32: 39; AVX512: # BB#0: # %entry 40; AVX512-NEXT: vpmovqd %zmm0, %ymm0 41; AVX512-NEXT: retq 42entry: 43 %0 = trunc <8 x i64> %a to <8 x i32> 44 ret <8 x i32> %0 45} 46 47define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) { 48; SSE2-LABEL: trunc8i64_8i16: 49; SSE2: # BB#0: # %entry 50; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 51; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 52; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 53; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 54; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 55; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 56; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 57; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 58; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 59; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 60; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 61; SSE2-NEXT: movapd %xmm2, %xmm0 62; SSE2-NEXT: retq 63; 64; SSSE3-LABEL: trunc8i64_8i16: 65; SSSE3: # BB#0: # %entry 66; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 67; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 68; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 69; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 70; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 71; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 72; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 73; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 74; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 75; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 76; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 77; SSSE3-NEXT: movapd %xmm2, %xmm0 78; SSSE3-NEXT: retq 79; 80; SSE41-LABEL: trunc8i64_8i16: 81; SSE41: # BB#0: # %entry 82; SSE41-NEXT: pxor %xmm4, %xmm4 83; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 84; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 85; SSE41-NEXT: packusdw %xmm3, %xmm2 86; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 87; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 88; SSE41-NEXT: packusdw %xmm1, %xmm0 89; SSE41-NEXT: packusdw %xmm2, %xmm0 90; SSE41-NEXT: retq 91; 92; AVX1-LABEL: trunc8i64_8i16: 93; AVX1: # BB#0: # %entry 94; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 95; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 96; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 97; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 98; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 99; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 100; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 101; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 102; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 103; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 104; AVX1-NEXT: vzeroupper 105; AVX1-NEXT: retq 106; 107; AVX2-LABEL: trunc8i64_8i16: 108; AVX2: # BB#0: # %entry 109; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 110; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 111; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 112; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 113; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 114; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 115; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 116; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 117; AVX2-NEXT: vzeroupper 118; AVX2-NEXT: retq 119; 120; AVX512-LABEL: trunc8i64_8i16: 121; AVX512: # BB#0: # %entry 122; AVX512-NEXT: vpmovqw %zmm0, %xmm0 123; AVX512-NEXT: retq 124entry: 125 %0 = trunc <8 x i64> %a to <8 x i16> 126 ret <8 x i16> %0 127} 128 129define void @trunc8i64_8i8(<8 x i64> %a) { 130; SSE-LABEL: trunc8i64_8i8: 131; SSE: # BB#0: # %entry 132; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 133; SSE-NEXT: pand %xmm4, %xmm3 134; SSE-NEXT: pand %xmm4, %xmm2 135; SSE-NEXT: packuswb %xmm3, %xmm2 136; SSE-NEXT: pand %xmm4, %xmm1 137; SSE-NEXT: pand %xmm4, %xmm0 138; SSE-NEXT: packuswb %xmm1, %xmm0 139; SSE-NEXT: packuswb %xmm2, %xmm0 140; SSE-NEXT: packuswb %xmm0, %xmm0 141; SSE-NEXT: movq %xmm0, (%rax) 142; SSE-NEXT: retq 143; 144; AVX1-LABEL: trunc8i64_8i8: 145; AVX1: # BB#0: # %entry 146; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 147; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 148; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 149; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 150; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 151; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 152; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 153; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 154; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 155; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 156; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 157; AVX1-NEXT: vmovq %xmm0, (%rax) 158; AVX1-NEXT: vzeroupper 159; AVX1-NEXT: retq 160; 161; AVX2-LABEL: trunc8i64_8i8: 162; AVX2: # BB#0: # %entry 163; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 164; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 165; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 166; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 167; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 168; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 169; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 170; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 171; AVX2-NEXT: vmovq %xmm0, (%rax) 172; AVX2-NEXT: vzeroupper 173; AVX2-NEXT: retq 174; 175; AVX512-LABEL: trunc8i64_8i8: 176; AVX512: # BB#0: # %entry 177; AVX512-NEXT: vpmovqb %zmm0, (%rax) 178; AVX512-NEXT: retq 179entry: 180 %0 = trunc <8 x i64> %a to <8 x i8> 181 store <8 x i8> %0, <8 x i8>* undef, align 4 182 ret void 183} 184 185define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) { 186; SSE2-LABEL: trunc8i32_8i16: 187; SSE2: # BB#0: # %entry 188; SSE2-NEXT: pslld $16, %xmm1 189; SSE2-NEXT: psrad $16, %xmm1 190; SSE2-NEXT: pslld $16, %xmm0 191; SSE2-NEXT: psrad $16, %xmm0 192; SSE2-NEXT: packssdw %xmm1, %xmm0 193; SSE2-NEXT: retq 194; 195; SSSE3-LABEL: trunc8i32_8i16: 196; SSSE3: # BB#0: # %entry 197; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 198; SSSE3-NEXT: pshufb %xmm2, %xmm1 199; SSSE3-NEXT: pshufb %xmm2, %xmm0 200; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 201; SSSE3-NEXT: retq 202; 203; SSE41-LABEL: trunc8i32_8i16: 204; SSE41: # BB#0: # %entry 205; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 206; SSE41-NEXT: pshufb %xmm2, %xmm1 207; SSE41-NEXT: pshufb %xmm2, %xmm0 208; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 209; SSE41-NEXT: retq 210; 211; AVX1-LABEL: trunc8i32_8i16: 212; AVX1: # BB#0: # %entry 213; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 214; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 215; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 216; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 217; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 218; AVX1-NEXT: vzeroupper 219; AVX1-NEXT: retq 220; 221; AVX2-LABEL: trunc8i32_8i16: 222; AVX2: # BB#0: # %entry 223; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 224; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 225; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 226; AVX2-NEXT: vzeroupper 227; AVX2-NEXT: retq 228; 229; AVX512F-LABEL: trunc8i32_8i16: 230; AVX512F: # BB#0: # %entry 231; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 232; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 233; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 234; AVX512F-NEXT: retq 235; 236; AVX512VL-LABEL: trunc8i32_8i16: 237; AVX512VL: # BB#0: # %entry 238; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 239; AVX512VL-NEXT: retq 240; 241; AVX512BW-LABEL: trunc8i32_8i16: 242; AVX512BW: # BB#0: # %entry 243; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 244; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 245; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 246; AVX512BW-NEXT: retq 247; 248; AVX512BWVL-LABEL: trunc8i32_8i16: 249; AVX512BWVL: # BB#0: # %entry 250; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 251; AVX512BWVL-NEXT: retq 252entry: 253 %0 = trunc <8 x i32> %a to <8 x i16> 254 ret <8 x i16> %0 255} 256 257define void @trunc8i32_8i8(<8 x i32> %a) { 258; SSE2-LABEL: trunc8i32_8i8: 259; SSE2: # BB#0: # %entry 260; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 261; SSE2-NEXT: pand %xmm2, %xmm1 262; SSE2-NEXT: pand %xmm2, %xmm0 263; SSE2-NEXT: packuswb %xmm1, %xmm0 264; SSE2-NEXT: packuswb %xmm0, %xmm0 265; SSE2-NEXT: movq %xmm0, (%rax) 266; SSE2-NEXT: retq 267; 268; SSSE3-LABEL: trunc8i32_8i8: 269; SSSE3: # BB#0: # %entry 270; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 271; SSSE3-NEXT: pshufb %xmm2, %xmm1 272; SSSE3-NEXT: pshufb %xmm2, %xmm0 273; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 274; SSSE3-NEXT: movq %xmm0, (%rax) 275; SSSE3-NEXT: retq 276; 277; SSE41-LABEL: trunc8i32_8i8: 278; SSE41: # BB#0: # %entry 279; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 280; SSE41-NEXT: pshufb %xmm2, %xmm1 281; SSE41-NEXT: pshufb %xmm2, %xmm0 282; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 283; SSE41-NEXT: movq %xmm0, (%rax) 284; SSE41-NEXT: retq 285; 286; AVX1-LABEL: trunc8i32_8i8: 287; AVX1: # BB#0: # %entry 288; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 289; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u> 290; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 291; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 292; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 293; AVX1-NEXT: vmovq %xmm0, (%rax) 294; AVX1-NEXT: vzeroupper 295; AVX1-NEXT: retq 296; 297; AVX2-LABEL: trunc8i32_8i8: 298; AVX2: # BB#0: # %entry 299; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 300; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 301; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 302; AVX2-NEXT: vmovq %xmm0, (%rax) 303; AVX2-NEXT: vzeroupper 304; AVX2-NEXT: retq 305; 306; AVX512F-LABEL: trunc8i32_8i8: 307; AVX512F: # BB#0: # %entry 308; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 309; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 310; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 311; AVX512F-NEXT: vmovq %xmm0, (%rax) 312; AVX512F-NEXT: retq 313; 314; AVX512VL-LABEL: trunc8i32_8i8: 315; AVX512VL: # BB#0: # %entry 316; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) 317; AVX512VL-NEXT: retq 318; 319; AVX512BW-LABEL: trunc8i32_8i8: 320; AVX512BW: # BB#0: # %entry 321; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 322; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 323; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 324; AVX512BW-NEXT: vmovq %xmm0, (%rax) 325; AVX512BW-NEXT: retq 326; 327; AVX512BWVL-LABEL: trunc8i32_8i8: 328; AVX512BWVL: # BB#0: # %entry 329; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) 330; AVX512BWVL-NEXT: retq 331entry: 332 %0 = trunc <8 x i32> %a to <8 x i8> 333 store <8 x i8> %0, <8 x i8>* undef, align 4 334 ret void 335} 336 337define void @trunc16i32_16i16(<16 x i32> %a) { 338; SSE2-LABEL: trunc16i32_16i16: 339; SSE2: # BB#0: # %entry 340; SSE2-NEXT: pslld $16, %xmm1 341; SSE2-NEXT: psrad $16, %xmm1 342; SSE2-NEXT: pslld $16, %xmm0 343; SSE2-NEXT: psrad $16, %xmm0 344; SSE2-NEXT: packssdw %xmm1, %xmm0 345; SSE2-NEXT: pslld $16, %xmm3 346; SSE2-NEXT: psrad $16, %xmm3 347; SSE2-NEXT: pslld $16, %xmm2 348; SSE2-NEXT: psrad $16, %xmm2 349; SSE2-NEXT: packssdw %xmm3, %xmm2 350; SSE2-NEXT: movdqu %xmm2, (%rax) 351; SSE2-NEXT: movdqu %xmm0, (%rax) 352; SSE2-NEXT: retq 353; 354; SSSE3-LABEL: trunc16i32_16i16: 355; SSSE3: # BB#0: # %entry 356; SSSE3-NEXT: pslld $16, %xmm1 357; SSSE3-NEXT: psrad $16, %xmm1 358; SSSE3-NEXT: pslld $16, %xmm0 359; SSSE3-NEXT: psrad $16, %xmm0 360; SSSE3-NEXT: packssdw %xmm1, %xmm0 361; SSSE3-NEXT: pslld $16, %xmm3 362; SSSE3-NEXT: psrad $16, %xmm3 363; SSSE3-NEXT: pslld $16, %xmm2 364; SSSE3-NEXT: psrad $16, %xmm2 365; SSSE3-NEXT: packssdw %xmm3, %xmm2 366; SSSE3-NEXT: movdqu %xmm2, (%rax) 367; SSSE3-NEXT: movdqu %xmm0, (%rax) 368; SSSE3-NEXT: retq 369; 370; SSE41-LABEL: trunc16i32_16i16: 371; SSE41: # BB#0: # %entry 372; SSE41-NEXT: pxor %xmm4, %xmm4 373; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 374; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 375; SSE41-NEXT: packusdw %xmm1, %xmm0 376; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] 377; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] 378; SSE41-NEXT: packusdw %xmm3, %xmm2 379; SSE41-NEXT: movdqu %xmm2, (%rax) 380; SSE41-NEXT: movdqu %xmm0, (%rax) 381; SSE41-NEXT: retq 382; 383; AVX1-LABEL: trunc16i32_16i16: 384; AVX1: # BB#0: # %entry 385; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 386; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 387; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] 388; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] 389; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 390; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 391; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] 392; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7] 393; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 394; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 395; AVX1-NEXT: vmovups %ymm0, (%rax) 396; AVX1-NEXT: vzeroupper 397; AVX1-NEXT: retq 398; 399; AVX2-LABEL: trunc16i32_16i16: 400; AVX2: # BB#0: # %entry 401; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 402; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 403; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 404; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 405; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 406; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 407; AVX2-NEXT: vmovdqu %ymm0, (%rax) 408; AVX2-NEXT: vzeroupper 409; AVX2-NEXT: retq 410; 411; AVX512-LABEL: trunc16i32_16i16: 412; AVX512: # BB#0: # %entry 413; AVX512-NEXT: vpmovdw %zmm0, (%rax) 414; AVX512-NEXT: retq 415entry: 416 %0 = trunc <16 x i32> %a to <16 x i16> 417 store <16 x i16> %0, <16 x i16>* undef, align 4 418 ret void 419} 420 421define void @trunc16i32_16i8(<16 x i32> %a) { 422; SSE-LABEL: trunc16i32_16i8: 423; SSE: # BB#0: # %entry 424; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 425; SSE-NEXT: pand %xmm4, %xmm3 426; SSE-NEXT: pand %xmm4, %xmm2 427; SSE-NEXT: packuswb %xmm3, %xmm2 428; SSE-NEXT: pand %xmm4, %xmm1 429; SSE-NEXT: pand %xmm4, %xmm0 430; SSE-NEXT: packuswb %xmm1, %xmm0 431; SSE-NEXT: packuswb %xmm2, %xmm0 432; SSE-NEXT: movdqu %xmm0, (%rax) 433; SSE-NEXT: retq 434; 435; AVX1-LABEL: trunc16i32_16i8: 436; AVX1: # BB#0: # %entry 437; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 438; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 439; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 440; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 441; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 442; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 443; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 444; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 445; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 446; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 447; AVX1-NEXT: vmovdqu %xmm0, (%rax) 448; AVX1-NEXT: vzeroupper 449; AVX1-NEXT: retq 450; 451; AVX2-LABEL: trunc16i32_16i8: 452; AVX2: # BB#0: # %entry 453; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 454; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 455; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 456; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 457; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 458; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 459; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 460; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 461; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 462; AVX2-NEXT: vmovdqu %xmm0, (%rax) 463; AVX2-NEXT: vzeroupper 464; AVX2-NEXT: retq 465; 466; AVX512-LABEL: trunc16i32_16i8: 467; AVX512: # BB#0: # %entry 468; AVX512-NEXT: vpmovdb %zmm0, (%rax) 469; AVX512-NEXT: retq 470entry: 471 %0 = trunc <16 x i32> %a to <16 x i8> 472 store <16 x i8> %0, <16 x i8>* undef, align 4 473 ret void 474} 475 476;PR25684 477define void @trunc16i16_16i8(<16 x i16> %a) { 478; SSE2-LABEL: trunc16i16_16i8: 479; SSE2: # BB#0: # %entry 480; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 481; SSE2-NEXT: pand %xmm2, %xmm1 482; SSE2-NEXT: pand %xmm2, %xmm0 483; SSE2-NEXT: packuswb %xmm1, %xmm0 484; SSE2-NEXT: movdqu %xmm0, (%rax) 485; SSE2-NEXT: retq 486; 487; SSSE3-LABEL: trunc16i16_16i8: 488; SSSE3: # BB#0: # %entry 489; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 490; SSSE3-NEXT: pshufb %xmm2, %xmm1 491; SSSE3-NEXT: pshufb %xmm2, %xmm0 492; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 493; SSSE3-NEXT: movdqu %xmm0, (%rax) 494; SSSE3-NEXT: retq 495; 496; SSE41-LABEL: trunc16i16_16i8: 497; SSE41: # BB#0: # %entry 498; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 499; SSE41-NEXT: pshufb %xmm2, %xmm1 500; SSE41-NEXT: pshufb %xmm2, %xmm0 501; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 502; SSE41-NEXT: movdqu %xmm0, (%rax) 503; SSE41-NEXT: retq 504; 505; AVX1-LABEL: trunc16i16_16i8: 506; AVX1: # BB#0: # %entry 507; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 508; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 509; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 510; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 511; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 512; AVX1-NEXT: vmovdqu %xmm0, (%rax) 513; AVX1-NEXT: vzeroupper 514; AVX1-NEXT: retq 515; 516; AVX2-LABEL: trunc16i16_16i8: 517; AVX2: # BB#0: # %entry 518; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 519; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 520; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 521; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 522; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 523; AVX2-NEXT: vmovdqu %xmm0, (%rax) 524; AVX2-NEXT: vzeroupper 525; AVX2-NEXT: retq 526; 527; AVX512F-LABEL: trunc16i16_16i8: 528; AVX512F: # BB#0: # %entry 529; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 530; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 531; AVX512F-NEXT: vmovdqu %xmm0, (%rax) 532; AVX512F-NEXT: retq 533; 534; AVX512VL-LABEL: trunc16i16_16i8: 535; AVX512VL: # BB#0: # %entry 536; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 537; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 538; AVX512VL-NEXT: vmovdqu %xmm0, (%rax) 539; AVX512VL-NEXT: retq 540; 541; AVX512BW-LABEL: trunc16i16_16i8: 542; AVX512BW: # BB#0: # %entry 543; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 544; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 545; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) 546; AVX512BW-NEXT: retq 547; 548; AVX512BWVL-LABEL: trunc16i16_16i8: 549; AVX512BWVL: # BB#0: # %entry 550; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) 551; AVX512BWVL-NEXT: retq 552entry: 553 %0 = trunc <16 x i16> %a to <16 x i8> 554 store <16 x i8> %0, <16 x i8>* undef, align 4 555 ret void 556} 557 558define void @trunc32i16_32i8(<32 x i16> %a) { 559; SSE2-LABEL: trunc32i16_32i8: 560; SSE2: # BB#0: # %entry 561; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 562; SSE2-NEXT: pand %xmm4, %xmm1 563; SSE2-NEXT: pand %xmm4, %xmm0 564; SSE2-NEXT: packuswb %xmm1, %xmm0 565; SSE2-NEXT: pand %xmm4, %xmm3 566; SSE2-NEXT: pand %xmm4, %xmm2 567; SSE2-NEXT: packuswb %xmm3, %xmm2 568; SSE2-NEXT: movdqu %xmm2, (%rax) 569; SSE2-NEXT: movdqu %xmm0, (%rax) 570; SSE2-NEXT: retq 571; 572; SSSE3-LABEL: trunc32i16_32i8: 573; SSSE3: # BB#0: # %entry 574; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 575; SSSE3-NEXT: pshufb %xmm4, %xmm1 576; SSSE3-NEXT: pshufb %xmm4, %xmm0 577; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 578; SSSE3-NEXT: pshufb %xmm4, %xmm3 579; SSSE3-NEXT: pshufb %xmm4, %xmm2 580; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 581; SSSE3-NEXT: movdqu %xmm2, (%rax) 582; SSSE3-NEXT: movdqu %xmm0, (%rax) 583; SSSE3-NEXT: retq 584; 585; SSE41-LABEL: trunc32i16_32i8: 586; SSE41: # BB#0: # %entry 587; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 588; SSE41-NEXT: pshufb %xmm4, %xmm1 589; SSE41-NEXT: pshufb %xmm4, %xmm0 590; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 591; SSE41-NEXT: pshufb %xmm4, %xmm3 592; SSE41-NEXT: pshufb %xmm4, %xmm2 593; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 594; SSE41-NEXT: movdqu %xmm2, (%rax) 595; SSE41-NEXT: movdqu %xmm0, (%rax) 596; SSE41-NEXT: retq 597; 598; AVX1-LABEL: trunc32i16_32i8: 599; AVX1: # BB#0: # %entry 600; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 601; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 602; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 603; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 604; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 605; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 606; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 607; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 608; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 609; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 610; AVX1-NEXT: vmovups %ymm0, (%rax) 611; AVX1-NEXT: vzeroupper 612; AVX1-NEXT: retq 613; 614; AVX2-LABEL: trunc32i16_32i8: 615; AVX2: # BB#0: # %entry 616; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 617; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 618; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 619; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 620; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 621; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 622; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 623; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 624; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 625; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 626; AVX2-NEXT: vmovdqu %ymm0, (%rax) 627; AVX2-NEXT: vzeroupper 628; AVX2-NEXT: retq 629; 630; AVX512F-LABEL: trunc32i16_32i8: 631; AVX512F: # BB#0: # %entry 632; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 633; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 634; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 635; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 636; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 637; AVX512F-NEXT: vmovdqu %ymm0, (%rax) 638; AVX512F-NEXT: retq 639; 640; AVX512VL-LABEL: trunc32i16_32i8: 641; AVX512VL: # BB#0: # %entry 642; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 643; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 644; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1 645; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 646; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 647; AVX512VL-NEXT: vmovdqu %ymm0, (%rax) 648; AVX512VL-NEXT: retq 649; 650; AVX512BW-LABEL: trunc32i16_32i8: 651; AVX512BW: # BB#0: # %entry 652; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) 653; AVX512BW-NEXT: retq 654; 655; AVX512BWVL-LABEL: trunc32i16_32i8: 656; AVX512BWVL: # BB#0: # %entry 657; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) 658; AVX512BWVL-NEXT: retq 659entry: 660 %0 = trunc <32 x i16> %a to <32 x i8> 661 store <32 x i8> %0, <32 x i8>* undef, align 4 662 ret void 663} 664 665define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { 666; SSE-LABEL: trunc2x4i64_8i32: 667; SSE: # BB#0: # %entry 668; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 669; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] 670; SSE-NEXT: movaps %xmm2, %xmm1 671; SSE-NEXT: retq 672; 673; AVX1-LABEL: trunc2x4i64_8i32: 674; AVX1: # BB#0: # %entry 675; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 676; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 677; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 678; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 679; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 680; AVX1-NEXT: retq 681; 682; AVX2-LABEL: trunc2x4i64_8i32: 683; AVX2: # BB#0: # %entry 684; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 685; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 686; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 687; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 688; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 689; AVX2-NEXT: retq 690; 691; AVX512F-LABEL: trunc2x4i64_8i32: 692; AVX512F: # BB#0: # %entry 693; AVX512F-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 694; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 695; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 696; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 697; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 698; AVX512F-NEXT: retq 699; 700; AVX512VL-LABEL: trunc2x4i64_8i32: 701; AVX512VL: # BB#0: # %entry 702; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 703; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1 704; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 705; AVX512VL-NEXT: retq 706; 707; AVX512BW-LABEL: trunc2x4i64_8i32: 708; AVX512BW: # BB#0: # %entry 709; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 710; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 711; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 712; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 713; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 714; AVX512BW-NEXT: retq 715; 716; AVX512BWVL-LABEL: trunc2x4i64_8i32: 717; AVX512BWVL: # BB#0: # %entry 718; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 719; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1 720; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 721; AVX512BWVL-NEXT: retq 722entry: 723 %0 = trunc <4 x i64> %a to <4 x i32> 724 %1 = trunc <4 x i64> %b to <4 x i32> 725 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 726 ret <8 x i32> %2 727} 728 729define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) { 730; SSE2-LABEL: trunc2x4i64_8i16: 731; SSE2: # BB#0: # %entry 732; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 733; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 734; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 735; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 736; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 737; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 738; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 739; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 740; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 741; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 742; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 743; SSE2-NEXT: movapd %xmm2, %xmm0 744; SSE2-NEXT: retq 745; 746; SSSE3-LABEL: trunc2x4i64_8i16: 747; SSSE3: # BB#0: # %entry 748; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 749; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 750; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 751; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 752; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 753; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 754; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 755; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 756; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 757; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 758; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 759; SSSE3-NEXT: movapd %xmm2, %xmm0 760; SSSE3-NEXT: retq 761; 762; SSE41-LABEL: trunc2x4i64_8i16: 763; SSE41: # BB#0: # %entry 764; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 765; SSE41-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 766; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 767; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 768; SSE41-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 769; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 770; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 771; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 772; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 773; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 774; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 775; SSE41-NEXT: retq 776; 777; AVX1-LABEL: trunc2x4i64_8i16: 778; AVX1: # BB#0: # %entry 779; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 780; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 781; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 782; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 783; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 784; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 785; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 786; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 787; AVX1-NEXT: vzeroupper 788; AVX1-NEXT: retq 789; 790; AVX2-LABEL: trunc2x4i64_8i16: 791; AVX2: # BB#0: # %entry 792; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 793; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 794; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 795; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 796; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 797; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 798; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 799; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 800; AVX2-NEXT: vzeroupper 801; AVX2-NEXT: retq 802; 803; AVX512F-LABEL: trunc2x4i64_8i16: 804; AVX512F: # BB#0: # %entry 805; AVX512F-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 806; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 807; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 808; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 809; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 810; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1 811; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0 812; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 813; AVX512F-NEXT: retq 814; 815; AVX512VL-LABEL: trunc2x4i64_8i16: 816; AVX512VL: # BB#0: # %entry 817; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 818; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1 819; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 820; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 821; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 822; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 823; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 824; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 825; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 826; AVX512VL-NEXT: retq 827; 828; AVX512BW-LABEL: trunc2x4i64_8i16: 829; AVX512BW: # BB#0: # %entry 830; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 831; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 832; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 833; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 834; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 835; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 836; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 837; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 838; AVX512BW-NEXT: retq 839; 840; AVX512BWVL-LABEL: trunc2x4i64_8i16: 841; AVX512BWVL: # BB#0: # %entry 842; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 843; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1 844; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 845; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 846; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 847; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 848; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 849; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 850; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 851; AVX512BWVL-NEXT: retq 852entry: 853 %0 = trunc <4 x i64> %a to <4 x i16> 854 %1 = trunc <4 x i64> %b to <4 x i16> 855 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 856 ret <8 x i16> %2 857} 858 859define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) { 860; SSE-LABEL: trunc2x2i64_4i32: 861; SSE: # BB#0: # %entry 862; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 863; SSE-NEXT: retq 864; 865; AVX-LABEL: trunc2x2i64_4i32: 866; AVX: # BB#0: # %entry 867; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 868; AVX-NEXT: retq 869; 870; AVX512-LABEL: trunc2x2i64_4i32: 871; AVX512: # BB#0: # %entry 872; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 873; AVX512-NEXT: retq 874entry: 875 %0 = trunc <2 x i64> %a to <2 x i32> 876 %1 = trunc <2 x i64> %b to <2 x i32> 877 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 878 ret <4 x i32> %2 879} 880 881define i64 @trunc2i64_i64(<2 x i64> %inval) { 882; SSE-LABEL: trunc2i64_i64: 883; SSE: # BB#0: # %entry 884; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 885; SSE-NEXT: movd %xmm0, %rax 886; SSE-NEXT: retq 887; 888; AVX-LABEL: trunc2i64_i64: 889; AVX: # BB#0: # %entry 890; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 891; AVX-NEXT: vmovq %xmm0, %rax 892; AVX-NEXT: retq 893; 894; AVX512F-LABEL: trunc2i64_i64: 895; AVX512F: # BB#0: # %entry 896; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 897; AVX512F-NEXT: vmovq %xmm0, %rax 898; AVX512F-NEXT: retq 899; 900; AVX512VL-LABEL: trunc2i64_i64: 901; AVX512VL: # BB#0: # %entry 902; AVX512VL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp) 903; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 904; AVX512VL-NEXT: retq 905; 906; AVX512BW-LABEL: trunc2i64_i64: 907; AVX512BW: # BB#0: # %entry 908; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 909; AVX512BW-NEXT: vmovq %xmm0, %rax 910; AVX512BW-NEXT: retq 911; 912; AVX512BWVL-LABEL: trunc2i64_i64: 913; AVX512BWVL: # BB#0: # %entry 914; AVX512BWVL-NEXT: vpmovqd %xmm0, -{{[0-9]+}}(%rsp) 915; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 916; AVX512BWVL-NEXT: retq 917entry: 918 %0 = trunc <2 x i64> %inval to <2 x i32> 919 %1 = bitcast <2 x i32> %0 to i64 920 ret i64 %1 921} 922 923define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { 924; SSE2-LABEL: trunc2x4i32_8i16: 925; SSE2: # BB#0: # %entry 926; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 927; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 928; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 929; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 930; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 931; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 932; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 933; SSE2-NEXT: retq 934; 935; SSSE3-LABEL: trunc2x4i32_8i16: 936; SSSE3: # BB#0: # %entry 937; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 938; SSSE3-NEXT: pshufb %xmm2, %xmm1 939; SSSE3-NEXT: pshufb %xmm2, %xmm0 940; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 941; SSSE3-NEXT: retq 942; 943; SSE41-LABEL: trunc2x4i32_8i16: 944; SSE41: # BB#0: # %entry 945; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 946; SSE41-NEXT: pshufb %xmm2, %xmm1 947; SSE41-NEXT: pshufb %xmm2, %xmm0 948; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 949; SSE41-NEXT: retq 950; 951; AVX-LABEL: trunc2x4i32_8i16: 952; AVX: # BB#0: # %entry 953; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 954; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 955; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 956; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 957; AVX-NEXT: retq 958; 959; AVX512F-LABEL: trunc2x4i32_8i16: 960; AVX512F: # BB#0: # %entry 961; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 962; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1 963; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0 964; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 965; AVX512F-NEXT: retq 966; 967; AVX512VL-LABEL: trunc2x4i32_8i16: 968; AVX512VL: # BB#0: # %entry 969; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 970; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 971; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 972; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 973; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 974; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 975; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 976; AVX512VL-NEXT: retq 977; 978; AVX512BW-LABEL: trunc2x4i32_8i16: 979; AVX512BW: # BB#0: # %entry 980; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 981; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 982; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 983; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 984; AVX512BW-NEXT: retq 985; 986; AVX512BWVL-LABEL: trunc2x4i32_8i16: 987; AVX512BWVL: # BB#0: # %entry 988; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 989; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7] 990; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 991; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 992; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 993; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 994; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 995; AVX512BWVL-NEXT: retq 996entry: 997 %0 = trunc <4 x i32> %a to <4 x i16> 998 %1 = trunc <4 x i32> %b to <4 x i16> 999 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1000 ret <8 x i16> %2 1001} 1002 1003; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 1004define i64 @trunc4i32_i64(<4 x i32> %inval) { 1005; SSE2-LABEL: trunc4i32_i64: 1006; SSE2: # BB#0: # %entry 1007; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1008; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 1009; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1010; SSE2-NEXT: movd %xmm0, %rax 1011; SSE2-NEXT: retq 1012; 1013; SSSE3-LABEL: trunc4i32_i64: 1014; SSSE3: # BB#0: # %entry 1015; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1016; SSSE3-NEXT: movd %xmm0, %rax 1017; SSSE3-NEXT: retq 1018; 1019; SSE41-LABEL: trunc4i32_i64: 1020; SSE41: # BB#0: # %entry 1021; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1022; SSE41-NEXT: movd %xmm0, %rax 1023; SSE41-NEXT: retq 1024; 1025; AVX-LABEL: trunc4i32_i64: 1026; AVX: # BB#0: # %entry 1027; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1028; AVX-NEXT: vmovq %xmm0, %rax 1029; AVX-NEXT: retq 1030; 1031; AVX512F-LABEL: trunc4i32_i64: 1032; AVX512F: # BB#0: # %entry 1033; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1034; AVX512F-NEXT: vmovq %xmm0, %rax 1035; AVX512F-NEXT: retq 1036; 1037; AVX512VL-LABEL: trunc4i32_i64: 1038; AVX512VL: # BB#0: # %entry 1039; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 1040; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1041; AVX512VL-NEXT: retq 1042; 1043; AVX512BW-LABEL: trunc4i32_i64: 1044; AVX512BW: # BB#0: # %entry 1045; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1046; AVX512BW-NEXT: vmovq %xmm0, %rax 1047; AVX512BW-NEXT: retq 1048; 1049; AVX512BWVL-LABEL: trunc4i32_i64: 1050; AVX512BWVL: # BB#0: # %entry 1051; AVX512BWVL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 1052; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1053; AVX512BWVL-NEXT: retq 1054entry: 1055 %0 = trunc <4 x i32> %inval to <4 x i16> 1056 %1 = bitcast <4 x i16> %0 to i64 1057 ret i64 %1 1058} 1059 1060define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) { 1061; SSE2-LABEL: trunc2x8i16_16i8: 1062; SSE2: # BB#0: # %entry 1063; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1064; SSE2-NEXT: pand %xmm2, %xmm1 1065; SSE2-NEXT: pand %xmm2, %xmm0 1066; SSE2-NEXT: packuswb %xmm1, %xmm0 1067; SSE2-NEXT: retq 1068; 1069; SSSE3-LABEL: trunc2x8i16_16i8: 1070; SSSE3: # BB#0: # %entry 1071; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1072; SSSE3-NEXT: pshufb %xmm2, %xmm1 1073; SSSE3-NEXT: pshufb %xmm2, %xmm0 1074; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1075; SSSE3-NEXT: retq 1076; 1077; SSE41-LABEL: trunc2x8i16_16i8: 1078; SSE41: # BB#0: # %entry 1079; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1080; SSE41-NEXT: pshufb %xmm2, %xmm1 1081; SSE41-NEXT: pshufb %xmm2, %xmm0 1082; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1083; SSE41-NEXT: retq 1084; 1085; AVX-LABEL: trunc2x8i16_16i8: 1086; AVX: # BB#0: # %entry 1087; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1088; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1089; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1090; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1091; AVX-NEXT: retq 1092; 1093; AVX512F-LABEL: trunc2x8i16_16i8: 1094; AVX512F: # BB#0: # %entry 1095; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1096; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1097; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1098; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1099; AVX512F-NEXT: retq 1100; 1101; AVX512VL-LABEL: trunc2x8i16_16i8: 1102; AVX512VL: # BB#0: # %entry 1103; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1104; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1105; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1106; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1107; AVX512VL-NEXT: retq 1108; 1109; AVX512BW-LABEL: trunc2x8i16_16i8: 1110; AVX512BW: # BB#0: # %entry 1111; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1112; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1113; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1114; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1115; AVX512BW-NEXT: retq 1116; 1117; AVX512BWVL-LABEL: trunc2x8i16_16i8: 1118; AVX512BWVL: # BB#0: # %entry 1119; AVX512BWVL-NEXT: vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1120; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1121; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1122; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1123; AVX512BWVL-NEXT: retq 1124entry: 1125 %0 = trunc <8 x i16> %a to <8 x i8> 1126 %1 = trunc <8 x i16> %b to <8 x i8> 1127 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1128 ret <16 x i8> %2 1129} 1130 1131; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524 1132define i64 @trunc8i16_i64(<8 x i16> %inval) { 1133; SSE2-LABEL: trunc8i16_i64: 1134; SSE2: # BB#0: # %entry 1135; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 1136; SSE2-NEXT: packuswb %xmm0, %xmm0 1137; SSE2-NEXT: movd %xmm0, %rax 1138; SSE2-NEXT: retq 1139; 1140; SSSE3-LABEL: trunc8i16_i64: 1141; SSSE3: # BB#0: # %entry 1142; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1143; SSSE3-NEXT: movd %xmm0, %rax 1144; SSSE3-NEXT: retq 1145; 1146; SSE41-LABEL: trunc8i16_i64: 1147; SSE41: # BB#0: # %entry 1148; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1149; SSE41-NEXT: movd %xmm0, %rax 1150; SSE41-NEXT: retq 1151; 1152; AVX-LABEL: trunc8i16_i64: 1153; AVX: # BB#0: # %entry 1154; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1155; AVX-NEXT: vmovq %xmm0, %rax 1156; AVX-NEXT: retq 1157; 1158; AVX512F-LABEL: trunc8i16_i64: 1159; AVX512F: # BB#0: # %entry 1160; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1161; AVX512F-NEXT: vmovq %xmm0, %rax 1162; AVX512F-NEXT: retq 1163; 1164; AVX512VL-LABEL: trunc8i16_i64: 1165; AVX512VL: # BB#0: # %entry 1166; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1167; AVX512VL-NEXT: vmovq %xmm0, %rax 1168; AVX512VL-NEXT: retq 1169; 1170; AVX512BW-LABEL: trunc8i16_i64: 1171; AVX512BW: # BB#0: # %entry 1172; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1173; AVX512BW-NEXT: vmovq %xmm0, %rax 1174; AVX512BW-NEXT: retq 1175; 1176; AVX512BWVL-LABEL: trunc8i16_i64: 1177; AVX512BWVL: # BB#0: # %entry 1178; AVX512BWVL-NEXT: vpmovwb %xmm0, -{{[0-9]+}}(%rsp) 1179; AVX512BWVL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1180; AVX512BWVL-NEXT: retq 1181entry: 1182 %0 = trunc <8 x i16> %inval to <8 x i8> 1183 %1 = bitcast <8 x i8> %0 to i64 1184 ret i64 %1 1185} 1186 1187define <16 x i8> @trunc16i64_16i8_const() { 1188; SSE-LABEL: trunc16i64_16i8_const: 1189; SSE: # BB#0: # %entry 1190; SSE-NEXT: xorps %xmm0, %xmm0 1191; SSE-NEXT: retq 1192; 1193; AVX-LABEL: trunc16i64_16i8_const: 1194; AVX: # BB#0: # %entry 1195; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1196; AVX-NEXT: retq 1197; 1198; AVX512F-LABEL: trunc16i64_16i8_const: 1199; AVX512F: # BB#0: # %entry 1200; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 1201; AVX512F-NEXT: retq 1202; 1203; AVX512VL-LABEL: trunc16i64_16i8_const: 1204; AVX512VL: # BB#0: # %entry 1205; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 1206; AVX512VL-NEXT: retq 1207; 1208; AVX512BW-LABEL: trunc16i64_16i8_const: 1209; AVX512BW: # BB#0: # %entry 1210; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 1211; AVX512BW-NEXT: retq 1212; 1213; AVX512BWVL-LABEL: trunc16i64_16i8_const: 1214; AVX512BWVL: # BB#0: # %entry 1215; AVX512BWVL-NEXT: vpxor %xmm0, %xmm0, %xmm0 1216; AVX512BWVL-NEXT: retq 1217 1218entry: 1219 %0 = trunc <16 x i64> zeroinitializer to <16 x i8> 1220 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26> 1221 ret <16 x i8> %1 1222} 1223 1224