1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ 8 9; 10; add 11; 12 13define <4 x i32> @trunc_add_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 14; SSE-LABEL: trunc_add_v4i64_v4i32: 15; SSE: # BB#0: 16; SSE-NEXT: paddq %xmm3, %xmm1 17; SSE-NEXT: paddq %xmm2, %xmm0 18; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 19; SSE-NEXT: retq 20; 21; AVX1-LABEL: trunc_add_v4i64_v4i32: 22; AVX1: # BB#0: 23; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 24; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 25; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 26; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 27; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 28; AVX1-NEXT: vzeroupper 29; AVX1-NEXT: retq 30; 31; AVX2-LABEL: trunc_add_v4i64_v4i32: 32; AVX2: # BB#0: 33; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 34; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 35; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 36; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 37; AVX2-NEXT: vzeroupper 38; AVX2-NEXT: retq 39; 40; AVX512-LABEL: trunc_add_v4i64_v4i32: 41; AVX512: # BB#0: 42; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 43; AVX512-NEXT: vpmovqd %zmm0, %ymm0 44; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 45; AVX512-NEXT: retq 46 %1 = add <4 x i64> %a0, %a1 47 %2 = trunc <4 x i64> %1 to <4 x i32> 48 ret <4 x i32> %2 49} 50 51define <8 x i16> @trunc_add_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 52; SSE-LABEL: trunc_add_v8i64_v8i16: 53; SSE: # BB#0: 54; SSE-NEXT: paddq %xmm4, %xmm0 55; SSE-NEXT: paddq %xmm5, %xmm1 56; SSE-NEXT: paddq %xmm6, %xmm2 57; SSE-NEXT: paddq %xmm7, %xmm3 58; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 59; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 60; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 61; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 62; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 63; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 64; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 65; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 66; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 67; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 68; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 69; SSE-NEXT: movapd %xmm2, %xmm0 70; SSE-NEXT: retq 71; 72; AVX1-LABEL: trunc_add_v8i64_v8i16: 73; AVX1: # BB#0: 74; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm4 75; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 76; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 77; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 78; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm2 79; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 80; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 81; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 82; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 83; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 84; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 85; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 86; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 87; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm3[1,2,3],xmm4[4],xmm3[5,6,7] 88; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 89; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 90; AVX1-NEXT: vzeroupper 91; AVX1-NEXT: retq 92; 93; AVX2-LABEL: trunc_add_v8i64_v8i16: 94; AVX2: # BB#0: 95; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 96; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 97; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 98; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 99; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 100; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 101; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 102; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 103; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 104; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 105; AVX2-NEXT: vzeroupper 106; AVX2-NEXT: retq 107; 108; AVX512-LABEL: trunc_add_v8i64_v8i16: 109; AVX512: # BB#0: 110; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 111; AVX512-NEXT: vpmovqw %zmm0, %xmm0 112; AVX512-NEXT: retq 113 %1 = add <8 x i64> %a0, %a1 114 %2 = trunc <8 x i64> %1 to <8 x i16> 115 ret <8 x i16> %2 116} 117 118define <8 x i16> @trunc_add_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 119; SSE-LABEL: trunc_add_v8i32_v8i16: 120; SSE: # BB#0: 121; SSE-NEXT: paddd %xmm2, %xmm0 122; SSE-NEXT: paddd %xmm3, %xmm1 123; SSE-NEXT: pslld $16, %xmm1 124; SSE-NEXT: psrad $16, %xmm1 125; SSE-NEXT: pslld $16, %xmm0 126; SSE-NEXT: psrad $16, %xmm0 127; SSE-NEXT: packssdw %xmm1, %xmm0 128; SSE-NEXT: retq 129; 130; AVX1-LABEL: trunc_add_v8i32_v8i16: 131; AVX1: # BB#0: 132; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 133; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 134; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 135; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 136; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 137; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 138; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 139; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 140; AVX1-NEXT: vzeroupper 141; AVX1-NEXT: retq 142; 143; AVX2-LABEL: trunc_add_v8i32_v8i16: 144; AVX2: # BB#0: 145; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 146; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 147; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 148; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 149; AVX2-NEXT: vzeroupper 150; AVX2-NEXT: retq 151; 152; AVX512-LABEL: trunc_add_v8i32_v8i16: 153; AVX512: # BB#0: 154; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 155; AVX512-NEXT: vpmovdw %zmm0, %ymm0 156; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 157; AVX512-NEXT: retq 158 %1 = add <8 x i32> %a0, %a1 159 %2 = trunc <8 x i32> %1 to <8 x i16> 160 ret <8 x i16> %2 161} 162 163define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 164; SSE-LABEL: trunc_add_v16i64_v16i8: 165; SSE: # BB#0: 166; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm0 167; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm1 168; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm2 169; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm3 170; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm4 171; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm5 172; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm6 173; SSE-NEXT: paddq {{[0-9]+}}(%rsp), %xmm7 174; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 175; SSE-NEXT: pand %xmm8, %xmm7 176; SSE-NEXT: pand %xmm8, %xmm6 177; SSE-NEXT: packuswb %xmm7, %xmm6 178; SSE-NEXT: pand %xmm8, %xmm5 179; SSE-NEXT: pand %xmm8, %xmm4 180; SSE-NEXT: packuswb %xmm5, %xmm4 181; SSE-NEXT: packuswb %xmm6, %xmm4 182; SSE-NEXT: pand %xmm8, %xmm3 183; SSE-NEXT: pand %xmm8, %xmm2 184; SSE-NEXT: packuswb %xmm3, %xmm2 185; SSE-NEXT: pand %xmm8, %xmm1 186; SSE-NEXT: pand %xmm8, %xmm0 187; SSE-NEXT: packuswb %xmm1, %xmm0 188; SSE-NEXT: packuswb %xmm2, %xmm0 189; SSE-NEXT: packuswb %xmm4, %xmm0 190; SSE-NEXT: retq 191; 192; AVX1-LABEL: trunc_add_v16i64_v16i8: 193; AVX1: # BB#0: 194; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm8 195; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 196; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 197; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0 198; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm4 199; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 200; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 201; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1 202; AVX1-NEXT: vpaddq %xmm6, %xmm2, %xmm5 203; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6 204; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 205; AVX1-NEXT: vpaddq %xmm6, %xmm2, %xmm2 206; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm6 207; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 208; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 209; AVX1-NEXT: vpaddq %xmm7, %xmm3, %xmm3 210; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 211; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 212; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6 213; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3 214; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2 215; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 216; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2 217; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 218; AVX1-NEXT: vpand %xmm7, %xmm1, %xmm1 219; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm3 220; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 221; AVX1-NEXT: vpand %xmm7, %xmm0, %xmm0 222; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm3 223; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0 224; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 225; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 226; AVX1-NEXT: vzeroupper 227; AVX1-NEXT: retq 228; 229; AVX2-LABEL: trunc_add_v16i64_v16i8: 230; AVX2: # BB#0: 231; AVX2-NEXT: vpaddq %ymm5, %ymm1, %ymm1 232; AVX2-NEXT: vpaddq %ymm4, %ymm0, %ymm0 233; AVX2-NEXT: vpaddq %ymm7, %ymm3, %ymm3 234; AVX2-NEXT: vpaddq %ymm6, %ymm2, %ymm2 235; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 236; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 237; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 238; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 239; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 240; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 241; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 242; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 243; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 244; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 245; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 246; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 247; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 248; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 249; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 250; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 251; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 252; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 253; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 254; AVX2-NEXT: vzeroupper 255; AVX2-NEXT: retq 256; 257; AVX512F-LABEL: trunc_add_v16i64_v16i8: 258; AVX512F: # BB#0: 259; AVX512F-NEXT: vpaddq %zmm3, %zmm1, %zmm1 260; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0 261; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 262; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 263; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 264; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 265; AVX512F-NEXT: retq 266; 267; AVX512BW-LABEL: trunc_add_v16i64_v16i8: 268; AVX512BW: # BB#0: 269; AVX512BW-NEXT: vpaddq %zmm3, %zmm1, %zmm1 270; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 271; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 272; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 273; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 274; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 275; AVX512BW-NEXT: retq 276; 277; AVX512DQ-LABEL: trunc_add_v16i64_v16i8: 278; AVX512DQ: # BB#0: 279; AVX512DQ-NEXT: vpaddq %zmm3, %zmm1, %zmm1 280; AVX512DQ-NEXT: vpaddq %zmm2, %zmm0, %zmm0 281; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 282; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 283; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 284; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 285; AVX512DQ-NEXT: retq 286 %1 = add <16 x i64> %a0, %a1 287 %2 = trunc <16 x i64> %1 to <16 x i8> 288 ret <16 x i8> %2 289} 290 291define <16 x i8> @trunc_add_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 292; SSE-LABEL: trunc_add_v16i32_v16i8: 293; SSE: # BB#0: 294; SSE-NEXT: paddd %xmm4, %xmm0 295; SSE-NEXT: paddd %xmm5, %xmm1 296; SSE-NEXT: paddd %xmm6, %xmm2 297; SSE-NEXT: paddd %xmm7, %xmm3 298; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 299; SSE-NEXT: pand %xmm4, %xmm3 300; SSE-NEXT: pand %xmm4, %xmm2 301; SSE-NEXT: packuswb %xmm3, %xmm2 302; SSE-NEXT: pand %xmm4, %xmm1 303; SSE-NEXT: pand %xmm4, %xmm0 304; SSE-NEXT: packuswb %xmm1, %xmm0 305; SSE-NEXT: packuswb %xmm2, %xmm0 306; SSE-NEXT: retq 307; 308; AVX1-LABEL: trunc_add_v16i32_v16i8: 309; AVX1: # BB#0: 310; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm4 311; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 312; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 313; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 314; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm2 315; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 316; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 317; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 318; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 319; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 320; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 321; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 322; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 323; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2 324; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 325; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 326; AVX1-NEXT: vzeroupper 327; AVX1-NEXT: retq 328; 329; AVX2-LABEL: trunc_add_v16i32_v16i8: 330; AVX2: # BB#0: 331; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 332; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 333; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 334; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 335; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 336; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 337; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 338; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 339; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 340; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 341; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 342; AVX2-NEXT: vzeroupper 343; AVX2-NEXT: retq 344; 345; AVX512-LABEL: trunc_add_v16i32_v16i8: 346; AVX512: # BB#0: 347; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 348; AVX512-NEXT: vpmovdb %zmm0, %xmm0 349; AVX512-NEXT: retq 350 %1 = add <16 x i32> %a0, %a1 351 %2 = trunc <16 x i32> %1 to <16 x i8> 352 ret <16 x i8> %2 353} 354 355define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 356; SSE-LABEL: trunc_add_v16i16_v16i8: 357; SSE: # BB#0: 358; SSE-NEXT: paddw %xmm2, %xmm0 359; SSE-NEXT: paddw %xmm3, %xmm1 360; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 361; SSE-NEXT: pand %xmm2, %xmm1 362; SSE-NEXT: pand %xmm2, %xmm0 363; SSE-NEXT: packuswb %xmm1, %xmm0 364; SSE-NEXT: retq 365; 366; AVX1-LABEL: trunc_add_v16i16_v16i8: 367; AVX1: # BB#0: 368; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm2 369; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 370; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 371; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 372; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 373; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 374; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 375; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 376; AVX1-NEXT: vzeroupper 377; AVX1-NEXT: retq 378; 379; AVX2-LABEL: trunc_add_v16i16_v16i8: 380; AVX2: # BB#0: 381; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 382; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 383; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 384; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 385; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 386; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 387; AVX2-NEXT: vzeroupper 388; AVX2-NEXT: retq 389; 390; AVX512F-LABEL: trunc_add_v16i16_v16i8: 391; AVX512F: # BB#0: 392; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm0 393; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 394; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 395; AVX512F-NEXT: retq 396; 397; AVX512BW-LABEL: trunc_add_v16i16_v16i8: 398; AVX512BW: # BB#0: 399; AVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 400; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 401; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 402; AVX512BW-NEXT: retq 403; 404; AVX512DQ-LABEL: trunc_add_v16i16_v16i8: 405; AVX512DQ: # BB#0: 406; AVX512DQ-NEXT: vpaddw %ymm1, %ymm0, %ymm0 407; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 408; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 409; AVX512DQ-NEXT: retq 410 %1 = add <16 x i16> %a0, %a1 411 %2 = trunc <16 x i16> %1 to <16 x i8> 412 ret <16 x i8> %2 413} 414 415; 416; add to constant 417; 418 419define <4 x i32> @trunc_add_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 420; SSE-LABEL: trunc_add_const_v4i64_v4i32: 421; SSE: # BB#0: 422; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 423; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 424; SSE-NEXT: retq 425; 426; AVX1-LABEL: trunc_add_const_v4i64_v4i32: 427; AVX1: # BB#0: 428; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 429; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 430; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 431; AVX1-NEXT: vzeroupper 432; AVX1-NEXT: retq 433; 434; AVX2-LABEL: trunc_add_const_v4i64_v4i32: 435; AVX2: # BB#0: 436; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 437; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 438; AVX2-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 439; AVX2-NEXT: vzeroupper 440; AVX2-NEXT: retq 441; 442; AVX512-LABEL: trunc_add_const_v4i64_v4i32: 443; AVX512: # BB#0: 444; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 445; AVX512-NEXT: vpmovqd %zmm0, %ymm0 446; AVX512-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 447; AVX512-NEXT: retq 448 %1 = add <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 449 %2 = trunc <4 x i64> %1 to <4 x i32> 450 ret <4 x i32> %2 451} 452 453define <8 x i16> @trunc_add_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 454; SSE-LABEL: trunc_add_const_v8i64_v8i16: 455; SSE: # BB#0: 456; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 457; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 458; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 459; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 460; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 461; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 462; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 463; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 464; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 465; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 466; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 467; SSE-NEXT: paddw {{.*}}(%rip), %xmm2 468; SSE-NEXT: movdqa %xmm2, %xmm0 469; SSE-NEXT: retq 470; 471; AVX1-LABEL: trunc_add_const_v8i64_v8i16: 472; AVX1: # BB#0: 473; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 474; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 475; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 476; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 477; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 478; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 479; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 480; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 481; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 482; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 483; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 484; AVX1-NEXT: vzeroupper 485; AVX1-NEXT: retq 486; 487; AVX2-LABEL: trunc_add_const_v8i64_v8i16: 488; AVX2: # BB#0: 489; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 490; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 491; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 492; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 493; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 494; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 495; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 496; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 497; AVX2-NEXT: vzeroupper 498; AVX2-NEXT: retq 499; 500; AVX512-LABEL: trunc_add_const_v8i64_v8i16: 501; AVX512: # BB#0: 502; AVX512-NEXT: vpmovqw %zmm0, %xmm0 503; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 504; AVX512-NEXT: retq 505 %1 = add <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 506 %2 = trunc <8 x i64> %1 to <8 x i16> 507 ret <8 x i16> %2 508} 509 510define <8 x i16> @trunc_add_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 511; SSE-LABEL: trunc_add_const_v8i32_v8i16: 512; SSE: # BB#0: 513; SSE-NEXT: pslld $16, %xmm1 514; SSE-NEXT: psrad $16, %xmm1 515; SSE-NEXT: pslld $16, %xmm0 516; SSE-NEXT: psrad $16, %xmm0 517; SSE-NEXT: packssdw %xmm1, %xmm0 518; SSE-NEXT: paddw {{.*}}(%rip), %xmm0 519; SSE-NEXT: retq 520; 521; AVX1-LABEL: trunc_add_const_v8i32_v8i16: 522; AVX1: # BB#0: 523; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 524; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 525; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 526; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 527; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 528; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 529; AVX1-NEXT: vzeroupper 530; AVX1-NEXT: retq 531; 532; AVX2-LABEL: trunc_add_const_v8i32_v8i16: 533; AVX2: # BB#0: 534; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 535; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 536; AVX2-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 537; AVX2-NEXT: vzeroupper 538; AVX2-NEXT: retq 539; 540; AVX512-LABEL: trunc_add_const_v8i32_v8i16: 541; AVX512: # BB#0: 542; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 543; AVX512-NEXT: vpmovdw %zmm0, %ymm0 544; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 545; AVX512-NEXT: retq 546 %1 = add <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 547 %2 = trunc <8 x i32> %1 to <8 x i16> 548 ret <8 x i16> %2 549} 550 551define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 552; SSE-LABEL: trunc_add_const_v16i64_v16i8: 553; SSE: # BB#0: 554; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 555; SSE-NEXT: pand %xmm8, %xmm7 556; SSE-NEXT: pand %xmm8, %xmm6 557; SSE-NEXT: packuswb %xmm7, %xmm6 558; SSE-NEXT: pand %xmm8, %xmm5 559; SSE-NEXT: pand %xmm8, %xmm4 560; SSE-NEXT: packuswb %xmm5, %xmm4 561; SSE-NEXT: packuswb %xmm6, %xmm4 562; SSE-NEXT: pand %xmm8, %xmm3 563; SSE-NEXT: pand %xmm8, %xmm2 564; SSE-NEXT: packuswb %xmm3, %xmm2 565; SSE-NEXT: pand %xmm8, %xmm1 566; SSE-NEXT: pand %xmm8, %xmm0 567; SSE-NEXT: packuswb %xmm1, %xmm0 568; SSE-NEXT: packuswb %xmm2, %xmm0 569; SSE-NEXT: packuswb %xmm4, %xmm0 570; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 571; SSE-NEXT: retq 572; 573; AVX1-LABEL: trunc_add_const_v16i64_v16i8: 574; AVX1: # BB#0: 575; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 576; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 577; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 578; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 579; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 580; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 581; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 582; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 583; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 584; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 585; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 586; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 587; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 588; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 589; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 590; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 591; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 592; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 593; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 594; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 595; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 596; AVX1-NEXT: vzeroupper 597; AVX1-NEXT: retq 598; 599; AVX2-LABEL: trunc_add_const_v16i64_v16i8: 600; AVX2: # BB#0: 601; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 602; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 603; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 604; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 605; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 606; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 607; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 608; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 609; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 610; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 611; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 612; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 613; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 614; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 615; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 616; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 617; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 618; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 619; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 620; AVX2-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 621; AVX2-NEXT: vzeroupper 622; AVX2-NEXT: retq 623; 624; AVX512F-LABEL: trunc_add_const_v16i64_v16i8: 625; AVX512F: # BB#0: 626; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 627; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 628; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 629; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 630; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 631; AVX512F-NEXT: retq 632; 633; AVX512BW-LABEL: trunc_add_const_v16i64_v16i8: 634; AVX512BW: # BB#0: 635; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 636; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 637; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 638; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 639; AVX512BW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 640; AVX512BW-NEXT: retq 641; 642; AVX512DQ-LABEL: trunc_add_const_v16i64_v16i8: 643; AVX512DQ: # BB#0: 644; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 645; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 646; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 647; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 648; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 649; AVX512DQ-NEXT: retq 650 %1 = add <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 651 %2 = trunc <16 x i64> %1 to <16 x i8> 652 ret <16 x i8> %2 653} 654 655define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 656; SSE-LABEL: trunc_add_const_v16i32_v16i8: 657; SSE: # BB#0: 658; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 659; SSE-NEXT: pand %xmm4, %xmm3 660; SSE-NEXT: pand %xmm4, %xmm2 661; SSE-NEXT: packuswb %xmm3, %xmm2 662; SSE-NEXT: pand %xmm4, %xmm1 663; SSE-NEXT: pand %xmm4, %xmm0 664; SSE-NEXT: packuswb %xmm1, %xmm0 665; SSE-NEXT: packuswb %xmm2, %xmm0 666; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 667; SSE-NEXT: retq 668; 669; AVX1-LABEL: trunc_add_const_v16i32_v16i8: 670; AVX1: # BB#0: 671; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 672; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 673; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 674; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 675; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 676; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 677; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 678; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 679; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 680; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 681; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 682; AVX1-NEXT: vzeroupper 683; AVX1-NEXT: retq 684; 685; AVX2-LABEL: trunc_add_const_v16i32_v16i8: 686; AVX2: # BB#0: 687; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 688; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 689; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 690; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 691; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 692; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 693; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 694; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 695; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 696; AVX2-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 697; AVX2-NEXT: vzeroupper 698; AVX2-NEXT: retq 699; 700; AVX512-LABEL: trunc_add_const_v16i32_v16i8: 701; AVX512: # BB#0: 702; AVX512-NEXT: vpmovdb %zmm0, %xmm0 703; AVX512-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 704; AVX512-NEXT: retq 705 %1 = add <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 706 %2 = trunc <16 x i32> %1 to <16 x i8> 707 ret <16 x i8> %2 708} 709 710define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 711; SSE-LABEL: trunc_add_const_v16i16_v16i8: 712; SSE: # BB#0: 713; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 714; SSE-NEXT: pand %xmm2, %xmm1 715; SSE-NEXT: pand %xmm2, %xmm0 716; SSE-NEXT: packuswb %xmm1, %xmm0 717; SSE-NEXT: paddb {{.*}}(%rip), %xmm0 718; SSE-NEXT: retq 719; 720; AVX1-LABEL: trunc_add_const_v16i16_v16i8: 721; AVX1: # BB#0: 722; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 723; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 724; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 725; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 726; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 727; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 728; AVX1-NEXT: vzeroupper 729; AVX1-NEXT: retq 730; 731; AVX2-LABEL: trunc_add_const_v16i16_v16i8: 732; AVX2: # BB#0: 733; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 734; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 735; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 736; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 737; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 738; AVX2-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 739; AVX2-NEXT: vzeroupper 740; AVX2-NEXT: retq 741; 742; AVX512F-LABEL: trunc_add_const_v16i16_v16i8: 743; AVX512F: # BB#0: 744; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 745; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 746; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 747; AVX512F-NEXT: retq 748; 749; AVX512BW-LABEL: trunc_add_const_v16i16_v16i8: 750; AVX512BW: # BB#0: 751; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 752; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 753; AVX512BW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 754; AVX512BW-NEXT: retq 755; 756; AVX512DQ-LABEL: trunc_add_const_v16i16_v16i8: 757; AVX512DQ: # BB#0: 758; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 759; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 760; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 761; AVX512DQ-NEXT: retq 762 %1 = add <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 763 %2 = trunc <16 x i16> %1 to <16 x i8> 764 ret <16 x i8> %2 765} 766 767; 768; sub 769; 770 771define <4 x i32> @trunc_sub_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 772; SSE-LABEL: trunc_sub_v4i64_v4i32: 773; SSE: # BB#0: 774; SSE-NEXT: psubq %xmm3, %xmm1 775; SSE-NEXT: psubq %xmm2, %xmm0 776; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 777; SSE-NEXT: retq 778; 779; AVX1-LABEL: trunc_sub_v4i64_v4i32: 780; AVX1: # BB#0: 781; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 782; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 783; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2 784; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 785; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 786; AVX1-NEXT: vzeroupper 787; AVX1-NEXT: retq 788; 789; AVX2-LABEL: trunc_sub_v4i64_v4i32: 790; AVX2: # BB#0: 791; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 792; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 793; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 794; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 795; AVX2-NEXT: vzeroupper 796; AVX2-NEXT: retq 797; 798; AVX512-LABEL: trunc_sub_v4i64_v4i32: 799; AVX512: # BB#0: 800; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 801; AVX512-NEXT: vpmovqd %zmm0, %ymm0 802; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 803; AVX512-NEXT: retq 804 %1 = sub <4 x i64> %a0, %a1 805 %2 = trunc <4 x i64> %1 to <4 x i32> 806 ret <4 x i32> %2 807} 808 809define <8 x i16> @trunc_sub_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 810; SSE-LABEL: trunc_sub_v8i64_v8i16: 811; SSE: # BB#0: 812; SSE-NEXT: psubq %xmm4, %xmm0 813; SSE-NEXT: psubq %xmm5, %xmm1 814; SSE-NEXT: psubq %xmm6, %xmm2 815; SSE-NEXT: psubq %xmm7, %xmm3 816; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 817; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 818; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 819; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 820; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 821; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 822; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 823; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 824; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 825; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 826; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 827; SSE-NEXT: movapd %xmm2, %xmm0 828; SSE-NEXT: retq 829; 830; AVX1-LABEL: trunc_sub_v8i64_v8i16: 831; AVX1: # BB#0: 832; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm4 833; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 834; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 835; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 836; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm2 837; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 838; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 839; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1 840; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 841; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 842; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 843; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 844; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 845; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm3[1,2,3],xmm4[4],xmm3[5,6,7] 846; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 847; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 848; AVX1-NEXT: vzeroupper 849; AVX1-NEXT: retq 850; 851; AVX2-LABEL: trunc_sub_v8i64_v8i16: 852; AVX2: # BB#0: 853; AVX2-NEXT: vpsubq %ymm3, %ymm1, %ymm1 854; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 855; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 856; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 857; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 858; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 859; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 860; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 861; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 862; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 863; AVX2-NEXT: vzeroupper 864; AVX2-NEXT: retq 865; 866; AVX512-LABEL: trunc_sub_v8i64_v8i16: 867; AVX512: # BB#0: 868; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0 869; AVX512-NEXT: vpmovqw %zmm0, %xmm0 870; AVX512-NEXT: retq 871 %1 = sub <8 x i64> %a0, %a1 872 %2 = trunc <8 x i64> %1 to <8 x i16> 873 ret <8 x i16> %2 874} 875 876define <8 x i16> @trunc_sub_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 877; SSE-LABEL: trunc_sub_v8i32_v8i16: 878; SSE: # BB#0: 879; SSE-NEXT: psubd %xmm2, %xmm0 880; SSE-NEXT: psubd %xmm3, %xmm1 881; SSE-NEXT: pslld $16, %xmm1 882; SSE-NEXT: psrad $16, %xmm1 883; SSE-NEXT: pslld $16, %xmm0 884; SSE-NEXT: psrad $16, %xmm0 885; SSE-NEXT: packssdw %xmm1, %xmm0 886; SSE-NEXT: retq 887; 888; AVX1-LABEL: trunc_sub_v8i32_v8i16: 889; AVX1: # BB#0: 890; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2 891; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 892; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 893; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 894; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 895; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 896; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 897; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 898; AVX1-NEXT: vzeroupper 899; AVX1-NEXT: retq 900; 901; AVX2-LABEL: trunc_sub_v8i32_v8i16: 902; AVX2: # BB#0: 903; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 904; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 905; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 906; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 907; AVX2-NEXT: vzeroupper 908; AVX2-NEXT: retq 909; 910; AVX512-LABEL: trunc_sub_v8i32_v8i16: 911; AVX512: # BB#0: 912; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 913; AVX512-NEXT: vpmovdw %zmm0, %ymm0 914; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 915; AVX512-NEXT: retq 916 %1 = sub <8 x i32> %a0, %a1 917 %2 = trunc <8 x i32> %1 to <8 x i16> 918 ret <8 x i16> %2 919} 920 921define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 922; SSE-LABEL: trunc_sub_v16i64_v16i8: 923; SSE: # BB#0: 924; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm0 925; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm1 926; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm2 927; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm3 928; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm4 929; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm5 930; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm6 931; SSE-NEXT: psubq {{[0-9]+}}(%rsp), %xmm7 932; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 933; SSE-NEXT: pand %xmm8, %xmm7 934; SSE-NEXT: pand %xmm8, %xmm6 935; SSE-NEXT: packuswb %xmm7, %xmm6 936; SSE-NEXT: pand %xmm8, %xmm5 937; SSE-NEXT: pand %xmm8, %xmm4 938; SSE-NEXT: packuswb %xmm5, %xmm4 939; SSE-NEXT: packuswb %xmm6, %xmm4 940; SSE-NEXT: pand %xmm8, %xmm3 941; SSE-NEXT: pand %xmm8, %xmm2 942; SSE-NEXT: packuswb %xmm3, %xmm2 943; SSE-NEXT: pand %xmm8, %xmm1 944; SSE-NEXT: pand %xmm8, %xmm0 945; SSE-NEXT: packuswb %xmm1, %xmm0 946; SSE-NEXT: packuswb %xmm2, %xmm0 947; SSE-NEXT: packuswb %xmm4, %xmm0 948; SSE-NEXT: retq 949; 950; AVX1-LABEL: trunc_sub_v16i64_v16i8: 951; AVX1: # BB#0: 952; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8 953; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 954; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 955; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0 956; AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm4 957; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 958; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 959; AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1 960; AVX1-NEXT: vpsubq %xmm6, %xmm2, %xmm5 961; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6 962; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 963; AVX1-NEXT: vpsubq %xmm6, %xmm2, %xmm2 964; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm6 965; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 966; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 967; AVX1-NEXT: vpsubq %xmm7, %xmm3, %xmm3 968; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 969; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3 970; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6 971; AVX1-NEXT: vpackuswb %xmm3, %xmm6, %xmm3 972; AVX1-NEXT: vpand %xmm7, %xmm2, %xmm2 973; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5 974; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2 975; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 976; AVX1-NEXT: vpand %xmm7, %xmm1, %xmm1 977; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm3 978; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 979; AVX1-NEXT: vpand %xmm7, %xmm0, %xmm0 980; AVX1-NEXT: vpand %xmm7, %xmm8, %xmm3 981; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0 982; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 983; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 984; AVX1-NEXT: vzeroupper 985; AVX1-NEXT: retq 986; 987; AVX2-LABEL: trunc_sub_v16i64_v16i8: 988; AVX2: # BB#0: 989; AVX2-NEXT: vpsubq %ymm5, %ymm1, %ymm1 990; AVX2-NEXT: vpsubq %ymm4, %ymm0, %ymm0 991; AVX2-NEXT: vpsubq %ymm7, %ymm3, %ymm3 992; AVX2-NEXT: vpsubq %ymm6, %ymm2, %ymm2 993; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 994; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 995; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 996; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 997; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 998; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 999; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 1000; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 1001; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1002; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 1003; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1004; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1005; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 1006; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1007; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1008; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 1009; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1010; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 1011; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1012; AVX2-NEXT: vzeroupper 1013; AVX2-NEXT: retq 1014; 1015; AVX512F-LABEL: trunc_sub_v16i64_v16i8: 1016; AVX512F: # BB#0: 1017; AVX512F-NEXT: vpsubq %zmm3, %zmm1, %zmm1 1018; AVX512F-NEXT: vpsubq %zmm2, %zmm0, %zmm0 1019; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 1020; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 1021; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1022; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1023; AVX512F-NEXT: retq 1024; 1025; AVX512BW-LABEL: trunc_sub_v16i64_v16i8: 1026; AVX512BW: # BB#0: 1027; AVX512BW-NEXT: vpsubq %zmm3, %zmm1, %zmm1 1028; AVX512BW-NEXT: vpsubq %zmm2, %zmm0, %zmm0 1029; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 1030; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 1031; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1032; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 1033; AVX512BW-NEXT: retq 1034; 1035; AVX512DQ-LABEL: trunc_sub_v16i64_v16i8: 1036; AVX512DQ: # BB#0: 1037; AVX512DQ-NEXT: vpsubq %zmm3, %zmm1, %zmm1 1038; AVX512DQ-NEXT: vpsubq %zmm2, %zmm0, %zmm0 1039; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 1040; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 1041; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 1042; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1043; AVX512DQ-NEXT: retq 1044 %1 = sub <16 x i64> %a0, %a1 1045 %2 = trunc <16 x i64> %1 to <16 x i8> 1046 ret <16 x i8> %2 1047} 1048 1049define <16 x i8> @trunc_sub_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 1050; SSE-LABEL: trunc_sub_v16i32_v16i8: 1051; SSE: # BB#0: 1052; SSE-NEXT: psubd %xmm4, %xmm0 1053; SSE-NEXT: psubd %xmm5, %xmm1 1054; SSE-NEXT: psubd %xmm6, %xmm2 1055; SSE-NEXT: psubd %xmm7, %xmm3 1056; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1057; SSE-NEXT: pand %xmm4, %xmm3 1058; SSE-NEXT: pand %xmm4, %xmm2 1059; SSE-NEXT: packuswb %xmm3, %xmm2 1060; SSE-NEXT: pand %xmm4, %xmm1 1061; SSE-NEXT: pand %xmm4, %xmm0 1062; SSE-NEXT: packuswb %xmm1, %xmm0 1063; SSE-NEXT: packuswb %xmm2, %xmm0 1064; SSE-NEXT: retq 1065; 1066; AVX1-LABEL: trunc_sub_v16i32_v16i8: 1067; AVX1: # BB#0: 1068; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm4 1069; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1070; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1071; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0 1072; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm2 1073; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 1074; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1075; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 1076; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1077; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 1078; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 1079; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 1080; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 1081; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2 1082; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 1083; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1084; AVX1-NEXT: vzeroupper 1085; AVX1-NEXT: retq 1086; 1087; AVX2-LABEL: trunc_sub_v16i32_v16i8: 1088; AVX2: # BB#0: 1089; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 1090; AVX2-NEXT: vpsubd %ymm3, %ymm1, %ymm1 1091; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 1092; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 1093; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1094; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1095; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 1096; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 1097; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1098; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 1099; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1100; AVX2-NEXT: vzeroupper 1101; AVX2-NEXT: retq 1102; 1103; AVX512-LABEL: trunc_sub_v16i32_v16i8: 1104; AVX512: # BB#0: 1105; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0 1106; AVX512-NEXT: vpmovdb %zmm0, %xmm0 1107; AVX512-NEXT: retq 1108 %1 = sub <16 x i32> %a0, %a1 1109 %2 = trunc <16 x i32> %1 to <16 x i8> 1110 ret <16 x i8> %2 1111} 1112 1113define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 1114; SSE-LABEL: trunc_sub_v16i16_v16i8: 1115; SSE: # BB#0: 1116; SSE-NEXT: psubw %xmm2, %xmm0 1117; SSE-NEXT: psubw %xmm3, %xmm1 1118; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1119; SSE-NEXT: pand %xmm2, %xmm1 1120; SSE-NEXT: pand %xmm2, %xmm0 1121; SSE-NEXT: packuswb %xmm1, %xmm0 1122; SSE-NEXT: retq 1123; 1124; AVX1-LABEL: trunc_sub_v16i16_v16i8: 1125; AVX1: # BB#0: 1126; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2 1127; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1128; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1129; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 1130; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1131; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1132; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 1133; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1134; AVX1-NEXT: vzeroupper 1135; AVX1-NEXT: retq 1136; 1137; AVX2-LABEL: trunc_sub_v16i16_v16i8: 1138; AVX2: # BB#0: 1139; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 1140; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1141; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1142; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1143; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1144; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1145; AVX2-NEXT: vzeroupper 1146; AVX2-NEXT: retq 1147; 1148; AVX512F-LABEL: trunc_sub_v16i16_v16i8: 1149; AVX512F: # BB#0: 1150; AVX512F-NEXT: vpsubw %ymm1, %ymm0, %ymm0 1151; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1152; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1153; AVX512F-NEXT: retq 1154; 1155; AVX512BW-LABEL: trunc_sub_v16i16_v16i8: 1156; AVX512BW: # BB#0: 1157; AVX512BW-NEXT: vpsubw %ymm1, %ymm0, %ymm0 1158; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1159; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1160; AVX512BW-NEXT: retq 1161; 1162; AVX512DQ-LABEL: trunc_sub_v16i16_v16i8: 1163; AVX512DQ: # BB#0: 1164; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm0 1165; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 1166; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1167; AVX512DQ-NEXT: retq 1168 %1 = sub <16 x i16> %a0, %a1 1169 %2 = trunc <16 x i16> %1 to <16 x i8> 1170 ret <16 x i8> %2 1171} 1172 1173; 1174; sub to constant 1175; 1176 1177define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 1178; SSE-LABEL: trunc_sub_const_v4i64_v4i32: 1179; SSE: # BB#0: 1180; SSE-NEXT: movl $1, %eax 1181; SSE-NEXT: movd %rax, %xmm2 1182; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 1183; SSE-NEXT: psubq %xmm2, %xmm0 1184; SSE-NEXT: psubq {{.*}}(%rip), %xmm1 1185; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1186; SSE-NEXT: retq 1187; 1188; AVX1-LABEL: trunc_sub_const_v4i64_v4i32: 1189; AVX1: # BB#0: 1190; AVX1-NEXT: movl $1, %eax 1191; AVX1-NEXT: vmovq %rax, %xmm1 1192; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 1193; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm1 1194; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1195; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0 1196; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] 1197; AVX1-NEXT: vzeroupper 1198; AVX1-NEXT: retq 1199; 1200; AVX2-LABEL: trunc_sub_const_v4i64_v4i32: 1201; AVX2: # BB#0: 1202; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0 1203; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1204; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1205; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1206; AVX2-NEXT: vzeroupper 1207; AVX2-NEXT: retq 1208; 1209; AVX512-LABEL: trunc_sub_const_v4i64_v4i32: 1210; AVX512: # BB#0: 1211; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0 1212; AVX512-NEXT: vpmovqd %zmm0, %ymm0 1213; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1214; AVX512-NEXT: retq 1215 %1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 1216 %2 = trunc <4 x i64> %1 to <4 x i32> 1217 ret <4 x i32> %2 1218} 1219 1220define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 1221; SSE-LABEL: trunc_sub_const_v8i64_v8i16: 1222; SSE: # BB#0: 1223; SSE-NEXT: movl $1, %eax 1224; SSE-NEXT: movd %rax, %xmm4 1225; SSE-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] 1226; SSE-NEXT: psubq %xmm4, %xmm0 1227; SSE-NEXT: psubq {{.*}}(%rip), %xmm1 1228; SSE-NEXT: psubq {{.*}}(%rip), %xmm2 1229; SSE-NEXT: psubq {{.*}}(%rip), %xmm3 1230; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1231; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 1232; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1233; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 1234; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1235; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1236; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1237; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1238; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 1239; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1240; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 1241; SSE-NEXT: movapd %xmm2, %xmm0 1242; SSE-NEXT: retq 1243; 1244; AVX1-LABEL: trunc_sub_const_v8i64_v8i16: 1245; AVX1: # BB#0: 1246; AVX1-NEXT: movl $1, %eax 1247; AVX1-NEXT: vmovq %rax, %xmm2 1248; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 1249; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm2 1250; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1251; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0 1252; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm3 1253; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1254; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1 1255; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1256; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] 1257; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] 1258; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1 1259; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] 1260; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] 1261; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 1262; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1263; AVX1-NEXT: vzeroupper 1264; AVX1-NEXT: retq 1265; 1266; AVX2-LABEL: trunc_sub_const_v8i64_v8i16: 1267; AVX2: # BB#0: 1268; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1 1269; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0 1270; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1271; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1272; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 1273; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1274; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1275; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 1276; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1277; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1278; AVX2-NEXT: vzeroupper 1279; AVX2-NEXT: retq 1280; 1281; AVX512-LABEL: trunc_sub_const_v8i64_v8i16: 1282; AVX512: # BB#0: 1283; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0 1284; AVX512-NEXT: vpmovqw %zmm0, %xmm0 1285; AVX512-NEXT: retq 1286 %1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 1287 %2 = trunc <8 x i64> %1 to <8 x i16> 1288 ret <8 x i16> %2 1289} 1290 1291define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 1292; SSE-LABEL: trunc_sub_const_v8i32_v8i16: 1293; SSE: # BB#0: 1294; SSE-NEXT: psubd {{.*}}(%rip), %xmm0 1295; SSE-NEXT: psubd {{.*}}(%rip), %xmm1 1296; SSE-NEXT: pslld $16, %xmm1 1297; SSE-NEXT: psrad $16, %xmm1 1298; SSE-NEXT: pslld $16, %xmm0 1299; SSE-NEXT: psrad $16, %xmm0 1300; SSE-NEXT: packssdw %xmm1, %xmm0 1301; SSE-NEXT: retq 1302; 1303; AVX1-LABEL: trunc_sub_const_v8i32_v8i16: 1304; AVX1: # BB#0: 1305; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm1 1306; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1307; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0 1308; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1309; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1310; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1311; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1312; AVX1-NEXT: vzeroupper 1313; AVX1-NEXT: retq 1314; 1315; AVX2-LABEL: trunc_sub_const_v8i32_v8i16: 1316; AVX2: # BB#0: 1317; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0 1318; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 1319; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1320; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1321; AVX2-NEXT: vzeroupper 1322; AVX2-NEXT: retq 1323; 1324; AVX512-LABEL: trunc_sub_const_v8i32_v8i16: 1325; AVX512: # BB#0: 1326; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0 1327; AVX512-NEXT: vpmovdw %zmm0, %ymm0 1328; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1329; AVX512-NEXT: retq 1330 %1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1331 %2 = trunc <8 x i32> %1 to <8 x i16> 1332 ret <8 x i16> %2 1333} 1334 1335define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 1336; SSE-LABEL: trunc_sub_const_v16i64_v16i8: 1337; SSE: # BB#0: 1338; SSE-NEXT: movl $1, %eax 1339; SSE-NEXT: movd %rax, %xmm8 1340; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7] 1341; SSE-NEXT: psubq %xmm8, %xmm0 1342; SSE-NEXT: psubq {{.*}}(%rip), %xmm1 1343; SSE-NEXT: psubq {{.*}}(%rip), %xmm2 1344; SSE-NEXT: psubq {{.*}}(%rip), %xmm3 1345; SSE-NEXT: psubq {{.*}}(%rip), %xmm4 1346; SSE-NEXT: psubq {{.*}}(%rip), %xmm5 1347; SSE-NEXT: psubq {{.*}}(%rip), %xmm6 1348; SSE-NEXT: psubq {{.*}}(%rip), %xmm7 1349; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 1350; SSE-NEXT: pand %xmm8, %xmm7 1351; SSE-NEXT: pand %xmm8, %xmm6 1352; SSE-NEXT: packuswb %xmm7, %xmm6 1353; SSE-NEXT: pand %xmm8, %xmm5 1354; SSE-NEXT: pand %xmm8, %xmm4 1355; SSE-NEXT: packuswb %xmm5, %xmm4 1356; SSE-NEXT: packuswb %xmm6, %xmm4 1357; SSE-NEXT: pand %xmm8, %xmm3 1358; SSE-NEXT: pand %xmm8, %xmm2 1359; SSE-NEXT: packuswb %xmm3, %xmm2 1360; SSE-NEXT: pand %xmm8, %xmm1 1361; SSE-NEXT: pand %xmm8, %xmm0 1362; SSE-NEXT: packuswb %xmm1, %xmm0 1363; SSE-NEXT: packuswb %xmm2, %xmm0 1364; SSE-NEXT: packuswb %xmm4, %xmm0 1365; SSE-NEXT: retq 1366; 1367; AVX1-LABEL: trunc_sub_const_v16i64_v16i8: 1368; AVX1: # BB#0: 1369; AVX1-NEXT: movl $1, %eax 1370; AVX1-NEXT: vmovq %rax, %xmm4 1371; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] 1372; AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm8 1373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1374; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm0, %xmm0 1375; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm5 1376; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1377; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1 1378; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm6 1379; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1380; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm2 1381; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm7 1382; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 1383; AVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3 1384; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 1385; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 1386; AVX1-NEXT: vpand %xmm4, %xmm7, %xmm7 1387; AVX1-NEXT: vpackuswb %xmm3, %xmm7, %xmm3 1388; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1389; AVX1-NEXT: vpand %xmm4, %xmm6, %xmm6 1390; AVX1-NEXT: vpackuswb %xmm2, %xmm6, %xmm2 1391; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 1392; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 1393; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3 1394; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 1395; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1396; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3 1397; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0 1398; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1399; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 1400; AVX1-NEXT: vzeroupper 1401; AVX1-NEXT: retq 1402; 1403; AVX2-LABEL: trunc_sub_const_v16i64_v16i8: 1404; AVX2: # BB#0: 1405; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm1, %ymm1 1406; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0 1407; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm3, %ymm3 1408; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm2, %ymm2 1409; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 1410; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 1411; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 1412; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 1413; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1414; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 1415; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 1416; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 1417; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1418; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 1419; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1420; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1421; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 1422; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1423; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1424; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 1425; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1426; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 1427; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1428; AVX2-NEXT: vzeroupper 1429; AVX2-NEXT: retq 1430; 1431; AVX512F-LABEL: trunc_sub_const_v16i64_v16i8: 1432; AVX512F: # BB#0: 1433; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1 1434; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0 1435; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 1436; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 1437; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1438; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1439; AVX512F-NEXT: retq 1440; 1441; AVX512BW-LABEL: trunc_sub_const_v16i64_v16i8: 1442; AVX512BW: # BB#0: 1443; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1 1444; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0 1445; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 1446; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 1447; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1448; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 1449; AVX512BW-NEXT: retq 1450; 1451; AVX512DQ-LABEL: trunc_sub_const_v16i64_v16i8: 1452; AVX512DQ: # BB#0: 1453; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1 1454; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0 1455; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 1456; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 1457; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 1458; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1459; AVX512DQ-NEXT: retq 1460 %1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 1461 %2 = trunc <16 x i64> %1 to <16 x i8> 1462 ret <16 x i8> %2 1463} 1464 1465define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 1466; SSE-LABEL: trunc_sub_const_v16i32_v16i8: 1467; SSE: # BB#0: 1468; SSE-NEXT: psubd {{.*}}(%rip), %xmm0 1469; SSE-NEXT: psubd {{.*}}(%rip), %xmm1 1470; SSE-NEXT: psubd {{.*}}(%rip), %xmm2 1471; SSE-NEXT: psubd {{.*}}(%rip), %xmm3 1472; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1473; SSE-NEXT: pand %xmm4, %xmm3 1474; SSE-NEXT: pand %xmm4, %xmm2 1475; SSE-NEXT: packuswb %xmm3, %xmm2 1476; SSE-NEXT: pand %xmm4, %xmm1 1477; SSE-NEXT: pand %xmm4, %xmm0 1478; SSE-NEXT: packuswb %xmm1, %xmm0 1479; SSE-NEXT: packuswb %xmm2, %xmm0 1480; SSE-NEXT: retq 1481; 1482; AVX1-LABEL: trunc_sub_const_v16i32_v16i8: 1483; AVX1: # BB#0: 1484; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm2 1485; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1486; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0 1487; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm3 1488; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1489; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm1, %xmm1 1490; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1491; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 1492; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 1493; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 1494; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 1495; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 1496; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 1497; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 1498; AVX1-NEXT: vzeroupper 1499; AVX1-NEXT: retq 1500; 1501; AVX2-LABEL: trunc_sub_const_v16i32_v16i8: 1502; AVX2: # BB#0: 1503; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0 1504; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm1, %ymm1 1505; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 1506; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 1507; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1508; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1509; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 1510; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 1511; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1512; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 1513; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1514; AVX2-NEXT: vzeroupper 1515; AVX2-NEXT: retq 1516; 1517; AVX512-LABEL: trunc_sub_const_v16i32_v16i8: 1518; AVX512: # BB#0: 1519; AVX512-NEXT: vpsubd {{.*}}(%rip), %zmm0, %zmm0 1520; AVX512-NEXT: vpmovdb %zmm0, %xmm0 1521; AVX512-NEXT: retq 1522 %1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1523 %2 = trunc <16 x i32> %1 to <16 x i8> 1524 ret <16 x i8> %2 1525} 1526 1527define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 1528; SSE-LABEL: trunc_sub_const_v16i16_v16i8: 1529; SSE: # BB#0: 1530; SSE-NEXT: psubw {{.*}}(%rip), %xmm0 1531; SSE-NEXT: psubw {{.*}}(%rip), %xmm1 1532; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 1533; SSE-NEXT: pand %xmm2, %xmm1 1534; SSE-NEXT: pand %xmm2, %xmm0 1535; SSE-NEXT: packuswb %xmm1, %xmm0 1536; SSE-NEXT: retq 1537; 1538; AVX1-LABEL: trunc_sub_const_v16i16_v16i8: 1539; AVX1: # BB#0: 1540; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm1 1541; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1542; AVX1-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0 1543; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1544; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1545; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1546; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1547; AVX1-NEXT: vzeroupper 1548; AVX1-NEXT: retq 1549; 1550; AVX2-LABEL: trunc_sub_const_v16i16_v16i8: 1551; AVX2: # BB#0: 1552; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 1553; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1554; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 1555; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 1556; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 1557; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1558; AVX2-NEXT: vzeroupper 1559; AVX2-NEXT: retq 1560; 1561; AVX512F-LABEL: trunc_sub_const_v16i16_v16i8: 1562; AVX512F: # BB#0: 1563; AVX512F-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 1564; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1565; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1566; AVX512F-NEXT: retq 1567; 1568; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8: 1569; AVX512BW: # BB#0: 1570; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 1571; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1572; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1573; AVX512BW-NEXT: retq 1574; 1575; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8: 1576; AVX512DQ: # BB#0: 1577; AVX512DQ-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 1578; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 1579; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1580; AVX512DQ-NEXT: retq 1581 %1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 1582 %2 = trunc <16 x i16> %1 to <16 x i8> 1583 ret <16 x i8> %2 1584} 1585 1586; 1587; mul 1588; 1589 1590define <4 x i32> @trunc_mul_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 1591; SSE-LABEL: trunc_mul_v4i64_v4i32: 1592; SSE: # BB#0: 1593; SSE-NEXT: movdqa %xmm1, %xmm4 1594; SSE-NEXT: psrlq $32, %xmm4 1595; SSE-NEXT: pmuludq %xmm3, %xmm4 1596; SSE-NEXT: movdqa %xmm3, %xmm5 1597; SSE-NEXT: psrlq $32, %xmm5 1598; SSE-NEXT: pmuludq %xmm1, %xmm5 1599; SSE-NEXT: paddq %xmm4, %xmm5 1600; SSE-NEXT: psllq $32, %xmm5 1601; SSE-NEXT: pmuludq %xmm3, %xmm1 1602; SSE-NEXT: paddq %xmm5, %xmm1 1603; SSE-NEXT: movdqa %xmm0, %xmm3 1604; SSE-NEXT: psrlq $32, %xmm3 1605; SSE-NEXT: pmuludq %xmm2, %xmm3 1606; SSE-NEXT: movdqa %xmm2, %xmm4 1607; SSE-NEXT: psrlq $32, %xmm4 1608; SSE-NEXT: pmuludq %xmm0, %xmm4 1609; SSE-NEXT: paddq %xmm3, %xmm4 1610; SSE-NEXT: psllq $32, %xmm4 1611; SSE-NEXT: pmuludq %xmm2, %xmm0 1612; SSE-NEXT: paddq %xmm4, %xmm0 1613; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1614; SSE-NEXT: retq 1615; 1616; AVX1-LABEL: trunc_mul_v4i64_v4i32: 1617; AVX1: # BB#0: 1618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1619; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] 1620; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1621; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 1622; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1623; AVX1-NEXT: vzeroupper 1624; AVX1-NEXT: retq 1625; 1626; AVX2-LABEL: trunc_mul_v4i64_v4i32: 1627; AVX2: # BB#0: 1628; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 1629; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1630; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1631; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1632; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1633; AVX2-NEXT: vzeroupper 1634; AVX2-NEXT: retq 1635; 1636; AVX512F-LABEL: trunc_mul_v4i64_v4i32: 1637; AVX512F: # BB#0: 1638; AVX512F-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1639; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1640; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 1641; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 1642; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1643; AVX512F-NEXT: retq 1644; 1645; AVX512BW-LABEL: trunc_mul_v4i64_v4i32: 1646; AVX512BW: # BB#0: 1647; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1648; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1649; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 1650; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 1651; AVX512BW-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1652; AVX512BW-NEXT: retq 1653; 1654; AVX512DQ-LABEL: trunc_mul_v4i64_v4i32: 1655; AVX512DQ: # BB#0: 1656; AVX512DQ-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> 1657; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 1658; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 1659; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 1660; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1661; AVX512DQ-NEXT: retq 1662 %1 = mul <4 x i64> %a0, %a1 1663 %2 = trunc <4 x i64> %1 to <4 x i32> 1664 ret <4 x i32> %2 1665} 1666 1667define <8 x i16> @trunc_mul_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 1668; SSE-LABEL: trunc_mul_v8i64_v8i16: 1669; SSE: # BB#0: 1670; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,2,2,3] 1671; SSE-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[0,1,0,2,4,5,6,7] 1672; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 1673; SSE-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,1,0,2,4,5,6,7] 1674; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] 1675; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 1676; SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,2,2,3,4,5,6,7] 1677; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 1678; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7] 1679; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 1680; SSE-NEXT: movsd {{.*#+}} xmm6 = xmm4[0],xmm6[1] 1681; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1682; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 1683; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1684; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 1685; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1686; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1687; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 1688; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1689; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1690; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1691; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 1692; SSE-NEXT: pmullw %xmm6, %xmm2 1693; SSE-NEXT: movdqa %xmm2, %xmm0 1694; SSE-NEXT: retq 1695; 1696; AVX1-LABEL: trunc_mul_v8i64_v8i16: 1697; AVX1: # BB#0: 1698; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1699; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 1700; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3],xmm4[4],xmm5[5,6,7] 1701; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm5[1,2,3],xmm3[4],xmm5[5,6,7] 1702; AVX1-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 1703; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 1704; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0],xmm5[1,2,3],xmm4[4],xmm5[5,6,7] 1705; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm5[1,2,3],xmm2[4],xmm5[5,6,7] 1706; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 1707; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 1708; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1709; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm5[1,2,3],xmm3[4],xmm5[5,6,7] 1710; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm5[1,2,3],xmm1[4],xmm5[5,6,7] 1711; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 1712; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1713; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm5[1,2,3],xmm3[4],xmm5[5,6,7] 1714; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm5[1,2,3],xmm0[4],xmm5[5,6,7] 1715; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 1716; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1717; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1718; AVX1-NEXT: vzeroupper 1719; AVX1-NEXT: retq 1720; 1721; AVX2-LABEL: trunc_mul_v8i64_v8i16: 1722; AVX2: # BB#0: 1723; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 1724; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 1725; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 1726; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 1727; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1728; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 1729; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 1730; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 1731; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 1732; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1733; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 1734; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 1735; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1736; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 1737; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1738; AVX2-NEXT: vpmullw %xmm2, %xmm0, %xmm0 1739; AVX2-NEXT: vzeroupper 1740; AVX2-NEXT: retq 1741; 1742; AVX512F-LABEL: trunc_mul_v8i64_v8i16: 1743; AVX512F: # BB#0: 1744; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 1745; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 1746; AVX512F-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1747; AVX512F-NEXT: retq 1748; 1749; AVX512BW-LABEL: trunc_mul_v8i64_v8i16: 1750; AVX512BW: # BB#0: 1751; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 1752; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 1753; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 1754; AVX512BW-NEXT: retq 1755; 1756; AVX512DQ-LABEL: trunc_mul_v8i64_v8i16: 1757; AVX512DQ: # BB#0: 1758; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 1759; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 1760; AVX512DQ-NEXT: retq 1761 %1 = mul <8 x i64> %a0, %a1 1762 %2 = trunc <8 x i64> %1 to <8 x i16> 1763 ret <8 x i16> %2 1764} 1765 1766define <8 x i16> @trunc_mul_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 1767; SSE-LABEL: trunc_mul_v8i32_v8i16: 1768; SSE: # BB#0: 1769; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1770; SSE-NEXT: pmuludq %xmm2, %xmm0 1771; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1772; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1773; SSE-NEXT: pmuludq %xmm4, %xmm2 1774; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1775; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1776; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1777; SSE-NEXT: pmuludq %xmm3, %xmm1 1778; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1779; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1780; SSE-NEXT: pmuludq %xmm2, %xmm3 1781; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 1782; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1783; SSE-NEXT: pslld $16, %xmm1 1784; SSE-NEXT: psrad $16, %xmm1 1785; SSE-NEXT: pslld $16, %xmm0 1786; SSE-NEXT: psrad $16, %xmm0 1787; SSE-NEXT: packssdw %xmm1, %xmm0 1788; SSE-NEXT: retq 1789; 1790; AVX1-LABEL: trunc_mul_v8i32_v8i16: 1791; AVX1: # BB#0: 1792; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm2 1793; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1794; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1795; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1796; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1797; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 1798; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 1799; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1800; AVX1-NEXT: vzeroupper 1801; AVX1-NEXT: retq 1802; 1803; AVX2-LABEL: trunc_mul_v8i32_v8i16: 1804; AVX2: # BB#0: 1805; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1806; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 1807; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 1808; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1809; AVX2-NEXT: vzeroupper 1810; AVX2-NEXT: retq 1811; 1812; AVX512-LABEL: trunc_mul_v8i32_v8i16: 1813; AVX512: # BB#0: 1814; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1815; AVX512-NEXT: vpmovdw %zmm0, %ymm0 1816; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1817; AVX512-NEXT: retq 1818 %1 = mul <8 x i32> %a0, %a1 1819 %2 = trunc <8 x i32> %1 to <8 x i16> 1820 ret <8 x i16> %2 1821} 1822 1823define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 1824; SSE-LABEL: trunc_mul_v16i64_v16i8: 1825; SSE: # BB#0: 1826; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 1827; SSE-NEXT: movdqa %xmm0, %xmm9 1828; SSE-NEXT: psrlq $32, %xmm9 1829; SSE-NEXT: pmuludq %xmm8, %xmm9 1830; SSE-NEXT: movdqa %xmm8, %xmm10 1831; SSE-NEXT: psrlq $32, %xmm10 1832; SSE-NEXT: pmuludq %xmm0, %xmm10 1833; SSE-NEXT: paddq %xmm9, %xmm10 1834; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1835; SSE-NEXT: psllq $32, %xmm10 1836; SSE-NEXT: pmuludq %xmm8, %xmm0 1837; SSE-NEXT: paddq %xmm10, %xmm0 1838; SSE-NEXT: movdqa %xmm1, %xmm8 1839; SSE-NEXT: psrlq $32, %xmm8 1840; SSE-NEXT: pmuludq %xmm9, %xmm8 1841; SSE-NEXT: movdqa %xmm9, %xmm10 1842; SSE-NEXT: psrlq $32, %xmm10 1843; SSE-NEXT: pmuludq %xmm1, %xmm10 1844; SSE-NEXT: paddq %xmm8, %xmm10 1845; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 1846; SSE-NEXT: psllq $32, %xmm10 1847; SSE-NEXT: pmuludq %xmm9, %xmm1 1848; SSE-NEXT: paddq %xmm10, %xmm1 1849; SSE-NEXT: movdqa %xmm2, %xmm9 1850; SSE-NEXT: psrlq $32, %xmm9 1851; SSE-NEXT: pmuludq %xmm8, %xmm9 1852; SSE-NEXT: movdqa %xmm8, %xmm10 1853; SSE-NEXT: psrlq $32, %xmm10 1854; SSE-NEXT: pmuludq %xmm2, %xmm10 1855; SSE-NEXT: paddq %xmm9, %xmm10 1856; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1857; SSE-NEXT: psllq $32, %xmm10 1858; SSE-NEXT: pmuludq %xmm8, %xmm2 1859; SSE-NEXT: paddq %xmm10, %xmm2 1860; SSE-NEXT: movdqa %xmm3, %xmm8 1861; SSE-NEXT: psrlq $32, %xmm8 1862; SSE-NEXT: pmuludq %xmm9, %xmm8 1863; SSE-NEXT: movdqa %xmm9, %xmm10 1864; SSE-NEXT: psrlq $32, %xmm10 1865; SSE-NEXT: pmuludq %xmm3, %xmm10 1866; SSE-NEXT: paddq %xmm8, %xmm10 1867; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 1868; SSE-NEXT: psllq $32, %xmm10 1869; SSE-NEXT: pmuludq %xmm9, %xmm3 1870; SSE-NEXT: paddq %xmm10, %xmm3 1871; SSE-NEXT: movdqa %xmm4, %xmm9 1872; SSE-NEXT: psrlq $32, %xmm9 1873; SSE-NEXT: pmuludq %xmm8, %xmm9 1874; SSE-NEXT: movdqa %xmm8, %xmm10 1875; SSE-NEXT: psrlq $32, %xmm10 1876; SSE-NEXT: pmuludq %xmm4, %xmm10 1877; SSE-NEXT: paddq %xmm9, %xmm10 1878; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1879; SSE-NEXT: psllq $32, %xmm10 1880; SSE-NEXT: pmuludq %xmm8, %xmm4 1881; SSE-NEXT: paddq %xmm10, %xmm4 1882; SSE-NEXT: movdqa %xmm5, %xmm8 1883; SSE-NEXT: psrlq $32, %xmm8 1884; SSE-NEXT: pmuludq %xmm9, %xmm8 1885; SSE-NEXT: movdqa %xmm9, %xmm10 1886; SSE-NEXT: psrlq $32, %xmm10 1887; SSE-NEXT: pmuludq %xmm5, %xmm10 1888; SSE-NEXT: paddq %xmm8, %xmm10 1889; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 1890; SSE-NEXT: psllq $32, %xmm10 1891; SSE-NEXT: pmuludq %xmm9, %xmm5 1892; SSE-NEXT: paddq %xmm10, %xmm5 1893; SSE-NEXT: movdqa %xmm6, %xmm9 1894; SSE-NEXT: psrlq $32, %xmm9 1895; SSE-NEXT: pmuludq %xmm8, %xmm9 1896; SSE-NEXT: movdqa %xmm8, %xmm10 1897; SSE-NEXT: psrlq $32, %xmm10 1898; SSE-NEXT: pmuludq %xmm6, %xmm10 1899; SSE-NEXT: paddq %xmm9, %xmm10 1900; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 1901; SSE-NEXT: psllq $32, %xmm10 1902; SSE-NEXT: pmuludq %xmm8, %xmm6 1903; SSE-NEXT: paddq %xmm10, %xmm6 1904; SSE-NEXT: movdqa %xmm7, %xmm8 1905; SSE-NEXT: psrlq $32, %xmm8 1906; SSE-NEXT: pmuludq %xmm9, %xmm8 1907; SSE-NEXT: movdqa %xmm9, %xmm10 1908; SSE-NEXT: psrlq $32, %xmm10 1909; SSE-NEXT: pmuludq %xmm7, %xmm10 1910; SSE-NEXT: paddq %xmm8, %xmm10 1911; SSE-NEXT: pmuludq %xmm9, %xmm7 1912; SSE-NEXT: psllq $32, %xmm10 1913; SSE-NEXT: paddq %xmm10, %xmm7 1914; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 1915; SSE-NEXT: pand %xmm8, %xmm7 1916; SSE-NEXT: pand %xmm8, %xmm6 1917; SSE-NEXT: packuswb %xmm7, %xmm6 1918; SSE-NEXT: pand %xmm8, %xmm5 1919; SSE-NEXT: pand %xmm8, %xmm4 1920; SSE-NEXT: packuswb %xmm5, %xmm4 1921; SSE-NEXT: packuswb %xmm6, %xmm4 1922; SSE-NEXT: pand %xmm8, %xmm3 1923; SSE-NEXT: pand %xmm8, %xmm2 1924; SSE-NEXT: packuswb %xmm3, %xmm2 1925; SSE-NEXT: pand %xmm8, %xmm1 1926; SSE-NEXT: pand %xmm8, %xmm0 1927; SSE-NEXT: packuswb %xmm1, %xmm0 1928; SSE-NEXT: packuswb %xmm2, %xmm0 1929; SSE-NEXT: packuswb %xmm4, %xmm0 1930; SSE-NEXT: retq 1931; 1932; AVX1-LABEL: trunc_mul_v16i64_v16i8: 1933; AVX1: # BB#0: 1934; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm8 1935; AVX1-NEXT: vpmuludq %xmm4, %xmm8, %xmm8 1936; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm9 1937; AVX1-NEXT: vpmuludq %xmm9, %xmm0, %xmm9 1938; AVX1-NEXT: vpaddq %xmm8, %xmm9, %xmm8 1939; AVX1-NEXT: vpsllq $32, %xmm8, %xmm8 1940; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm9 1941; AVX1-NEXT: vpaddq %xmm8, %xmm9, %xmm8 1942; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm9 1943; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1944; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4 1945; AVX1-NEXT: vpmuludq %xmm9, %xmm4, %xmm10 1946; AVX1-NEXT: vpsrlq $32, %xmm9, %xmm4 1947; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm4 1948; AVX1-NEXT: vpaddq %xmm10, %xmm4, %xmm4 1949; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 1950; AVX1-NEXT: vpmuludq %xmm9, %xmm0, %xmm0 1951; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm9 1952; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4 1953; AVX1-NEXT: vpmuludq %xmm5, %xmm4, %xmm4 1954; AVX1-NEXT: vpsrlq $32, %xmm5, %xmm0 1955; AVX1-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 1956; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1957; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 1958; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm4 1959; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm10 1960; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm0 1961; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 1962; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm5 1963; AVX1-NEXT: vpmuludq %xmm0, %xmm5, %xmm5 1964; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm4 1965; AVX1-NEXT: vpmuludq %xmm4, %xmm1, %xmm4 1966; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 1967; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 1968; AVX1-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 1969; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm1 1970; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm0 1971; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm0 1972; AVX1-NEXT: vpsrlq $32, %xmm6, %xmm4 1973; AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm4 1974; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 1975; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 1976; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm4 1977; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm5 1978; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0 1979; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 1980; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm4 1981; AVX1-NEXT: vpmuludq %xmm0, %xmm4, %xmm4 1982; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6 1983; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm6 1984; AVX1-NEXT: vpaddq %xmm4, %xmm6, %xmm4 1985; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 1986; AVX1-NEXT: vpmuludq %xmm0, %xmm2, %xmm0 1987; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1988; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm2 1989; AVX1-NEXT: vpmuludq %xmm7, %xmm2, %xmm2 1990; AVX1-NEXT: vpsrlq $32, %xmm7, %xmm4 1991; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm4 1992; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 1993; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 1994; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm4 1995; AVX1-NEXT: vpaddq %xmm2, %xmm4, %xmm2 1996; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm4 1997; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 1998; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm6 1999; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm6 2000; AVX1-NEXT: vpsrlq $32, %xmm4, %xmm7 2001; AVX1-NEXT: vpmuludq %xmm7, %xmm3, %xmm7 2002; AVX1-NEXT: vpaddq %xmm6, %xmm7, %xmm6 2003; AVX1-NEXT: vpsllq $32, %xmm6, %xmm6 2004; AVX1-NEXT: vpmuludq %xmm4, %xmm3, %xmm3 2005; AVX1-NEXT: vpaddq %xmm6, %xmm3, %xmm3 2006; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2007; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 2008; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 2009; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 2010; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 2011; AVX1-NEXT: vpand %xmm4, %xmm5, %xmm3 2012; AVX1-NEXT: vpackuswb %xmm0, %xmm3, %xmm0 2013; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 2014; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 2015; AVX1-NEXT: vpand %xmm4, %xmm10, %xmm2 2016; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 2017; AVX1-NEXT: vpand %xmm4, %xmm9, %xmm2 2018; AVX1-NEXT: vpand %xmm4, %xmm8, %xmm3 2019; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 2020; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 2021; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 2022; AVX1-NEXT: vzeroupper 2023; AVX1-NEXT: retq 2024; 2025; AVX2-LABEL: trunc_mul_v16i64_v16i8: 2026; AVX2: # BB#0: 2027; AVX2-NEXT: vpshufd {{.*#+}} ymm7 = ymm7[0,2,2,3,4,6,6,7] 2028; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm7[0,2,2,3] 2029; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 2030; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 2031; AVX2-NEXT: vpmulld %xmm7, %xmm3, %xmm3 2032; AVX2-NEXT: vpshufd {{.*#+}} ymm6 = ymm6[0,2,2,3,4,6,6,7] 2033; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,2,2,3] 2034; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 2035; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2036; AVX2-NEXT: vpmulld %xmm6, %xmm2, %xmm2 2037; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 2038; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 2039; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 2040; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2041; AVX2-NEXT: vmovdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2042; AVX2-NEXT: vpshufb %xmm6, %xmm2, %xmm2 2043; AVX2-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,2,2,3,4,6,6,7] 2044; AVX2-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,2,2,3] 2045; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2046; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2047; AVX2-NEXT: vpmulld %xmm5, %xmm1, %xmm1 2048; AVX2-NEXT: vpshufd {{.*#+}} ymm4 = ymm4[0,2,2,3,4,6,6,7] 2049; AVX2-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,2,3] 2050; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2051; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2052; AVX2-NEXT: vpmulld %xmm4, %xmm0, %xmm0 2053; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2054; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 2055; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2056; AVX2-NEXT: vpshufb %xmm6, %xmm0, %xmm0 2057; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2058; AVX2-NEXT: vzeroupper 2059; AVX2-NEXT: retq 2060; 2061; AVX512F-LABEL: trunc_mul_v16i64_v16i8: 2062; AVX512F: # BB#0: 2063; AVX512F-NEXT: vpmovqd %zmm3, %ymm3 2064; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 2065; AVX512F-NEXT: vpmulld %ymm3, %ymm1, %ymm1 2066; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 2067; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 2068; AVX512F-NEXT: vpmulld %ymm2, %ymm0, %ymm0 2069; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 2070; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2071; AVX512F-NEXT: retq 2072; 2073; AVX512BW-LABEL: trunc_mul_v16i64_v16i8: 2074; AVX512BW: # BB#0: 2075; AVX512BW-NEXT: vpmovqd %zmm3, %ymm3 2076; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 2077; AVX512BW-NEXT: vpmulld %ymm3, %ymm1, %ymm1 2078; AVX512BW-NEXT: vpmovqd %zmm2, %ymm2 2079; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 2080; AVX512BW-NEXT: vpmulld %ymm2, %ymm0, %ymm0 2081; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 2082; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 2083; AVX512BW-NEXT: retq 2084; 2085; AVX512DQ-LABEL: trunc_mul_v16i64_v16i8: 2086; AVX512DQ: # BB#0: 2087; AVX512DQ-NEXT: vpmullq %zmm3, %zmm1, %zmm1 2088; AVX512DQ-NEXT: vpmullq %zmm2, %zmm0, %zmm0 2089; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 2090; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 2091; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 2092; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2093; AVX512DQ-NEXT: retq 2094 %1 = mul <16 x i64> %a0, %a1 2095 %2 = trunc <16 x i64> %1 to <16 x i8> 2096 ret <16 x i8> %2 2097} 2098 2099define <16 x i8> @trunc_mul_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 2100; SSE-LABEL: trunc_mul_v16i32_v16i8: 2101; SSE: # BB#0: 2102; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm0[1,1,3,3] 2103; SSE-NEXT: pmuludq %xmm4, %xmm0 2104; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2105; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2106; SSE-NEXT: pmuludq %xmm8, %xmm4 2107; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2108; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 2109; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 2110; SSE-NEXT: pmuludq %xmm5, %xmm1 2111; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2112; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2113; SSE-NEXT: pmuludq %xmm4, %xmm5 2114; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] 2115; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] 2116; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2117; SSE-NEXT: pmuludq %xmm6, %xmm2 2118; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2119; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 2120; SSE-NEXT: pmuludq %xmm4, %xmm5 2121; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] 2122; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2123; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2124; SSE-NEXT: pmuludq %xmm7, %xmm3 2125; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2126; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 2127; SSE-NEXT: pmuludq %xmm4, %xmm5 2128; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] 2129; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 2130; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 2131; SSE-NEXT: pand %xmm4, %xmm3 2132; SSE-NEXT: pand %xmm4, %xmm2 2133; SSE-NEXT: packuswb %xmm3, %xmm2 2134; SSE-NEXT: pand %xmm4, %xmm1 2135; SSE-NEXT: pand %xmm4, %xmm0 2136; SSE-NEXT: packuswb %xmm1, %xmm0 2137; SSE-NEXT: packuswb %xmm2, %xmm0 2138; SSE-NEXT: retq 2139; 2140; AVX1-LABEL: trunc_mul_v16i32_v16i8: 2141; AVX1: # BB#0: 2142; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm4 2143; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 2144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2145; AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 2146; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm2 2147; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 2148; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2149; AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 2150; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 2151; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 2152; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 2153; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 2154; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 2155; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm2 2156; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 2157; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2158; AVX1-NEXT: vzeroupper 2159; AVX1-NEXT: retq 2160; 2161; AVX2-LABEL: trunc_mul_v16i32_v16i8: 2162; AVX2: # BB#0: 2163; AVX2-NEXT: vpmulld %ymm2, %ymm0, %ymm0 2164; AVX2-NEXT: vpmulld %ymm3, %ymm1, %ymm1 2165; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 2166; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2167; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2168; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2169; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2170; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 2171; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2172; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2173; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2174; AVX2-NEXT: vzeroupper 2175; AVX2-NEXT: retq 2176; 2177; AVX512-LABEL: trunc_mul_v16i32_v16i8: 2178; AVX512: # BB#0: 2179; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 2180; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2181; AVX512-NEXT: retq 2182 %1 = mul <16 x i32> %a0, %a1 2183 %2 = trunc <16 x i32> %1 to <16 x i8> 2184 ret <16 x i8> %2 2185} 2186 2187define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 2188; SSE-LABEL: trunc_mul_v16i16_v16i8: 2189; SSE: # BB#0: 2190; SSE-NEXT: pmullw %xmm2, %xmm0 2191; SSE-NEXT: pmullw %xmm3, %xmm1 2192; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2193; SSE-NEXT: pand %xmm2, %xmm1 2194; SSE-NEXT: pand %xmm2, %xmm0 2195; SSE-NEXT: packuswb %xmm1, %xmm0 2196; SSE-NEXT: retq 2197; 2198; AVX1-LABEL: trunc_mul_v16i16_v16i8: 2199; AVX1: # BB#0: 2200; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm2 2201; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2202; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2203; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 2204; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2205; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2206; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm1 2207; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2208; AVX1-NEXT: vzeroupper 2209; AVX1-NEXT: retq 2210; 2211; AVX2-LABEL: trunc_mul_v16i16_v16i8: 2212; AVX2: # BB#0: 2213; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2214; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2215; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2216; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2217; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2218; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2219; AVX2-NEXT: vzeroupper 2220; AVX2-NEXT: retq 2221; 2222; AVX512F-LABEL: trunc_mul_v16i16_v16i8: 2223; AVX512F: # BB#0: 2224; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2225; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 2226; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2227; AVX512F-NEXT: retq 2228; 2229; AVX512BW-LABEL: trunc_mul_v16i16_v16i8: 2230; AVX512BW: # BB#0: 2231; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2232; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 2233; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2234; AVX512BW-NEXT: retq 2235; 2236; AVX512DQ-LABEL: trunc_mul_v16i16_v16i8: 2237; AVX512DQ: # BB#0: 2238; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 2239; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 2240; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2241; AVX512DQ-NEXT: retq 2242 %1 = mul <16 x i16> %a0, %a1 2243 %2 = trunc <16 x i16> %1 to <16 x i8> 2244 ret <16 x i8> %2 2245} 2246 2247; 2248; mul to constant 2249; 2250 2251define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 2252; SSE-LABEL: trunc_mul_const_v4i64_v4i32: 2253; SSE: # BB#0: 2254; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,3] 2255; SSE-NEXT: movdqa %xmm1, %xmm3 2256; SSE-NEXT: pmuludq %xmm2, %xmm3 2257; SSE-NEXT: psrlq $32, %xmm1 2258; SSE-NEXT: pmuludq %xmm2, %xmm1 2259; SSE-NEXT: psllq $32, %xmm1 2260; SSE-NEXT: paddq %xmm3, %xmm1 2261; SSE-NEXT: movl $1, %eax 2262; SSE-NEXT: movd %rax, %xmm2 2263; SSE-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 2264; SSE-NEXT: movdqa %xmm0, %xmm3 2265; SSE-NEXT: pmuludq %xmm2, %xmm3 2266; SSE-NEXT: psrlq $32, %xmm0 2267; SSE-NEXT: pmuludq %xmm2, %xmm0 2268; SSE-NEXT: psllq $32, %xmm0 2269; SSE-NEXT: paddq %xmm3, %xmm0 2270; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2271; SSE-NEXT: retq 2272; 2273; AVX1-LABEL: trunc_mul_const_v4i64_v4i32: 2274; AVX1: # BB#0: 2275; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2276; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2277; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2278; AVX1-NEXT: vzeroupper 2279; AVX1-NEXT: retq 2280; 2281; AVX2-LABEL: trunc_mul_const_v4i64_v4i32: 2282; AVX2: # BB#0: 2283; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2284; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2285; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2286; AVX2-NEXT: vzeroupper 2287; AVX2-NEXT: retq 2288; 2289; AVX512-LABEL: trunc_mul_const_v4i64_v4i32: 2290; AVX512: # BB#0: 2291; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 2292; AVX512-NEXT: vpmovqd %zmm0, %ymm0 2293; AVX512-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2294; AVX512-NEXT: retq 2295 %1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 2296 %2 = trunc <4 x i64> %1 to <4 x i32> 2297 ret <4 x i32> %2 2298} 2299 2300define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 2301; SSE-LABEL: trunc_mul_const_v8i64_v8i16: 2302; SSE: # BB#0: 2303; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2304; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 2305; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2306; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 2307; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2308; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2309; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 2310; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2311; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2312; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2313; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2314; SSE-NEXT: pmullw {{.*}}(%rip), %xmm2 2315; SSE-NEXT: movdqa %xmm2, %xmm0 2316; SSE-NEXT: retq 2317; 2318; AVX1-LABEL: trunc_mul_const_v8i64_v8i16: 2319; AVX1: # BB#0: 2320; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2321; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2322; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 2323; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 2324; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 2325; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2326; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 2327; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 2328; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 2329; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2330; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2331; AVX1-NEXT: vzeroupper 2332; AVX1-NEXT: retq 2333; 2334; AVX2-LABEL: trunc_mul_const_v8i64_v8i16: 2335; AVX2: # BB#0: 2336; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2337; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2338; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2339; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2340; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2341; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 2342; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2343; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2344; AVX2-NEXT: vzeroupper 2345; AVX2-NEXT: retq 2346; 2347; AVX512-LABEL: trunc_mul_const_v8i64_v8i16: 2348; AVX512: # BB#0: 2349; AVX512-NEXT: vpmovqw %zmm0, %xmm0 2350; AVX512-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2351; AVX512-NEXT: retq 2352 %1 = mul <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 2353 %2 = trunc <8 x i64> %1 to <8 x i16> 2354 ret <8 x i16> %2 2355} 2356 2357define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 2358; SSE-LABEL: trunc_mul_const_v8i32_v8i16: 2359; SSE: # BB#0: 2360; SSE-NEXT: pslld $16, %xmm1 2361; SSE-NEXT: psrad $16, %xmm1 2362; SSE-NEXT: pslld $16, %xmm0 2363; SSE-NEXT: psrad $16, %xmm0 2364; SSE-NEXT: packssdw %xmm1, %xmm0 2365; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 2366; SSE-NEXT: retq 2367; 2368; AVX1-LABEL: trunc_mul_const_v8i32_v8i16: 2369; AVX1: # BB#0: 2370; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2371; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 2372; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2373; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2374; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2375; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2376; AVX1-NEXT: vzeroupper 2377; AVX1-NEXT: retq 2378; 2379; AVX2-LABEL: trunc_mul_const_v8i32_v8i16: 2380; AVX2: # BB#0: 2381; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 2382; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2383; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2384; AVX2-NEXT: vzeroupper 2385; AVX2-NEXT: retq 2386; 2387; AVX512-LABEL: trunc_mul_const_v8i32_v8i16: 2388; AVX512: # BB#0: 2389; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 2390; AVX512-NEXT: vpmovdw %zmm0, %ymm0 2391; AVX512-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2392; AVX512-NEXT: retq 2393 %1 = mul <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2394 %2 = trunc <8 x i32> %1 to <8 x i16> 2395 ret <8 x i16> %2 2396} 2397 2398define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 2399; SSE-LABEL: trunc_mul_const_v16i64_v16i8: 2400; SSE: # BB#0: 2401; SSE-NEXT: movl $1, %eax 2402; SSE-NEXT: movd %rax, %xmm8 2403; SSE-NEXT: pslldq {{.*#+}} xmm8 = zero,zero,zero,zero,zero,zero,zero,zero,xmm8[0,1,2,3,4,5,6,7] 2404; SSE-NEXT: movdqa %xmm0, %xmm9 2405; SSE-NEXT: pmuludq %xmm8, %xmm9 2406; SSE-NEXT: psrlq $32, %xmm0 2407; SSE-NEXT: pmuludq %xmm8, %xmm0 2408; SSE-NEXT: psllq $32, %xmm0 2409; SSE-NEXT: paddq %xmm9, %xmm0 2410; SSE-NEXT: movdqa {{.*#+}} xmm8 = [2,3] 2411; SSE-NEXT: movdqa %xmm1, %xmm9 2412; SSE-NEXT: pmuludq %xmm8, %xmm9 2413; SSE-NEXT: psrlq $32, %xmm1 2414; SSE-NEXT: pmuludq %xmm8, %xmm1 2415; SSE-NEXT: psllq $32, %xmm1 2416; SSE-NEXT: paddq %xmm9, %xmm1 2417; SSE-NEXT: movdqa {{.*#+}} xmm8 = [4,5] 2418; SSE-NEXT: movdqa %xmm2, %xmm9 2419; SSE-NEXT: pmuludq %xmm8, %xmm9 2420; SSE-NEXT: psrlq $32, %xmm2 2421; SSE-NEXT: pmuludq %xmm8, %xmm2 2422; SSE-NEXT: psllq $32, %xmm2 2423; SSE-NEXT: paddq %xmm9, %xmm2 2424; SSE-NEXT: movdqa {{.*#+}} xmm8 = [6,7] 2425; SSE-NEXT: movdqa %xmm3, %xmm9 2426; SSE-NEXT: pmuludq %xmm8, %xmm9 2427; SSE-NEXT: psrlq $32, %xmm3 2428; SSE-NEXT: pmuludq %xmm8, %xmm3 2429; SSE-NEXT: psllq $32, %xmm3 2430; SSE-NEXT: paddq %xmm9, %xmm3 2431; SSE-NEXT: movdqa {{.*#+}} xmm8 = [8,9] 2432; SSE-NEXT: movdqa %xmm4, %xmm9 2433; SSE-NEXT: pmuludq %xmm8, %xmm9 2434; SSE-NEXT: psrlq $32, %xmm4 2435; SSE-NEXT: pmuludq %xmm8, %xmm4 2436; SSE-NEXT: psllq $32, %xmm4 2437; SSE-NEXT: paddq %xmm9, %xmm4 2438; SSE-NEXT: movdqa {{.*#+}} xmm8 = [10,11] 2439; SSE-NEXT: movdqa %xmm5, %xmm9 2440; SSE-NEXT: pmuludq %xmm8, %xmm9 2441; SSE-NEXT: psrlq $32, %xmm5 2442; SSE-NEXT: pmuludq %xmm8, %xmm5 2443; SSE-NEXT: psllq $32, %xmm5 2444; SSE-NEXT: paddq %xmm9, %xmm5 2445; SSE-NEXT: movdqa {{.*#+}} xmm8 = [12,13] 2446; SSE-NEXT: movdqa %xmm6, %xmm9 2447; SSE-NEXT: pmuludq %xmm8, %xmm9 2448; SSE-NEXT: psrlq $32, %xmm6 2449; SSE-NEXT: pmuludq %xmm8, %xmm6 2450; SSE-NEXT: psllq $32, %xmm6 2451; SSE-NEXT: paddq %xmm9, %xmm6 2452; SSE-NEXT: movdqa {{.*#+}} xmm8 = [14,15] 2453; SSE-NEXT: movdqa %xmm7, %xmm9 2454; SSE-NEXT: pmuludq %xmm8, %xmm9 2455; SSE-NEXT: psrlq $32, %xmm7 2456; SSE-NEXT: pmuludq %xmm8, %xmm7 2457; SSE-NEXT: psllq $32, %xmm7 2458; SSE-NEXT: paddq %xmm9, %xmm7 2459; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2460; SSE-NEXT: pand %xmm8, %xmm7 2461; SSE-NEXT: pand %xmm8, %xmm6 2462; SSE-NEXT: packuswb %xmm7, %xmm6 2463; SSE-NEXT: pand %xmm8, %xmm5 2464; SSE-NEXT: pand %xmm8, %xmm4 2465; SSE-NEXT: packuswb %xmm5, %xmm4 2466; SSE-NEXT: packuswb %xmm6, %xmm4 2467; SSE-NEXT: pand %xmm8, %xmm3 2468; SSE-NEXT: pand %xmm8, %xmm2 2469; SSE-NEXT: packuswb %xmm3, %xmm2 2470; SSE-NEXT: pand %xmm8, %xmm1 2471; SSE-NEXT: pand %xmm8, %xmm0 2472; SSE-NEXT: packuswb %xmm1, %xmm0 2473; SSE-NEXT: packuswb %xmm2, %xmm0 2474; SSE-NEXT: packuswb %xmm4, %xmm0 2475; SSE-NEXT: retq 2476; 2477; AVX1-LABEL: trunc_mul_const_v16i64_v16i8: 2478; AVX1: # BB#0: 2479; AVX1-NEXT: movl $1, %eax 2480; AVX1-NEXT: vmovq %rax, %xmm4 2481; AVX1-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] 2482; AVX1-NEXT: vpmuludq %xmm4, %xmm0, %xmm5 2483; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm6 2484; AVX1-NEXT: vpmuludq %xmm4, %xmm6, %xmm4 2485; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 2486; AVX1-NEXT: vpaddq %xmm4, %xmm5, %xmm8 2487; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2488; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [2,3] 2489; AVX1-NEXT: vpmuludq %xmm5, %xmm0, %xmm6 2490; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 2491; AVX1-NEXT: vpmuludq %xmm5, %xmm0, %xmm0 2492; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 2493; AVX1-NEXT: vpaddq %xmm0, %xmm6, %xmm9 2494; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [4,5] 2495; AVX1-NEXT: vpmuludq %xmm5, %xmm1, %xmm6 2496; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm7 2497; AVX1-NEXT: vpmuludq %xmm5, %xmm7, %xmm5 2498; AVX1-NEXT: vpsllq $32, %xmm5, %xmm5 2499; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5 2500; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2501; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [6,7] 2502; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm7 2503; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 2504; AVX1-NEXT: vpmuludq %xmm6, %xmm1, %xmm1 2505; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 2506; AVX1-NEXT: vpaddq %xmm1, %xmm7, %xmm1 2507; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [8,9] 2508; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm7 2509; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm4 2510; AVX1-NEXT: vpmuludq %xmm6, %xmm4, %xmm4 2511; AVX1-NEXT: vpsllq $32, %xmm4, %xmm4 2512; AVX1-NEXT: vpaddq %xmm4, %xmm7, %xmm4 2513; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 2514; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [10,11] 2515; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm7 2516; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 2517; AVX1-NEXT: vpmuludq %xmm6, %xmm2, %xmm2 2518; AVX1-NEXT: vpsllq $32, %xmm2, %xmm2 2519; AVX1-NEXT: vpaddq %xmm2, %xmm7, %xmm2 2520; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [12,13] 2521; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm7 2522; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm0 2523; AVX1-NEXT: vpmuludq %xmm6, %xmm0, %xmm0 2524; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 2525; AVX1-NEXT: vpaddq %xmm0, %xmm7, %xmm0 2526; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 2527; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [14,15] 2528; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm7 2529; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm3 2530; AVX1-NEXT: vpmuludq %xmm6, %xmm3, %xmm3 2531; AVX1-NEXT: vpsllq $32, %xmm3, %xmm3 2532; AVX1-NEXT: vpaddq %xmm3, %xmm7, %xmm3 2533; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2534; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 2535; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0 2536; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 2537; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2 2538; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm3 2539; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 2540; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 2541; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 2542; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm2 2543; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 2544; AVX1-NEXT: vpand %xmm6, %xmm9, %xmm2 2545; AVX1-NEXT: vpand %xmm6, %xmm8, %xmm3 2546; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 2547; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 2548; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0 2549; AVX1-NEXT: vzeroupper 2550; AVX1-NEXT: retq 2551; 2552; AVX2-LABEL: trunc_mul_const_v16i64_v16i8: 2553; AVX2: # BB#0: 2554; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 2555; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2556; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm2 2557; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 2558; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 2559; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm3, %xmm3 2560; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 2561; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 2562; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 2563; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2564; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2565; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 2566; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2567; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2568; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2569; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2570; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2571; AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2572; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2573; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 2574; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2575; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 2576; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2577; AVX2-NEXT: vzeroupper 2578; AVX2-NEXT: retq 2579; 2580; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8: 2581; AVX512F: # BB#0: 2582; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 2583; AVX512F-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0 2584; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 2585; AVX512F-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 2586; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 2587; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2588; AVX512F-NEXT: retq 2589; 2590; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8: 2591; AVX512BW: # BB#0: 2592; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 2593; AVX512BW-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0 2594; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 2595; AVX512BW-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 2596; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 2597; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 2598; AVX512BW-NEXT: retq 2599; 2600; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8: 2601; AVX512DQ: # BB#0: 2602; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 2603; AVX512DQ-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0 2604; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 2605; AVX512DQ-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 2606; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 2607; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2608; AVX512DQ-NEXT: retq 2609 %1 = mul <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 2610 %2 = trunc <16 x i64> %1 to <16 x i8> 2611 ret <16 x i8> %2 2612} 2613 2614define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 2615; SSE-LABEL: trunc_mul_const_v16i32_v16i8: 2616; SSE: # BB#0: 2617; SSE-NEXT: movdqa {{.*#+}} xmm4 = [0,1,2,3] 2618; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 2619; SSE-NEXT: pmuludq %xmm4, %xmm0 2620; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2621; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2622; SSE-NEXT: pmuludq %xmm5, %xmm4 2623; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2624; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 2625; SSE-NEXT: movdqa {{.*#+}} xmm4 = [4,5,6,7] 2626; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] 2627; SSE-NEXT: pmuludq %xmm4, %xmm1 2628; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2629; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2630; SSE-NEXT: pmuludq %xmm5, %xmm4 2631; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2632; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] 2633; SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,9,10,11] 2634; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 2635; SSE-NEXT: pmuludq %xmm4, %xmm2 2636; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2637; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2638; SSE-NEXT: pmuludq %xmm5, %xmm4 2639; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2640; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2641; SSE-NEXT: movdqa {{.*#+}} xmm4 = [12,13,14,15] 2642; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3] 2643; SSE-NEXT: pmuludq %xmm4, %xmm3 2644; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2645; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2646; SSE-NEXT: pmuludq %xmm5, %xmm4 2647; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2648; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 2649; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 2650; SSE-NEXT: pand %xmm4, %xmm3 2651; SSE-NEXT: pand %xmm4, %xmm2 2652; SSE-NEXT: packuswb %xmm3, %xmm2 2653; SSE-NEXT: pand %xmm4, %xmm1 2654; SSE-NEXT: pand %xmm4, %xmm0 2655; SSE-NEXT: packuswb %xmm1, %xmm0 2656; SSE-NEXT: packuswb %xmm2, %xmm0 2657; SSE-NEXT: retq 2658; 2659; AVX1-LABEL: trunc_mul_const_v16i32_v16i8: 2660; AVX1: # BB#0: 2661; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2662; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2663; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2664; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm3 2665; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2666; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2667; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 2668; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 2669; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 2670; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 2671; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 2672; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 2673; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0 2674; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2675; AVX1-NEXT: vzeroupper 2676; AVX1-NEXT: retq 2677; 2678; AVX2-LABEL: trunc_mul_const_v16i32_v16i8: 2679; AVX2: # BB#0: 2680; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 2681; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 2682; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2683; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1 2684; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2685; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 2686; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 2687; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2688; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2689; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 2690; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2691; AVX2-NEXT: vzeroupper 2692; AVX2-NEXT: retq 2693; 2694; AVX512-LABEL: trunc_mul_const_v16i32_v16i8: 2695; AVX512: # BB#0: 2696; AVX512-NEXT: vpmulld {{.*}}(%rip), %zmm0, %zmm0 2697; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2698; AVX512-NEXT: retq 2699 %1 = mul <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2700 %2 = trunc <16 x i32> %1 to <16 x i8> 2701 ret <16 x i8> %2 2702} 2703 2704define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 2705; SSE-LABEL: trunc_mul_const_v16i16_v16i8: 2706; SSE: # BB#0: 2707; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0 2708; SSE-NEXT: pmullw {{.*}}(%rip), %xmm1 2709; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 2710; SSE-NEXT: pand %xmm2, %xmm1 2711; SSE-NEXT: pand %xmm2, %xmm0 2712; SSE-NEXT: packuswb %xmm1, %xmm0 2713; SSE-NEXT: retq 2714; 2715; AVX1-LABEL: trunc_mul_const_v16i16_v16i8: 2716; AVX1: # BB#0: 2717; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1 2718; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2719; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 2720; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2721; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2722; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2723; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2724; AVX1-NEXT: vzeroupper 2725; AVX1-NEXT: retq 2726; 2727; AVX2-LABEL: trunc_mul_const_v16i16_v16i8: 2728; AVX2: # BB#0: 2729; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 2730; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 2731; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2732; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2733; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2734; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2735; AVX2-NEXT: vzeroupper 2736; AVX2-NEXT: retq 2737; 2738; AVX512F-LABEL: trunc_mul_const_v16i16_v16i8: 2739; AVX512F: # BB#0: 2740; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 2741; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 2742; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2743; AVX512F-NEXT: retq 2744; 2745; AVX512BW-LABEL: trunc_mul_const_v16i16_v16i8: 2746; AVX512BW: # BB#0: 2747; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 2748; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 2749; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2750; AVX512BW-NEXT: retq 2751; 2752; AVX512DQ-LABEL: trunc_mul_const_v16i16_v16i8: 2753; AVX512DQ: # BB#0: 2754; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 2755; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 2756; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 2757; AVX512DQ-NEXT: retq 2758 %1 = mul <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 2759 %2 = trunc <16 x i16> %1 to <16 x i8> 2760 ret <16 x i8> %2 2761} 2762 2763; 2764; and 2765; 2766 2767define <4 x i32> @trunc_and_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 2768; SSE-LABEL: trunc_and_v4i64_v4i32: 2769; SSE: # BB#0: 2770; SSE-NEXT: andps %xmm3, %xmm1 2771; SSE-NEXT: andps %xmm2, %xmm0 2772; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2773; SSE-NEXT: retq 2774; 2775; AVX1-LABEL: trunc_and_v4i64_v4i32: 2776; AVX1: # BB#0: 2777; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 2778; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2779; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 2780; AVX1-NEXT: vzeroupper 2781; AVX1-NEXT: retq 2782; 2783; AVX2-LABEL: trunc_and_v4i64_v4i32: 2784; AVX2: # BB#0: 2785; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2786; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2787; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2788; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2789; AVX2-NEXT: vzeroupper 2790; AVX2-NEXT: retq 2791; 2792; AVX512-LABEL: trunc_and_v4i64_v4i32: 2793; AVX512: # BB#0: 2794; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 2795; AVX512-NEXT: vpmovqd %zmm0, %ymm0 2796; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2797; AVX512-NEXT: retq 2798 %1 = and <4 x i64> %a0, %a1 2799 %2 = trunc <4 x i64> %1 to <4 x i32> 2800 ret <4 x i32> %2 2801} 2802 2803define <8 x i16> @trunc_and_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 2804; SSE-LABEL: trunc_and_v8i64_v8i16: 2805; SSE: # BB#0: 2806; SSE-NEXT: pand %xmm4, %xmm0 2807; SSE-NEXT: pand %xmm5, %xmm1 2808; SSE-NEXT: pand %xmm6, %xmm2 2809; SSE-NEXT: pand %xmm7, %xmm3 2810; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2811; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 2812; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2813; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 2814; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2815; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2816; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 2817; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2818; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2819; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2820; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 2821; SSE-NEXT: movapd %xmm2, %xmm0 2822; SSE-NEXT: retq 2823; 2824; AVX1-LABEL: trunc_and_v8i64_v8i16: 2825; AVX1: # BB#0: 2826; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 2827; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 2828; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2829; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 2830; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 2831; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 2832; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 2833; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2834; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 2835; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 2836; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 2837; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2838; AVX1-NEXT: vzeroupper 2839; AVX1-NEXT: retq 2840; 2841; AVX2-LABEL: trunc_and_v8i64_v8i16: 2842; AVX2: # BB#0: 2843; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 2844; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 2845; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2846; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2847; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2848; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2849; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2850; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 2851; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2852; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2853; AVX2-NEXT: vzeroupper 2854; AVX2-NEXT: retq 2855; 2856; AVX512-LABEL: trunc_and_v8i64_v8i16: 2857; AVX512: # BB#0: 2858; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 2859; AVX512-NEXT: vpmovqw %zmm0, %xmm0 2860; AVX512-NEXT: retq 2861 %1 = and <8 x i64> %a0, %a1 2862 %2 = trunc <8 x i64> %1 to <8 x i16> 2863 ret <8 x i16> %2 2864} 2865 2866define <8 x i16> @trunc_and_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 2867; SSE-LABEL: trunc_and_v8i32_v8i16: 2868; SSE: # BB#0: 2869; SSE-NEXT: pand %xmm2, %xmm0 2870; SSE-NEXT: pand %xmm3, %xmm1 2871; SSE-NEXT: pslld $16, %xmm1 2872; SSE-NEXT: psrad $16, %xmm1 2873; SSE-NEXT: pslld $16, %xmm0 2874; SSE-NEXT: psrad $16, %xmm0 2875; SSE-NEXT: packssdw %xmm1, %xmm0 2876; SSE-NEXT: retq 2877; 2878; AVX1-LABEL: trunc_and_v8i32_v8i16: 2879; AVX1: # BB#0: 2880; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 2881; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2882; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 2883; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 2884; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 2885; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2886; AVX1-NEXT: vzeroupper 2887; AVX1-NEXT: retq 2888; 2889; AVX2-LABEL: trunc_and_v8i32_v8i16: 2890; AVX2: # BB#0: 2891; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2892; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 2893; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2894; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2895; AVX2-NEXT: vzeroupper 2896; AVX2-NEXT: retq 2897; 2898; AVX512-LABEL: trunc_and_v8i32_v8i16: 2899; AVX512: # BB#0: 2900; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 2901; AVX512-NEXT: vpmovdw %zmm0, %ymm0 2902; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 2903; AVX512-NEXT: retq 2904 %1 = and <8 x i32> %a0, %a1 2905 %2 = trunc <8 x i32> %1 to <8 x i16> 2906 ret <8 x i16> %2 2907} 2908 2909define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 2910; SSE-LABEL: trunc_and_v16i64_v16i8: 2911; SSE: # BB#0: 2912; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm0 2913; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm1 2914; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm2 2915; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm3 2916; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm4 2917; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm5 2918; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm6 2919; SSE-NEXT: pand {{[0-9]+}}(%rsp), %xmm7 2920; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2921; SSE-NEXT: pand %xmm8, %xmm7 2922; SSE-NEXT: pand %xmm8, %xmm6 2923; SSE-NEXT: packuswb %xmm7, %xmm6 2924; SSE-NEXT: pand %xmm8, %xmm5 2925; SSE-NEXT: pand %xmm8, %xmm4 2926; SSE-NEXT: packuswb %xmm5, %xmm4 2927; SSE-NEXT: packuswb %xmm6, %xmm4 2928; SSE-NEXT: pand %xmm8, %xmm3 2929; SSE-NEXT: pand %xmm8, %xmm2 2930; SSE-NEXT: packuswb %xmm3, %xmm2 2931; SSE-NEXT: pand %xmm8, %xmm1 2932; SSE-NEXT: pand %xmm8, %xmm0 2933; SSE-NEXT: packuswb %xmm1, %xmm0 2934; SSE-NEXT: packuswb %xmm2, %xmm0 2935; SSE-NEXT: packuswb %xmm4, %xmm0 2936; SSE-NEXT: retq 2937; 2938; AVX1-LABEL: trunc_and_v16i64_v16i8: 2939; AVX1: # BB#0: 2940; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 2941; AVX1-NEXT: vandps %ymm5, %ymm1, %ymm1 2942; AVX1-NEXT: vandps %ymm6, %ymm2, %ymm2 2943; AVX1-NEXT: vandps %ymm7, %ymm3, %ymm3 2944; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 2945; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 2946; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 2947; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 2948; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 2949; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 2950; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 2951; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 2952; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 2953; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 2954; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2955; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 2956; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 2957; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 2958; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 2959; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 2960; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 2961; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 2962; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2963; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 2964; AVX1-NEXT: vzeroupper 2965; AVX1-NEXT: retq 2966; 2967; AVX2-LABEL: trunc_and_v16i64_v16i8: 2968; AVX2: # BB#0: 2969; AVX2-NEXT: vpand %ymm5, %ymm1, %ymm1 2970; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 2971; AVX2-NEXT: vpand %ymm7, %ymm3, %ymm3 2972; AVX2-NEXT: vpand %ymm6, %ymm2, %ymm2 2973; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 2974; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2975; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 2976; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 2977; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 2978; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 2979; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 2980; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 2981; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 2982; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 2983; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 2984; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2985; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 2986; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 2987; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2988; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 2989; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 2990; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 2991; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2992; AVX2-NEXT: vzeroupper 2993; AVX2-NEXT: retq 2994; 2995; AVX512F-LABEL: trunc_and_v16i64_v16i8: 2996; AVX512F: # BB#0: 2997; AVX512F-NEXT: vpandq %zmm3, %zmm1, %zmm1 2998; AVX512F-NEXT: vpandq %zmm2, %zmm0, %zmm0 2999; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 3000; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 3001; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3002; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3003; AVX512F-NEXT: retq 3004; 3005; AVX512BW-LABEL: trunc_and_v16i64_v16i8: 3006; AVX512BW: # BB#0: 3007; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1 3008; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 3009; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 3010; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 3011; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3012; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 3013; AVX512BW-NEXT: retq 3014; 3015; AVX512DQ-LABEL: trunc_and_v16i64_v16i8: 3016; AVX512DQ: # BB#0: 3017; AVX512DQ-NEXT: vpandq %zmm3, %zmm1, %zmm1 3018; AVX512DQ-NEXT: vpandq %zmm2, %zmm0, %zmm0 3019; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 3020; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 3021; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 3022; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3023; AVX512DQ-NEXT: retq 3024 %1 = and <16 x i64> %a0, %a1 3025 %2 = trunc <16 x i64> %1 to <16 x i8> 3026 ret <16 x i8> %2 3027} 3028 3029define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 3030; SSE-LABEL: trunc_and_v16i32_v16i8: 3031; SSE: # BB#0: 3032; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3033; SSE-NEXT: pand %xmm8, %xmm7 3034; SSE-NEXT: pand %xmm3, %xmm7 3035; SSE-NEXT: pand %xmm8, %xmm6 3036; SSE-NEXT: pand %xmm2, %xmm6 3037; SSE-NEXT: packuswb %xmm7, %xmm6 3038; SSE-NEXT: pand %xmm8, %xmm5 3039; SSE-NEXT: pand %xmm1, %xmm5 3040; SSE-NEXT: pand %xmm8, %xmm4 3041; SSE-NEXT: pand %xmm4, %xmm0 3042; SSE-NEXT: packuswb %xmm5, %xmm0 3043; SSE-NEXT: packuswb %xmm6, %xmm0 3044; SSE-NEXT: retq 3045; 3046; AVX1-LABEL: trunc_and_v16i32_v16i8: 3047; AVX1: # BB#0: 3048; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 3049; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1 3050; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3051; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3052; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3053; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 3054; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 3055; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3056; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3057; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 3058; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3059; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3060; AVX1-NEXT: vzeroupper 3061; AVX1-NEXT: retq 3062; 3063; AVX2-LABEL: trunc_and_v16i32_v16i8: 3064; AVX2: # BB#0: 3065; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 3066; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 3067; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 3068; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 3069; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3070; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3071; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 3072; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 3073; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3074; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 3075; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3076; AVX2-NEXT: vzeroupper 3077; AVX2-NEXT: retq 3078; 3079; AVX512-LABEL: trunc_and_v16i32_v16i8: 3080; AVX512: # BB#0: 3081; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 3082; AVX512-NEXT: vpmovdb %zmm0, %xmm0 3083; AVX512-NEXT: retq 3084 %1 = and <16 x i32> %a0, %a1 3085 %2 = trunc <16 x i32> %1 to <16 x i8> 3086 ret <16 x i8> %2 3087} 3088 3089define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 3090; SSE-LABEL: trunc_and_v16i16_v16i8: 3091; SSE: # BB#0: 3092; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] 3093; SSE-NEXT: pand %xmm4, %xmm3 3094; SSE-NEXT: pand %xmm1, %xmm3 3095; SSE-NEXT: pand %xmm4, %xmm2 3096; SSE-NEXT: pand %xmm2, %xmm0 3097; SSE-NEXT: packuswb %xmm3, %xmm0 3098; SSE-NEXT: retq 3099; 3100; AVX1-LABEL: trunc_and_v16i16_v16i8: 3101; AVX1: # BB#0: 3102; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 3103; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3104; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3105; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3106; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3107; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3108; AVX1-NEXT: vzeroupper 3109; AVX1-NEXT: retq 3110; 3111; AVX2-LABEL: trunc_and_v16i16_v16i8: 3112; AVX2: # BB#0: 3113; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 3114; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3115; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3116; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3117; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3118; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3119; AVX2-NEXT: vzeroupper 3120; AVX2-NEXT: retq 3121; 3122; AVX512F-LABEL: trunc_and_v16i16_v16i8: 3123; AVX512F: # BB#0: 3124; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 3125; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 3126; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3127; AVX512F-NEXT: retq 3128; 3129; AVX512BW-LABEL: trunc_and_v16i16_v16i8: 3130; AVX512BW: # BB#0: 3131; AVX512BW-NEXT: vpand %ymm1, %ymm0, %ymm0 3132; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 3133; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3134; AVX512BW-NEXT: retq 3135; 3136; AVX512DQ-LABEL: trunc_and_v16i16_v16i8: 3137; AVX512DQ: # BB#0: 3138; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 3139; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 3140; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3141; AVX512DQ-NEXT: retq 3142 %1 = and <16 x i16> %a0, %a1 3143 %2 = trunc <16 x i16> %1 to <16 x i8> 3144 ret <16 x i8> %2 3145} 3146 3147; 3148; and to constant 3149; 3150 3151define <4 x i32> @trunc_and_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 3152; SSE-LABEL: trunc_and_const_v4i64_v4i32: 3153; SSE: # BB#0: 3154; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3155; SSE-NEXT: andps {{.*}}(%rip), %xmm0 3156; SSE-NEXT: retq 3157; 3158; AVX1-LABEL: trunc_and_const_v4i64_v4i32: 3159; AVX1: # BB#0: 3160; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3161; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3162; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 3163; AVX1-NEXT: vzeroupper 3164; AVX1-NEXT: retq 3165; 3166; AVX2-LABEL: trunc_and_const_v4i64_v4i32: 3167; AVX2: # BB#0: 3168; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3169; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3170; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3171; AVX2-NEXT: vzeroupper 3172; AVX2-NEXT: retq 3173; 3174; AVX512-LABEL: trunc_and_const_v4i64_v4i32: 3175; AVX512: # BB#0: 3176; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3177; AVX512-NEXT: vpmovqd %zmm0, %ymm0 3178; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3179; AVX512-NEXT: retq 3180 %1 = and <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 3181 %2 = trunc <4 x i64> %1 to <4 x i32> 3182 ret <4 x i32> %2 3183} 3184 3185define <8 x i16> @trunc_and_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 3186; SSE-LABEL: trunc_and_const_v8i64_v8i16: 3187; SSE: # BB#0: 3188; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 3189; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 3190; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3191; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 3192; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3193; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 3194; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 3195; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 3196; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3197; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3198; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 3199; SSE-NEXT: andpd {{.*}}(%rip), %xmm2 3200; SSE-NEXT: movapd %xmm2, %xmm0 3201; SSE-NEXT: retq 3202; 3203; AVX1-LABEL: trunc_and_const_v8i64_v8i16: 3204; AVX1: # BB#0: 3205; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3206; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 3207; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3208; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 3209; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 3210; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3211; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3212; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 3213; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 3214; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 3215; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3216; AVX1-NEXT: vzeroupper 3217; AVX1-NEXT: retq 3218; 3219; AVX2-LABEL: trunc_and_const_v8i64_v8i16: 3220; AVX2: # BB#0: 3221; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3222; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3223; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3224; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3225; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3226; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 3227; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3228; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3229; AVX2-NEXT: vzeroupper 3230; AVX2-NEXT: retq 3231; 3232; AVX512-LABEL: trunc_and_const_v8i64_v8i16: 3233; AVX512: # BB#0: 3234; AVX512-NEXT: vpmovqw %zmm0, %xmm0 3235; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3236; AVX512-NEXT: retq 3237 %1 = and <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 3238 %2 = trunc <8 x i64> %1 to <8 x i16> 3239 ret <8 x i16> %2 3240} 3241 3242define <8 x i16> @trunc_and_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 3243; SSE-LABEL: trunc_and_const_v8i32_v8i16: 3244; SSE: # BB#0: 3245; SSE-NEXT: pslld $16, %xmm1 3246; SSE-NEXT: psrad $16, %xmm1 3247; SSE-NEXT: pslld $16, %xmm0 3248; SSE-NEXT: psrad $16, %xmm0 3249; SSE-NEXT: packssdw %xmm1, %xmm0 3250; SSE-NEXT: pand {{.*}}(%rip), %xmm0 3251; SSE-NEXT: retq 3252; 3253; AVX1-LABEL: trunc_and_const_v8i32_v8i16: 3254; AVX1: # BB#0: 3255; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3256; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3257; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3258; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3259; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3260; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3261; AVX1-NEXT: vzeroupper 3262; AVX1-NEXT: retq 3263; 3264; AVX2-LABEL: trunc_and_const_v8i32_v8i16: 3265; AVX2: # BB#0: 3266; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 3267; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3268; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3269; AVX2-NEXT: vzeroupper 3270; AVX2-NEXT: retq 3271; 3272; AVX512-LABEL: trunc_and_const_v8i32_v8i16: 3273; AVX512: # BB#0: 3274; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3275; AVX512-NEXT: vpmovdw %zmm0, %ymm0 3276; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3277; AVX512-NEXT: retq 3278 %1 = and <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3279 %2 = trunc <8 x i32> %1 to <8 x i16> 3280 ret <8 x i16> %2 3281} 3282 3283define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 3284; SSE-LABEL: trunc_and_const_v16i64_v16i8: 3285; SSE: # BB#0: 3286; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 3287; SSE-NEXT: pand %xmm8, %xmm7 3288; SSE-NEXT: pand %xmm8, %xmm6 3289; SSE-NEXT: packuswb %xmm7, %xmm6 3290; SSE-NEXT: pand %xmm8, %xmm5 3291; SSE-NEXT: pand %xmm8, %xmm4 3292; SSE-NEXT: packuswb %xmm5, %xmm4 3293; SSE-NEXT: packuswb %xmm6, %xmm4 3294; SSE-NEXT: pand %xmm8, %xmm3 3295; SSE-NEXT: pand %xmm8, %xmm2 3296; SSE-NEXT: packuswb %xmm3, %xmm2 3297; SSE-NEXT: pand %xmm8, %xmm1 3298; SSE-NEXT: pand %xmm8, %xmm0 3299; SSE-NEXT: packuswb %xmm1, %xmm0 3300; SSE-NEXT: packuswb %xmm2, %xmm0 3301; SSE-NEXT: packuswb %xmm4, %xmm0 3302; SSE-NEXT: pand {{.*}}(%rip), %xmm0 3303; SSE-NEXT: retq 3304; 3305; AVX1-LABEL: trunc_and_const_v16i64_v16i8: 3306; AVX1: # BB#0: 3307; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3308; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 3309; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 3310; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3311; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 3312; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 3313; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 3314; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 3315; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 3316; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 3317; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3318; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3319; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 3320; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 3321; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3322; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3323; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 3324; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 3325; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3326; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3327; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3328; AVX1-NEXT: vzeroupper 3329; AVX1-NEXT: retq 3330; 3331; AVX2-LABEL: trunc_and_const_v16i64_v16i8: 3332; AVX2: # BB#0: 3333; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 3334; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 3335; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 3336; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 3337; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 3338; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 3339; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 3340; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 3341; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3342; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 3343; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3344; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3345; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3346; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3347; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3348; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 3349; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3350; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 3351; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3352; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3353; AVX2-NEXT: vzeroupper 3354; AVX2-NEXT: retq 3355; 3356; AVX512F-LABEL: trunc_and_const_v16i64_v16i8: 3357; AVX512F: # BB#0: 3358; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 3359; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 3360; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3361; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3362; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3363; AVX512F-NEXT: retq 3364; 3365; AVX512BW-LABEL: trunc_and_const_v16i64_v16i8: 3366; AVX512BW: # BB#0: 3367; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 3368; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 3369; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3370; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 3371; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3372; AVX512BW-NEXT: retq 3373; 3374; AVX512DQ-LABEL: trunc_and_const_v16i64_v16i8: 3375; AVX512DQ: # BB#0: 3376; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 3377; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 3378; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 3379; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3380; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3381; AVX512DQ-NEXT: retq 3382 %1 = and <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 3383 %2 = trunc <16 x i64> %1 to <16 x i8> 3384 ret <16 x i8> %2 3385} 3386 3387define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 3388; SSE-LABEL: trunc_and_const_v16i32_v16i8: 3389; SSE: # BB#0: 3390; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3391; SSE-NEXT: pand %xmm4, %xmm3 3392; SSE-NEXT: pand %xmm4, %xmm2 3393; SSE-NEXT: packuswb %xmm3, %xmm2 3394; SSE-NEXT: pand %xmm4, %xmm1 3395; SSE-NEXT: pand %xmm4, %xmm0 3396; SSE-NEXT: packuswb %xmm1, %xmm0 3397; SSE-NEXT: packuswb %xmm2, %xmm0 3398; SSE-NEXT: pand {{.*}}(%rip), %xmm0 3399; SSE-NEXT: retq 3400; 3401; AVX1-LABEL: trunc_and_const_v16i32_v16i8: 3402; AVX1: # BB#0: 3403; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3404; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3405; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3406; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 3407; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 3408; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3409; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3410; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 3411; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3412; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3413; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3414; AVX1-NEXT: vzeroupper 3415; AVX1-NEXT: retq 3416; 3417; AVX2-LABEL: trunc_and_const_v16i32_v16i8: 3418; AVX2: # BB#0: 3419; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 3420; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 3421; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3422; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3423; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 3424; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 3425; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3426; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 3427; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3428; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3429; AVX2-NEXT: vzeroupper 3430; AVX2-NEXT: retq 3431; 3432; AVX512-LABEL: trunc_and_const_v16i32_v16i8: 3433; AVX512: # BB#0: 3434; AVX512-NEXT: vpmovdb %zmm0, %xmm0 3435; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3436; AVX512-NEXT: retq 3437 %1 = and <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 3438 %2 = trunc <16 x i32> %1 to <16 x i8> 3439 ret <16 x i8> %2 3440} 3441 3442define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 3443; SSE-LABEL: trunc_and_const_v16i16_v16i8: 3444; SSE: # BB#0: 3445; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 3446; SSE-NEXT: pand %xmm2, %xmm1 3447; SSE-NEXT: pand %xmm2, %xmm0 3448; SSE-NEXT: packuswb %xmm1, %xmm0 3449; SSE-NEXT: pand {{.*}}(%rip), %xmm0 3450; SSE-NEXT: retq 3451; 3452; AVX1-LABEL: trunc_and_const_v16i16_v16i8: 3453; AVX1: # BB#0: 3454; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3455; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3456; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3457; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3458; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3459; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3460; AVX1-NEXT: vzeroupper 3461; AVX1-NEXT: retq 3462; 3463; AVX2-LABEL: trunc_and_const_v16i16_v16i8: 3464; AVX2: # BB#0: 3465; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3466; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3467; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3468; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3469; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3470; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3471; AVX2-NEXT: vzeroupper 3472; AVX2-NEXT: retq 3473; 3474; AVX512F-LABEL: trunc_and_const_v16i16_v16i8: 3475; AVX512F: # BB#0: 3476; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 3477; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3478; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3479; AVX512F-NEXT: retq 3480; 3481; AVX512BW-LABEL: trunc_and_const_v16i16_v16i8: 3482; AVX512BW: # BB#0: 3483; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3484; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 3485; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3486; AVX512BW-NEXT: retq 3487; 3488; AVX512DQ-LABEL: trunc_and_const_v16i16_v16i8: 3489; AVX512DQ: # BB#0: 3490; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 3491; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3492; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 3493; AVX512DQ-NEXT: retq 3494 %1 = and <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 3495 %2 = trunc <16 x i16> %1 to <16 x i8> 3496 ret <16 x i8> %2 3497} 3498 3499; 3500; xor 3501; 3502 3503define <4 x i32> @trunc_xor_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 3504; SSE-LABEL: trunc_xor_v4i64_v4i32: 3505; SSE: # BB#0: 3506; SSE-NEXT: xorps %xmm3, %xmm1 3507; SSE-NEXT: xorps %xmm2, %xmm0 3508; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3509; SSE-NEXT: retq 3510; 3511; AVX1-LABEL: trunc_xor_v4i64_v4i32: 3512; AVX1: # BB#0: 3513; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 3514; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3515; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3516; AVX1-NEXT: vzeroupper 3517; AVX1-NEXT: retq 3518; 3519; AVX2-LABEL: trunc_xor_v4i64_v4i32: 3520; AVX2: # BB#0: 3521; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 3522; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3523; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3524; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3525; AVX2-NEXT: vzeroupper 3526; AVX2-NEXT: retq 3527; 3528; AVX512-LABEL: trunc_xor_v4i64_v4i32: 3529; AVX512: # BB#0: 3530; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 3531; AVX512-NEXT: vpmovqd %zmm0, %ymm0 3532; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3533; AVX512-NEXT: retq 3534 %1 = xor <4 x i64> %a0, %a1 3535 %2 = trunc <4 x i64> %1 to <4 x i32> 3536 ret <4 x i32> %2 3537} 3538 3539define <8 x i16> @trunc_xor_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 3540; SSE-LABEL: trunc_xor_v8i64_v8i16: 3541; SSE: # BB#0: 3542; SSE-NEXT: pxor %xmm4, %xmm0 3543; SSE-NEXT: pxor %xmm5, %xmm1 3544; SSE-NEXT: pxor %xmm6, %xmm2 3545; SSE-NEXT: pxor %xmm7, %xmm3 3546; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 3547; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 3548; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3549; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 3550; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3551; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 3552; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 3553; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 3554; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3555; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3556; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 3557; SSE-NEXT: movapd %xmm2, %xmm0 3558; SSE-NEXT: retq 3559; 3560; AVX1-LABEL: trunc_xor_v8i64_v8i16: 3561; AVX1: # BB#0: 3562; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 3563; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 3564; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3565; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 3566; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3567; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 3568; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 3569; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3570; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3571; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 3572; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 3573; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 3574; AVX1-NEXT: vzeroupper 3575; AVX1-NEXT: retq 3576; 3577; AVX2-LABEL: trunc_xor_v8i64_v8i16: 3578; AVX2: # BB#0: 3579; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 3580; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 3581; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3582; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3583; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3584; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3585; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3586; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 3587; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3588; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3589; AVX2-NEXT: vzeroupper 3590; AVX2-NEXT: retq 3591; 3592; AVX512-LABEL: trunc_xor_v8i64_v8i16: 3593; AVX512: # BB#0: 3594; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 3595; AVX512-NEXT: vpmovqw %zmm0, %xmm0 3596; AVX512-NEXT: retq 3597 %1 = xor <8 x i64> %a0, %a1 3598 %2 = trunc <8 x i64> %1 to <8 x i16> 3599 ret <8 x i16> %2 3600} 3601 3602define <8 x i16> @trunc_xor_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 3603; SSE-LABEL: trunc_xor_v8i32_v8i16: 3604; SSE: # BB#0: 3605; SSE-NEXT: pxor %xmm2, %xmm0 3606; SSE-NEXT: pxor %xmm3, %xmm1 3607; SSE-NEXT: pslld $16, %xmm1 3608; SSE-NEXT: psrad $16, %xmm1 3609; SSE-NEXT: pslld $16, %xmm0 3610; SSE-NEXT: psrad $16, %xmm0 3611; SSE-NEXT: packssdw %xmm1, %xmm0 3612; SSE-NEXT: retq 3613; 3614; AVX1-LABEL: trunc_xor_v8i32_v8i16: 3615; AVX1: # BB#0: 3616; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 3617; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3618; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3619; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3620; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3621; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3622; AVX1-NEXT: vzeroupper 3623; AVX1-NEXT: retq 3624; 3625; AVX2-LABEL: trunc_xor_v8i32_v8i16: 3626; AVX2: # BB#0: 3627; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 3628; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 3629; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3630; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3631; AVX2-NEXT: vzeroupper 3632; AVX2-NEXT: retq 3633; 3634; AVX512-LABEL: trunc_xor_v8i32_v8i16: 3635; AVX512: # BB#0: 3636; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 3637; AVX512-NEXT: vpmovdw %zmm0, %ymm0 3638; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3639; AVX512-NEXT: retq 3640 %1 = xor <8 x i32> %a0, %a1 3641 %2 = trunc <8 x i32> %1 to <8 x i16> 3642 ret <8 x i16> %2 3643} 3644 3645define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 3646; SSE-LABEL: trunc_xor_v16i64_v16i8: 3647; SSE: # BB#0: 3648; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm0 3649; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm1 3650; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm2 3651; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm3 3652; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm4 3653; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm5 3654; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm6 3655; SSE-NEXT: pxor {{[0-9]+}}(%rsp), %xmm7 3656; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 3657; SSE-NEXT: pand %xmm8, %xmm7 3658; SSE-NEXT: pand %xmm8, %xmm6 3659; SSE-NEXT: packuswb %xmm7, %xmm6 3660; SSE-NEXT: pand %xmm8, %xmm5 3661; SSE-NEXT: pand %xmm8, %xmm4 3662; SSE-NEXT: packuswb %xmm5, %xmm4 3663; SSE-NEXT: packuswb %xmm6, %xmm4 3664; SSE-NEXT: pand %xmm8, %xmm3 3665; SSE-NEXT: pand %xmm8, %xmm2 3666; SSE-NEXT: packuswb %xmm3, %xmm2 3667; SSE-NEXT: pand %xmm8, %xmm1 3668; SSE-NEXT: pand %xmm8, %xmm0 3669; SSE-NEXT: packuswb %xmm1, %xmm0 3670; SSE-NEXT: packuswb %xmm2, %xmm0 3671; SSE-NEXT: packuswb %xmm4, %xmm0 3672; SSE-NEXT: retq 3673; 3674; AVX1-LABEL: trunc_xor_v16i64_v16i8: 3675; AVX1: # BB#0: 3676; AVX1-NEXT: vxorps %ymm4, %ymm0, %ymm0 3677; AVX1-NEXT: vxorps %ymm5, %ymm1, %ymm1 3678; AVX1-NEXT: vxorps %ymm6, %ymm2, %ymm2 3679; AVX1-NEXT: vxorps %ymm7, %ymm3, %ymm3 3680; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3681; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 3682; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 3683; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3684; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 3685; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 3686; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 3687; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 3688; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 3689; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 3690; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3691; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3692; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 3693; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 3694; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 3695; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 3696; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 3697; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 3698; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3699; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3700; AVX1-NEXT: vzeroupper 3701; AVX1-NEXT: retq 3702; 3703; AVX2-LABEL: trunc_xor_v16i64_v16i8: 3704; AVX2: # BB#0: 3705; AVX2-NEXT: vpxor %ymm5, %ymm1, %ymm1 3706; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm0 3707; AVX2-NEXT: vpxor %ymm7, %ymm3, %ymm3 3708; AVX2-NEXT: vpxor %ymm6, %ymm2, %ymm2 3709; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 3710; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 3711; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 3712; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 3713; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 3714; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 3715; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 3716; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 3717; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3718; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 3719; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3720; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3721; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3722; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3723; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3724; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 3725; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3726; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 3727; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3728; AVX2-NEXT: vzeroupper 3729; AVX2-NEXT: retq 3730; 3731; AVX512F-LABEL: trunc_xor_v16i64_v16i8: 3732; AVX512F: # BB#0: 3733; AVX512F-NEXT: vpxorq %zmm3, %zmm1, %zmm1 3734; AVX512F-NEXT: vpxorq %zmm2, %zmm0, %zmm0 3735; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 3736; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 3737; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3738; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3739; AVX512F-NEXT: retq 3740; 3741; AVX512BW-LABEL: trunc_xor_v16i64_v16i8: 3742; AVX512BW: # BB#0: 3743; AVX512BW-NEXT: vpxorq %zmm3, %zmm1, %zmm1 3744; AVX512BW-NEXT: vpxorq %zmm2, %zmm0, %zmm0 3745; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 3746; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 3747; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 3748; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 3749; AVX512BW-NEXT: retq 3750; 3751; AVX512DQ-LABEL: trunc_xor_v16i64_v16i8: 3752; AVX512DQ: # BB#0: 3753; AVX512DQ-NEXT: vpxorq %zmm3, %zmm1, %zmm1 3754; AVX512DQ-NEXT: vpxorq %zmm2, %zmm0, %zmm0 3755; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 3756; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 3757; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 3758; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3759; AVX512DQ-NEXT: retq 3760 %1 = xor <16 x i64> %a0, %a1 3761 %2 = trunc <16 x i64> %1 to <16 x i8> 3762 ret <16 x i8> %2 3763} 3764 3765define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 3766; SSE-LABEL: trunc_xor_v16i32_v16i8: 3767; SSE: # BB#0: 3768; SSE-NEXT: pxor %xmm4, %xmm0 3769; SSE-NEXT: pxor %xmm5, %xmm1 3770; SSE-NEXT: pxor %xmm6, %xmm2 3771; SSE-NEXT: pxor %xmm7, %xmm3 3772; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3773; SSE-NEXT: pand %xmm4, %xmm3 3774; SSE-NEXT: pand %xmm4, %xmm2 3775; SSE-NEXT: packuswb %xmm3, %xmm2 3776; SSE-NEXT: pand %xmm4, %xmm1 3777; SSE-NEXT: pand %xmm4, %xmm0 3778; SSE-NEXT: packuswb %xmm1, %xmm0 3779; SSE-NEXT: packuswb %xmm2, %xmm0 3780; SSE-NEXT: retq 3781; 3782; AVX1-LABEL: trunc_xor_v16i32_v16i8: 3783; AVX1: # BB#0: 3784; AVX1-NEXT: vxorps %ymm2, %ymm0, %ymm0 3785; AVX1-NEXT: vxorps %ymm3, %ymm1, %ymm1 3786; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3787; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 3788; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3789; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 3790; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 3791; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3792; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 3793; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 3794; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 3795; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 3796; AVX1-NEXT: vzeroupper 3797; AVX1-NEXT: retq 3798; 3799; AVX2-LABEL: trunc_xor_v16i32_v16i8: 3800; AVX2: # BB#0: 3801; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 3802; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1 3803; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 3804; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 3805; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3806; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3807; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 3808; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 3809; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3810; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 3811; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3812; AVX2-NEXT: vzeroupper 3813; AVX2-NEXT: retq 3814; 3815; AVX512-LABEL: trunc_xor_v16i32_v16i8: 3816; AVX512: # BB#0: 3817; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 3818; AVX512-NEXT: vpmovdb %zmm0, %xmm0 3819; AVX512-NEXT: retq 3820 %1 = xor <16 x i32> %a0, %a1 3821 %2 = trunc <16 x i32> %1 to <16 x i8> 3822 ret <16 x i8> %2 3823} 3824 3825define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 3826; SSE-LABEL: trunc_xor_v16i16_v16i8: 3827; SSE: # BB#0: 3828; SSE-NEXT: pxor %xmm2, %xmm0 3829; SSE-NEXT: pxor %xmm3, %xmm1 3830; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 3831; SSE-NEXT: pand %xmm2, %xmm1 3832; SSE-NEXT: pand %xmm2, %xmm0 3833; SSE-NEXT: packuswb %xmm1, %xmm0 3834; SSE-NEXT: retq 3835; 3836; AVX1-LABEL: trunc_xor_v16i16_v16i8: 3837; AVX1: # BB#0: 3838; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 3839; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3840; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3841; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3842; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3843; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3844; AVX1-NEXT: vzeroupper 3845; AVX1-NEXT: retq 3846; 3847; AVX2-LABEL: trunc_xor_v16i16_v16i8: 3848; AVX2: # BB#0: 3849; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 3850; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 3851; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 3852; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3853; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3854; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3855; AVX2-NEXT: vzeroupper 3856; AVX2-NEXT: retq 3857; 3858; AVX512F-LABEL: trunc_xor_v16i16_v16i8: 3859; AVX512F: # BB#0: 3860; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 3861; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 3862; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3863; AVX512F-NEXT: retq 3864; 3865; AVX512BW-LABEL: trunc_xor_v16i16_v16i8: 3866; AVX512BW: # BB#0: 3867; AVX512BW-NEXT: vpxor %ymm1, %ymm0, %ymm0 3868; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 3869; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 3870; AVX512BW-NEXT: retq 3871; 3872; AVX512DQ-LABEL: trunc_xor_v16i16_v16i8: 3873; AVX512DQ: # BB#0: 3874; AVX512DQ-NEXT: vpxor %ymm1, %ymm0, %ymm0 3875; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 3876; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 3877; AVX512DQ-NEXT: retq 3878 %1 = xor <16 x i16> %a0, %a1 3879 %2 = trunc <16 x i16> %1 to <16 x i8> 3880 ret <16 x i8> %2 3881} 3882 3883; 3884; xor to constant 3885; 3886 3887define <4 x i32> @trunc_xor_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 3888; SSE-LABEL: trunc_xor_const_v4i64_v4i32: 3889; SSE: # BB#0: 3890; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3891; SSE-NEXT: xorps {{.*}}(%rip), %xmm0 3892; SSE-NEXT: retq 3893; 3894; AVX1-LABEL: trunc_xor_const_v4i64_v4i32: 3895; AVX1: # BB#0: 3896; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3897; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 3898; AVX1-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 3899; AVX1-NEXT: vzeroupper 3900; AVX1-NEXT: retq 3901; 3902; AVX2-LABEL: trunc_xor_const_v4i64_v4i32: 3903; AVX2: # BB#0: 3904; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3905; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3906; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3907; AVX2-NEXT: vzeroupper 3908; AVX2-NEXT: retq 3909; 3910; AVX512-LABEL: trunc_xor_const_v4i64_v4i32: 3911; AVX512: # BB#0: 3912; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3913; AVX512-NEXT: vpmovqd %zmm0, %ymm0 3914; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3915; AVX512-NEXT: retq 3916 %1 = xor <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 3917 %2 = trunc <4 x i64> %1 to <4 x i32> 3918 ret <4 x i32> %2 3919} 3920 3921define <8 x i16> @trunc_xor_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 3922; SSE-LABEL: trunc_xor_const_v8i64_v8i16: 3923; SSE: # BB#0: 3924; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 3925; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 3926; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3927; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 3928; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3929; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 3930; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 3931; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 3932; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3933; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3934; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 3935; SSE-NEXT: xorpd {{.*}}(%rip), %xmm2 3936; SSE-NEXT: movapd %xmm2, %xmm0 3937; SSE-NEXT: retq 3938; 3939; AVX1-LABEL: trunc_xor_const_v8i64_v8i16: 3940; AVX1: # BB#0: 3941; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3942; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 3943; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3944; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 3945; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 3946; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3947; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 3948; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 3949; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 3950; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 3951; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3952; AVX1-NEXT: vzeroupper 3953; AVX1-NEXT: retq 3954; 3955; AVX2-LABEL: trunc_xor_const_v8i64_v8i16: 3956; AVX2: # BB#0: 3957; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 3958; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3959; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 3960; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 3961; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3962; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 3963; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 3964; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3965; AVX2-NEXT: vzeroupper 3966; AVX2-NEXT: retq 3967; 3968; AVX512-LABEL: trunc_xor_const_v8i64_v8i16: 3969; AVX512: # BB#0: 3970; AVX512-NEXT: vpmovqw %zmm0, %xmm0 3971; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3972; AVX512-NEXT: retq 3973 %1 = xor <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 3974 %2 = trunc <8 x i64> %1 to <8 x i16> 3975 ret <8 x i16> %2 3976} 3977 3978define <8 x i16> @trunc_xor_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 3979; SSE-LABEL: trunc_xor_const_v8i32_v8i16: 3980; SSE: # BB#0: 3981; SSE-NEXT: pslld $16, %xmm1 3982; SSE-NEXT: psrad $16, %xmm1 3983; SSE-NEXT: pslld $16, %xmm0 3984; SSE-NEXT: psrad $16, %xmm0 3985; SSE-NEXT: packssdw %xmm1, %xmm0 3986; SSE-NEXT: pxor {{.*}}(%rip), %xmm0 3987; SSE-NEXT: retq 3988; 3989; AVX1-LABEL: trunc_xor_const_v8i32_v8i16: 3990; AVX1: # BB#0: 3991; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3992; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3993; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 3994; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 3995; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3996; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 3997; AVX1-NEXT: vzeroupper 3998; AVX1-NEXT: retq 3999; 4000; AVX2-LABEL: trunc_xor_const_v8i32_v8i16: 4001; AVX2: # BB#0: 4002; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 4003; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4004; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4005; AVX2-NEXT: vzeroupper 4006; AVX2-NEXT: retq 4007; 4008; AVX512-LABEL: trunc_xor_const_v8i32_v8i16: 4009; AVX512: # BB#0: 4010; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 4011; AVX512-NEXT: vpmovdw %zmm0, %ymm0 4012; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4013; AVX512-NEXT: retq 4014 %1 = xor <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4015 %2 = trunc <8 x i32> %1 to <8 x i16> 4016 ret <8 x i16> %2 4017} 4018 4019define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 4020; SSE-LABEL: trunc_xor_const_v16i64_v16i8: 4021; SSE: # BB#0: 4022; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4023; SSE-NEXT: pand %xmm8, %xmm7 4024; SSE-NEXT: pand %xmm8, %xmm6 4025; SSE-NEXT: packuswb %xmm7, %xmm6 4026; SSE-NEXT: pand %xmm8, %xmm5 4027; SSE-NEXT: pand %xmm8, %xmm4 4028; SSE-NEXT: packuswb %xmm5, %xmm4 4029; SSE-NEXT: packuswb %xmm6, %xmm4 4030; SSE-NEXT: pand %xmm8, %xmm3 4031; SSE-NEXT: pand %xmm8, %xmm2 4032; SSE-NEXT: packuswb %xmm3, %xmm2 4033; SSE-NEXT: pand %xmm8, %xmm1 4034; SSE-NEXT: pand %xmm8, %xmm0 4035; SSE-NEXT: packuswb %xmm1, %xmm0 4036; SSE-NEXT: packuswb %xmm2, %xmm0 4037; SSE-NEXT: packuswb %xmm4, %xmm0 4038; SSE-NEXT: pxor {{.*}}(%rip), %xmm0 4039; SSE-NEXT: retq 4040; 4041; AVX1-LABEL: trunc_xor_const_v16i64_v16i8: 4042; AVX1: # BB#0: 4043; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 4044; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4045; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4046; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4047; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 4048; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 4049; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4050; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 4051; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 4052; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 4053; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4054; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4055; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 4056; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 4057; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4058; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4059; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 4060; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 4061; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4062; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4063; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4064; AVX1-NEXT: vzeroupper 4065; AVX1-NEXT: retq 4066; 4067; AVX2-LABEL: trunc_xor_const_v16i64_v16i8: 4068; AVX2: # BB#0: 4069; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 4070; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4071; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 4072; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 4073; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4074; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4075; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 4076; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4077; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4078; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 4079; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4080; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4081; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 4082; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4083; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4084; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 4085; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4086; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 4087; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4088; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4089; AVX2-NEXT: vzeroupper 4090; AVX2-NEXT: retq 4091; 4092; AVX512F-LABEL: trunc_xor_const_v16i64_v16i8: 4093; AVX512F: # BB#0: 4094; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 4095; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 4096; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4097; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4098; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4099; AVX512F-NEXT: retq 4100; 4101; AVX512BW-LABEL: trunc_xor_const_v16i64_v16i8: 4102; AVX512BW: # BB#0: 4103; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 4104; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 4105; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4106; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 4107; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4108; AVX512BW-NEXT: retq 4109; 4110; AVX512DQ-LABEL: trunc_xor_const_v16i64_v16i8: 4111; AVX512DQ: # BB#0: 4112; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 4113; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 4114; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 4115; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4116; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4117; AVX512DQ-NEXT: retq 4118 %1 = xor <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 4119 %2 = trunc <16 x i64> %1 to <16 x i8> 4120 ret <16 x i8> %2 4121} 4122 4123define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 4124; SSE-LABEL: trunc_xor_const_v16i32_v16i8: 4125; SSE: # BB#0: 4126; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4127; SSE-NEXT: pand %xmm4, %xmm3 4128; SSE-NEXT: pand %xmm4, %xmm2 4129; SSE-NEXT: packuswb %xmm3, %xmm2 4130; SSE-NEXT: pand %xmm4, %xmm1 4131; SSE-NEXT: pand %xmm4, %xmm0 4132; SSE-NEXT: packuswb %xmm1, %xmm0 4133; SSE-NEXT: packuswb %xmm2, %xmm0 4134; SSE-NEXT: pxor {{.*}}(%rip), %xmm0 4135; SSE-NEXT: retq 4136; 4137; AVX1-LABEL: trunc_xor_const_v16i32_v16i8: 4138; AVX1: # BB#0: 4139; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4140; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4141; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4142; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 4143; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 4144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4145; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4146; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 4147; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4148; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4149; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4150; AVX1-NEXT: vzeroupper 4151; AVX1-NEXT: retq 4152; 4153; AVX2-LABEL: trunc_xor_const_v16i32_v16i8: 4154; AVX2: # BB#0: 4155; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4156; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4157; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4158; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4159; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 4160; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4161; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4162; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 4163; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4164; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4165; AVX2-NEXT: vzeroupper 4166; AVX2-NEXT: retq 4167; 4168; AVX512-LABEL: trunc_xor_const_v16i32_v16i8: 4169; AVX512: # BB#0: 4170; AVX512-NEXT: vpmovdb %zmm0, %xmm0 4171; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4172; AVX512-NEXT: retq 4173 %1 = xor <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4174 %2 = trunc <16 x i32> %1 to <16 x i8> 4175 ret <16 x i8> %2 4176} 4177 4178define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 4179; SSE-LABEL: trunc_xor_const_v16i16_v16i8: 4180; SSE: # BB#0: 4181; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 4182; SSE-NEXT: pand %xmm2, %xmm1 4183; SSE-NEXT: pand %xmm2, %xmm0 4184; SSE-NEXT: packuswb %xmm1, %xmm0 4185; SSE-NEXT: pxor {{.*}}(%rip), %xmm0 4186; SSE-NEXT: retq 4187; 4188; AVX1-LABEL: trunc_xor_const_v16i16_v16i8: 4189; AVX1: # BB#0: 4190; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4191; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4192; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4193; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4194; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4195; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4196; AVX1-NEXT: vzeroupper 4197; AVX1-NEXT: retq 4198; 4199; AVX2-LABEL: trunc_xor_const_v16i16_v16i8: 4200; AVX2: # BB#0: 4201; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4202; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4203; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4204; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4205; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4206; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4207; AVX2-NEXT: vzeroupper 4208; AVX2-NEXT: retq 4209; 4210; AVX512F-LABEL: trunc_xor_const_v16i16_v16i8: 4211; AVX512F: # BB#0: 4212; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 4213; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4214; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4215; AVX512F-NEXT: retq 4216; 4217; AVX512BW-LABEL: trunc_xor_const_v16i16_v16i8: 4218; AVX512BW: # BB#0: 4219; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 4220; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 4221; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4222; AVX512BW-NEXT: retq 4223; 4224; AVX512DQ-LABEL: trunc_xor_const_v16i16_v16i8: 4225; AVX512DQ: # BB#0: 4226; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 4227; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4228; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 4229; AVX512DQ-NEXT: retq 4230 %1 = xor <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 4231 %2 = trunc <16 x i16> %1 to <16 x i8> 4232 ret <16 x i8> %2 4233} 4234 4235; 4236; or 4237; 4238 4239define <4 x i32> @trunc_or_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { 4240; SSE-LABEL: trunc_or_v4i64_v4i32: 4241; SSE: # BB#0: 4242; SSE-NEXT: orps %xmm3, %xmm1 4243; SSE-NEXT: orps %xmm2, %xmm0 4244; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 4245; SSE-NEXT: retq 4246; 4247; AVX1-LABEL: trunc_or_v4i64_v4i32: 4248; AVX1: # BB#0: 4249; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 4250; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4251; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 4252; AVX1-NEXT: vzeroupper 4253; AVX1-NEXT: retq 4254; 4255; AVX2-LABEL: trunc_or_v4i64_v4i32: 4256; AVX2: # BB#0: 4257; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 4258; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4259; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4260; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4261; AVX2-NEXT: vzeroupper 4262; AVX2-NEXT: retq 4263; 4264; AVX512-LABEL: trunc_or_v4i64_v4i32: 4265; AVX512: # BB#0: 4266; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 4267; AVX512-NEXT: vpmovqd %zmm0, %ymm0 4268; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4269; AVX512-NEXT: retq 4270 %1 = or <4 x i64> %a0, %a1 4271 %2 = trunc <4 x i64> %1 to <4 x i32> 4272 ret <4 x i32> %2 4273} 4274 4275define <8 x i16> @trunc_or_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { 4276; SSE-LABEL: trunc_or_v8i64_v8i16: 4277; SSE: # BB#0: 4278; SSE-NEXT: por %xmm4, %xmm0 4279; SSE-NEXT: por %xmm5, %xmm1 4280; SSE-NEXT: por %xmm6, %xmm2 4281; SSE-NEXT: por %xmm7, %xmm3 4282; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 4283; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 4284; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 4285; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 4286; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 4287; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 4288; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 4289; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 4290; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 4291; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4292; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 4293; SSE-NEXT: movapd %xmm2, %xmm0 4294; SSE-NEXT: retq 4295; 4296; AVX1-LABEL: trunc_or_v8i64_v8i16: 4297; AVX1: # BB#0: 4298; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 4299; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 4300; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4301; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4302; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 4303; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 4304; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 4305; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4306; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 4307; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 4308; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 4309; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 4310; AVX1-NEXT: vzeroupper 4311; AVX1-NEXT: retq 4312; 4313; AVX2-LABEL: trunc_or_v8i64_v8i16: 4314; AVX2: # BB#0: 4315; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 4316; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 4317; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4318; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4319; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 4320; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4321; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4322; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 4323; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4324; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4325; AVX2-NEXT: vzeroupper 4326; AVX2-NEXT: retq 4327; 4328; AVX512-LABEL: trunc_or_v8i64_v8i16: 4329; AVX512: # BB#0: 4330; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 4331; AVX512-NEXT: vpmovqw %zmm0, %xmm0 4332; AVX512-NEXT: retq 4333 %1 = or <8 x i64> %a0, %a1 4334 %2 = trunc <8 x i64> %1 to <8 x i16> 4335 ret <8 x i16> %2 4336} 4337 4338define <8 x i16> @trunc_or_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { 4339; SSE-LABEL: trunc_or_v8i32_v8i16: 4340; SSE: # BB#0: 4341; SSE-NEXT: por %xmm2, %xmm0 4342; SSE-NEXT: por %xmm3, %xmm1 4343; SSE-NEXT: pslld $16, %xmm1 4344; SSE-NEXT: psrad $16, %xmm1 4345; SSE-NEXT: pslld $16, %xmm0 4346; SSE-NEXT: psrad $16, %xmm0 4347; SSE-NEXT: packssdw %xmm1, %xmm0 4348; SSE-NEXT: retq 4349; 4350; AVX1-LABEL: trunc_or_v8i32_v8i16: 4351; AVX1: # BB#0: 4352; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 4353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4354; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 4355; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4356; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4357; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4358; AVX1-NEXT: vzeroupper 4359; AVX1-NEXT: retq 4360; 4361; AVX2-LABEL: trunc_or_v8i32_v8i16: 4362; AVX2: # BB#0: 4363; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 4364; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 4365; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4366; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4367; AVX2-NEXT: vzeroupper 4368; AVX2-NEXT: retq 4369; 4370; AVX512-LABEL: trunc_or_v8i32_v8i16: 4371; AVX512: # BB#0: 4372; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 4373; AVX512-NEXT: vpmovdw %zmm0, %ymm0 4374; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4375; AVX512-NEXT: retq 4376 %1 = or <8 x i32> %a0, %a1 4377 %2 = trunc <8 x i32> %1 to <8 x i16> 4378 ret <8 x i16> %2 4379} 4380 4381define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind { 4382; SSE-LABEL: trunc_or_v16i64_v16i8: 4383; SSE: # BB#0: 4384; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm0 4385; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm1 4386; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm2 4387; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm3 4388; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm4 4389; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm5 4390; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm6 4391; SSE-NEXT: por {{[0-9]+}}(%rsp), %xmm7 4392; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4393; SSE-NEXT: pand %xmm8, %xmm7 4394; SSE-NEXT: pand %xmm8, %xmm6 4395; SSE-NEXT: packuswb %xmm7, %xmm6 4396; SSE-NEXT: pand %xmm8, %xmm5 4397; SSE-NEXT: pand %xmm8, %xmm4 4398; SSE-NEXT: packuswb %xmm5, %xmm4 4399; SSE-NEXT: packuswb %xmm6, %xmm4 4400; SSE-NEXT: pand %xmm8, %xmm3 4401; SSE-NEXT: pand %xmm8, %xmm2 4402; SSE-NEXT: packuswb %xmm3, %xmm2 4403; SSE-NEXT: pand %xmm8, %xmm1 4404; SSE-NEXT: pand %xmm8, %xmm0 4405; SSE-NEXT: packuswb %xmm1, %xmm0 4406; SSE-NEXT: packuswb %xmm2, %xmm0 4407; SSE-NEXT: packuswb %xmm4, %xmm0 4408; SSE-NEXT: retq 4409; 4410; AVX1-LABEL: trunc_or_v16i64_v16i8: 4411; AVX1: # BB#0: 4412; AVX1-NEXT: vorps %ymm4, %ymm0, %ymm0 4413; AVX1-NEXT: vorps %ymm5, %ymm1, %ymm1 4414; AVX1-NEXT: vorps %ymm6, %ymm2, %ymm2 4415; AVX1-NEXT: vorps %ymm7, %ymm3, %ymm3 4416; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 4417; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4418; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4419; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4420; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 4421; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 4422; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4423; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 4424; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 4425; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 4426; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4427; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4428; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 4429; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 4430; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4431; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4432; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 4433; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 4434; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4435; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4436; AVX1-NEXT: vzeroupper 4437; AVX1-NEXT: retq 4438; 4439; AVX2-LABEL: trunc_or_v16i64_v16i8: 4440; AVX2: # BB#0: 4441; AVX2-NEXT: vpor %ymm5, %ymm1, %ymm1 4442; AVX2-NEXT: vpor %ymm4, %ymm0, %ymm0 4443; AVX2-NEXT: vpor %ymm7, %ymm3, %ymm3 4444; AVX2-NEXT: vpor %ymm6, %ymm2, %ymm2 4445; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 4446; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4447; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 4448; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 4449; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4450; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4451; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 4452; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4453; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4454; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 4455; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4456; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4457; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 4458; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4459; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4460; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 4461; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4462; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 4463; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4464; AVX2-NEXT: vzeroupper 4465; AVX2-NEXT: retq 4466; 4467; AVX512F-LABEL: trunc_or_v16i64_v16i8: 4468; AVX512F: # BB#0: 4469; AVX512F-NEXT: vporq %zmm3, %zmm1, %zmm1 4470; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 4471; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 4472; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 4473; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4474; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4475; AVX512F-NEXT: retq 4476; 4477; AVX512BW-LABEL: trunc_or_v16i64_v16i8: 4478; AVX512BW: # BB#0: 4479; AVX512BW-NEXT: vporq %zmm3, %zmm1, %zmm1 4480; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0 4481; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 4482; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 4483; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4484; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 4485; AVX512BW-NEXT: retq 4486; 4487; AVX512DQ-LABEL: trunc_or_v16i64_v16i8: 4488; AVX512DQ: # BB#0: 4489; AVX512DQ-NEXT: vporq %zmm3, %zmm1, %zmm1 4490; AVX512DQ-NEXT: vporq %zmm2, %zmm0, %zmm0 4491; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 4492; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 4493; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 4494; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4495; AVX512DQ-NEXT: retq 4496 %1 = or <16 x i64> %a0, %a1 4497 %2 = trunc <16 x i64> %1 to <16 x i8> 4498 ret <16 x i8> %2 4499} 4500 4501define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind { 4502; SSE-LABEL: trunc_or_v16i32_v16i8: 4503; SSE: # BB#0: 4504; SSE-NEXT: por %xmm4, %xmm0 4505; SSE-NEXT: por %xmm5, %xmm1 4506; SSE-NEXT: por %xmm6, %xmm2 4507; SSE-NEXT: por %xmm7, %xmm3 4508; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4509; SSE-NEXT: pand %xmm4, %xmm3 4510; SSE-NEXT: pand %xmm4, %xmm2 4511; SSE-NEXT: packuswb %xmm3, %xmm2 4512; SSE-NEXT: pand %xmm4, %xmm1 4513; SSE-NEXT: pand %xmm4, %xmm0 4514; SSE-NEXT: packuswb %xmm1, %xmm0 4515; SSE-NEXT: packuswb %xmm2, %xmm0 4516; SSE-NEXT: retq 4517; 4518; AVX1-LABEL: trunc_or_v16i32_v16i8: 4519; AVX1: # BB#0: 4520; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 4521; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 4522; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4523; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4524; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4525; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 4526; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 4527; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4528; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4529; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 4530; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4531; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4532; AVX1-NEXT: vzeroupper 4533; AVX1-NEXT: retq 4534; 4535; AVX2-LABEL: trunc_or_v16i32_v16i8: 4536; AVX2: # BB#0: 4537; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 4538; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1 4539; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4540; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4541; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4542; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4543; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 4544; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4545; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4546; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 4547; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4548; AVX2-NEXT: vzeroupper 4549; AVX2-NEXT: retq 4550; 4551; AVX512-LABEL: trunc_or_v16i32_v16i8: 4552; AVX512: # BB#0: 4553; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 4554; AVX512-NEXT: vpmovdb %zmm0, %xmm0 4555; AVX512-NEXT: retq 4556 %1 = or <16 x i32> %a0, %a1 4557 %2 = trunc <16 x i32> %1 to <16 x i8> 4558 ret <16 x i8> %2 4559} 4560 4561define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind { 4562; SSE-LABEL: trunc_or_v16i16_v16i8: 4563; SSE: # BB#0: 4564; SSE-NEXT: por %xmm2, %xmm0 4565; SSE-NEXT: por %xmm3, %xmm1 4566; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 4567; SSE-NEXT: pand %xmm2, %xmm1 4568; SSE-NEXT: pand %xmm2, %xmm0 4569; SSE-NEXT: packuswb %xmm1, %xmm0 4570; SSE-NEXT: retq 4571; 4572; AVX1-LABEL: trunc_or_v16i16_v16i8: 4573; AVX1: # BB#0: 4574; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 4575; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4576; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4577; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4578; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4579; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4580; AVX1-NEXT: vzeroupper 4581; AVX1-NEXT: retq 4582; 4583; AVX2-LABEL: trunc_or_v16i16_v16i8: 4584; AVX2: # BB#0: 4585; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 4586; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4587; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4588; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4589; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4590; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4591; AVX2-NEXT: vzeroupper 4592; AVX2-NEXT: retq 4593; 4594; AVX512F-LABEL: trunc_or_v16i16_v16i8: 4595; AVX512F: # BB#0: 4596; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 4597; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 4598; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4599; AVX512F-NEXT: retq 4600; 4601; AVX512BW-LABEL: trunc_or_v16i16_v16i8: 4602; AVX512BW: # BB#0: 4603; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 4604; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 4605; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4606; AVX512BW-NEXT: retq 4607; 4608; AVX512DQ-LABEL: trunc_or_v16i16_v16i8: 4609; AVX512DQ: # BB#0: 4610; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 4611; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 4612; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4613; AVX512DQ-NEXT: retq 4614 %1 = or <16 x i16> %a0, %a1 4615 %2 = trunc <16 x i16> %1 to <16 x i8> 4616 ret <16 x i8> %2 4617} 4618 4619; 4620; or to constant 4621; 4622 4623define <4 x i32> @trunc_or_const_v4i64_v4i32(<4 x i64> %a0) nounwind { 4624; SSE-LABEL: trunc_or_const_v4i64_v4i32: 4625; SSE: # BB#0: 4626; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 4627; SSE-NEXT: orps {{.*}}(%rip), %xmm0 4628; SSE-NEXT: retq 4629; 4630; AVX1-LABEL: trunc_or_const_v4i64_v4i32: 4631; AVX1: # BB#0: 4632; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4633; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 4634; AVX1-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0 4635; AVX1-NEXT: vzeroupper 4636; AVX1-NEXT: retq 4637; 4638; AVX2-LABEL: trunc_or_const_v4i64_v4i32: 4639; AVX2: # BB#0: 4640; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4641; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4642; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4643; AVX2-NEXT: vzeroupper 4644; AVX2-NEXT: retq 4645; 4646; AVX512-LABEL: trunc_or_const_v4i64_v4i32: 4647; AVX512: # BB#0: 4648; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 4649; AVX512-NEXT: vpmovqd %zmm0, %ymm0 4650; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4651; AVX512-NEXT: retq 4652 %1 = or <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> 4653 %2 = trunc <4 x i64> %1 to <4 x i32> 4654 ret <4 x i32> %2 4655} 4656 4657define <8 x i16> @trunc_or_const_v8i64_v8i16(<8 x i64> %a0) nounwind { 4658; SSE-LABEL: trunc_or_const_v8i64_v8i16: 4659; SSE: # BB#0: 4660; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 4661; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7] 4662; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 4663; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7] 4664; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 4665; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 4666; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7] 4667; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 4668; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 4669; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4670; SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1] 4671; SSE-NEXT: orpd {{.*}}(%rip), %xmm2 4672; SSE-NEXT: movapd %xmm2, %xmm0 4673; SSE-NEXT: retq 4674; 4675; AVX1-LABEL: trunc_or_const_v8i64_v8i16: 4676; AVX1: # BB#0: 4677; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4678; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 4679; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 4680; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] 4681; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 4682; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4683; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] 4684; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] 4685; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 4686; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 4687; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4688; AVX1-NEXT: vzeroupper 4689; AVX1-NEXT: retq 4690; 4691; AVX2-LABEL: trunc_or_const_v8i64_v8i16: 4692; AVX2: # BB#0: 4693; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4694; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4695; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 4696; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4697; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4698; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 4699; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4700; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4701; AVX2-NEXT: vzeroupper 4702; AVX2-NEXT: retq 4703; 4704; AVX512-LABEL: trunc_or_const_v8i64_v8i16: 4705; AVX512: # BB#0: 4706; AVX512-NEXT: vpmovqw %zmm0, %xmm0 4707; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4708; AVX512-NEXT: retq 4709 %1 = or <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> 4710 %2 = trunc <8 x i64> %1 to <8 x i16> 4711 ret <8 x i16> %2 4712} 4713 4714define <8 x i16> @trunc_or_const_v8i32_v8i16(<8 x i32> %a0) nounwind { 4715; SSE-LABEL: trunc_or_const_v8i32_v8i16: 4716; SSE: # BB#0: 4717; SSE-NEXT: pslld $16, %xmm1 4718; SSE-NEXT: psrad $16, %xmm1 4719; SSE-NEXT: pslld $16, %xmm0 4720; SSE-NEXT: psrad $16, %xmm0 4721; SSE-NEXT: packssdw %xmm1, %xmm0 4722; SSE-NEXT: por {{.*}}(%rip), %xmm0 4723; SSE-NEXT: retq 4724; 4725; AVX1-LABEL: trunc_or_const_v8i32_v8i16: 4726; AVX1: # BB#0: 4727; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4728; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 4729; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4730; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4731; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4732; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4733; AVX1-NEXT: vzeroupper 4734; AVX1-NEXT: retq 4735; 4736; AVX2-LABEL: trunc_or_const_v8i32_v8i16: 4737; AVX2: # BB#0: 4738; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 4739; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4740; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4741; AVX2-NEXT: vzeroupper 4742; AVX2-NEXT: retq 4743; 4744; AVX512-LABEL: trunc_or_const_v8i32_v8i16: 4745; AVX512: # BB#0: 4746; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 4747; AVX512-NEXT: vpmovdw %zmm0, %ymm0 4748; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4749; AVX512-NEXT: retq 4750 %1 = or <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4751 %2 = trunc <8 x i32> %1 to <8 x i16> 4752 ret <8 x i16> %2 4753} 4754 4755define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { 4756; SSE-LABEL: trunc_or_const_v16i64_v16i8: 4757; SSE: # BB#0: 4758; SSE-NEXT: movdqa {{.*#+}} xmm8 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4759; SSE-NEXT: pand %xmm8, %xmm7 4760; SSE-NEXT: pand %xmm8, %xmm6 4761; SSE-NEXT: packuswb %xmm7, %xmm6 4762; SSE-NEXT: pand %xmm8, %xmm5 4763; SSE-NEXT: pand %xmm8, %xmm4 4764; SSE-NEXT: packuswb %xmm5, %xmm4 4765; SSE-NEXT: packuswb %xmm6, %xmm4 4766; SSE-NEXT: pand %xmm8, %xmm3 4767; SSE-NEXT: pand %xmm8, %xmm2 4768; SSE-NEXT: packuswb %xmm3, %xmm2 4769; SSE-NEXT: pand %xmm8, %xmm1 4770; SSE-NEXT: pand %xmm8, %xmm0 4771; SSE-NEXT: packuswb %xmm1, %xmm0 4772; SSE-NEXT: packuswb %xmm2, %xmm0 4773; SSE-NEXT: packuswb %xmm4, %xmm0 4774; SSE-NEXT: por {{.*}}(%rip), %xmm0 4775; SSE-NEXT: retq 4776; 4777; AVX1-LABEL: trunc_or_const_v16i64_v16i8: 4778; AVX1: # BB#0: 4779; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 4780; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] 4781; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4782; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4783; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 4784; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 4785; AVX1-NEXT: vandps %xmm5, %xmm4, %xmm4 4786; AVX1-NEXT: vandps %xmm5, %xmm2, %xmm2 4787; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 4788; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 4789; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 4790; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4791; AVX1-NEXT: vandps %xmm5, %xmm1, %xmm1 4792; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 4793; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 4794; AVX1-NEXT: vandps %xmm5, %xmm3, %xmm3 4795; AVX1-NEXT: vandps %xmm5, %xmm0, %xmm0 4796; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 4797; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4798; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4799; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4800; AVX1-NEXT: vzeroupper 4801; AVX1-NEXT: retq 4802; 4803; AVX2-LABEL: trunc_or_const_v16i64_v16i8: 4804; AVX2: # BB#0: 4805; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,2,2,3,4,6,6,7] 4806; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4807; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,2,2,3,4,6,6,7] 4808; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,2,2,3] 4809; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 4810; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4811; AVX2-NEXT: vpshufb %ymm3, %ymm2, %ymm2 4812; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3] 4813; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4814; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 4815; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 4816; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4817; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] 4818; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4819; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4820; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0 4821; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4822; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 4823; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4824; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4825; AVX2-NEXT: vzeroupper 4826; AVX2-NEXT: retq 4827; 4828; AVX512F-LABEL: trunc_or_const_v16i64_v16i8: 4829; AVX512F: # BB#0: 4830; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 4831; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 4832; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4833; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4834; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4835; AVX512F-NEXT: retq 4836; 4837; AVX512BW-LABEL: trunc_or_const_v16i64_v16i8: 4838; AVX512BW: # BB#0: 4839; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 4840; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 4841; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 4842; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 4843; AVX512BW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4844; AVX512BW-NEXT: retq 4845; 4846; AVX512DQ-LABEL: trunc_or_const_v16i64_v16i8: 4847; AVX512DQ: # BB#0: 4848; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 4849; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 4850; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 4851; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4852; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4853; AVX512DQ-NEXT: retq 4854 %1 = or <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> 4855 %2 = trunc <16 x i64> %1 to <16 x i8> 4856 ret <16 x i8> %2 4857} 4858 4859define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind { 4860; SSE-LABEL: trunc_or_const_v16i32_v16i8: 4861; SSE: # BB#0: 4862; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4863; SSE-NEXT: pand %xmm4, %xmm3 4864; SSE-NEXT: pand %xmm4, %xmm2 4865; SSE-NEXT: packuswb %xmm3, %xmm2 4866; SSE-NEXT: pand %xmm4, %xmm1 4867; SSE-NEXT: pand %xmm4, %xmm0 4868; SSE-NEXT: packuswb %xmm1, %xmm0 4869; SSE-NEXT: packuswb %xmm2, %xmm0 4870; SSE-NEXT: por {{.*}}(%rip), %xmm0 4871; SSE-NEXT: retq 4872; 4873; AVX1-LABEL: trunc_or_const_v16i32_v16i8: 4874; AVX1: # BB#0: 4875; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 4876; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 4877; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4878; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1 4879; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 4880; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 4881; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2 4882; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0 4883; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 4884; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 4885; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4886; AVX1-NEXT: vzeroupper 4887; AVX1-NEXT: retq 4888; 4889; AVX2-LABEL: trunc_or_const_v16i32_v16i8: 4890; AVX2: # BB#0: 4891; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] 4892; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 4893; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] 4894; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4895; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 4896; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 4897; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 4898; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 4899; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4900; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4901; AVX2-NEXT: vzeroupper 4902; AVX2-NEXT: retq 4903; 4904; AVX512-LABEL: trunc_or_const_v16i32_v16i8: 4905; AVX512: # BB#0: 4906; AVX512-NEXT: vpmovdb %zmm0, %xmm0 4907; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4908; AVX512-NEXT: retq 4909 %1 = or <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4910 %2 = trunc <16 x i32> %1 to <16 x i8> 4911 ret <16 x i8> %2 4912} 4913 4914define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind { 4915; SSE-LABEL: trunc_or_const_v16i16_v16i8: 4916; SSE: # BB#0: 4917; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 4918; SSE-NEXT: pand %xmm2, %xmm1 4919; SSE-NEXT: pand %xmm2, %xmm0 4920; SSE-NEXT: packuswb %xmm1, %xmm0 4921; SSE-NEXT: por {{.*}}(%rip), %xmm0 4922; SSE-NEXT: retq 4923; 4924; AVX1-LABEL: trunc_or_const_v16i16_v16i8: 4925; AVX1: # BB#0: 4926; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 4927; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4928; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4929; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4930; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4931; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4932; AVX1-NEXT: vzeroupper 4933; AVX1-NEXT: retq 4934; 4935; AVX2-LABEL: trunc_or_const_v16i16_v16i8: 4936; AVX2: # BB#0: 4937; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 4938; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 4939; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 4940; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 4941; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4942; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4943; AVX2-NEXT: vzeroupper 4944; AVX2-NEXT: retq 4945; 4946; AVX512F-LABEL: trunc_or_const_v16i16_v16i8: 4947; AVX512F: # BB#0: 4948; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 4949; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4950; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4951; AVX512F-NEXT: retq 4952; 4953; AVX512BW-LABEL: trunc_or_const_v16i16_v16i8: 4954; AVX512BW: # BB#0: 4955; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 4956; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 4957; AVX512BW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4958; AVX512BW-NEXT: retq 4959; 4960; AVX512DQ-LABEL: trunc_or_const_v16i16_v16i8: 4961; AVX512DQ: # BB#0: 4962; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 4963; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 4964; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 4965; AVX512DQ-NEXT: retq 4966 %1 = or <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> 4967 %2 = trunc <16 x i16> %1 to <16 x i8> 4968 ret <16 x i8> %2 4969} 4970 4971; 4972; complex patterns - often created by vectorizer 4973; 4974 4975define <4 x i32> @mul_add_const_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { 4976; SSE-LABEL: mul_add_const_v4i64_v4i32: 4977; SSE: # BB#0: 4978; SSE-NEXT: movdqa %xmm0, %xmm2 4979; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 4980; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,3,3] 4981; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3] 4982; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 4983; SSE-NEXT: movdqa %xmm2, %xmm4 4984; SSE-NEXT: psrlq $32, %xmm4 4985; SSE-NEXT: pmuludq %xmm1, %xmm4 4986; SSE-NEXT: movdqa %xmm1, %xmm5 4987; SSE-NEXT: psrlq $32, %xmm5 4988; SSE-NEXT: pmuludq %xmm2, %xmm5 4989; SSE-NEXT: paddq %xmm4, %xmm5 4990; SSE-NEXT: psllq $32, %xmm5 4991; SSE-NEXT: pmuludq %xmm1, %xmm2 4992; SSE-NEXT: paddq %xmm5, %xmm2 4993; SSE-NEXT: movdqa %xmm0, %xmm1 4994; SSE-NEXT: psrlq $32, %xmm1 4995; SSE-NEXT: pmuludq %xmm3, %xmm1 4996; SSE-NEXT: movdqa %xmm3, %xmm4 4997; SSE-NEXT: psrlq $32, %xmm4 4998; SSE-NEXT: pmuludq %xmm0, %xmm4 4999; SSE-NEXT: paddq %xmm1, %xmm4 5000; SSE-NEXT: psllq $32, %xmm4 5001; SSE-NEXT: pmuludq %xmm3, %xmm0 5002; SSE-NEXT: paddq %xmm4, %xmm0 5003; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] 5004; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 5005; SSE-NEXT: retq 5006; 5007; AVX1-LABEL: mul_add_const_v4i64_v4i32: 5008; AVX1: # BB#0: 5009; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5010; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 5011; AVX1-NEXT: retq 5012; 5013; AVX2-LABEL: mul_add_const_v4i64_v4i32: 5014; AVX2: # BB#0: 5015; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5016; AVX2-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 5017; AVX2-NEXT: retq 5018; 5019; AVX512F-LABEL: mul_add_const_v4i64_v4i32: 5020; AVX512F: # BB#0: 5021; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5022; AVX512F-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 5023; AVX512F-NEXT: retq 5024; 5025; AVX512BW-LABEL: mul_add_const_v4i64_v4i32: 5026; AVX512BW: # BB#0: 5027; AVX512BW-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5028; AVX512BW-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 5029; AVX512BW-NEXT: retq 5030; 5031; AVX512DQ-LABEL: mul_add_const_v4i64_v4i32: 5032; AVX512DQ: # BB#0: 5033; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 5034; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 5035; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 5036; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 5037; AVX512DQ-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 5038; AVX512DQ-NEXT: retq 5039 %1 = sext <4 x i32> %a0 to <4 x i64> 5040 %2 = sext <4 x i32> %a1 to <4 x i64> 5041 %3 = mul <4 x i64> %1, %2 5042 %4 = add <4 x i64> %3, <i64 -3, i64 -1, i64 1, i64 3> 5043 %5 = trunc <4 x i64> %4 to <4 x i32> 5044 ret <4 x i32> %5 5045} 5046 5047define <4 x i32> @mul_add_self_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { 5048; SSE-LABEL: mul_add_self_v4i64_v4i32: 5049; SSE: # BB#0: 5050; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 5051; SSE-NEXT: movdqa %xmm2, %xmm3 5052; SSE-NEXT: psrad $31, %xmm3 5053; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 5054; SSE-NEXT: movdqa %xmm0, %xmm3 5055; SSE-NEXT: psrad $31, %xmm3 5056; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 5057; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 5058; SSE-NEXT: movdqa %xmm3, %xmm4 5059; SSE-NEXT: psrad $31, %xmm4 5060; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 5061; SSE-NEXT: movdqa %xmm1, %xmm4 5062; SSE-NEXT: psrad $31, %xmm4 5063; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] 5064; SSE-NEXT: movdqa %xmm0, %xmm4 5065; SSE-NEXT: psrlq $32, %xmm4 5066; SSE-NEXT: pmuludq %xmm1, %xmm4 5067; SSE-NEXT: movdqa %xmm1, %xmm5 5068; SSE-NEXT: psrlq $32, %xmm5 5069; SSE-NEXT: pmuludq %xmm0, %xmm5 5070; SSE-NEXT: paddq %xmm4, %xmm5 5071; SSE-NEXT: psllq $32, %xmm5 5072; SSE-NEXT: pmuludq %xmm0, %xmm1 5073; SSE-NEXT: paddq %xmm5, %xmm1 5074; SSE-NEXT: movdqa %xmm2, %xmm0 5075; SSE-NEXT: psrlq $32, %xmm0 5076; SSE-NEXT: pmuludq %xmm3, %xmm0 5077; SSE-NEXT: movdqa %xmm3, %xmm4 5078; SSE-NEXT: psrlq $32, %xmm4 5079; SSE-NEXT: pmuludq %xmm2, %xmm4 5080; SSE-NEXT: paddq %xmm0, %xmm4 5081; SSE-NEXT: psllq $32, %xmm4 5082; SSE-NEXT: pmuludq %xmm2, %xmm3 5083; SSE-NEXT: paddq %xmm4, %xmm3 5084; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2] 5085; SSE-NEXT: paddd %xmm1, %xmm1 5086; SSE-NEXT: movdqa %xmm1, %xmm0 5087; SSE-NEXT: retq 5088; 5089; AVX1-LABEL: mul_add_self_v4i64_v4i32: 5090; AVX1: # BB#0: 5091; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5092; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 5093; AVX1-NEXT: retq 5094; 5095; AVX2-LABEL: mul_add_self_v4i64_v4i32: 5096; AVX2: # BB#0: 5097; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5098; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 5099; AVX2-NEXT: retq 5100; 5101; AVX512F-LABEL: mul_add_self_v4i64_v4i32: 5102; AVX512F: # BB#0: 5103; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5104; AVX512F-NEXT: vpaddd %xmm0, %xmm0, %xmm0 5105; AVX512F-NEXT: retq 5106; 5107; AVX512BW-LABEL: mul_add_self_v4i64_v4i32: 5108; AVX512BW: # BB#0: 5109; AVX512BW-NEXT: vpmulld %xmm1, %xmm0, %xmm0 5110; AVX512BW-NEXT: vpaddd %xmm0, %xmm0, %xmm0 5111; AVX512BW-NEXT: retq 5112; 5113; AVX512DQ-LABEL: mul_add_self_v4i64_v4i32: 5114; AVX512DQ: # BB#0: 5115; AVX512DQ-NEXT: vpmovsxdq %xmm0, %ymm0 5116; AVX512DQ-NEXT: vpmovsxdq %xmm1, %ymm1 5117; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 5118; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 5119; AVX512DQ-NEXT: vpaddd %xmm0, %xmm0, %xmm0 5120; AVX512DQ-NEXT: retq 5121 %1 = sext <4 x i32> %a0 to <4 x i64> 5122 %2 = sext <4 x i32> %a1 to <4 x i64> 5123 %3 = mul <4 x i64> %1, %2 5124 %4 = add <4 x i64> %3, %3 5125 %5 = trunc <4 x i64> %4 to <4 x i32> 5126 ret <4 x i32> %5 5127} 5128 5129define <4 x i32> @mul_add_multiuse_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { 5130; SSE-LABEL: mul_add_multiuse_v4i64_v4i32: 5131; SSE: # BB#0: 5132; SSE-NEXT: movdqa %xmm0, %xmm2 5133; SSE-NEXT: psrad $31, %xmm2 5134; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 5135; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 5136; SSE-NEXT: movdqa %xmm3, %xmm2 5137; SSE-NEXT: psrad $31, %xmm2 5138; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 5139; SSE-NEXT: movdqa %xmm1, %xmm2 5140; SSE-NEXT: psrad $31, %xmm2 5141; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1] 5142; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 5143; SSE-NEXT: movdqa %xmm4, %xmm2 5144; SSE-NEXT: psrad $31, %xmm2 5145; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 5146; SSE-NEXT: movdqa %xmm3, %xmm2 5147; SSE-NEXT: psrlq $32, %xmm2 5148; SSE-NEXT: pmuludq %xmm4, %xmm2 5149; SSE-NEXT: movdqa %xmm3, %xmm5 5150; SSE-NEXT: pmuludq %xmm4, %xmm5 5151; SSE-NEXT: psrlq $32, %xmm4 5152; SSE-NEXT: pmuludq %xmm3, %xmm4 5153; SSE-NEXT: paddq %xmm2, %xmm4 5154; SSE-NEXT: psllq $32, %xmm4 5155; SSE-NEXT: movdqa %xmm0, %xmm6 5156; SSE-NEXT: psrlq $32, %xmm6 5157; SSE-NEXT: pmuludq %xmm1, %xmm6 5158; SSE-NEXT: movdqa %xmm0, %xmm2 5159; SSE-NEXT: pmuludq %xmm1, %xmm2 5160; SSE-NEXT: psrlq $32, %xmm1 5161; SSE-NEXT: pmuludq %xmm0, %xmm1 5162; SSE-NEXT: paddq %xmm6, %xmm1 5163; SSE-NEXT: psllq $32, %xmm1 5164; SSE-NEXT: paddq %xmm0, %xmm1 5165; SSE-NEXT: paddq %xmm1, %xmm2 5166; SSE-NEXT: paddq %xmm3, %xmm4 5167; SSE-NEXT: paddq %xmm5, %xmm4 5168; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm4[0,2] 5169; SSE-NEXT: movaps %xmm2, %xmm0 5170; SSE-NEXT: retq 5171; 5172; AVX1-LABEL: mul_add_multiuse_v4i64_v4i32: 5173; AVX1: # BB#0: 5174; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2 5175; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 5176; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 5177; AVX1-NEXT: vpmovsxdq %xmm1, %xmm3 5178; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 5179; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 5180; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 5181; AVX1-NEXT: vpmuldq %xmm3, %xmm2, %xmm3 5182; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2 5183; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 5184; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[0,2] 5185; AVX1-NEXT: retq 5186; 5187; AVX2-LABEL: mul_add_multiuse_v4i64_v4i32: 5188; AVX2: # BB#0: 5189; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 5190; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1 5191; AVX2-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 5192; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 5193; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] 5194; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 5195; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5196; AVX2-NEXT: vzeroupper 5197; AVX2-NEXT: retq 5198; 5199; AVX512F-LABEL: mul_add_multiuse_v4i64_v4i32: 5200; AVX512F: # BB#0: 5201; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 5202; AVX512F-NEXT: vpmovsxdq %xmm1, %ymm1 5203; AVX512F-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 5204; AVX512F-NEXT: vpaddq %ymm1, %ymm0, %ymm0 5205; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 5206; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5207; AVX512F-NEXT: retq 5208; 5209; AVX512BW-LABEL: mul_add_multiuse_v4i64_v4i32: 5210; AVX512BW: # BB#0: 5211; AVX512BW-NEXT: vpmovsxdq %xmm0, %ymm0 5212; AVX512BW-NEXT: vpmovsxdq %xmm1, %ymm1 5213; AVX512BW-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 5214; AVX512BW-NEXT: vpaddq %ymm1, %ymm0, %ymm0 5215; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 5216; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5217; AVX512BW-NEXT: retq 5218; 5219; AVX512DQ-LABEL: mul_add_multiuse_v4i64_v4i32: 5220; AVX512DQ: # BB#0: 5221; AVX512DQ-NEXT: vpmovsxdq %xmm0, %ymm0 5222; AVX512DQ-NEXT: vpmovsxdq %xmm1, %ymm1 5223; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm1 5224; AVX512DQ-NEXT: vpaddq %ymm1, %ymm0, %ymm0 5225; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 5226; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5227; AVX512DQ-NEXT: retq 5228 %1 = sext <4 x i32> %a0 to <4 x i64> 5229 %2 = sext <4 x i32> %a1 to <4 x i64> 5230 %3 = mul <4 x i64> %1, %2 5231 %4 = add <4 x i64> %1, %3 5232 %5 = trunc <4 x i64> %4 to <4 x i32> 5233 ret <4 x i32> %5 5234} 5235