1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=X86-SSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 --check-prefix=X86-AVX --check-prefix=X86-AVX1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=X86-AVX --check-prefix=X86-AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=X64-SSE 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 --check-prefix=X64-AVX --check-prefix=X64-AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=X64-AVX --check-prefix=X64-AVX2 8 9define <4 x i32> @trunc_ashr_v4i64(<4 x i64> %a) nounwind { 10; SSE-LABEL: trunc_ashr_v4i64: 11; SSE: # %bb.0: 12; SSE-NEXT: psrad $31, %xmm1 13; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 14; SSE-NEXT: psrad $31, %xmm0 15; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 16; SSE-NEXT: packssdw %xmm1, %xmm0 17; SSE-NEXT: ret{{[l|q]}} 18; 19; AVX1-LABEL: trunc_ashr_v4i64: 20; AVX1: # %bb.0: 21; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 22; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 23; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 24; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 25; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 26; AVX1-NEXT: vzeroupper 27; AVX1-NEXT: ret{{[l|q]}} 28; 29; AVX2-LABEL: trunc_ashr_v4i64: 30; AVX2: # %bb.0: 31; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 32; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 33; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 34; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 35; AVX2-NEXT: vzeroupper 36; AVX2-NEXT: ret{{[l|q]}} 37 %1 = ashr <4 x i64> %a, <i64 63, i64 63, i64 63, i64 63> 38 %2 = trunc <4 x i64> %1 to <4 x i32> 39 ret <4 x i32> %2 40} 41 42define <8 x i16> @trunc_ashr_v4i64_bitcast(<4 x i64> %a0) { 43; SSE-LABEL: trunc_ashr_v4i64_bitcast: 44; SSE: # %bb.0: 45; SSE-NEXT: movdqa %xmm1, %xmm2 46; SSE-NEXT: psrad $31, %xmm2 47; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 48; SSE-NEXT: psrad $17, %xmm1 49; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 50; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 51; SSE-NEXT: movdqa %xmm0, %xmm2 52; SSE-NEXT: psrad $31, %xmm2 53; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 54; SSE-NEXT: psrad $17, %xmm0 55; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 56; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 57; SSE-NEXT: packssdw %xmm1, %xmm0 58; SSE-NEXT: ret{{[l|q]}} 59; 60; AVX1-LABEL: trunc_ashr_v4i64_bitcast: 61; AVX1: # %bb.0: 62; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 63; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 64; AVX1-NEXT: vpsrad $17, %xmm1, %xmm1 65; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 66; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 67; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 68; AVX1-NEXT: vpsrad $17, %xmm0, %xmm0 69; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 70; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 71; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 72; AVX1-NEXT: vzeroupper 73; AVX1-NEXT: ret{{[l|q]}} 74; 75; AVX2-LABEL: trunc_ashr_v4i64_bitcast: 76; AVX2: # %bb.0: 77; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 78; AVX2-NEXT: vpsrad $17, %ymm0, %ymm0 79; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 80; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 81; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 82; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 83; AVX2-NEXT: vzeroupper 84; AVX2-NEXT: ret{{[l|q]}} 85 %1 = ashr <4 x i64> %a0, <i64 49, i64 49, i64 49, i64 49> 86 %2 = bitcast <4 x i64> %1 to <8 x i32> 87 %3 = trunc <8 x i32> %2 to <8 x i16> 88 ret <8 x i16> %3 89} 90 91define <8 x i16> @trunc_ashr_v8i32(<8 x i32> %a) nounwind { 92; SSE-LABEL: trunc_ashr_v8i32: 93; SSE: # %bb.0: 94; SSE-NEXT: psrad $31, %xmm1 95; SSE-NEXT: psrad $31, %xmm0 96; SSE-NEXT: packssdw %xmm1, %xmm0 97; SSE-NEXT: ret{{[l|q]}} 98; 99; AVX1-LABEL: trunc_ashr_v8i32: 100; AVX1: # %bb.0: 101; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 102; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 103; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 104; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 105; AVX1-NEXT: vzeroupper 106; AVX1-NEXT: ret{{[l|q]}} 107; 108; AVX2-LABEL: trunc_ashr_v8i32: 109; AVX2: # %bb.0: 110; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 111; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 112; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 113; AVX2-NEXT: vzeroupper 114; AVX2-NEXT: ret{{[l|q]}} 115 %1 = ashr <8 x i32> %a, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 116 %2 = trunc <8 x i32> %1 to <8 x i16> 117 ret <8 x i16> %2 118} 119 120define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 121; X86-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32: 122; X86-SSE: # %bb.0: 123; X86-SSE-NEXT: psrad $31, %xmm0 124; X86-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 125; X86-SSE-NEXT: packssdw %xmm1, %xmm0 126; X86-SSE-NEXT: retl 127; 128; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32: 129; X86-AVX: # %bb.0: 130; X86-AVX-NEXT: vpsrad $31, %xmm0, %xmm0 131; X86-AVX-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 132; X86-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 133; X86-AVX-NEXT: retl 134; 135; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32: 136; X64-SSE: # %bb.0: 137; X64-SSE-NEXT: psrad $31, %xmm0 138; X64-SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 139; X64-SSE-NEXT: packssdw %xmm1, %xmm0 140; X64-SSE-NEXT: retq 141; 142; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32: 143; X64-AVX: # %bb.0: 144; X64-AVX-NEXT: vpsrad $31, %xmm0, %xmm0 145; X64-AVX-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 146; X64-AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 147; X64-AVX-NEXT: retq 148 %1 = ashr <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31> 149 %2 = icmp sgt <4 x i32> %b, <i32 1, i32 16, i32 255, i32 65535> 150 %3 = sext <4 x i1> %2 to <4 x i32> 151 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 152 %5 = trunc <8 x i32> %4 to <8 x i16> 153 ret <8 x i16> %5 154} 155 156define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) { 157; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts: 158; X86-SSE: # %bb.0: 159; X86-SSE-NEXT: psllq $63, %xmm1 160; X86-SSE-NEXT: psllq $63, %xmm0 161; X86-SSE-NEXT: psrlq $63, %xmm0 162; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,0,0,2147483648] 163; X86-SSE-NEXT: pxor %xmm2, %xmm0 164; X86-SSE-NEXT: psubq %xmm2, %xmm0 165; X86-SSE-NEXT: psrlq $63, %xmm1 166; X86-SSE-NEXT: pxor %xmm2, %xmm1 167; X86-SSE-NEXT: psubq %xmm2, %xmm1 168; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 169; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 170; X86-SSE-NEXT: packssdw %xmm1, %xmm0 171; X86-SSE-NEXT: retl 172; 173; X86-AVX1-LABEL: trunc_ashr_v4i64_demandedelts: 174; X86-AVX1: # %bb.0: 175; X86-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1 176; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 177; X86-AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 178; X86-AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 179; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,1] 180; X86-AVX1-NEXT: # xmm2 = mem[0,0] 181; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 182; X86-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 183; X86-AVX1-NEXT: vpaddq %xmm3, %xmm0, %xmm0 184; X86-AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1 185; X86-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 186; X86-AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 187; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 188; X86-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 189; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 190; X86-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 191; X86-AVX1-NEXT: vzeroupper 192; X86-AVX1-NEXT: retl 193; 194; X86-AVX2-LABEL: trunc_ashr_v4i64_demandedelts: 195; X86-AVX2: # %bb.0: 196; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0] 197; X86-AVX2-NEXT: # ymm1 = mem[0,1,0,1] 198; X86-AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 199; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 200; X86-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,0,0,2147483648,1,0,0,2147483648] 201; X86-AVX2-NEXT: # ymm1 = mem[0,1,0,1] 202; X86-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 203; X86-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 204; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 205; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 206; X86-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 207; X86-AVX2-NEXT: vzeroupper 208; X86-AVX2-NEXT: retl 209; 210; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts: 211; X64-SSE: # %bb.0: 212; X64-SSE-NEXT: psllq $63, %xmm1 213; X64-SSE-NEXT: psllq $63, %xmm0 214; X64-SSE-NEXT: psrlq $63, %xmm0 215; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,9223372036854775808] 216; X64-SSE-NEXT: pxor %xmm2, %xmm0 217; X64-SSE-NEXT: psubq %xmm2, %xmm0 218; X64-SSE-NEXT: psrlq $63, %xmm1 219; X64-SSE-NEXT: pxor %xmm2, %xmm1 220; X64-SSE-NEXT: psubq %xmm2, %xmm1 221; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 222; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 223; X64-SSE-NEXT: packssdw %xmm1, %xmm0 224; X64-SSE-NEXT: retq 225; 226; X64-AVX1-LABEL: trunc_ashr_v4i64_demandedelts: 227; X64-AVX1: # %bb.0: 228; X64-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1 229; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 230; X64-AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 231; X64-AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0 232; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,9223372036854775808] 233; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 234; X64-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 235; X64-AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1 236; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 237; X64-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1 238; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 239; X64-AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 240; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 241; X64-AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 242; X64-AVX1-NEXT: vzeroupper 243; X64-AVX1-NEXT: retq 244; 245; X64-AVX2-LABEL: trunc_ashr_v4i64_demandedelts: 246; X64-AVX2: # %bb.0: 247; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 248; X64-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 249; X64-AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [1,9223372036854775808,1,9223372036854775808] 250; X64-AVX2-NEXT: # ymm1 = mem[0,1,0,1] 251; X64-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 252; X64-AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0 253; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4] 254; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 255; X64-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 256; X64-AVX2-NEXT: vzeroupper 257; X64-AVX2-NEXT: retq 258 %1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0> 259 %2 = ashr <4 x i64> %1, <i64 63, i64 0, i64 63, i64 0> 260 %3 = bitcast <4 x i64> %2 to <8 x i32> 261 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 262 %5 = trunc <8 x i32> %4 to <8 x i16> 263 ret <8 x i16> %5 264} 265 266define <16 x i8> @packsswb_icmp_zero_128(<8 x i16> %a0) { 267; SSE-LABEL: packsswb_icmp_zero_128: 268; SSE: # %bb.0: 269; SSE-NEXT: pxor %xmm1, %xmm1 270; SSE-NEXT: pcmpeqw %xmm1, %xmm0 271; SSE-NEXT: packsswb %xmm1, %xmm0 272; SSE-NEXT: ret{{[l|q]}} 273; 274; AVX-LABEL: packsswb_icmp_zero_128: 275; AVX: # %bb.0: 276; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 277; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 278; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 279; AVX-NEXT: ret{{[l|q]}} 280 %1 = icmp eq <8 x i16> %a0, zeroinitializer 281 %2 = sext <8 x i1> %1 to <8 x i8> 282 %3 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 283 ret <16 x i8> %3 284} 285 286define <16 x i8> @packsswb_icmp_zero_trunc_128(<8 x i16> %a0) { 287; SSE-LABEL: packsswb_icmp_zero_trunc_128: 288; SSE: # %bb.0: 289; SSE-NEXT: pxor %xmm1, %xmm1 290; SSE-NEXT: pcmpeqw %xmm1, %xmm0 291; SSE-NEXT: packsswb %xmm1, %xmm0 292; SSE-NEXT: ret{{[l|q]}} 293; 294; AVX-LABEL: packsswb_icmp_zero_trunc_128: 295; AVX: # %bb.0: 296; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 297; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 298; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 299; AVX-NEXT: ret{{[l|q]}} 300 %1 = icmp eq <8 x i16> %a0, zeroinitializer 301 %2 = sext <8 x i1> %1 to <8 x i16> 302 %3 = shufflevector <8 x i16> %2, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 303 %4 = trunc <16 x i16> %3 to <16 x i8> 304 ret <16 x i8> %4 305} 306 307define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) { 308; SSE-LABEL: packsswb_icmp_zero_256: 309; SSE: # %bb.0: 310; SSE-NEXT: pxor %xmm2, %xmm2 311; SSE-NEXT: pcmpeqw %xmm2, %xmm1 312; SSE-NEXT: pcmpeqw %xmm2, %xmm0 313; SSE-NEXT: pxor %xmm3, %xmm3 314; SSE-NEXT: packsswb %xmm0, %xmm3 315; SSE-NEXT: packsswb %xmm1, %xmm2 316; SSE-NEXT: movdqa %xmm3, %xmm0 317; SSE-NEXT: movdqa %xmm2, %xmm1 318; SSE-NEXT: ret{{[l|q]}} 319; 320; AVX1-LABEL: packsswb_icmp_zero_256: 321; AVX1: # %bb.0: 322; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 323; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 324; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 325; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 326; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 327; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 328; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 329; AVX1-NEXT: ret{{[l|q]}} 330; 331; AVX2-LABEL: packsswb_icmp_zero_256: 332; AVX2: # %bb.0: 333; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 334; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 335; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 336; AVX2-NEXT: ret{{[l|q]}} 337 %1 = icmp eq <16 x i16> %a0, zeroinitializer 338 %2 = sext <16 x i1> %1 to <16 x i16> 339 %3 = bitcast <16 x i16> %2 to <32 x i8> 340 %4 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62> 341 ret <32 x i8> %4 342} 343 344define <32 x i8> @packsswb_icmp_zero_trunc_256(<16 x i16> %a0) { 345; SSE-LABEL: packsswb_icmp_zero_trunc_256: 346; SSE: # %bb.0: 347; SSE-NEXT: pxor %xmm2, %xmm2 348; SSE-NEXT: pcmpeqw %xmm2, %xmm1 349; SSE-NEXT: pcmpeqw %xmm2, %xmm0 350; SSE-NEXT: pxor %xmm3, %xmm3 351; SSE-NEXT: packsswb %xmm0, %xmm3 352; SSE-NEXT: packsswb %xmm1, %xmm2 353; SSE-NEXT: movdqa %xmm3, %xmm0 354; SSE-NEXT: movdqa %xmm2, %xmm1 355; SSE-NEXT: ret{{[l|q]}} 356; 357; AVX1-LABEL: packsswb_icmp_zero_trunc_256: 358; AVX1: # %bb.0: 359; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 360; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 361; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 362; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 363; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 364; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 365; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 366; AVX1-NEXT: ret{{[l|q]}} 367; 368; AVX2-LABEL: packsswb_icmp_zero_trunc_256: 369; AVX2: # %bb.0: 370; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 371; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 372; AVX2-NEXT: vpacksswb %ymm0, %ymm1, %ymm0 373; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3] 374; AVX2-NEXT: ret{{[l|q]}} 375 %1 = icmp eq <16 x i16> %a0, zeroinitializer 376 %2 = sext <16 x i1> %1 to <16 x i16> 377 %3 = shufflevector <16 x i16> zeroinitializer, <16 x i16> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 378 %4 = trunc <32 x i16> %3 to <32 x i8> 379 ret <32 x i8> %4 380} 381