1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=X86-SSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 --check-prefix=X86-AVX --check-prefix=X86-AVX1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=X86-AVX --check-prefix=X86-AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=X64-SSE
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 --check-prefix=X64-AVX --check-prefix=X64-AVX1
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 --check-prefix=X64-AVX --check-prefix=X64-AVX2
8
9define <4 x i32> @trunc_ashr_v4i64(<4 x i64> %a) nounwind {
10; SSE-LABEL: trunc_ashr_v4i64:
11; SSE:       # %bb.0:
12; SSE-NEXT:    psrad $31, %xmm1
13; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
14; SSE-NEXT:    psrad $31, %xmm0
15; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
16; SSE-NEXT:    packssdw %xmm1, %xmm0
17; SSE-NEXT:    ret{{[l|q]}}
18;
19; AVX1-LABEL: trunc_ashr_v4i64:
20; AVX1:       # %bb.0:
21; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
22; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
23; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
24; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
25; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
26; AVX1-NEXT:    vzeroupper
27; AVX1-NEXT:    ret{{[l|q]}}
28;
29; AVX2-LABEL: trunc_ashr_v4i64:
30; AVX2:       # %bb.0:
31; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm0
33; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
34; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
35; AVX2-NEXT:    vzeroupper
36; AVX2-NEXT:    ret{{[l|q]}}
37  %1 = ashr <4 x i64> %a, <i64 63, i64 63, i64 63, i64 63>
38  %2 = trunc <4 x i64> %1 to <4 x i32>
39  ret <4 x i32> %2
40}
41
42define <8 x i16> @trunc_ashr_v4i64_bitcast(<4 x i64> %a0) {
43; SSE-LABEL: trunc_ashr_v4i64_bitcast:
44; SSE:       # %bb.0:
45; SSE-NEXT:    movdqa %xmm1, %xmm2
46; SSE-NEXT:    psrad $31, %xmm2
47; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
48; SSE-NEXT:    psrad $17, %xmm1
49; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
50; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
51; SSE-NEXT:    movdqa %xmm0, %xmm2
52; SSE-NEXT:    psrad $31, %xmm2
53; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
54; SSE-NEXT:    psrad $17, %xmm0
55; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
56; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
57; SSE-NEXT:    packssdw %xmm1, %xmm0
58; SSE-NEXT:    ret{{[l|q]}}
59;
60; AVX1-LABEL: trunc_ashr_v4i64_bitcast:
61; AVX1:       # %bb.0:
62; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
63; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm2
64; AVX1-NEXT:    vpsrad $17, %xmm1, %xmm1
65; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
66; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
67; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
68; AVX1-NEXT:    vpsrad $17, %xmm0, %xmm0
69; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
70; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
71; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
72; AVX1-NEXT:    vzeroupper
73; AVX1-NEXT:    ret{{[l|q]}}
74;
75; AVX2-LABEL: trunc_ashr_v4i64_bitcast:
76; AVX2:       # %bb.0:
77; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm1
78; AVX2-NEXT:    vpsrad $17, %ymm0, %ymm0
79; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
80; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
81; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
82; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
83; AVX2-NEXT:    vzeroupper
84; AVX2-NEXT:    ret{{[l|q]}}
85   %1 = ashr <4 x i64> %a0, <i64 49, i64 49, i64 49, i64 49>
86   %2 = bitcast <4 x i64> %1 to <8 x i32>
87   %3 = trunc <8 x i32> %2 to <8 x i16>
88   ret <8 x i16> %3
89}
90
91define <8 x i16> @trunc_ashr_v8i32(<8 x i32> %a) nounwind {
92; SSE-LABEL: trunc_ashr_v8i32:
93; SSE:       # %bb.0:
94; SSE-NEXT:    psrad $31, %xmm1
95; SSE-NEXT:    psrad $31, %xmm0
96; SSE-NEXT:    packssdw %xmm1, %xmm0
97; SSE-NEXT:    ret{{[l|q]}}
98;
99; AVX1-LABEL: trunc_ashr_v8i32:
100; AVX1:       # %bb.0:
101; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
102; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
103; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
104; AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
105; AVX1-NEXT:    vzeroupper
106; AVX1-NEXT:    ret{{[l|q]}}
107;
108; AVX2-LABEL: trunc_ashr_v8i32:
109; AVX2:       # %bb.0:
110; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
111; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
112; AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
113; AVX2-NEXT:    vzeroupper
114; AVX2-NEXT:    ret{{[l|q]}}
115  %1 = ashr <8 x i32> %a, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
116  %2 = trunc <8 x i32> %1 to <8 x i16>
117  ret <8 x i16> %2
118}
119
120define <8 x i16> @trunc_ashr_v4i32_icmp_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
121; X86-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:
122; X86-SSE:       # %bb.0:
123; X86-SSE-NEXT:    psrad $31, %xmm0
124; X86-SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
125; X86-SSE-NEXT:    packssdw %xmm1, %xmm0
126; X86-SSE-NEXT:    retl
127;
128; X86-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
129; X86-AVX:       # %bb.0:
130; X86-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
131; X86-AVX-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
132; X86-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
133; X86-AVX-NEXT:    retl
134;
135; X64-SSE-LABEL: trunc_ashr_v4i32_icmp_v4i32:
136; X64-SSE:       # %bb.0:
137; X64-SSE-NEXT:    psrad $31, %xmm0
138; X64-SSE-NEXT:    pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
139; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
140; X64-SSE-NEXT:    retq
141;
142; X64-AVX-LABEL: trunc_ashr_v4i32_icmp_v4i32:
143; X64-AVX:       # %bb.0:
144; X64-AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
145; X64-AVX-NEXT:    vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
146; X64-AVX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
147; X64-AVX-NEXT:    retq
148  %1 = ashr <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
149  %2 = icmp sgt <4 x i32> %b, <i32 1, i32 16, i32 255, i32 65535>
150  %3 = sext <4 x i1> %2 to <4 x i32>
151  %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
152  %5 = trunc <8 x i32> %4 to <8 x i16>
153  ret <8 x i16> %5
154}
155
156define <8 x i16> @trunc_ashr_v4i64_demandedelts(<4 x i64> %a0) {
157; X86-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
158; X86-SSE:       # %bb.0:
159; X86-SSE-NEXT:    psllq $63, %xmm1
160; X86-SSE-NEXT:    psllq $63, %xmm0
161; X86-SSE-NEXT:    psrlq $63, %xmm0
162; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,0,0,2147483648]
163; X86-SSE-NEXT:    pxor %xmm2, %xmm0
164; X86-SSE-NEXT:    psubq %xmm2, %xmm0
165; X86-SSE-NEXT:    psrlq $63, %xmm1
166; X86-SSE-NEXT:    pxor %xmm2, %xmm1
167; X86-SSE-NEXT:    psubq %xmm2, %xmm1
168; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
169; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
170; X86-SSE-NEXT:    packssdw %xmm1, %xmm0
171; X86-SSE-NEXT:    retl
172;
173; X86-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
174; X86-AVX1:       # %bb.0:
175; X86-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm1
176; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
177; X86-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0
178; X86-AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
179; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [1,1]
180; X86-AVX1-NEXT:    # xmm2 = mem[0,0]
181; X86-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
182; X86-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
183; X86-AVX1-NEXT:    vpaddq %xmm3, %xmm0, %xmm0
184; X86-AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
185; X86-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
186; X86-AVX1-NEXT:    vpaddq %xmm3, %xmm1, %xmm1
187; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
188; X86-AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
189; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
190; X86-AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
191; X86-AVX1-NEXT:    vzeroupper
192; X86-AVX1-NEXT:    retl
193;
194; X86-AVX2-LABEL: trunc_ashr_v4i64_demandedelts:
195; X86-AVX2:       # %bb.0:
196; X86-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [63,0,0,0,63,0,0,0]
197; X86-AVX2-NEXT:    # ymm1 = mem[0,1,0,1]
198; X86-AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
199; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
200; X86-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [1,0,0,2147483648,1,0,0,2147483648]
201; X86-AVX2-NEXT:    # ymm1 = mem[0,1,0,1]
202; X86-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
203; X86-AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
204; X86-AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
205; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
206; X86-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
207; X86-AVX2-NEXT:    vzeroupper
208; X86-AVX2-NEXT:    retl
209;
210; X64-SSE-LABEL: trunc_ashr_v4i64_demandedelts:
211; X64-SSE:       # %bb.0:
212; X64-SSE-NEXT:    psllq $63, %xmm1
213; X64-SSE-NEXT:    psllq $63, %xmm0
214; X64-SSE-NEXT:    psrlq $63, %xmm0
215; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,9223372036854775808]
216; X64-SSE-NEXT:    pxor %xmm2, %xmm0
217; X64-SSE-NEXT:    psubq %xmm2, %xmm0
218; X64-SSE-NEXT:    psrlq $63, %xmm1
219; X64-SSE-NEXT:    pxor %xmm2, %xmm1
220; X64-SSE-NEXT:    psubq %xmm2, %xmm1
221; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
222; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
223; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
224; X64-SSE-NEXT:    retq
225;
226; X64-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
227; X64-AVX1:       # %bb.0:
228; X64-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm1
229; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
230; X64-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0
231; X64-AVX1-NEXT:    vpsrlq $63, %xmm0, %xmm0
232; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,9223372036854775808]
233; X64-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
234; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
235; X64-AVX1-NEXT:    vpsrlq $63, %xmm1, %xmm1
236; X64-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
237; X64-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
238; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
239; X64-AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
240; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
241; X64-AVX1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
242; X64-AVX1-NEXT:    vzeroupper
243; X64-AVX1-NEXT:    retq
244;
245; X64-AVX2-LABEL: trunc_ashr_v4i64_demandedelts:
246; X64-AVX2:       # %bb.0:
247; X64-AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1]
248; X64-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
249; X64-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = [1,9223372036854775808,1,9223372036854775808]
250; X64-AVX2-NEXT:    # ymm1 = mem[0,1,0,1]
251; X64-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
252; X64-AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
253; X64-AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
254; X64-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
255; X64-AVX2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
256; X64-AVX2-NEXT:    vzeroupper
257; X64-AVX2-NEXT:    retq
258  %1 = shl <4 x i64> %a0, <i64 63, i64 0, i64 63, i64 0>
259  %2 = ashr <4 x i64> %1, <i64 63, i64 0, i64 63, i64 0>
260  %3 = bitcast <4 x i64> %2 to <8 x i32>
261  %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
262  %5 = trunc <8 x i32> %4 to <8 x i16>
263  ret <8 x i16> %5
264}
265
266define <16 x i8> @packsswb_icmp_zero_128(<8 x i16> %a0) {
267; SSE-LABEL: packsswb_icmp_zero_128:
268; SSE:       # %bb.0:
269; SSE-NEXT:    pxor %xmm1, %xmm1
270; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
271; SSE-NEXT:    packsswb %xmm1, %xmm0
272; SSE-NEXT:    ret{{[l|q]}}
273;
274; AVX-LABEL: packsswb_icmp_zero_128:
275; AVX:       # %bb.0:
276; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
277; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
278; AVX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
279; AVX-NEXT:    ret{{[l|q]}}
280  %1 = icmp eq <8 x i16> %a0, zeroinitializer
281  %2 = sext <8 x i1> %1 to <8 x i8>
282  %3 = shufflevector <8 x i8> %2, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
283  ret <16 x i8> %3
284}
285
286define <16 x i8> @packsswb_icmp_zero_trunc_128(<8 x i16> %a0) {
287; SSE-LABEL: packsswb_icmp_zero_trunc_128:
288; SSE:       # %bb.0:
289; SSE-NEXT:    pxor %xmm1, %xmm1
290; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
291; SSE-NEXT:    packsswb %xmm1, %xmm0
292; SSE-NEXT:    ret{{[l|q]}}
293;
294; AVX-LABEL: packsswb_icmp_zero_trunc_128:
295; AVX:       # %bb.0:
296; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
297; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
298; AVX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
299; AVX-NEXT:    ret{{[l|q]}}
300  %1 = icmp eq <8 x i16> %a0, zeroinitializer
301  %2 = sext <8 x i1> %1 to <8 x i16>
302  %3 = shufflevector <8 x i16> %2, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
303  %4 = trunc <16 x i16> %3 to <16 x i8>
304  ret <16 x i8> %4
305}
306
307define <32 x i8> @packsswb_icmp_zero_256(<16 x i16> %a0) {
308; SSE-LABEL: packsswb_icmp_zero_256:
309; SSE:       # %bb.0:
310; SSE-NEXT:    pxor %xmm2, %xmm2
311; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
312; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
313; SSE-NEXT:    pxor %xmm3, %xmm3
314; SSE-NEXT:    packsswb %xmm0, %xmm3
315; SSE-NEXT:    packsswb %xmm1, %xmm2
316; SSE-NEXT:    movdqa %xmm3, %xmm0
317; SSE-NEXT:    movdqa %xmm2, %xmm1
318; SSE-NEXT:    ret{{[l|q]}}
319;
320; AVX1-LABEL: packsswb_icmp_zero_256:
321; AVX1:       # %bb.0:
322; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
323; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
324; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
325; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
326; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
327; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
328; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
329; AVX1-NEXT:    ret{{[l|q]}}
330;
331; AVX2-LABEL: packsswb_icmp_zero_256:
332; AVX2:       # %bb.0:
333; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
334; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
335; AVX2-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0
336; AVX2-NEXT:    ret{{[l|q]}}
337  %1 = icmp eq <16 x i16> %a0, zeroinitializer
338  %2 = sext <16 x i1> %1 to <16 x i16>
339  %3 = bitcast <16 x i16> %2 to <32 x i8>
340  %4 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
341  ret <32 x i8> %4
342}
343
344define <32 x i8> @packsswb_icmp_zero_trunc_256(<16 x i16> %a0) {
345; SSE-LABEL: packsswb_icmp_zero_trunc_256:
346; SSE:       # %bb.0:
347; SSE-NEXT:    pxor %xmm2, %xmm2
348; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
349; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
350; SSE-NEXT:    pxor %xmm3, %xmm3
351; SSE-NEXT:    packsswb %xmm0, %xmm3
352; SSE-NEXT:    packsswb %xmm1, %xmm2
353; SSE-NEXT:    movdqa %xmm3, %xmm0
354; SSE-NEXT:    movdqa %xmm2, %xmm1
355; SSE-NEXT:    ret{{[l|q]}}
356;
357; AVX1-LABEL: packsswb_icmp_zero_trunc_256:
358; AVX1:       # %bb.0:
359; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
360; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm2
361; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
362; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
363; AVX1-NEXT:    vpacksswb %xmm0, %xmm1, %xmm0
364; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
365; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
366; AVX1-NEXT:    ret{{[l|q]}}
367;
368; AVX2-LABEL: packsswb_icmp_zero_trunc_256:
369; AVX2:       # %bb.0:
370; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
371; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
372; AVX2-NEXT:    vpacksswb %ymm0, %ymm1, %ymm0
373; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3]
374; AVX2-NEXT:    ret{{[l|q]}}
375  %1 = icmp eq <16 x i16> %a0, zeroinitializer
376  %2 = sext <16 x i1> %1 to <16 x i16>
377  %3 = shufflevector <16 x i16> zeroinitializer, <16 x i16> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
378  %4 = trunc <32 x i16> %3 to <32 x i8>
379  ret <32 x i8> %4
380}
381