1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2-SSSE3
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE2-SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
7
8;
9; 128-bit vectors
10;
11
12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind {
13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1:
14; SSE2-SSSE3:       # %bb.0:
15; SSE2-SSSE3-NEXT:    movmskpd %xmm0, %ecx
16; SSE2-SSSE3-NEXT:    movl %ecx, %eax
17; SSE2-SSSE3-NEXT:    shrb %al
18; SSE2-SSSE3-NEXT:    addb %cl, %al
19; SSE2-SSSE3-NEXT:    retq
20;
21; AVX12-LABEL: bitcast_v2i64_to_v2i1:
22; AVX12:       # %bb.0:
23; AVX12-NEXT:    vmovmskpd %xmm0, %ecx
24; AVX12-NEXT:    movl %ecx, %eax
25; AVX12-NEXT:    shrb %al
26; AVX12-NEXT:    addb %cl, %al
27; AVX12-NEXT:    retq
28;
29; AVX512-LABEL: bitcast_v2i64_to_v2i1:
30; AVX512:       # %bb.0:
31; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
32; AVX512-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
33; AVX512-NEXT:    kshiftrw $1, %k0, %k1
34; AVX512-NEXT:    kmovd %k1, %ecx
35; AVX512-NEXT:    kmovd %k0, %eax
36; AVX512-NEXT:    addb %cl, %al
37; AVX512-NEXT:    # kill: def $al killed $al killed $eax
38; AVX512-NEXT:    retq
39  %1 = icmp slt <2 x i64> %a0, zeroinitializer
40  %2 = bitcast <2 x i1> %1 to <2 x i1>
41  %3 = extractelement <2 x i1> %2, i32 0
42  %4 = extractelement <2 x i1> %2, i32 1
43  %5 = add i1 %3, %4
44  ret i1 %5
45}
46
47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2:
49; SSE2-SSSE3:       # %bb.0:
50; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
51; SSE2-SSSE3-NEXT:    movl %eax, %ecx
52; SSE2-SSSE3-NEXT:    shrb $2, %cl
53; SSE2-SSSE3-NEXT:    andb $3, %al
54; SSE2-SSSE3-NEXT:    addb %cl, %al
55; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
56; SSE2-SSSE3-NEXT:    retq
57;
58; AVX-LABEL: bitcast_v4i32_to_v2i2:
59; AVX:       # %bb.0:
60; AVX-NEXT:    vmovmskps %xmm0, %eax
61; AVX-NEXT:    movl %eax, %ecx
62; AVX-NEXT:    shrb $2, %cl
63; AVX-NEXT:    andb $3, %al
64; AVX-NEXT:    addb %cl, %al
65; AVX-NEXT:    # kill: def $al killed $al killed $eax
66; AVX-NEXT:    retq
67  %1 = icmp slt <4 x i32> %a0, zeroinitializer
68  %2 = bitcast <4 x i1> %1 to <2 x i2>
69  %3 = extractelement <2 x i2> %2, i32 0
70  %4 = extractelement <2 x i2> %2, i32 1
71  %5 = add i2 %3, %4
72  ret i2 %5
73}
74
75define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
76; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4:
77; SSE2-SSSE3:       # %bb.0:
78; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
79; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
80; SSE2-SSSE3-NEXT:    movl %eax, %ecx
81; SSE2-SSSE3-NEXT:    shrb $4, %cl
82; SSE2-SSSE3-NEXT:    andb $15, %al
83; SSE2-SSSE3-NEXT:    addb %cl, %al
84; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
85; SSE2-SSSE3-NEXT:    retq
86;
87; AVX12-LABEL: bitcast_v8i16_to_v2i4:
88; AVX12:       # %bb.0:
89; AVX12-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
90; AVX12-NEXT:    vpmovmskb %xmm0, %eax
91; AVX12-NEXT:    movl %eax, %ecx
92; AVX12-NEXT:    shrb $4, %cl
93; AVX12-NEXT:    andb $15, %al
94; AVX12-NEXT:    addb %cl, %al
95; AVX12-NEXT:    # kill: def $al killed $al killed $eax
96; AVX12-NEXT:    retq
97;
98; AVX512-LABEL: bitcast_v8i16_to_v2i4:
99; AVX512:       # %bb.0:
100; AVX512-NEXT:    vpmovw2m %xmm0, %k0
101; AVX512-NEXT:    kmovd %k0, %eax
102; AVX512-NEXT:    movl %eax, %ecx
103; AVX512-NEXT:    shrb $4, %cl
104; AVX512-NEXT:    andb $15, %al
105; AVX512-NEXT:    addb %cl, %al
106; AVX512-NEXT:    # kill: def $al killed $al killed $eax
107; AVX512-NEXT:    retq
108  %1 = icmp slt <8 x i16> %a0, zeroinitializer
109  %2 = bitcast <8 x i1> %1 to <2 x i4>
110  %3 = extractelement <2 x i4> %2, i32 0
111  %4 = extractelement <2 x i4> %2, i32 1
112  %5 = add i4 %3, %4
113  ret i4 %5
114}
115
116define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
117; SSE2-SSSE3-LABEL: bitcast_v16i8_to_v2i8:
118; SSE2-SSSE3:       # %bb.0:
119; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
120; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
121; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
122; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
123; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
124; SSE2-SSSE3-NEXT:    retq
125;
126; AVX-LABEL: bitcast_v16i8_to_v2i8:
127; AVX:       # %bb.0:
128; AVX-NEXT:    vpmovmskb %xmm0, %ecx
129; AVX-NEXT:    movl %ecx, %eax
130; AVX-NEXT:    shrl $8, %eax
131; AVX-NEXT:    addb %cl, %al
132; AVX-NEXT:    # kill: def $al killed $al killed $eax
133; AVX-NEXT:    retq
134  %1 = icmp slt <16 x i8> %a0, zeroinitializer
135  %2 = bitcast <16 x i1> %1 to <2 x i8>
136  %3 = extractelement <2 x i8> %2, i32 0
137  %4 = extractelement <2 x i8> %2, i32 1
138  %5 = add i8 %3, %4
139  ret i8 %5
140}
141
142;
143; 256-bit vectors
144;
145
146define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
147; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2:
148; SSE2-SSSE3:       # %bb.0:
149; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
150; SSE2-SSSE3-NEXT:    movmskps %xmm0, %eax
151; SSE2-SSSE3-NEXT:    movl %eax, %ecx
152; SSE2-SSSE3-NEXT:    shrb $2, %cl
153; SSE2-SSSE3-NEXT:    andb $3, %al
154; SSE2-SSSE3-NEXT:    addb %cl, %al
155; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
156; SSE2-SSSE3-NEXT:    retq
157;
158; AVX-LABEL: bitcast_v4i64_to_v2i2:
159; AVX:       # %bb.0:
160; AVX-NEXT:    vmovmskpd %ymm0, %eax
161; AVX-NEXT:    movl %eax, %ecx
162; AVX-NEXT:    shrb $2, %cl
163; AVX-NEXT:    andb $3, %al
164; AVX-NEXT:    addb %cl, %al
165; AVX-NEXT:    # kill: def $al killed $al killed $eax
166; AVX-NEXT:    vzeroupper
167; AVX-NEXT:    retq
168  %1 = icmp slt <4 x i64> %a0, zeroinitializer
169  %2 = bitcast <4 x i1> %1 to <2 x i2>
170  %3 = extractelement <2 x i2> %2, i32 0
171  %4 = extractelement <2 x i2> %2, i32 1
172  %5 = add i2 %3, %4
173  ret i2 %5
174}
175
176define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
177; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4:
178; SSE2-SSSE3:       # %bb.0:
179; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
180; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
181; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
182; SSE2-SSSE3-NEXT:    movl %eax, %ecx
183; SSE2-SSSE3-NEXT:    shrb $4, %cl
184; SSE2-SSSE3-NEXT:    andb $15, %al
185; SSE2-SSSE3-NEXT:    addb %cl, %al
186; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
187; SSE2-SSSE3-NEXT:    retq
188;
189; AVX-LABEL: bitcast_v8i32_to_v2i4:
190; AVX:       # %bb.0:
191; AVX-NEXT:    vmovmskps %ymm0, %eax
192; AVX-NEXT:    movl %eax, %ecx
193; AVX-NEXT:    shrb $4, %cl
194; AVX-NEXT:    andb $15, %al
195; AVX-NEXT:    addb %cl, %al
196; AVX-NEXT:    # kill: def $al killed $al killed $eax
197; AVX-NEXT:    vzeroupper
198; AVX-NEXT:    retq
199  %1 = icmp slt <8 x i32> %a0, zeroinitializer
200  %2 = bitcast <8 x i1> %1 to <2 x i4>
201  %3 = extractelement <2 x i4> %2, i32 0
202  %4 = extractelement <2 x i4> %2, i32 1
203  %5 = add i4 %3, %4
204  ret i4 %5
205}
206
207define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
208; SSE2-SSSE3-LABEL: bitcast_v16i16_to_v2i8:
209; SSE2-SSSE3:       # %bb.0:
210; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
211; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
212; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
213; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
214; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
215; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
216; SSE2-SSSE3-NEXT:    retq
217;
218; AVX1-LABEL: bitcast_v16i16_to_v2i8:
219; AVX1:       # %bb.0:
220; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
221; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
222; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
223; AVX1-NEXT:    movl %ecx, %eax
224; AVX1-NEXT:    shrl $8, %eax
225; AVX1-NEXT:    addb %cl, %al
226; AVX1-NEXT:    # kill: def $al killed $al killed $eax
227; AVX1-NEXT:    vzeroupper
228; AVX1-NEXT:    retq
229;
230; AVX2-LABEL: bitcast_v16i16_to_v2i8:
231; AVX2:       # %bb.0:
232; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
233; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
234; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
235; AVX2-NEXT:    movl %ecx, %eax
236; AVX2-NEXT:    shrl $8, %eax
237; AVX2-NEXT:    addb %cl, %al
238; AVX2-NEXT:    # kill: def $al killed $al killed $eax
239; AVX2-NEXT:    vzeroupper
240; AVX2-NEXT:    retq
241;
242; AVX512-LABEL: bitcast_v16i16_to_v2i8:
243; AVX512:       # %bb.0:
244; AVX512-NEXT:    vpmovw2m %ymm0, %k0
245; AVX512-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp)
246; AVX512-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm0
247; AVX512-NEXT:    vmovd %xmm0, %ecx
248; AVX512-NEXT:    vpextrb $1, %xmm0, %eax
249; AVX512-NEXT:    addb %cl, %al
250; AVX512-NEXT:    # kill: def $al killed $al killed $eax
251; AVX512-NEXT:    vzeroupper
252; AVX512-NEXT:    retq
253  %1 = icmp slt <16 x i16> %a0, zeroinitializer
254  %2 = bitcast <16 x i1> %1 to <2 x i8>
255  %3 = extractelement <2 x i8> %2, i32 0
256  %4 = extractelement <2 x i8> %2, i32 1
257  %5 = add i8 %3, %4
258  ret i8 %5
259}
260
261define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
262; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
263; SSE2-SSSE3:       # %bb.0:
264; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
265; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
266; SSE2-SSSE3-NEXT:    addl %ecx, %eax
267; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
268; SSE2-SSSE3-NEXT:    retq
269;
270; AVX1-LABEL: bitcast_v32i8_to_v2i16:
271; AVX1:       # %bb.0:
272; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
273; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
274; AVX1-NEXT:    vpmovmskb %xmm0, %eax
275; AVX1-NEXT:    addl %ecx, %eax
276; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
277; AVX1-NEXT:    vzeroupper
278; AVX1-NEXT:    retq
279;
280; AVX2-LABEL: bitcast_v32i8_to_v2i16:
281; AVX2:       # %bb.0:
282; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
283; AVX2-NEXT:    movl %ecx, %eax
284; AVX2-NEXT:    shrl $16, %eax
285; AVX2-NEXT:    addl %ecx, %eax
286; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
287; AVX2-NEXT:    vzeroupper
288; AVX2-NEXT:    retq
289;
290; AVX512-LABEL: bitcast_v32i8_to_v2i16:
291; AVX512:       # %bb.0:
292; AVX512-NEXT:    vpmovmskb %ymm0, %ecx
293; AVX512-NEXT:    movl %ecx, %eax
294; AVX512-NEXT:    shrl $16, %eax
295; AVX512-NEXT:    addl %ecx, %eax
296; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
297; AVX512-NEXT:    vzeroupper
298; AVX512-NEXT:    retq
299  %1 = icmp slt <32 x i8> %a0, zeroinitializer
300  %2 = bitcast <32 x i1> %1 to <2 x i16>
301  %3 = extractelement <2 x i16> %2, i32 0
302  %4 = extractelement <2 x i16> %2, i32 1
303  %5 = add i16 %3, %4
304  ret i16 %5
305}
306
307;
308; 512-bit vectors
309;
310
311define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
312; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4:
313; SSE2-SSSE3:       # %bb.0:
314; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
315; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
316; SSE2-SSSE3-NEXT:    packssdw %xmm2, %xmm0
317; SSE2-SSSE3-NEXT:    packsswb %xmm0, %xmm0
318; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
319; SSE2-SSSE3-NEXT:    movl %eax, %ecx
320; SSE2-SSSE3-NEXT:    shrb $4, %cl
321; SSE2-SSSE3-NEXT:    andb $15, %al
322; SSE2-SSSE3-NEXT:    addb %cl, %al
323; SSE2-SSSE3-NEXT:    # kill: def $al killed $al killed $eax
324; SSE2-SSSE3-NEXT:    retq
325;
326; AVX1-LABEL: bitcast_v8i64_to_v2i4:
327; AVX1:       # %bb.0:
328; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
329; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
330; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
331; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
332; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
333; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
334; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
335; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
336; AVX1-NEXT:    vmovmskps %ymm0, %eax
337; AVX1-NEXT:    movl %eax, %ecx
338; AVX1-NEXT:    shrb $4, %cl
339; AVX1-NEXT:    andb $15, %al
340; AVX1-NEXT:    addb %cl, %al
341; AVX1-NEXT:    # kill: def $al killed $al killed $eax
342; AVX1-NEXT:    vzeroupper
343; AVX1-NEXT:    retq
344;
345; AVX2-LABEL: bitcast_v8i64_to_v2i4:
346; AVX2:       # %bb.0:
347; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
348; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
349; AVX2-NEXT:    vmovmskps %ymm0, %eax
350; AVX2-NEXT:    movl %eax, %ecx
351; AVX2-NEXT:    shrb $4, %cl
352; AVX2-NEXT:    andb $15, %al
353; AVX2-NEXT:    addb %cl, %al
354; AVX2-NEXT:    # kill: def $al killed $al killed $eax
355; AVX2-NEXT:    vzeroupper
356; AVX2-NEXT:    retq
357;
358; AVX512-LABEL: bitcast_v8i64_to_v2i4:
359; AVX512:       # %bb.0:
360; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
361; AVX512-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
362; AVX512-NEXT:    kmovd %k0, %eax
363; AVX512-NEXT:    movl %eax, %ecx
364; AVX512-NEXT:    shrb $4, %cl
365; AVX512-NEXT:    andb $15, %al
366; AVX512-NEXT:    addb %cl, %al
367; AVX512-NEXT:    # kill: def $al killed $al killed $eax
368; AVX512-NEXT:    vzeroupper
369; AVX512-NEXT:    retq
370  %1 = icmp slt <8 x i64> %a0, zeroinitializer
371  %2 = bitcast <8 x i1> %1 to <2 x i4>
372  %3 = extractelement <2 x i4> %2, i32 0
373  %4 = extractelement <2 x i4> %2, i32 1
374  %5 = add i4 %3, %4
375  ret i4 %5
376}
377
378define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
379; SSE2-SSSE3-LABEL: bitcast_v16i32_to_v2i8:
380; SSE2-SSSE3:       # %bb.0:
381; SSE2-SSSE3-NEXT:    packssdw %xmm3, %xmm2
382; SSE2-SSSE3-NEXT:    packssdw %xmm1, %xmm0
383; SSE2-SSSE3-NEXT:    packsswb %xmm2, %xmm0
384; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
385; SSE2-SSSE3-NEXT:    movd %eax, %xmm0
386; SSE2-SSSE3-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
387; SSE2-SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
388; SSE2-SSSE3-NEXT:    addb -{{[0-9]+}}(%rsp), %al
389; SSE2-SSSE3-NEXT:    retq
390;
391; AVX1-LABEL: bitcast_v16i32_to_v2i8:
392; AVX1:       # %bb.0:
393; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
394; AVX1-NEXT:    vpackssdw %xmm2, %xmm1, %xmm1
395; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
396; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
397; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
398; AVX1-NEXT:    vpmovmskb %xmm0, %ecx
399; AVX1-NEXT:    movl %ecx, %eax
400; AVX1-NEXT:    shrl $8, %eax
401; AVX1-NEXT:    addb %cl, %al
402; AVX1-NEXT:    # kill: def $al killed $al killed $eax
403; AVX1-NEXT:    vzeroupper
404; AVX1-NEXT:    retq
405;
406; AVX2-LABEL: bitcast_v16i32_to_v2i8:
407; AVX2:       # %bb.0:
408; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
409; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm2, %ymm1
410; AVX2-NEXT:    vpcmpgtd %ymm0, %ymm2, %ymm0
411; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
412; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
413; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
414; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
415; AVX2-NEXT:    vpmovmskb %xmm0, %ecx
416; AVX2-NEXT:    movl %ecx, %eax
417; AVX2-NEXT:    shrl $8, %eax
418; AVX2-NEXT:    addb %cl, %al
419; AVX2-NEXT:    # kill: def $al killed $al killed $eax
420; AVX2-NEXT:    vzeroupper
421; AVX2-NEXT:    retq
422;
423; AVX512-LABEL: bitcast_v16i32_to_v2i8:
424; AVX512:       # %bb.0:
425; AVX512-NEXT:    vpxor %xmm1, %xmm1, %xmm1
426; AVX512-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
427; AVX512-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp)
428; AVX512-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm0
429; AVX512-NEXT:    vmovd %xmm0, %ecx
430; AVX512-NEXT:    vpextrb $1, %xmm0, %eax
431; AVX512-NEXT:    addb %cl, %al
432; AVX512-NEXT:    # kill: def $al killed $al killed $eax
433; AVX512-NEXT:    vzeroupper
434; AVX512-NEXT:    retq
435  %1 = icmp slt <16 x i32> %a0, zeroinitializer
436  %2 = bitcast <16 x i1> %1 to <2 x i8>
437  %3 = extractelement <2 x i8> %2, i32 0
438  %4 = extractelement <2 x i8> %2, i32 1
439  %5 = add i8 %3, %4
440  ret i8 %5
441}
442
443define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
444; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
445; SSE2-SSSE3:       # %bb.0:
446; SSE2-SSSE3-NEXT:    packsswb %xmm3, %xmm2
447; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %ecx
448; SSE2-SSSE3-NEXT:    packsswb %xmm1, %xmm0
449; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
450; SSE2-SSSE3-NEXT:    addl %ecx, %eax
451; SSE2-SSSE3-NEXT:    # kill: def $ax killed $ax killed $eax
452; SSE2-SSSE3-NEXT:    retq
453;
454; AVX1-LABEL: bitcast_v32i16_to_v2i16:
455; AVX1:       # %bb.0:
456; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
457; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
458; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
459; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
460; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
461; AVX1-NEXT:    vpmovmskb %xmm0, %eax
462; AVX1-NEXT:    addl %ecx, %eax
463; AVX1-NEXT:    # kill: def $ax killed $ax killed $eax
464; AVX1-NEXT:    vzeroupper
465; AVX1-NEXT:    retq
466;
467; AVX2-LABEL: bitcast_v32i16_to_v2i16:
468; AVX2:       # %bb.0:
469; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
470; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
471; AVX2-NEXT:    vpmovmskb %ymm0, %ecx
472; AVX2-NEXT:    movl %ecx, %eax
473; AVX2-NEXT:    shrl $16, %eax
474; AVX2-NEXT:    addl %ecx, %eax
475; AVX2-NEXT:    # kill: def $ax killed $ax killed $eax
476; AVX2-NEXT:    vzeroupper
477; AVX2-NEXT:    retq
478;
479; AVX512-LABEL: bitcast_v32i16_to_v2i16:
480; AVX512:       # %bb.0:
481; AVX512-NEXT:    vpmovw2m %zmm0, %k0
482; AVX512-NEXT:    kmovd %k0, -{{[0-9]+}}(%rsp)
483; AVX512-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm0
484; AVX512-NEXT:    vmovd %xmm0, %ecx
485; AVX512-NEXT:    vpextrw $1, %xmm0, %eax
486; AVX512-NEXT:    addl %ecx, %eax
487; AVX512-NEXT:    # kill: def $ax killed $ax killed $eax
488; AVX512-NEXT:    vzeroupper
489; AVX512-NEXT:    retq
490  %1 = icmp slt <32 x i16> %a0, zeroinitializer
491  %2 = bitcast <32 x i1> %1 to <2 x i16>
492  %3 = extractelement <2 x i16> %2, i32 0
493  %4 = extractelement <2 x i16> %2, i32 1
494  %5 = add i16 %3, %4
495  ret i16 %5
496}
497
498define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
499; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32:
500; SSE2-SSSE3:       # %bb.0:
501; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
502; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
503; SSE2-SSSE3-NEXT:    shll $16, %ecx
504; SSE2-SSSE3-NEXT:    orl %eax, %ecx
505; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %eax
506; SSE2-SSSE3-NEXT:    pmovmskb %xmm3, %edx
507; SSE2-SSSE3-NEXT:    shll $16, %edx
508; SSE2-SSSE3-NEXT:    orl %eax, %edx
509; SSE2-SSSE3-NEXT:    shlq $32, %rdx
510; SSE2-SSSE3-NEXT:    orq %rcx, %rdx
511; SSE2-SSSE3-NEXT:    movq %rdx, %xmm0
512; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
513; SSE2-SSSE3-NEXT:    movd %xmm0, %eax
514; SSE2-SSSE3-NEXT:    addl %ecx, %eax
515; SSE2-SSSE3-NEXT:    retq
516;
517; AVX1-LABEL: bitcast_v64i8_to_v2i32:
518; AVX1:       # %bb.0:
519; AVX1-NEXT:    vpmovmskb %xmm1, %eax
520; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
521; AVX1-NEXT:    vpmovmskb %xmm1, %ecx
522; AVX1-NEXT:    shll $16, %ecx
523; AVX1-NEXT:    orl %eax, %ecx
524; AVX1-NEXT:    vpmovmskb %xmm0, %edx
525; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
526; AVX1-NEXT:    vpmovmskb %xmm0, %eax
527; AVX1-NEXT:    shll $16, %eax
528; AVX1-NEXT:    orl %edx, %eax
529; AVX1-NEXT:    addl %ecx, %eax
530; AVX1-NEXT:    vzeroupper
531; AVX1-NEXT:    retq
532;
533; AVX2-LABEL: bitcast_v64i8_to_v2i32:
534; AVX2:       # %bb.0:
535; AVX2-NEXT:    vpmovmskb %ymm1, %ecx
536; AVX2-NEXT:    vpmovmskb %ymm0, %eax
537; AVX2-NEXT:    addl %ecx, %eax
538; AVX2-NEXT:    vzeroupper
539; AVX2-NEXT:    retq
540;
541; AVX512-LABEL: bitcast_v64i8_to_v2i32:
542; AVX512:       # %bb.0:
543; AVX512-NEXT:    vpmovb2m %zmm0, %k0
544; AVX512-NEXT:    kmovq %k0, -{{[0-9]+}}(%rsp)
545; AVX512-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
546; AVX512-NEXT:    addl -{{[0-9]+}}(%rsp), %eax
547; AVX512-NEXT:    vzeroupper
548; AVX512-NEXT:    retq
549  %1 = icmp slt <64 x i8> %a0, zeroinitializer
550  %2 = bitcast <64 x i1> %1 to <2 x i32>
551  %3 = extractelement <2 x i32> %2, i32 0
552  %4 = extractelement <2 x i32> %2, i32 1
553  %5 = add i32 %3, %4
554  ret i32 %5
555}
556
557define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind {
558; SSE2-SSSE3-LABEL: bitcast_v128i8_to_v2i64:
559; SSE2-SSSE3:       # %bb.0:
560; SSE2-SSSE3-NEXT:    pmovmskb %xmm4, %eax
561; SSE2-SSSE3-NEXT:    pmovmskb %xmm5, %ecx
562; SSE2-SSSE3-NEXT:    shll $16, %ecx
563; SSE2-SSSE3-NEXT:    orl %eax, %ecx
564; SSE2-SSSE3-NEXT:    pmovmskb %xmm6, %eax
565; SSE2-SSSE3-NEXT:    pmovmskb %xmm7, %edx
566; SSE2-SSSE3-NEXT:    shll $16, %edx
567; SSE2-SSSE3-NEXT:    orl %eax, %edx
568; SSE2-SSSE3-NEXT:    shlq $32, %rdx
569; SSE2-SSSE3-NEXT:    orq %rcx, %rdx
570; SSE2-SSSE3-NEXT:    pmovmskb %xmm0, %eax
571; SSE2-SSSE3-NEXT:    pmovmskb %xmm1, %ecx
572; SSE2-SSSE3-NEXT:    shll $16, %ecx
573; SSE2-SSSE3-NEXT:    orl %eax, %ecx
574; SSE2-SSSE3-NEXT:    pmovmskb %xmm2, %esi
575; SSE2-SSSE3-NEXT:    pmovmskb %xmm3, %eax
576; SSE2-SSSE3-NEXT:    shll $16, %eax
577; SSE2-SSSE3-NEXT:    orl %esi, %eax
578; SSE2-SSSE3-NEXT:    shlq $32, %rax
579; SSE2-SSSE3-NEXT:    orq %rcx, %rax
580; SSE2-SSSE3-NEXT:    addq %rdx, %rax
581; SSE2-SSSE3-NEXT:    retq
582;
583; AVX1-LABEL: bitcast_v128i8_to_v2i64:
584; AVX1:       # %bb.0:
585; AVX1-NEXT:    vpmovmskb %xmm2, %eax
586; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
587; AVX1-NEXT:    vpmovmskb %xmm2, %edx
588; AVX1-NEXT:    shll $16, %edx
589; AVX1-NEXT:    orl %eax, %edx
590; AVX1-NEXT:    vpmovmskb %xmm3, %eax
591; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
592; AVX1-NEXT:    vpmovmskb %xmm2, %ecx
593; AVX1-NEXT:    shll $16, %ecx
594; AVX1-NEXT:    orl %eax, %ecx
595; AVX1-NEXT:    shlq $32, %rcx
596; AVX1-NEXT:    orq %rdx, %rcx
597; AVX1-NEXT:    vpmovmskb %xmm0, %eax
598; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
599; AVX1-NEXT:    vpmovmskb %xmm0, %edx
600; AVX1-NEXT:    shll $16, %edx
601; AVX1-NEXT:    orl %eax, %edx
602; AVX1-NEXT:    vpmovmskb %xmm1, %esi
603; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
604; AVX1-NEXT:    vpmovmskb %xmm0, %eax
605; AVX1-NEXT:    shll $16, %eax
606; AVX1-NEXT:    orl %esi, %eax
607; AVX1-NEXT:    shlq $32, %rax
608; AVX1-NEXT:    orq %rdx, %rax
609; AVX1-NEXT:    addq %rcx, %rax
610; AVX1-NEXT:    vzeroupper
611; AVX1-NEXT:    retq
612;
613; AVX2-LABEL: bitcast_v128i8_to_v2i64:
614; AVX2:       # %bb.0:
615; AVX2-NEXT:    vpmovmskb %ymm3, %eax
616; AVX2-NEXT:    shlq $32, %rax
617; AVX2-NEXT:    vpmovmskb %ymm2, %ecx
618; AVX2-NEXT:    orq %rax, %rcx
619; AVX2-NEXT:    vpmovmskb %ymm1, %edx
620; AVX2-NEXT:    shlq $32, %rdx
621; AVX2-NEXT:    vpmovmskb %ymm0, %eax
622; AVX2-NEXT:    orq %rdx, %rax
623; AVX2-NEXT:    addq %rcx, %rax
624; AVX2-NEXT:    vzeroupper
625; AVX2-NEXT:    retq
626;
627; AVX512-LABEL: bitcast_v128i8_to_v2i64:
628; AVX512:       # %bb.0:
629; AVX512-NEXT:    vpmovb2m %zmm1, %k0
630; AVX512-NEXT:    kmovq %k0, %rcx
631; AVX512-NEXT:    vpmovb2m %zmm0, %k0
632; AVX512-NEXT:    kmovq %k0, %rax
633; AVX512-NEXT:    addq %rcx, %rax
634; AVX512-NEXT:    vzeroupper
635; AVX512-NEXT:    retq
636  %1 = icmp slt <128 x i8> %a0, zeroinitializer
637  %2 = bitcast <128 x i1> %1 to <2 x i64>
638  %3 = extractelement <2 x i64> %2, i32 0
639  %4 = extractelement <2 x i64> %2, i32 1
640  %5 = add i64 %3, %4
641  ret i64 %5
642}
643