1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
9
10define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
11; SSE2-LABEL: zext_16i8_to_8i16:
12; SSE2:       # BB#0: # %entry
13; SSE2-NEXT:    pxor %xmm1, %xmm1
14; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
15; SSE2-NEXT:    retq
16;
17; SSSE3-LABEL: zext_16i8_to_8i16:
18; SSSE3:       # BB#0: # %entry
19; SSSE3-NEXT:    pxor %xmm1, %xmm1
20; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
21; SSSE3-NEXT:    retq
22;
23; SSE41-LABEL: zext_16i8_to_8i16:
24; SSE41:       # BB#0: # %entry
25; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
26; SSE41-NEXT:    retq
27;
28; AVX-LABEL: zext_16i8_to_8i16:
29; AVX:       # BB#0: # %entry
30; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
31; AVX-NEXT:    retq
32entry:
33  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
34  %C = zext <8 x i8> %B to <8 x i16>
35  ret <8 x i16> %C
36}
37
38; PR17654
39define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
40; SSE2-LABEL: zext_16i8_to_16i16:
41; SSE2:       # BB#0: # %entry
42; SSE2-NEXT:    movdqa %xmm0, %xmm1
43; SSE2-NEXT:    pxor %xmm2, %xmm2
44; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
45; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
46; SSE2-NEXT:    retq
47;
48; SSSE3-LABEL: zext_16i8_to_16i16:
49; SSSE3:       # BB#0: # %entry
50; SSSE3-NEXT:    movdqa %xmm0, %xmm1
51; SSSE3-NEXT:    pxor %xmm2, %xmm2
52; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
53; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
54; SSSE3-NEXT:    retq
55;
56; SSE41-LABEL: zext_16i8_to_16i16:
57; SSE41:       # BB#0: # %entry
58; SSE41-NEXT:    movdqa %xmm0, %xmm1
59; SSE41-NEXT:    pxor %xmm2, %xmm2
60; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
61; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
62; SSE41-NEXT:    retq
63;
64; AVX1-LABEL: zext_16i8_to_16i16:
65; AVX1:       # BB#0: # %entry
66; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
67; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
68; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
69; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
70; AVX1-NEXT:    retq
71;
72; AVX2-LABEL: zext_16i8_to_16i16:
73; AVX2:       # BB#0: # %entry
74; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
75; AVX2-NEXT:    retq
76;
77; AVX512-LABEL: zext_16i8_to_16i16:
78; AVX512:       # BB#0: # %entry
79; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
80; AVX512-NEXT:    retq
81entry:
82  %B = zext <16 x i8> %A to <16 x i16>
83  ret <16 x i16> %B
84}
85
86define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) {
87; SSE2-LABEL: zext_32i8_to_32i16:
88; SSE2:       # BB#0: # %entry
89; SSE2-NEXT:    movdqa %xmm1, %xmm3
90; SSE2-NEXT:    movdqa %xmm0, %xmm1
91; SSE2-NEXT:    pxor %xmm4, %xmm4
92; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
93; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
94; SSE2-NEXT:    movdqa %xmm3, %xmm2
95; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
96; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
97; SSE2-NEXT:    retq
98;
99; SSSE3-LABEL: zext_32i8_to_32i16:
100; SSSE3:       # BB#0: # %entry
101; SSSE3-NEXT:    movdqa %xmm1, %xmm3
102; SSSE3-NEXT:    movdqa %xmm0, %xmm1
103; SSSE3-NEXT:    pxor %xmm4, %xmm4
104; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
105; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
106; SSSE3-NEXT:    movdqa %xmm3, %xmm2
107; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
108; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
109; SSSE3-NEXT:    retq
110;
111; SSE41-LABEL: zext_32i8_to_32i16:
112; SSE41:       # BB#0: # %entry
113; SSE41-NEXT:    movdqa %xmm1, %xmm3
114; SSE41-NEXT:    movdqa %xmm0, %xmm1
115; SSE41-NEXT:    pxor %xmm4, %xmm4
116; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
117; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15]
118; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
119; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15]
120; SSE41-NEXT:    retq
121;
122; AVX1-LABEL: zext_32i8_to_32i16:
123; AVX1:       # BB#0: # %entry
124; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
125; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
126; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
127; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
128; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
129; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
130; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
131; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
132; AVX1-NEXT:    vmovaps %ymm2, %ymm0
133; AVX1-NEXT:    retq
134;
135; AVX2-LABEL: zext_32i8_to_32i16:
136; AVX2:       # BB#0: # %entry
137; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
138; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
139; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
140; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
141; AVX2-NEXT:    retq
142;
143; AVX512F-LABEL: zext_32i8_to_32i16:
144; AVX512F:       # BB#0: # %entry
145; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
146; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
147; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
148; AVX512F-NEXT:    vmovdqa %ymm2, %ymm0
149; AVX512F-NEXT:    retq
150;
151; AVX512BW-LABEL: zext_32i8_to_32i16:
152; AVX512BW:       # BB#0: # %entry
153; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
154; AVX512BW-NEXT:    retq
155entry:
156  %B = zext <32 x i8> %A to <32 x i16>
157  ret <32 x i16> %B
158}
159
160define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
161; SSE2-LABEL: zext_16i8_to_4i32:
162; SSE2:       # BB#0: # %entry
163; SSE2-NEXT:    pxor %xmm1, %xmm1
164; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
165; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
166; SSE2-NEXT:    retq
167;
168; SSSE3-LABEL: zext_16i8_to_4i32:
169; SSSE3:       # BB#0: # %entry
170; SSSE3-NEXT:    pxor %xmm1, %xmm1
171; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
172; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
173; SSSE3-NEXT:    retq
174;
175; SSE41-LABEL: zext_16i8_to_4i32:
176; SSE41:       # BB#0: # %entry
177; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
178; SSE41-NEXT:    retq
179;
180; AVX-LABEL: zext_16i8_to_4i32:
181; AVX:       # BB#0: # %entry
182; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
183; AVX-NEXT:    retq
184entry:
185  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
186  %C = zext <4 x i8> %B to <4 x i32>
187  ret <4 x i32> %C
188}
189
190define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
191; SSE2-LABEL: zext_16i8_to_8i32:
192; SSE2:       # BB#0: # %entry
193; SSE2-NEXT:    movdqa %xmm0, %xmm1
194; SSE2-NEXT:    pxor %xmm2, %xmm2
195; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
196; SSE2-NEXT:    movdqa %xmm1, %xmm0
197; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
198; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
199; SSE2-NEXT:    retq
200;
201; SSSE3-LABEL: zext_16i8_to_8i32:
202; SSSE3:       # BB#0: # %entry
203; SSSE3-NEXT:    movdqa %xmm0, %xmm1
204; SSSE3-NEXT:    pxor %xmm2, %xmm2
205; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
206; SSSE3-NEXT:    movdqa %xmm1, %xmm0
207; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
208; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
209; SSSE3-NEXT:    retq
210;
211; SSE41-LABEL: zext_16i8_to_8i32:
212; SSE41:       # BB#0: # %entry
213; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
214; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
215; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
216; SSE41-NEXT:    movdqa %xmm2, %xmm0
217; SSE41-NEXT:    retq
218;
219; AVX1-LABEL: zext_16i8_to_8i32:
220; AVX1:       # BB#0: # %entry
221; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
222; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
223; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
224; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
225; AVX1-NEXT:    retq
226;
227; AVX2-LABEL: zext_16i8_to_8i32:
228; AVX2:       # BB#0: # %entry
229; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
230; AVX2-NEXT:    retq
231;
232; AVX512-LABEL: zext_16i8_to_8i32:
233; AVX512:       # BB#0: # %entry
234; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
235; AVX512-NEXT:    retq
236entry:
237  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
238  %C = zext <8 x i8> %B to <8 x i32>
239  ret <8 x i32> %C
240}
241
242define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp {
243; SSE2-LABEL: zext_16i8_to_16i32:
244; SSE2:       # BB#0: # %entry
245; SSE2-NEXT:    movdqa %xmm0, %xmm1
246; SSE2-NEXT:    pxor %xmm4, %xmm4
247; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
248; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
249; SSE2-NEXT:    movdqa %xmm1, %xmm0
250; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
251; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
252; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
253; SSE2-NEXT:    movdqa %xmm3, %xmm2
254; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
255; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
256; SSE2-NEXT:    retq
257;
258; SSSE3-LABEL: zext_16i8_to_16i32:
259; SSSE3:       # BB#0: # %entry
260; SSSE3-NEXT:    movdqa %xmm0, %xmm1
261; SSSE3-NEXT:    pxor %xmm4, %xmm4
262; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
263; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
264; SSSE3-NEXT:    movdqa %xmm1, %xmm0
265; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
266; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
267; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
268; SSSE3-NEXT:    movdqa %xmm3, %xmm2
269; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
270; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
271; SSSE3-NEXT:    retq
272;
273; SSE41-LABEL: zext_16i8_to_16i32:
274; SSE41:       # BB#0: # %entry
275; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
276; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
277; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
278; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
279; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
280; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
281; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
282; SSE41-NEXT:    movdqa %xmm4, %xmm0
283; SSE41-NEXT:    retq
284;
285; AVX1-LABEL: zext_16i8_to_16i32:
286; AVX1:       # BB#0: # %entry
287; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
288; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
289; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
290; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
291; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
292; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
293; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
294; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
295; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
296; AVX1-NEXT:    vmovaps %ymm2, %ymm0
297; AVX1-NEXT:    retq
298;
299; AVX2-LABEL: zext_16i8_to_16i32:
300; AVX2:       # BB#0: # %entry
301; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
302; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
303; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
304; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
305; AVX2-NEXT:    retq
306;
307; AVX512-LABEL: zext_16i8_to_16i32:
308; AVX512:       # BB#0: # %entry
309; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
310; AVX512-NEXT:    retq
311entry:
312  %B = zext <16 x i8> %A to <16 x i32>
313  ret <16 x i32> %B
314}
315
316define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
317; SSE2-LABEL: zext_16i8_to_2i64:
318; SSE2:       # BB#0: # %entry
319; SSE2-NEXT:    pxor %xmm1, %xmm1
320; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
321; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
322; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
323; SSE2-NEXT:    retq
324;
325; SSSE3-LABEL: zext_16i8_to_2i64:
326; SSSE3:       # BB#0: # %entry
327; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
328; SSSE3-NEXT:    retq
329;
330; SSE41-LABEL: zext_16i8_to_2i64:
331; SSE41:       # BB#0: # %entry
332; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
333; SSE41-NEXT:    retq
334;
335; AVX-LABEL: zext_16i8_to_2i64:
336; AVX:       # BB#0: # %entry
337; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
338; AVX-NEXT:    retq
339entry:
340  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
341  %C = zext <2 x i8> %B to <2 x i64>
342  ret <2 x i64> %C
343}
344
345define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
346; SSE2-LABEL: zext_16i8_to_4i64:
347; SSE2:       # BB#0: # %entry
348; SSE2-NEXT:    movdqa %xmm0, %xmm1
349; SSE2-NEXT:    pxor %xmm2, %xmm2
350; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
351; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
352; SSE2-NEXT:    movdqa %xmm1, %xmm0
353; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
354; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
355; SSE2-NEXT:    retq
356;
357; SSSE3-LABEL: zext_16i8_to_4i64:
358; SSSE3:       # BB#0: # %entry
359; SSSE3-NEXT:    movdqa %xmm0, %xmm1
360; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
361; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
362; SSSE3-NEXT:    retq
363;
364; SSE41-LABEL: zext_16i8_to_4i64:
365; SSE41:       # BB#0: # %entry
366; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
367; SSE41-NEXT:    psrld $16, %xmm0
368; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
369; SSE41-NEXT:    movdqa %xmm2, %xmm0
370; SSE41-NEXT:    retq
371;
372; AVX1-LABEL: zext_16i8_to_4i64:
373; AVX1:       # BB#0: # %entry
374; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
375; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
376; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
377; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
378; AVX1-NEXT:    retq
379;
380; AVX2-LABEL: zext_16i8_to_4i64:
381; AVX2:       # BB#0: # %entry
382; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
383; AVX2-NEXT:    retq
384;
385; AVX512-LABEL: zext_16i8_to_4i64:
386; AVX512:       # BB#0: # %entry
387; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
388; AVX512-NEXT:    retq
389entry:
390  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
391  %C = zext <4 x i8> %B to <4 x i64>
392  ret <4 x i64> %C
393}
394
395define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
396; SSE2-LABEL: zext_16i8_to_8i64:
397; SSE2:       # BB#0: # %entry
398; SSE2-NEXT:    movdqa %xmm0, %xmm1
399; SSE2-NEXT:    pxor %xmm4, %xmm4
400; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
401; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
402; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
403; SSE2-NEXT:    movdqa %xmm1, %xmm0
404; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
405; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
406; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
407; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
408; SSE2-NEXT:    movdqa %xmm3, %xmm2
409; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
410; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
411; SSE2-NEXT:    retq
412;
413; SSSE3-LABEL: zext_16i8_to_8i64:
414; SSSE3:       # BB#0: # %entry
415; SSSE3-NEXT:    movdqa %xmm0, %xmm1
416; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
417; SSSE3-NEXT:    pshufb %xmm4, %xmm0
418; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
419; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
420; SSSE3-NEXT:    pshufb %xmm5, %xmm1
421; SSSE3-NEXT:    movdqa %xmm3, %xmm2
422; SSSE3-NEXT:    pshufb %xmm4, %xmm2
423; SSSE3-NEXT:    pshufb %xmm5, %xmm3
424; SSSE3-NEXT:    retq
425;
426; SSE41-LABEL: zext_16i8_to_8i64:
427; SSE41:       # BB#0: # %entry
428; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
429; SSE41-NEXT:    movdqa %xmm0, %xmm1
430; SSE41-NEXT:    psrld $16, %xmm1
431; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
432; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
433; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
434; SSE41-NEXT:    psrlq $48, %xmm0
435; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
436; SSE41-NEXT:    movdqa %xmm4, %xmm0
437; SSE41-NEXT:    retq
438;
439; AVX1-LABEL: zext_16i8_to_8i64:
440; AVX1:       # BB#0: # %entry
441; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
442; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
443; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero
444; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
445; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
446; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
447; AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
448; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
449; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
450; AVX1-NEXT:    vmovaps %ymm2, %ymm0
451; AVX1-NEXT:    retq
452;
453; AVX2-LABEL: zext_16i8_to_8i64:
454; AVX2:       # BB#0: # %entry
455; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
456; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
457; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
458; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
459; AVX2-NEXT:    retq
460;
461; AVX512F-LABEL: zext_16i8_to_8i64:
462; AVX512F:       # BB#0: # %entry
463; AVX512F-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
464; AVX512F-NEXT:    retq
465;
466; AVX512BW-LABEL: zext_16i8_to_8i64:
467; AVX512BW:       # BB#0: # %entry
468; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
469; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
470; AVX512BW-NEXT:    retq
471entry:
472  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
473  %C = zext <8 x i8> %B to <8 x i64>
474  ret <8 x i64> %C
475}
476
477define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
478; SSE2-LABEL: zext_8i16_to_4i32:
479; SSE2:       # BB#0: # %entry
480; SSE2-NEXT:    pxor %xmm1, %xmm1
481; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
482; SSE2-NEXT:    retq
483;
484; SSSE3-LABEL: zext_8i16_to_4i32:
485; SSSE3:       # BB#0: # %entry
486; SSSE3-NEXT:    pxor %xmm1, %xmm1
487; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
488; SSSE3-NEXT:    retq
489;
490; SSE41-LABEL: zext_8i16_to_4i32:
491; SSE41:       # BB#0: # %entry
492; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
493; SSE41-NEXT:    retq
494;
495; AVX-LABEL: zext_8i16_to_4i32:
496; AVX:       # BB#0: # %entry
497; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
498; AVX-NEXT:    retq
499entry:
500  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
501  %C = zext <4 x i16> %B to <4 x i32>
502  ret <4 x i32> %C
503}
504
505define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
506; SSE2-LABEL: zext_8i16_to_8i32:
507; SSE2:       # BB#0: # %entry
508; SSE2-NEXT:    movdqa %xmm0, %xmm1
509; SSE2-NEXT:    pxor %xmm2, %xmm2
510; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
511; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
512; SSE2-NEXT:    retq
513;
514; SSSE3-LABEL: zext_8i16_to_8i32:
515; SSSE3:       # BB#0: # %entry
516; SSSE3-NEXT:    movdqa %xmm0, %xmm1
517; SSSE3-NEXT:    pxor %xmm2, %xmm2
518; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
519; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
520; SSSE3-NEXT:    retq
521;
522; SSE41-LABEL: zext_8i16_to_8i32:
523; SSE41:       # BB#0: # %entry
524; SSE41-NEXT:    movdqa %xmm0, %xmm1
525; SSE41-NEXT:    pxor %xmm2, %xmm2
526; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
527; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
528; SSE41-NEXT:    retq
529;
530; AVX1-LABEL: zext_8i16_to_8i32:
531; AVX1:       # BB#0: # %entry
532; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
533; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
534; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
535; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
536; AVX1-NEXT:    retq
537;
538; AVX2-LABEL: zext_8i16_to_8i32:
539; AVX2:       # BB#0: # %entry
540; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
541; AVX2-NEXT:    retq
542;
543; AVX512-LABEL: zext_8i16_to_8i32:
544; AVX512:       # BB#0: # %entry
545; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
546; AVX512-NEXT:    retq
547entry:
548  %B = zext <8 x i16> %A to <8 x i32>
549  ret <8 x i32>%B
550}
551
552define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp {
553; SSE2-LABEL: zext_16i16_to_16i32:
554; SSE2:       # BB#0: # %entry
555; SSE2-NEXT:    movdqa %xmm1, %xmm3
556; SSE2-NEXT:    movdqa %xmm0, %xmm1
557; SSE2-NEXT:    pxor %xmm4, %xmm4
558; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
559; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
560; SSE2-NEXT:    movdqa %xmm3, %xmm2
561; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
562; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
563; SSE2-NEXT:    retq
564;
565; SSSE3-LABEL: zext_16i16_to_16i32:
566; SSSE3:       # BB#0: # %entry
567; SSSE3-NEXT:    movdqa %xmm1, %xmm3
568; SSSE3-NEXT:    movdqa %xmm0, %xmm1
569; SSSE3-NEXT:    pxor %xmm4, %xmm4
570; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
571; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
572; SSSE3-NEXT:    movdqa %xmm3, %xmm2
573; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
574; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
575; SSSE3-NEXT:    retq
576;
577; SSE41-LABEL: zext_16i16_to_16i32:
578; SSE41:       # BB#0: # %entry
579; SSE41-NEXT:    movdqa %xmm1, %xmm3
580; SSE41-NEXT:    movdqa %xmm0, %xmm1
581; SSE41-NEXT:    pxor %xmm4, %xmm4
582; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
583; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
584; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
585; SSE41-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
586; SSE41-NEXT:    retq
587;
588; AVX1-LABEL: zext_16i16_to_16i32:
589; AVX1:       # BB#0: # %entry
590; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
591; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
592; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
593; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
594; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
595; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
596; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
597; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
598; AVX1-NEXT:    vmovaps %ymm2, %ymm0
599; AVX1-NEXT:    retq
600;
601; AVX2-LABEL: zext_16i16_to_16i32:
602; AVX2:       # BB#0: # %entry
603; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
604; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
605; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
606; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
607; AVX2-NEXT:    retq
608;
609; AVX512-LABEL: zext_16i16_to_16i32:
610; AVX512:       # BB#0: # %entry
611; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
612; AVX512-NEXT:    retq
613entry:
614  %B = zext <16 x i16> %A to <16 x i32>
615  ret <16 x i32> %B
616}
617
618define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
619; SSE2-LABEL: zext_8i16_to_2i64:
620; SSE2:       # BB#0: # %entry
621; SSE2-NEXT:    pxor %xmm1, %xmm1
622; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
623; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
624; SSE2-NEXT:    retq
625;
626; SSSE3-LABEL: zext_8i16_to_2i64:
627; SSSE3:       # BB#0: # %entry
628; SSSE3-NEXT:    pxor %xmm1, %xmm1
629; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
630; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
631; SSSE3-NEXT:    retq
632;
633; SSE41-LABEL: zext_8i16_to_2i64:
634; SSE41:       # BB#0: # %entry
635; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
636; SSE41-NEXT:    retq
637;
638; AVX-LABEL: zext_8i16_to_2i64:
639; AVX:       # BB#0: # %entry
640; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
641; AVX-NEXT:    retq
642entry:
643  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
644  %C = zext <2 x i16> %B to <2 x i64>
645  ret <2 x i64> %C
646}
647
648define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
649; SSE2-LABEL: zext_8i16_to_4i64:
650; SSE2:       # BB#0: # %entry
651; SSE2-NEXT:    movdqa %xmm0, %xmm1
652; SSE2-NEXT:    pxor %xmm2, %xmm2
653; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
654; SSE2-NEXT:    movdqa %xmm1, %xmm0
655; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
656; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
657; SSE2-NEXT:    retq
658;
659; SSSE3-LABEL: zext_8i16_to_4i64:
660; SSSE3:       # BB#0: # %entry
661; SSSE3-NEXT:    movdqa %xmm0, %xmm1
662; SSSE3-NEXT:    pxor %xmm2, %xmm2
663; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
664; SSSE3-NEXT:    movdqa %xmm1, %xmm0
665; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
666; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
667; SSSE3-NEXT:    retq
668;
669; SSE41-LABEL: zext_8i16_to_4i64:
670; SSE41:       # BB#0: # %entry
671; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
672; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
673; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
674; SSE41-NEXT:    movdqa %xmm2, %xmm0
675; SSE41-NEXT:    retq
676;
677; AVX1-LABEL: zext_8i16_to_4i64:
678; AVX1:       # BB#0: # %entry
679; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
680; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
681; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
682; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
683; AVX1-NEXT:    retq
684;
685; AVX2-LABEL: zext_8i16_to_4i64:
686; AVX2:       # BB#0: # %entry
687; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
688; AVX2-NEXT:    retq
689;
690; AVX512-LABEL: zext_8i16_to_4i64:
691; AVX512:       # BB#0: # %entry
692; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
693; AVX512-NEXT:    retq
694entry:
695  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
696  %C = zext <4 x i16> %B to <4 x i64>
697  ret <4 x i64> %C
698}
699
700define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp {
701; SSE2-LABEL: zext_8i16_to_8i64:
702; SSE2:       # BB#0: # %entry
703; SSE2-NEXT:    movdqa %xmm0, %xmm1
704; SSE2-NEXT:    pxor %xmm4, %xmm4
705; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
706; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
707; SSE2-NEXT:    movdqa %xmm1, %xmm0
708; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
709; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
710; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
711; SSE2-NEXT:    movdqa %xmm3, %xmm2
712; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
713; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
714; SSE2-NEXT:    retq
715;
716; SSSE3-LABEL: zext_8i16_to_8i64:
717; SSSE3:       # BB#0: # %entry
718; SSSE3-NEXT:    movdqa %xmm0, %xmm1
719; SSSE3-NEXT:    pxor %xmm4, %xmm4
720; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
721; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
722; SSSE3-NEXT:    movdqa %xmm1, %xmm0
723; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
724; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
725; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
726; SSSE3-NEXT:    movdqa %xmm3, %xmm2
727; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
728; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
729; SSSE3-NEXT:    retq
730;
731; SSE41-LABEL: zext_8i16_to_8i64:
732; SSE41:       # BB#0: # %entry
733; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
734; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
735; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
736; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
737; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
738; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
739; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
740; SSE41-NEXT:    movdqa %xmm4, %xmm0
741; SSE41-NEXT:    retq
742;
743; AVX1-LABEL: zext_8i16_to_8i64:
744; AVX1:       # BB#0: # %entry
745; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
746; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
747; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
748; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm2
749; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
750; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
751; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
752; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
753; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
754; AVX1-NEXT:    vmovaps %ymm2, %ymm0
755; AVX1-NEXT:    retq
756;
757; AVX2-LABEL: zext_8i16_to_8i64:
758; AVX2:       # BB#0: # %entry
759; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
760; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
761; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
762; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
763; AVX2-NEXT:    retq
764;
765; AVX512-LABEL: zext_8i16_to_8i64:
766; AVX512:       # BB#0: # %entry
767; AVX512-NEXT:    vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
768; AVX512-NEXT:    retq
769entry:
770  %B = zext <8 x i16> %A to <8 x i64>
771  ret <8 x i64> %B
772}
773
774define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
775; SSE2-LABEL: zext_4i32_to_2i64:
776; SSE2:       # BB#0: # %entry
777; SSE2-NEXT:    pxor %xmm1, %xmm1
778; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
779; SSE2-NEXT:    retq
780;
781; SSSE3-LABEL: zext_4i32_to_2i64:
782; SSSE3:       # BB#0: # %entry
783; SSSE3-NEXT:    pxor %xmm1, %xmm1
784; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
785; SSSE3-NEXT:    retq
786;
787; SSE41-LABEL: zext_4i32_to_2i64:
788; SSE41:       # BB#0: # %entry
789; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
790; SSE41-NEXT:    retq
791;
792; AVX-LABEL: zext_4i32_to_2i64:
793; AVX:       # BB#0: # %entry
794; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
795; AVX-NEXT:    retq
796entry:
797  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
798  %C = zext <2 x i32> %B to <2 x i64>
799  ret <2 x i64> %C
800}
801
802define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
803; SSE2-LABEL: zext_4i32_to_4i64:
804; SSE2:       # BB#0: # %entry
805; SSE2-NEXT:    movdqa %xmm0, %xmm1
806; SSE2-NEXT:    pxor %xmm2, %xmm2
807; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
808; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
809; SSE2-NEXT:    retq
810;
811; SSSE3-LABEL: zext_4i32_to_4i64:
812; SSSE3:       # BB#0: # %entry
813; SSSE3-NEXT:    movdqa %xmm0, %xmm1
814; SSSE3-NEXT:    pxor %xmm2, %xmm2
815; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
816; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
817; SSSE3-NEXT:    retq
818;
819; SSE41-LABEL: zext_4i32_to_4i64:
820; SSE41:       # BB#0: # %entry
821; SSE41-NEXT:    movdqa %xmm0, %xmm1
822; SSE41-NEXT:    pxor %xmm2, %xmm2
823; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
824; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
825; SSE41-NEXT:    retq
826;
827; AVX1-LABEL: zext_4i32_to_4i64:
828; AVX1:       # BB#0: # %entry
829; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
830; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
831; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
832; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
833; AVX1-NEXT:    retq
834;
835; AVX2-LABEL: zext_4i32_to_4i64:
836; AVX2:       # BB#0: # %entry
837; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
838; AVX2-NEXT:    retq
839;
840; AVX512-LABEL: zext_4i32_to_4i64:
841; AVX512:       # BB#0: # %entry
842; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
843; AVX512-NEXT:    retq
844entry:
845  %B = zext <4 x i32> %A to <4 x i64>
846  ret <4 x i64>%B
847}
848
849define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp {
850; SSE2-LABEL: zext_8i32_to_8i64:
851; SSE2:       # BB#0: # %entry
852; SSE2-NEXT:    movdqa %xmm1, %xmm3
853; SSE2-NEXT:    movdqa %xmm0, %xmm1
854; SSE2-NEXT:    pxor %xmm4, %xmm4
855; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
856; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
857; SSE2-NEXT:    movdqa %xmm3, %xmm2
858; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
859; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
860; SSE2-NEXT:    retq
861;
862; SSSE3-LABEL: zext_8i32_to_8i64:
863; SSSE3:       # BB#0: # %entry
864; SSSE3-NEXT:    movdqa %xmm1, %xmm3
865; SSSE3-NEXT:    movdqa %xmm0, %xmm1
866; SSSE3-NEXT:    pxor %xmm4, %xmm4
867; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
868; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
869; SSSE3-NEXT:    movdqa %xmm3, %xmm2
870; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
871; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
872; SSSE3-NEXT:    retq
873;
874; SSE41-LABEL: zext_8i32_to_8i64:
875; SSE41:       # BB#0: # %entry
876; SSE41-NEXT:    movdqa %xmm1, %xmm3
877; SSE41-NEXT:    movdqa %xmm0, %xmm1
878; SSE41-NEXT:    pxor %xmm4, %xmm4
879; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
880; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
881; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero
882; SSE41-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
883; SSE41-NEXT:    retq
884;
885; AVX1-LABEL: zext_8i32_to_8i64:
886; AVX1:       # BB#0: # %entry
887; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
888; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
889; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero
890; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
891; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
892; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
893; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
894; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
895; AVX1-NEXT:    vmovaps %ymm2, %ymm0
896; AVX1-NEXT:    retq
897;
898; AVX2-LABEL: zext_8i32_to_8i64:
899; AVX2:       # BB#0: # %entry
900; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
901; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
902; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
903; AVX2-NEXT:    vmovdqa %ymm2, %ymm0
904; AVX2-NEXT:    retq
905;
906; AVX512-LABEL: zext_8i32_to_8i64:
907; AVX512:       # BB#0: # %entry
908; AVX512-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
909; AVX512-NEXT:    retq
910entry:
911  %B = zext <8 x i32> %A to <8 x i64>
912  ret <8 x i64>%B
913}
914
915define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
916; SSE2-LABEL: load_zext_2i8_to_2i64:
917; SSE2:       # BB#0: # %entry
918; SSE2-NEXT:    movzwl (%rdi), %eax
919; SSE2-NEXT:    movd %eax, %xmm0
920; SSE2-NEXT:    pxor %xmm1, %xmm1
921; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
922; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
923; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
924; SSE2-NEXT:    retq
925;
926; SSSE3-LABEL: load_zext_2i8_to_2i64:
927; SSSE3:       # BB#0: # %entry
928; SSSE3-NEXT:    movzwl (%rdi), %eax
929; SSSE3-NEXT:    movd %eax, %xmm0
930; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
931; SSSE3-NEXT:    retq
932;
933; SSE41-LABEL: load_zext_2i8_to_2i64:
934; SSE41:       # BB#0: # %entry
935; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
936; SSE41-NEXT:    retq
937;
938; AVX-LABEL: load_zext_2i8_to_2i64:
939; AVX:       # BB#0: # %entry
940; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
941; AVX-NEXT:    retq
942entry:
943 %X = load <2 x i8>, <2 x i8>* %ptr
944 %Y = zext <2 x i8> %X to <2 x i64>
945 ret <2 x i64> %Y
946}
947
948define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
949; SSE2-LABEL: load_zext_4i8_to_4i32:
950; SSE2:       # BB#0: # %entry
951; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
952; SSE2-NEXT:    pxor %xmm1, %xmm1
953; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
954; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
955; SSE2-NEXT:    retq
956;
957; SSSE3-LABEL: load_zext_4i8_to_4i32:
958; SSSE3:       # BB#0: # %entry
959; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
960; SSSE3-NEXT:    pxor %xmm1, %xmm1
961; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
962; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
963; SSSE3-NEXT:    retq
964;
965; SSE41-LABEL: load_zext_4i8_to_4i32:
966; SSE41:       # BB#0: # %entry
967; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
968; SSE41-NEXT:    retq
969;
970; AVX-LABEL: load_zext_4i8_to_4i32:
971; AVX:       # BB#0: # %entry
972; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
973; AVX-NEXT:    retq
974entry:
975 %X = load <4 x i8>, <4 x i8>* %ptr
976 %Y = zext <4 x i8> %X to <4 x i32>
977 ret <4 x i32> %Y
978}
979
980define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
981; SSE2-LABEL: load_zext_4i8_to_4i64:
982; SSE2:       # BB#0: # %entry
983; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
984; SSE2-NEXT:    pxor %xmm2, %xmm2
985; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
986; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
987; SSE2-NEXT:    movdqa %xmm1, %xmm0
988; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
989; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
990; SSE2-NEXT:    retq
991;
992; SSSE3-LABEL: load_zext_4i8_to_4i64:
993; SSSE3:       # BB#0: # %entry
994; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
995; SSSE3-NEXT:    movdqa %xmm1, %xmm0
996; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
997; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
998; SSSE3-NEXT:    retq
999;
1000; SSE41-LABEL: load_zext_4i8_to_4i64:
1001; SSE41:       # BB#0: # %entry
1002; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1003; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1004; SSE41-NEXT:    retq
1005;
1006; AVX1-LABEL: load_zext_4i8_to_4i64:
1007; AVX1:       # BB#0: # %entry
1008; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1009; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1010; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1011; AVX1-NEXT:    retq
1012;
1013; AVX2-LABEL: load_zext_4i8_to_4i64:
1014; AVX2:       # BB#0: # %entry
1015; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1016; AVX2-NEXT:    retq
1017;
1018; AVX512-LABEL: load_zext_4i8_to_4i64:
1019; AVX512:       # BB#0: # %entry
1020; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1021; AVX512-NEXT:    retq
1022entry:
1023 %X = load <4 x i8>, <4 x i8>* %ptr
1024 %Y = zext <4 x i8> %X to <4 x i64>
1025 ret <4 x i64> %Y
1026}
1027
1028define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
1029; SSE2-LABEL: load_zext_8i8_to_8i16:
1030; SSE2:       # BB#0: # %entry
1031; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1032; SSE2-NEXT:    pxor %xmm1, %xmm1
1033; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1034; SSE2-NEXT:    retq
1035;
1036; SSSE3-LABEL: load_zext_8i8_to_8i16:
1037; SSSE3:       # BB#0: # %entry
1038; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1039; SSSE3-NEXT:    pxor %xmm1, %xmm1
1040; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1041; SSSE3-NEXT:    retq
1042;
1043; SSE41-LABEL: load_zext_8i8_to_8i16:
1044; SSE41:       # BB#0: # %entry
1045; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1046; SSE41-NEXT:    retq
1047;
1048; AVX-LABEL: load_zext_8i8_to_8i16:
1049; AVX:       # BB#0: # %entry
1050; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1051; AVX-NEXT:    retq
1052entry:
1053 %X = load <8 x i8>, <8 x i8>* %ptr
1054 %Y = zext <8 x i8> %X to <8 x i16>
1055 ret <8 x i16> %Y
1056}
1057
1058define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
1059; SSE2-LABEL: load_zext_8i8_to_8i32:
1060; SSE2:       # BB#0: # %entry
1061; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1062; SSE2-NEXT:    pxor %xmm2, %xmm2
1063; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1064; SSE2-NEXT:    movdqa %xmm1, %xmm0
1065; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1066; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1067; SSE2-NEXT:    retq
1068;
1069; SSSE3-LABEL: load_zext_8i8_to_8i32:
1070; SSSE3:       # BB#0: # %entry
1071; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1072; SSSE3-NEXT:    pxor %xmm2, %xmm2
1073; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1074; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1075; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1076; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1077; SSSE3-NEXT:    retq
1078;
1079; SSE41-LABEL: load_zext_8i8_to_8i32:
1080; SSE41:       # BB#0: # %entry
1081; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1082; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1083; SSE41-NEXT:    retq
1084;
1085; AVX1-LABEL: load_zext_8i8_to_8i32:
1086; AVX1:       # BB#0: # %entry
1087; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1088; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1089; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1090; AVX1-NEXT:    retq
1091;
1092; AVX2-LABEL: load_zext_8i8_to_8i32:
1093; AVX2:       # BB#0: # %entry
1094; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1095; AVX2-NEXT:    retq
1096;
1097; AVX512-LABEL: load_zext_8i8_to_8i32:
1098; AVX512:       # BB#0: # %entry
1099; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1100; AVX512-NEXT:    retq
1101entry:
1102 %X = load <8 x i8>, <8 x i8>* %ptr
1103 %Y = zext <8 x i8> %X to <8 x i32>
1104 ret <8 x i32> %Y
1105}
1106
1107define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
1108; SSE2-LABEL: load_zext_16i8_to_8i32:
1109; SSE2:       # BB#0: # %entry
1110; SSE2-NEXT:    movdqa (%rdi), %xmm1
1111; SSE2-NEXT:    pxor %xmm2, %xmm2
1112; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1113; SSE2-NEXT:    movdqa %xmm1, %xmm0
1114; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1115; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1116; SSE2-NEXT:    retq
1117;
1118; SSSE3-LABEL: load_zext_16i8_to_8i32:
1119; SSSE3:       # BB#0: # %entry
1120; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1121; SSSE3-NEXT:    pxor %xmm2, %xmm2
1122; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1123; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1124; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1125; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1126; SSSE3-NEXT:    retq
1127;
1128; SSE41-LABEL: load_zext_16i8_to_8i32:
1129; SSE41:       # BB#0: # %entry
1130; SSE41-NEXT:    movdqa (%rdi), %xmm1
1131; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1132; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
1133; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1134; SSE41-NEXT:    retq
1135;
1136; AVX1-LABEL: load_zext_16i8_to_8i32:
1137; AVX1:       # BB#0: # %entry
1138; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
1139; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1140; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1141; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1142; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1143; AVX1-NEXT:    retq
1144;
1145; AVX2-LABEL: load_zext_16i8_to_8i32:
1146; AVX2:       # BB#0: # %entry
1147; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1148; AVX2-NEXT:    retq
1149;
1150; AVX512-LABEL: load_zext_16i8_to_8i32:
1151; AVX512:       # BB#0: # %entry
1152; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
1153; AVX512-NEXT:    retq
1154entry:
1155 %X = load <16 x i8>, <16 x i8>* %ptr
1156 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1157 %Z = zext <8 x i8> %Y to <8 x i32>
1158 ret <8 x i32> %Z
1159}
1160
1161define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) {
1162; SSE2-LABEL: load_zext_8i8_to_8i64:
1163; SSE2:       # BB#0: # %entry
1164; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1165; SSE2-NEXT:    pxor %xmm4, %xmm4
1166; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1167; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1168; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1169; SSE2-NEXT:    movdqa %xmm1, %xmm0
1170; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
1171; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1172; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1173; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1174; SSE2-NEXT:    movdqa %xmm3, %xmm2
1175; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1176; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
1177; SSE2-NEXT:    retq
1178;
1179; SSSE3-LABEL: load_zext_8i8_to_8i64:
1180; SSSE3:       # BB#0: # %entry
1181; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1182; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128]
1183; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1184; SSSE3-NEXT:    pshufb %xmm4, %xmm0
1185; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128]
1186; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
1187; SSSE3-NEXT:    pshufb %xmm5, %xmm1
1188; SSSE3-NEXT:    movdqa %xmm3, %xmm2
1189; SSSE3-NEXT:    pshufb %xmm4, %xmm2
1190; SSSE3-NEXT:    pshufb %xmm5, %xmm3
1191; SSSE3-NEXT:    retq
1192;
1193; SSE41-LABEL: load_zext_8i8_to_8i64:
1194; SSE41:       # BB#0: # %entry
1195; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1196; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1197; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1198; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1199; SSE41-NEXT:    retq
1200;
1201; AVX1-LABEL: load_zext_8i8_to_8i64:
1202; AVX1:       # BB#0: # %entry
1203; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1204; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1205; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1206; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1207; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
1208; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1209; AVX1-NEXT:    retq
1210;
1211; AVX2-LABEL: load_zext_8i8_to_8i64:
1212; AVX2:       # BB#0: # %entry
1213; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1214; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
1215; AVX2-NEXT:    retq
1216;
1217; AVX512-LABEL: load_zext_8i8_to_8i64:
1218; AVX512:       # BB#0: # %entry
1219; AVX512-NEXT:    vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
1220; AVX512-NEXT:    retq
1221entry:
1222 %X = load <8 x i8>, <8 x i8>* %ptr
1223 %Y = zext <8 x i8> %X to <8 x i64>
1224 ret <8 x i64> %Y
1225}
1226
1227define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
1228; SSE2-LABEL: load_zext_16i8_to_16i16:
1229; SSE2:       # BB#0: # %entry
1230; SSE2-NEXT:    movdqa (%rdi), %xmm1
1231; SSE2-NEXT:    pxor %xmm2, %xmm2
1232; SSE2-NEXT:    movdqa %xmm1, %xmm0
1233; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1234; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1235; SSE2-NEXT:    retq
1236;
1237; SSSE3-LABEL: load_zext_16i8_to_16i16:
1238; SSSE3:       # BB#0: # %entry
1239; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1240; SSSE3-NEXT:    pxor %xmm2, %xmm2
1241; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1242; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1243; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1244; SSSE3-NEXT:    retq
1245;
1246; SSE41-LABEL: load_zext_16i8_to_16i16:
1247; SSE41:       # BB#0: # %entry
1248; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1249; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1250; SSE41-NEXT:    retq
1251;
1252; AVX1-LABEL: load_zext_16i8_to_16i16:
1253; AVX1:       # BB#0: # %entry
1254; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1255; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1256; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1257; AVX1-NEXT:    retq
1258;
1259; AVX2-LABEL: load_zext_16i8_to_16i16:
1260; AVX2:       # BB#0: # %entry
1261; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1262; AVX2-NEXT:    retq
1263;
1264; AVX512-LABEL: load_zext_16i8_to_16i16:
1265; AVX512:       # BB#0: # %entry
1266; AVX512-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
1267; AVX512-NEXT:    retq
1268entry:
1269 %X = load <16 x i8>, <16 x i8>* %ptr
1270 %Y = zext <16 x i8> %X to <16 x i16>
1271 ret <16 x i16> %Y
1272}
1273
1274define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
1275; SSE2-LABEL: load_zext_2i16_to_2i64:
1276; SSE2:       # BB#0: # %entry
1277; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1278; SSE2-NEXT:    pxor %xmm1, %xmm1
1279; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1280; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1281; SSE2-NEXT:    retq
1282;
1283; SSSE3-LABEL: load_zext_2i16_to_2i64:
1284; SSSE3:       # BB#0: # %entry
1285; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
1286; SSSE3-NEXT:    pxor %xmm1, %xmm1
1287; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1288; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1289; SSSE3-NEXT:    retq
1290;
1291; SSE41-LABEL: load_zext_2i16_to_2i64:
1292; SSE41:       # BB#0: # %entry
1293; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1294; SSE41-NEXT:    retq
1295;
1296; AVX-LABEL: load_zext_2i16_to_2i64:
1297; AVX:       # BB#0: # %entry
1298; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1299; AVX-NEXT:    retq
1300entry:
1301 %X = load <2 x i16>, <2 x i16>* %ptr
1302 %Y = zext <2 x i16> %X to <2 x i64>
1303 ret <2 x i64> %Y
1304}
1305
1306define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
1307; SSE2-LABEL: load_zext_4i16_to_4i32:
1308; SSE2:       # BB#0: # %entry
1309; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1310; SSE2-NEXT:    pxor %xmm1, %xmm1
1311; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1312; SSE2-NEXT:    retq
1313;
1314; SSSE3-LABEL: load_zext_4i16_to_4i32:
1315; SSSE3:       # BB#0: # %entry
1316; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1317; SSSE3-NEXT:    pxor %xmm1, %xmm1
1318; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1319; SSSE3-NEXT:    retq
1320;
1321; SSE41-LABEL: load_zext_4i16_to_4i32:
1322; SSE41:       # BB#0: # %entry
1323; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1324; SSE41-NEXT:    retq
1325;
1326; AVX-LABEL: load_zext_4i16_to_4i32:
1327; AVX:       # BB#0: # %entry
1328; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1329; AVX-NEXT:    retq
1330entry:
1331 %X = load <4 x i16>, <4 x i16>* %ptr
1332 %Y = zext <4 x i16> %X to <4 x i32>
1333 ret <4 x i32> %Y
1334}
1335
1336define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
1337; SSE2-LABEL: load_zext_4i16_to_4i64:
1338; SSE2:       # BB#0: # %entry
1339; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1340; SSE2-NEXT:    pxor %xmm2, %xmm2
1341; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1342; SSE2-NEXT:    movdqa %xmm1, %xmm0
1343; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1344; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1345; SSE2-NEXT:    retq
1346;
1347; SSSE3-LABEL: load_zext_4i16_to_4i64:
1348; SSSE3:       # BB#0: # %entry
1349; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
1350; SSSE3-NEXT:    pxor %xmm2, %xmm2
1351; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1352; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1353; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1354; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1355; SSSE3-NEXT:    retq
1356;
1357; SSE41-LABEL: load_zext_4i16_to_4i64:
1358; SSE41:       # BB#0: # %entry
1359; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1360; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1361; SSE41-NEXT:    retq
1362;
1363; AVX1-LABEL: load_zext_4i16_to_4i64:
1364; AVX1:       # BB#0: # %entry
1365; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1366; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
1367; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1368; AVX1-NEXT:    retq
1369;
1370; AVX2-LABEL: load_zext_4i16_to_4i64:
1371; AVX2:       # BB#0: # %entry
1372; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1373; AVX2-NEXT:    retq
1374;
1375; AVX512-LABEL: load_zext_4i16_to_4i64:
1376; AVX512:       # BB#0: # %entry
1377; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
1378; AVX512-NEXT:    retq
1379entry:
1380 %X = load <4 x i16>, <4 x i16>* %ptr
1381 %Y = zext <4 x i16> %X to <4 x i64>
1382 ret <4 x i64> %Y
1383}
1384
1385define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
1386; SSE2-LABEL: load_zext_8i16_to_8i32:
1387; SSE2:       # BB#0: # %entry
1388; SSE2-NEXT:    movdqa (%rdi), %xmm1
1389; SSE2-NEXT:    pxor %xmm2, %xmm2
1390; SSE2-NEXT:    movdqa %xmm1, %xmm0
1391; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1392; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1393; SSE2-NEXT:    retq
1394;
1395; SSSE3-LABEL: load_zext_8i16_to_8i32:
1396; SSSE3:       # BB#0: # %entry
1397; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1398; SSSE3-NEXT:    pxor %xmm2, %xmm2
1399; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1400; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1401; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1402; SSSE3-NEXT:    retq
1403;
1404; SSE41-LABEL: load_zext_8i16_to_8i32:
1405; SSE41:       # BB#0: # %entry
1406; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1407; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1408; SSE41-NEXT:    retq
1409;
1410; AVX1-LABEL: load_zext_8i16_to_8i32:
1411; AVX1:       # BB#0: # %entry
1412; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1413; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1414; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1415; AVX1-NEXT:    retq
1416;
1417; AVX2-LABEL: load_zext_8i16_to_8i32:
1418; AVX2:       # BB#0: # %entry
1419; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1420; AVX2-NEXT:    retq
1421;
1422; AVX512-LABEL: load_zext_8i16_to_8i32:
1423; AVX512:       # BB#0: # %entry
1424; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
1425; AVX512-NEXT:    retq
1426entry:
1427 %X = load <8 x i16>, <8 x i16>* %ptr
1428 %Y = zext <8 x i16> %X to <8 x i32>
1429 ret <8 x i32> %Y
1430}
1431
1432define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
1433; SSE2-LABEL: load_zext_2i32_to_2i64:
1434; SSE2:       # BB#0: # %entry
1435; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1436; SSE2-NEXT:    pxor %xmm1, %xmm1
1437; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1438; SSE2-NEXT:    retq
1439;
1440; SSSE3-LABEL: load_zext_2i32_to_2i64:
1441; SSSE3:       # BB#0: # %entry
1442; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1443; SSSE3-NEXT:    pxor %xmm1, %xmm1
1444; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1445; SSSE3-NEXT:    retq
1446;
1447; SSE41-LABEL: load_zext_2i32_to_2i64:
1448; SSE41:       # BB#0: # %entry
1449; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1450; SSE41-NEXT:    retq
1451;
1452; AVX-LABEL: load_zext_2i32_to_2i64:
1453; AVX:       # BB#0: # %entry
1454; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1455; AVX-NEXT:    retq
1456entry:
1457 %X = load <2 x i32>, <2 x i32>* %ptr
1458 %Y = zext <2 x i32> %X to <2 x i64>
1459 ret <2 x i64> %Y
1460}
1461
1462define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
1463; SSE2-LABEL: load_zext_4i32_to_4i64:
1464; SSE2:       # BB#0: # %entry
1465; SSE2-NEXT:    movdqa (%rdi), %xmm1
1466; SSE2-NEXT:    pxor %xmm2, %xmm2
1467; SSE2-NEXT:    movdqa %xmm1, %xmm0
1468; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1469; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1470; SSE2-NEXT:    retq
1471;
1472; SSSE3-LABEL: load_zext_4i32_to_4i64:
1473; SSSE3:       # BB#0: # %entry
1474; SSSE3-NEXT:    movdqa (%rdi), %xmm1
1475; SSSE3-NEXT:    pxor %xmm2, %xmm2
1476; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1477; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1478; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1479; SSSE3-NEXT:    retq
1480;
1481; SSE41-LABEL: load_zext_4i32_to_4i64:
1482; SSE41:       # BB#0: # %entry
1483; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1484; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1485; SSE41-NEXT:    retq
1486;
1487; AVX1-LABEL: load_zext_4i32_to_4i64:
1488; AVX1:       # BB#0: # %entry
1489; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
1490; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
1491; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1492; AVX1-NEXT:    retq
1493;
1494; AVX2-LABEL: load_zext_4i32_to_4i64:
1495; AVX2:       # BB#0: # %entry
1496; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1497; AVX2-NEXT:    retq
1498;
1499; AVX512-LABEL: load_zext_4i32_to_4i64:
1500; AVX512:       # BB#0: # %entry
1501; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
1502; AVX512-NEXT:    retq
1503entry:
1504 %X = load <4 x i32>, <4 x i32>* %ptr
1505 %Y = zext <4 x i32> %X to <4 x i64>
1506 ret <4 x i64> %Y
1507}
1508
1509define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
1510; SSE2-LABEL: zext_8i8_to_8i32:
1511; SSE2:       # BB#0: # %entry
1512; SSE2-NEXT:    movdqa %xmm0, %xmm1
1513; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1514; SSE2-NEXT:    pxor %xmm2, %xmm2
1515; SSE2-NEXT:    movdqa %xmm1, %xmm0
1516; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1517; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1518; SSE2-NEXT:    retq
1519;
1520; SSSE3-LABEL: zext_8i8_to_8i32:
1521; SSSE3:       # BB#0: # %entry
1522; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1523; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1524; SSSE3-NEXT:    pxor %xmm2, %xmm2
1525; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1526; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1527; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1528; SSSE3-NEXT:    retq
1529;
1530; SSE41-LABEL: zext_8i8_to_8i32:
1531; SSE41:       # BB#0: # %entry
1532; SSE41-NEXT:    movdqa %xmm0, %xmm1
1533; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
1534; SSE41-NEXT:    pxor %xmm2, %xmm2
1535; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1536; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1537; SSE41-NEXT:    retq
1538;
1539; AVX1-LABEL: zext_8i8_to_8i32:
1540; AVX1:       # BB#0: # %entry
1541; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1542; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1543; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1544; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1545; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1546; AVX1-NEXT:    retq
1547;
1548; AVX2-LABEL: zext_8i8_to_8i32:
1549; AVX2:       # BB#0: # %entry
1550; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1551; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1552; AVX2-NEXT:    retq
1553;
1554; AVX512-LABEL: zext_8i8_to_8i32:
1555; AVX512:       # BB#0: # %entry
1556; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
1557; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1558; AVX512-NEXT:    retq
1559entry:
1560  %t = zext <8 x i8> %z to <8 x i32>
1561  ret <8 x i32> %t
1562}
1563
1564define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
1565; SSE2-LABEL: shuf_zext_8i16_to_8i32:
1566; SSE2:       # BB#0: # %entry
1567; SSE2-NEXT:    movdqa %xmm0, %xmm1
1568; SSE2-NEXT:    pxor %xmm2, %xmm2
1569; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1570; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1571; SSE2-NEXT:    retq
1572;
1573; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
1574; SSSE3:       # BB#0: # %entry
1575; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1576; SSSE3-NEXT:    pxor %xmm2, %xmm2
1577; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1578; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1579; SSSE3-NEXT:    retq
1580;
1581; SSE41-LABEL: shuf_zext_8i16_to_8i32:
1582; SSE41:       # BB#0: # %entry
1583; SSE41-NEXT:    movdqa %xmm0, %xmm1
1584; SSE41-NEXT:    pxor %xmm2, %xmm2
1585; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1586; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1587; SSE41-NEXT:    retq
1588;
1589; AVX1-LABEL: shuf_zext_8i16_to_8i32:
1590; AVX1:       # BB#0: # %entry
1591; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1592; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1593; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1594; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1595; AVX1-NEXT:    retq
1596;
1597; AVX2-LABEL: shuf_zext_8i16_to_8i32:
1598; AVX2:       # BB#0: # %entry
1599; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1600; AVX2-NEXT:    retq
1601;
1602; AVX512-LABEL: shuf_zext_8i16_to_8i32:
1603; AVX512:       # BB#0: # %entry
1604; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1605; AVX512-NEXT:    retq
1606entry:
1607  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
1608  %Z = bitcast <16 x i16> %B to <8 x i32>
1609  ret <8 x i32> %Z
1610}
1611
1612define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
1613; SSE2-LABEL: shuf_zext_4i32_to_4i64:
1614; SSE2:       # BB#0: # %entry
1615; SSE2-NEXT:    movdqa %xmm0, %xmm1
1616; SSE2-NEXT:    pxor %xmm2, %xmm2
1617; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1618; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1619; SSE2-NEXT:    retq
1620;
1621; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
1622; SSSE3:       # BB#0: # %entry
1623; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1624; SSSE3-NEXT:    pxor %xmm2, %xmm2
1625; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1626; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1627; SSSE3-NEXT:    retq
1628;
1629; SSE41-LABEL: shuf_zext_4i32_to_4i64:
1630; SSE41:       # BB#0: # %entry
1631; SSE41-NEXT:    movdqa %xmm0, %xmm1
1632; SSE41-NEXT:    pxor %xmm2, %xmm2
1633; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
1634; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1635; SSE41-NEXT:    retq
1636;
1637; AVX1-LABEL: shuf_zext_4i32_to_4i64:
1638; AVX1:       # BB#0: # %entry
1639; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
1640; AVX1-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
1641; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
1642; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
1643; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1644; AVX1-NEXT:    retq
1645;
1646; AVX2-LABEL: shuf_zext_4i32_to_4i64:
1647; AVX2:       # BB#0: # %entry
1648; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1649; AVX2-NEXT:    retq
1650;
1651; AVX512-LABEL: shuf_zext_4i32_to_4i64:
1652; AVX512:       # BB#0: # %entry
1653; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1654; AVX512-NEXT:    retq
1655entry:
1656  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
1657  %Z = bitcast <8 x i32> %B to <4 x i64>
1658  ret <4 x i64> %Z
1659}
1660
1661define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
1662; SSE2-LABEL: shuf_zext_8i8_to_8i32:
1663; SSE2:       # BB#0: # %entry
1664; SSE2-NEXT:    movdqa %xmm0, %xmm1
1665; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
1666; SSE2-NEXT:    packuswb %xmm1, %xmm1
1667; SSE2-NEXT:    pxor %xmm2, %xmm2
1668; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1669; SSE2-NEXT:    movdqa %xmm1, %xmm0
1670; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1671; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1672; SSE2-NEXT:    retq
1673;
1674; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
1675; SSSE3:       # BB#0: # %entry
1676; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1677; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
1678; SSSE3-NEXT:    pxor %xmm2, %xmm2
1679; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1680; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1681; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1682; SSSE3-NEXT:    retq
1683;
1684; SSE41-LABEL: shuf_zext_8i8_to_8i32:
1685; SSE41:       # BB#0: # %entry
1686; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1687; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1688; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1689; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1690; SSE41-NEXT:    movdqa %xmm2, %xmm0
1691; SSE41-NEXT:    retq
1692;
1693; AVX1-LABEL: shuf_zext_8i8_to_8i32:
1694; AVX1:       # BB#0: # %entry
1695; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1696; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1697; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1698; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1699; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1700; AVX1-NEXT:    retq
1701;
1702; AVX2-LABEL: shuf_zext_8i8_to_8i32:
1703; AVX2:       # BB#0: # %entry
1704; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1705; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1706; AVX2-NEXT:    retq
1707;
1708; AVX512-LABEL: shuf_zext_8i8_to_8i32:
1709; AVX512:       # BB#0: # %entry
1710; AVX512-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1711; AVX512-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1712; AVX512-NEXT:    retq
1713entry:
1714  %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
1715  %Z = bitcast <32 x i8> %B to <8 x i32>
1716  ret <8 x i32> %Z
1717}
1718
1719define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
1720; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
1721; SSE2:       # BB#0: # %entry
1722; SSE2-NEXT:    pxor %xmm1, %xmm1
1723; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1724; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1725; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1726; SSE2-NEXT:    retq
1727;
1728; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
1729; SSSE3:       # BB#0: # %entry
1730; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
1731; SSSE3-NEXT:    retq
1732;
1733; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
1734; SSE41:       # BB#0: # %entry
1735; SSE41-NEXT:    psrlq $48, %xmm0
1736; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1737; SSE41-NEXT:    retq
1738;
1739; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
1740; AVX:       # BB#0: # %entry
1741; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
1742; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1743; AVX-NEXT:    retq
1744entry:
1745  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1746  %Z = bitcast <16 x i8> %B to <2 x i64>
1747  ret <2 x i64> %Z
1748}
1749
1750define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
1751; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1752; SSE2:       # BB#0: # %entry
1753; SSE2-NEXT:    movdqa %xmm0, %xmm1
1754; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
1755; SSE2-NEXT:    pxor %xmm2, %xmm2
1756; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
1757; SSE2-NEXT:    movdqa %xmm1, %xmm0
1758; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1759; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1760; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1761; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1762; SSE2-NEXT:    retq
1763;
1764; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
1765; SSSE3:       # BB#0: # %entry
1766; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1767; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
1768; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
1769; SSSE3-NEXT:    retq
1770;
1771; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
1772; SSE41:       # BB#0: # %entry
1773; SSE41-NEXT:    movdqa %xmm0, %xmm1
1774; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1775; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1776; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1777; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1778; SSE41-NEXT:    movdqa %xmm2, %xmm0
1779; SSE41-NEXT:    retq
1780;
1781; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
1782; AVX1:       # BB#0: # %entry
1783; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1784; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1785; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1786; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
1787; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1788; AVX1-NEXT:    retq
1789;
1790; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
1791; AVX2:       # BB#0: # %entry
1792; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1793; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1794; AVX2-NEXT:    retq
1795;
1796; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11:
1797; AVX512:       # BB#0: # %entry
1798; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1799; AVX512-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
1800; AVX512-NEXT:    retq
1801entry:
1802  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1803  %Z = bitcast <32 x i8> %B to <4 x i64>
1804  ret <4 x i64> %Z
1805}
1806
1807define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
1808; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
1809; SSE2:       # BB#0: # %entry
1810; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1811; SSE2-NEXT:    pxor %xmm1, %xmm1
1812; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1813; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1814; SSE2-NEXT:    retq
1815;
1816; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
1817; SSSE3:       # BB#0: # %entry
1818; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero
1819; SSSE3-NEXT:    retq
1820;
1821; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
1822; SSE41:       # BB#0: # %entry
1823; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1824; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1825; SSE41-NEXT:    retq
1826;
1827; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
1828; AVX:       # BB#0: # %entry
1829; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1830; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1831; AVX-NEXT:    retq
1832entry:
1833  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
1834  %Z = bitcast <8 x i16> %B to <2 x i64>
1835  ret <2 x i64> %Z
1836}
1837
1838define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
1839; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1840; SSE2:       # BB#0: # %entry
1841; SSE2-NEXT:    movdqa %xmm0, %xmm1
1842; SSE2-NEXT:    pxor %xmm2, %xmm2
1843; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1844; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1845; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1846; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1847; SSE2-NEXT:    retq
1848;
1849; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
1850; SSSE3:       # BB#0: # %entry
1851; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1852; SSSE3-NEXT:    pxor %xmm2, %xmm2
1853; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1854; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1855; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1856; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1857; SSSE3-NEXT:    retq
1858;
1859; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
1860; SSE41:       # BB#0: # %entry
1861; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1862; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1863; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1864; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1865; SSE41-NEXT:    movdqa %xmm2, %xmm0
1866; SSE41-NEXT:    retq
1867;
1868; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
1869; AVX1:       # BB#0: # %entry
1870; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1871; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1872; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1873; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1874; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1875; AVX1-NEXT:    retq
1876;
1877; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
1878; AVX2:       # BB#0: # %entry
1879; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1880; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1881; AVX2-NEXT:    retq
1882;
1883; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2:
1884; AVX512:       # BB#0: # %entry
1885; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
1886; AVX512-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1887; AVX512-NEXT:    retq
1888entry:
1889  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
1890  %Z = bitcast <16 x i16> %B to <4 x i64>
1891  ret <4 x i64> %Z
1892}
1893
1894define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
1895; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1:
1896; SSE2:       # BB#0: # %entry
1897; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1898; SSE2-NEXT:    pxor %xmm1, %xmm1
1899; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1900; SSE2-NEXT:    retq
1901;
1902; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1:
1903; SSSE3:       # BB#0: # %entry
1904; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1905; SSSE3-NEXT:    pxor %xmm1, %xmm1
1906; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1907; SSSE3-NEXT:    retq
1908;
1909; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1:
1910; SSE41:       # BB#0: # %entry
1911; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1912; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1913; SSE41-NEXT:    retq
1914;
1915; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
1916; AVX:       # BB#0: # %entry
1917; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
1918; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1919; AVX-NEXT:    retq
1920entry:
1921  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
1922  %Z = bitcast <8 x i16> %B to <4 x i32>
1923  ret <4 x i32> %Z
1924}
1925
1926define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
1927; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1928; SSE2:       # BB#0: # %entry
1929; SSE2-NEXT:    movdqa %xmm0, %xmm1
1930; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1931; SSE2-NEXT:    pxor %xmm2, %xmm2
1932; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1933; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1934; SSE2-NEXT:    retq
1935;
1936; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
1937; SSSE3:       # BB#0: # %entry
1938; SSSE3-NEXT:    movdqa %xmm0, %xmm1
1939; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1940; SSSE3-NEXT:    pxor %xmm2, %xmm2
1941; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1942; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1943; SSSE3-NEXT:    retq
1944;
1945; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
1946; SSE41:       # BB#0: # %entry
1947; SSE41-NEXT:    movdqa %xmm0, %xmm1
1948; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1949; SSE41-NEXT:    pxor %xmm2, %xmm2
1950; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1951; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1952; SSE41-NEXT:    retq
1953;
1954; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
1955; AVX1:       # BB#0: # %entry
1956; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1957; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1958; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1959; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1960; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1961; AVX1-NEXT:    retq
1962;
1963; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
1964; AVX2:       # BB#0: # %entry
1965; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1966; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1967; AVX2-NEXT:    retq
1968;
1969; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3:
1970; AVX512:       # BB#0: # %entry
1971; AVX512-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
1972; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1973; AVX512-NEXT:    retq
1974entry:
1975  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
1976  %Z = bitcast <16 x i16> %B to <8 x i32>
1977  ret <8 x i32> %Z
1978}
1979
1980define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
1981; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
1982; SSE2:       # BB#0: # %entry
1983; SSE2-NEXT:    pxor %xmm2, %xmm2
1984; SSE2-NEXT:    movdqa %xmm1, %xmm0
1985; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1986; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1987; SSE2-NEXT:    retq
1988;
1989; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
1990; SSSE3:       # BB#0: # %entry
1991; SSSE3-NEXT:    pxor %xmm2, %xmm2
1992; SSSE3-NEXT:    movdqa %xmm1, %xmm0
1993; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1994; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1995; SSSE3-NEXT:    retq
1996;
1997; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
1998; SSE41:       # BB#0: # %entry
1999; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
2000; SSE41-NEXT:    pxor %xmm2, %xmm2
2001; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
2002; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2003; SSE41-NEXT:    movdqa %xmm2, %xmm1
2004; SSE41-NEXT:    retq
2005;
2006; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
2007; AVX1:       # BB#0: # %entry
2008; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
2009; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
2010; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
2011; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
2012; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2013; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
2014; AVX1-NEXT:    retq
2015;
2016; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
2017; AVX2:       # BB#0: # %entry
2018; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
2019; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2020; AVX2-NEXT:    retq
2021;
2022; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8:
2023; AVX512:       # BB#0: # %entry
2024; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm0
2025; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2026; AVX512-NEXT:    retq
2027entry:
2028  %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
2029  %Z = bitcast <16 x i16> %B to <8 x i32>
2030  ret <8 x i32> %Z
2031}
2032
2033define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
2034; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
2035; SSE:       # BB#0: # %entry
2036; SSE-NEXT:    pxor %xmm1, %xmm1
2037; SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2038; SSE-NEXT:    retq
2039;
2040; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
2041; AVX:       # BB#0: # %entry
2042; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
2043; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2044; AVX-NEXT:    retq
2045entry:
2046  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
2047  %Z = bitcast <4 x i32> %B to <2 x i64>
2048  ret <2 x i64> %Z
2049}
2050
2051define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
2052; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2053; SSE2:       # BB#0: # %entry
2054; SSE2-NEXT:    movdqa %xmm0, %xmm1
2055; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2056; SSE2-NEXT:    pand %xmm1, %xmm0
2057; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2058; SSE2-NEXT:    retq
2059;
2060; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
2061; SSSE3:       # BB#0: # %entry
2062; SSSE3-NEXT:    movdqa %xmm0, %xmm1
2063; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
2064; SSSE3-NEXT:    pand %xmm1, %xmm0
2065; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2066; SSSE3-NEXT:    retq
2067;
2068; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
2069; SSE41:       # BB#0: # %entry
2070; SSE41-NEXT:    movdqa %xmm0, %xmm1
2071; SSE41-NEXT:    pxor %xmm0, %xmm0
2072; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
2073; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2074; SSE41-NEXT:    retq
2075;
2076; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
2077; AVX1:       # BB#0: # %entry
2078; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
2079; AVX1-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3]
2080; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2081; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
2082; AVX1-NEXT:    retq
2083;
2084; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
2085; AVX2:       # BB#0: # %entry
2086; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2087; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2088; AVX2-NEXT:    retq
2089;
2090; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1:
2091; AVX512:       # BB#0: # %entry
2092; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
2093; AVX512-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2094; AVX512-NEXT:    retq
2095entry:
2096  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
2097  %Z = bitcast <8 x i32> %B to <4 x i64>
2098  ret <4 x i64> %Z
2099}
2100