1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5
6;
7; PR6455 'Clear Upper Bits' Patterns
8;
9
10define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
11; SSE-LABEL: _clearupper2xi64a:
12; SSE:       # BB#0:
13; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
14; SSE-NEXT:    retq
15;
16; AVX1-LABEL: _clearupper2xi64a:
17; AVX1:       # BB#0:
18; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
20; AVX1-NEXT:    retq
21;
22; AVX2-LABEL: _clearupper2xi64a:
23; AVX2:       # BB#0:
24; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
26; AVX2-NEXT:    retq
27  %x0 = extractelement <2 x i64> %0, i32 0
28  %x1 = extractelement <2 x i64> %0, i32 1
29  %trunc0 = trunc i64 %x0 to i32
30  %trunc1 = trunc i64 %x1 to i32
31  %ext0 = zext i32 %trunc0 to i64
32  %ext1 = zext i32 %trunc1 to i64
33  %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
34  %v1 = insertelement <2 x i64> %v0,   i64 %ext1, i32 1
35  ret <2 x i64> %v1
36}
37
38define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
39; SSE-LABEL: _clearupper4xi64a:
40; SSE:       # BB#0:
41; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,4294967295]
42; SSE-NEXT:    andps %xmm2, %xmm0
43; SSE-NEXT:    andps %xmm2, %xmm1
44; SSE-NEXT:    retq
45;
46; AVX1-LABEL: _clearupper4xi64a:
47; AVX1:       # BB#0:
48; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
49; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
50; AVX1-NEXT:    retq
51;
52; AVX2-LABEL: _clearupper4xi64a:
53; AVX2:       # BB#0:
54; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
55; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
56; AVX2-NEXT:    retq
57  %x0 = extractelement <4 x i64> %0, i32 0
58  %x1 = extractelement <4 x i64> %0, i32 1
59  %x2 = extractelement <4 x i64> %0, i32 2
60  %x3 = extractelement <4 x i64> %0, i32 3
61  %trunc0 = trunc i64 %x0 to i32
62  %trunc1 = trunc i64 %x1 to i32
63  %trunc2 = trunc i64 %x2 to i32
64  %trunc3 = trunc i64 %x3 to i32
65  %ext0 = zext i32 %trunc0 to i64
66  %ext1 = zext i32 %trunc1 to i64
67  %ext2 = zext i32 %trunc2 to i64
68  %ext3 = zext i32 %trunc3 to i64
69  %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0
70  %v1 = insertelement <4 x i64> %v0,   i64 %ext1, i32 1
71  %v2 = insertelement <4 x i64> %v1,   i64 %ext2, i32 2
72  %v3 = insertelement <4 x i64> %v2,   i64 %ext3, i32 3
73  ret <4 x i64> %v3
74}
75
76define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
77; SSE-LABEL: _clearupper4xi32a:
78; SSE:       # BB#0:
79; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
80; SSE-NEXT:    retq
81;
82; AVX-LABEL: _clearupper4xi32a:
83; AVX:       # BB#0:
84; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
85; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
86; AVX-NEXT:    retq
87  %x0 = extractelement <4 x i32> %0, i32 0
88  %x1 = extractelement <4 x i32> %0, i32 1
89  %x2 = extractelement <4 x i32> %0, i32 2
90  %x3 = extractelement <4 x i32> %0, i32 3
91  %trunc0 = trunc i32 %x0 to i16
92  %trunc1 = trunc i32 %x1 to i16
93  %trunc2 = trunc i32 %x2 to i16
94  %trunc3 = trunc i32 %x3 to i16
95  %ext0 = zext i16 %trunc0 to i32
96  %ext1 = zext i16 %trunc1 to i32
97  %ext2 = zext i16 %trunc2 to i32
98  %ext3 = zext i16 %trunc3 to i32
99  %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
100  %v1 = insertelement <4 x i32> %v0,   i32 %ext1, i32 1
101  %v2 = insertelement <4 x i32> %v1,   i32 %ext2, i32 2
102  %v3 = insertelement <4 x i32> %v2,   i32 %ext3, i32 3
103  ret <4 x i32> %v3
104}
105
106define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
107; SSE-LABEL: _clearupper8xi32a:
108; SSE:       # BB#0:
109; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,65535,65535,65535]
110; SSE-NEXT:    andps %xmm2, %xmm0
111; SSE-NEXT:    andps %xmm2, %xmm1
112; SSE-NEXT:    retq
113;
114; AVX1-LABEL: _clearupper8xi32a:
115; AVX1:       # BB#0:
116; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
117; AVX1-NEXT:    retq
118;
119; AVX2-LABEL: _clearupper8xi32a:
120; AVX2:       # BB#0:
121; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
122; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
123; AVX2-NEXT:    retq
124  %x0 = extractelement <8 x i32> %0, i32 0
125  %x1 = extractelement <8 x i32> %0, i32 1
126  %x2 = extractelement <8 x i32> %0, i32 2
127  %x3 = extractelement <8 x i32> %0, i32 3
128  %x4 = extractelement <8 x i32> %0, i32 4
129  %x5 = extractelement <8 x i32> %0, i32 5
130  %x6 = extractelement <8 x i32> %0, i32 6
131  %x7 = extractelement <8 x i32> %0, i32 7
132  %trunc0 = trunc i32 %x0 to i16
133  %trunc1 = trunc i32 %x1 to i16
134  %trunc2 = trunc i32 %x2 to i16
135  %trunc3 = trunc i32 %x3 to i16
136  %trunc4 = trunc i32 %x4 to i16
137  %trunc5 = trunc i32 %x5 to i16
138  %trunc6 = trunc i32 %x6 to i16
139  %trunc7 = trunc i32 %x7 to i16
140  %ext0 = zext i16 %trunc0 to i32
141  %ext1 = zext i16 %trunc1 to i32
142  %ext2 = zext i16 %trunc2 to i32
143  %ext3 = zext i16 %trunc3 to i32
144  %ext4 = zext i16 %trunc4 to i32
145  %ext5 = zext i16 %trunc5 to i32
146  %ext6 = zext i16 %trunc6 to i32
147  %ext7 = zext i16 %trunc7 to i32
148  %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0
149  %v1 = insertelement <8 x i32> %v0,   i32 %ext1, i32 1
150  %v2 = insertelement <8 x i32> %v1,   i32 %ext2, i32 2
151  %v3 = insertelement <8 x i32> %v2,   i32 %ext3, i32 3
152  %v4 = insertelement <8 x i32> %v3,   i32 %ext4, i32 4
153  %v5 = insertelement <8 x i32> %v4,   i32 %ext5, i32 5
154  %v6 = insertelement <8 x i32> %v5,   i32 %ext6, i32 6
155  %v7 = insertelement <8 x i32> %v6,   i32 %ext7, i32 7
156  ret <8 x i32> %v7
157}
158
159define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
160; SSE-LABEL: _clearupper8xi16a:
161; SSE:       # BB#0:
162; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: _clearupper8xi16a:
166; AVX:       # BB#0:
167; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
168; AVX-NEXT:    retq
169  %x0 = extractelement <8 x i16> %0, i32 0
170  %x1 = extractelement <8 x i16> %0, i32 1
171  %x2 = extractelement <8 x i16> %0, i32 2
172  %x3 = extractelement <8 x i16> %0, i32 3
173  %x4 = extractelement <8 x i16> %0, i32 4
174  %x5 = extractelement <8 x i16> %0, i32 5
175  %x6 = extractelement <8 x i16> %0, i32 6
176  %x7 = extractelement <8 x i16> %0, i32 7
177  %trunc0 = trunc i16 %x0 to i8
178  %trunc1 = trunc i16 %x1 to i8
179  %trunc2 = trunc i16 %x2 to i8
180  %trunc3 = trunc i16 %x3 to i8
181  %trunc4 = trunc i16 %x4 to i8
182  %trunc5 = trunc i16 %x5 to i8
183  %trunc6 = trunc i16 %x6 to i8
184  %trunc7 = trunc i16 %x7 to i8
185  %ext0 = zext i8 %trunc0 to i16
186  %ext1 = zext i8 %trunc1 to i16
187  %ext2 = zext i8 %trunc2 to i16
188  %ext3 = zext i8 %trunc3 to i16
189  %ext4 = zext i8 %trunc4 to i16
190  %ext5 = zext i8 %trunc5 to i16
191  %ext6 = zext i8 %trunc6 to i16
192  %ext7 = zext i8 %trunc7 to i16
193  %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0
194  %v1 = insertelement <8 x i16> %v0,   i16 %ext1, i32 1
195  %v2 = insertelement <8 x i16> %v1,   i16 %ext2, i32 2
196  %v3 = insertelement <8 x i16> %v2,   i16 %ext3, i32 3
197  %v4 = insertelement <8 x i16> %v3,   i16 %ext4, i32 4
198  %v5 = insertelement <8 x i16> %v4,   i16 %ext5, i32 5
199  %v6 = insertelement <8 x i16> %v5,   i16 %ext6, i32 6
200  %v7 = insertelement <8 x i16> %v6,   i16 %ext7, i32 7
201  ret <8 x i16> %v7
202}
203
204define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
205; SSE-LABEL: _clearupper16xi16a:
206; SSE:       # BB#0:
207; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
208; SSE-NEXT:    andps %xmm2, %xmm0
209; SSE-NEXT:    andps %xmm2, %xmm1
210; SSE-NEXT:    retq
211;
212; AVX-LABEL: _clearupper16xi16a:
213; AVX:       # BB#0:
214; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
215; AVX-NEXT:    retq
216  %x0  = extractelement <16 x i16> %0, i32 0
217  %x1  = extractelement <16 x i16> %0, i32 1
218  %x2  = extractelement <16 x i16> %0, i32 2
219  %x3  = extractelement <16 x i16> %0, i32 3
220  %x4  = extractelement <16 x i16> %0, i32 4
221  %x5  = extractelement <16 x i16> %0, i32 5
222  %x6  = extractelement <16 x i16> %0, i32 6
223  %x7  = extractelement <16 x i16> %0, i32 7
224  %x8  = extractelement <16 x i16> %0, i32 8
225  %x9  = extractelement <16 x i16> %0, i32 9
226  %x10 = extractelement <16 x i16> %0, i32 10
227  %x11 = extractelement <16 x i16> %0, i32 11
228  %x12 = extractelement <16 x i16> %0, i32 12
229  %x13 = extractelement <16 x i16> %0, i32 13
230  %x14 = extractelement <16 x i16> %0, i32 14
231  %x15 = extractelement <16 x i16> %0, i32 15
232  %trunc0  = trunc i16 %x0  to i8
233  %trunc1  = trunc i16 %x1  to i8
234  %trunc2  = trunc i16 %x2  to i8
235  %trunc3  = trunc i16 %x3  to i8
236  %trunc4  = trunc i16 %x4  to i8
237  %trunc5  = trunc i16 %x5  to i8
238  %trunc6  = trunc i16 %x6  to i8
239  %trunc7  = trunc i16 %x7  to i8
240  %trunc8  = trunc i16 %x8  to i8
241  %trunc9  = trunc i16 %x9  to i8
242  %trunc10 = trunc i16 %x10 to i8
243  %trunc11 = trunc i16 %x11 to i8
244  %trunc12 = trunc i16 %x12 to i8
245  %trunc13 = trunc i16 %x13 to i8
246  %trunc14 = trunc i16 %x14 to i8
247  %trunc15 = trunc i16 %x15 to i8
248  %ext0  = zext i8 %trunc0  to i16
249  %ext1  = zext i8 %trunc1  to i16
250  %ext2  = zext i8 %trunc2  to i16
251  %ext3  = zext i8 %trunc3  to i16
252  %ext4  = zext i8 %trunc4  to i16
253  %ext5  = zext i8 %trunc5  to i16
254  %ext6  = zext i8 %trunc6  to i16
255  %ext7  = zext i8 %trunc7  to i16
256  %ext8  = zext i8 %trunc8  to i16
257  %ext9  = zext i8 %trunc9  to i16
258  %ext10 = zext i8 %trunc10 to i16
259  %ext11 = zext i8 %trunc11 to i16
260  %ext12 = zext i8 %trunc12 to i16
261  %ext13 = zext i8 %trunc13 to i16
262  %ext14 = zext i8 %trunc14 to i16
263  %ext15 = zext i8 %trunc15 to i16
264  %v0  = insertelement <16 x i16> undef, i16 %ext0,  i32 0
265  %v1  = insertelement <16 x i16> %v0,   i16 %ext1,  i32 1
266  %v2  = insertelement <16 x i16> %v1,   i16 %ext2,  i32 2
267  %v3  = insertelement <16 x i16> %v2,   i16 %ext3,  i32 3
268  %v4  = insertelement <16 x i16> %v3,   i16 %ext4,  i32 4
269  %v5  = insertelement <16 x i16> %v4,   i16 %ext5,  i32 5
270  %v6  = insertelement <16 x i16> %v5,   i16 %ext6,  i32 6
271  %v7  = insertelement <16 x i16> %v6,   i16 %ext7,  i32 7
272  %v8  = insertelement <16 x i16> %v7,   i16 %ext8,  i32 8
273  %v9  = insertelement <16 x i16> %v8,   i16 %ext9,  i32 9
274  %v10 = insertelement <16 x i16> %v9,   i16 %ext10, i32 10
275  %v11 = insertelement <16 x i16> %v10,  i16 %ext11, i32 11
276  %v12 = insertelement <16 x i16> %v11,  i16 %ext12, i32 12
277  %v13 = insertelement <16 x i16> %v12,  i16 %ext13, i32 13
278  %v14 = insertelement <16 x i16> %v13,  i16 %ext14, i32 14
279  %v15 = insertelement <16 x i16> %v14,  i16 %ext15, i32 15
280  ret <16 x i16> %v15
281}
282
283define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
284; SSE-LABEL: _clearupper16xi8a:
285; SSE:       # BB#0:
286; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
287; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
288; SSE-NEXT:    movd %eax, %xmm0
289; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
290; SSE-NEXT:    movd %eax, %xmm1
291; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
292; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
293; SSE-NEXT:    movd %eax, %xmm0
294; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
295; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
296; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
297; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
298; SSE-NEXT:    movd %eax, %xmm0
299; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
300; SSE-NEXT:    movd %eax, %xmm3
301; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
302; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
303; SSE-NEXT:    movd %eax, %xmm0
304; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
305; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
306; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
307; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
308; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
309; SSE-NEXT:    movd %eax, %xmm0
310; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
311; SSE-NEXT:    movd %eax, %xmm2
312; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
313; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
314; SSE-NEXT:    movd %eax, %xmm0
315; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
316; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
317; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
318; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
319; SSE-NEXT:    movd %eax, %xmm0
320; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
321; SSE-NEXT:    movd %eax, %xmm2
322; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
323; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
324; SSE-NEXT:    movd %eax, %xmm4
325; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
326; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
327; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
328; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
329; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
330; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
331; SSE-NEXT:    retq
332;
333; AVX-LABEL: _clearupper16xi8a:
334; AVX:       # BB#0:
335; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
336; AVX-NEXT:    retq
337  %x0  = extractelement <16 x i8> %0, i32 0
338  %x1  = extractelement <16 x i8> %0, i32 1
339  %x2  = extractelement <16 x i8> %0, i32 2
340  %x3  = extractelement <16 x i8> %0, i32 3
341  %x4  = extractelement <16 x i8> %0, i32 4
342  %x5  = extractelement <16 x i8> %0, i32 5
343  %x6  = extractelement <16 x i8> %0, i32 6
344  %x7  = extractelement <16 x i8> %0, i32 7
345  %x8  = extractelement <16 x i8> %0, i32 8
346  %x9  = extractelement <16 x i8> %0, i32 9
347  %x10 = extractelement <16 x i8> %0, i32 10
348  %x11 = extractelement <16 x i8> %0, i32 11
349  %x12 = extractelement <16 x i8> %0, i32 12
350  %x13 = extractelement <16 x i8> %0, i32 13
351  %x14 = extractelement <16 x i8> %0, i32 14
352  %x15 = extractelement <16 x i8> %0, i32 15
353  %trunc0  = trunc i8 %x0  to i4
354  %trunc1  = trunc i8 %x1  to i4
355  %trunc2  = trunc i8 %x2  to i4
356  %trunc3  = trunc i8 %x3  to i4
357  %trunc4  = trunc i8 %x4  to i4
358  %trunc5  = trunc i8 %x5  to i4
359  %trunc6  = trunc i8 %x6  to i4
360  %trunc7  = trunc i8 %x7  to i4
361  %trunc8  = trunc i8 %x8  to i4
362  %trunc9  = trunc i8 %x9  to i4
363  %trunc10 = trunc i8 %x10 to i4
364  %trunc11 = trunc i8 %x11 to i4
365  %trunc12 = trunc i8 %x12 to i4
366  %trunc13 = trunc i8 %x13 to i4
367  %trunc14 = trunc i8 %x14 to i4
368  %trunc15 = trunc i8 %x15 to i4
369  %ext0  = zext i4 %trunc0  to i8
370  %ext1  = zext i4 %trunc1  to i8
371  %ext2  = zext i4 %trunc2  to i8
372  %ext3  = zext i4 %trunc3  to i8
373  %ext4  = zext i4 %trunc4  to i8
374  %ext5  = zext i4 %trunc5  to i8
375  %ext6  = zext i4 %trunc6  to i8
376  %ext7  = zext i4 %trunc7  to i8
377  %ext8  = zext i4 %trunc8  to i8
378  %ext9  = zext i4 %trunc9  to i8
379  %ext10 = zext i4 %trunc10 to i8
380  %ext11 = zext i4 %trunc11 to i8
381  %ext12 = zext i4 %trunc12 to i8
382  %ext13 = zext i4 %trunc13 to i8
383  %ext14 = zext i4 %trunc14 to i8
384  %ext15 = zext i4 %trunc15 to i8
385  %v0  = insertelement <16 x i8> undef, i8 %ext0,  i32 0
386  %v1  = insertelement <16 x i8> %v0,   i8 %ext1,  i32 1
387  %v2  = insertelement <16 x i8> %v1,   i8 %ext2,  i32 2
388  %v3  = insertelement <16 x i8> %v2,   i8 %ext3,  i32 3
389  %v4  = insertelement <16 x i8> %v3,   i8 %ext4,  i32 4
390  %v5  = insertelement <16 x i8> %v4,   i8 %ext5,  i32 5
391  %v6  = insertelement <16 x i8> %v5,   i8 %ext6,  i32 6
392  %v7  = insertelement <16 x i8> %v6,   i8 %ext7,  i32 7
393  %v8  = insertelement <16 x i8> %v7,   i8 %ext8,  i32 8
394  %v9  = insertelement <16 x i8> %v8,   i8 %ext9,  i32 9
395  %v10 = insertelement <16 x i8> %v9,   i8 %ext10, i32 10
396  %v11 = insertelement <16 x i8> %v10,  i8 %ext11, i32 11
397  %v12 = insertelement <16 x i8> %v11,  i8 %ext12, i32 12
398  %v13 = insertelement <16 x i8> %v12,  i8 %ext13, i32 13
399  %v14 = insertelement <16 x i8> %v13,  i8 %ext14, i32 14
400  %v15 = insertelement <16 x i8> %v14,  i8 %ext15, i32 15
401  ret <16 x i8> %v15
402}
403
404define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
405; SSE-LABEL: _clearupper32xi8a:
406; SSE:       # BB#0:
407; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
408; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
409; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
410; SSE-NEXT:    movd %eax, %xmm0
411; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
412; SSE-NEXT:    movd %eax, %xmm1
413; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
414; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
415; SSE-NEXT:    movd %eax, %xmm0
416; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
417; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
418; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
419; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
420; SSE-NEXT:    movd %eax, %xmm0
421; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
422; SSE-NEXT:    movd %eax, %xmm3
423; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
424; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
425; SSE-NEXT:    movd %eax, %xmm0
426; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
427; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
428; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
429; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
430; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
431; SSE-NEXT:    movd %eax, %xmm0
432; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
433; SSE-NEXT:    movd %eax, %xmm2
434; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
435; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
436; SSE-NEXT:    movd %eax, %xmm0
437; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
438; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
439; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
440; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
441; SSE-NEXT:    movd %eax, %xmm0
442; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
443; SSE-NEXT:    movd %eax, %xmm2
444; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
445; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
446; SSE-NEXT:    movd %eax, %xmm4
447; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
448; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
449; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
450; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
451; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
452; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
453; SSE-NEXT:    pand %xmm2, %xmm0
454; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
455; SSE-NEXT:    movd %eax, %xmm1
456; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
457; SSE-NEXT:    movd %eax, %xmm3
458; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
459; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
460; SSE-NEXT:    movd %eax, %xmm1
461; SSE-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
462; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
463; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
464; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
465; SSE-NEXT:    movd %eax, %xmm1
466; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
467; SSE-NEXT:    movd %eax, %xmm5
468; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
469; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
470; SSE-NEXT:    movd %eax, %xmm1
471; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
472; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
473; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
474; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
475; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
476; SSE-NEXT:    movd %eax, %xmm1
477; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
478; SSE-NEXT:    movd %eax, %xmm4
479; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
480; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
481; SSE-NEXT:    movd %eax, %xmm1
482; SSE-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
483; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
484; SSE-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
485; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
486; SSE-NEXT:    movd %eax, %xmm1
487; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
488; SSE-NEXT:    movd %eax, %xmm4
489; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
490; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
491; SSE-NEXT:    movd %eax, %xmm6
492; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
493; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
494; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
495; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
496; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
497; SSE-NEXT:    pand %xmm2, %xmm1
498; SSE-NEXT:    retq
499;
500; AVX-LABEL: _clearupper32xi8a:
501; AVX:       # BB#0:
502; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
503; AVX-NEXT:    retq
504  %x0  = extractelement <32 x i8> %0, i32 0
505  %x1  = extractelement <32 x i8> %0, i32 1
506  %x2  = extractelement <32 x i8> %0, i32 2
507  %x3  = extractelement <32 x i8> %0, i32 3
508  %x4  = extractelement <32 x i8> %0, i32 4
509  %x5  = extractelement <32 x i8> %0, i32 5
510  %x6  = extractelement <32 x i8> %0, i32 6
511  %x7  = extractelement <32 x i8> %0, i32 7
512  %x8  = extractelement <32 x i8> %0, i32 8
513  %x9  = extractelement <32 x i8> %0, i32 9
514  %x10 = extractelement <32 x i8> %0, i32 10
515  %x11 = extractelement <32 x i8> %0, i32 11
516  %x12 = extractelement <32 x i8> %0, i32 12
517  %x13 = extractelement <32 x i8> %0, i32 13
518  %x14 = extractelement <32 x i8> %0, i32 14
519  %x15 = extractelement <32 x i8> %0, i32 15
520  %x16 = extractelement <32 x i8> %0, i32 16
521  %x17 = extractelement <32 x i8> %0, i32 17
522  %x18 = extractelement <32 x i8> %0, i32 18
523  %x19 = extractelement <32 x i8> %0, i32 19
524  %x20 = extractelement <32 x i8> %0, i32 20
525  %x21 = extractelement <32 x i8> %0, i32 21
526  %x22 = extractelement <32 x i8> %0, i32 22
527  %x23 = extractelement <32 x i8> %0, i32 23
528  %x24 = extractelement <32 x i8> %0, i32 24
529  %x25 = extractelement <32 x i8> %0, i32 25
530  %x26 = extractelement <32 x i8> %0, i32 26
531  %x27 = extractelement <32 x i8> %0, i32 27
532  %x28 = extractelement <32 x i8> %0, i32 28
533  %x29 = extractelement <32 x i8> %0, i32 29
534  %x30 = extractelement <32 x i8> %0, i32 30
535  %x31 = extractelement <32 x i8> %0, i32 31
536  %trunc0  = trunc i8 %x0  to i4
537  %trunc1  = trunc i8 %x1  to i4
538  %trunc2  = trunc i8 %x2  to i4
539  %trunc3  = trunc i8 %x3  to i4
540  %trunc4  = trunc i8 %x4  to i4
541  %trunc5  = trunc i8 %x5  to i4
542  %trunc6  = trunc i8 %x6  to i4
543  %trunc7  = trunc i8 %x7  to i4
544  %trunc8  = trunc i8 %x8  to i4
545  %trunc9  = trunc i8 %x9  to i4
546  %trunc10 = trunc i8 %x10 to i4
547  %trunc11 = trunc i8 %x11 to i4
548  %trunc12 = trunc i8 %x12 to i4
549  %trunc13 = trunc i8 %x13 to i4
550  %trunc14 = trunc i8 %x14 to i4
551  %trunc15 = trunc i8 %x15 to i4
552  %trunc16 = trunc i8 %x16 to i4
553  %trunc17 = trunc i8 %x17 to i4
554  %trunc18 = trunc i8 %x18 to i4
555  %trunc19 = trunc i8 %x19 to i4
556  %trunc20 = trunc i8 %x20 to i4
557  %trunc21 = trunc i8 %x21 to i4
558  %trunc22 = trunc i8 %x22 to i4
559  %trunc23 = trunc i8 %x23 to i4
560  %trunc24 = trunc i8 %x24 to i4
561  %trunc25 = trunc i8 %x25 to i4
562  %trunc26 = trunc i8 %x26 to i4
563  %trunc27 = trunc i8 %x27 to i4
564  %trunc28 = trunc i8 %x28 to i4
565  %trunc29 = trunc i8 %x29 to i4
566  %trunc30 = trunc i8 %x30 to i4
567  %trunc31 = trunc i8 %x31 to i4
568  %ext0  = zext i4 %trunc0  to i8
569  %ext1  = zext i4 %trunc1  to i8
570  %ext2  = zext i4 %trunc2  to i8
571  %ext3  = zext i4 %trunc3  to i8
572  %ext4  = zext i4 %trunc4  to i8
573  %ext5  = zext i4 %trunc5  to i8
574  %ext6  = zext i4 %trunc6  to i8
575  %ext7  = zext i4 %trunc7  to i8
576  %ext8  = zext i4 %trunc8  to i8
577  %ext9  = zext i4 %trunc9  to i8
578  %ext10 = zext i4 %trunc10 to i8
579  %ext11 = zext i4 %trunc11 to i8
580  %ext12 = zext i4 %trunc12 to i8
581  %ext13 = zext i4 %trunc13 to i8
582  %ext14 = zext i4 %trunc14 to i8
583  %ext15 = zext i4 %trunc15 to i8
584  %ext16 = zext i4 %trunc16 to i8
585  %ext17 = zext i4 %trunc17 to i8
586  %ext18 = zext i4 %trunc18 to i8
587  %ext19 = zext i4 %trunc19 to i8
588  %ext20 = zext i4 %trunc20 to i8
589  %ext21 = zext i4 %trunc21 to i8
590  %ext22 = zext i4 %trunc22 to i8
591  %ext23 = zext i4 %trunc23 to i8
592  %ext24 = zext i4 %trunc24 to i8
593  %ext25 = zext i4 %trunc25 to i8
594  %ext26 = zext i4 %trunc26 to i8
595  %ext27 = zext i4 %trunc27 to i8
596  %ext28 = zext i4 %trunc28 to i8
597  %ext29 = zext i4 %trunc29 to i8
598  %ext30 = zext i4 %trunc30 to i8
599  %ext31 = zext i4 %trunc31 to i8
600  %v0  = insertelement <32 x i8> undef, i8 %ext0,  i32 0
601  %v1  = insertelement <32 x i8> %v0,   i8 %ext1,  i32 1
602  %v2  = insertelement <32 x i8> %v1,   i8 %ext2,  i32 2
603  %v3  = insertelement <32 x i8> %v2,   i8 %ext3,  i32 3
604  %v4  = insertelement <32 x i8> %v3,   i8 %ext4,  i32 4
605  %v5  = insertelement <32 x i8> %v4,   i8 %ext5,  i32 5
606  %v6  = insertelement <32 x i8> %v5,   i8 %ext6,  i32 6
607  %v7  = insertelement <32 x i8> %v6,   i8 %ext7,  i32 7
608  %v8  = insertelement <32 x i8> %v7,   i8 %ext8,  i32 8
609  %v9  = insertelement <32 x i8> %v8,   i8 %ext9,  i32 9
610  %v10 = insertelement <32 x i8> %v9,   i8 %ext10, i32 10
611  %v11 = insertelement <32 x i8> %v10,  i8 %ext11, i32 11
612  %v12 = insertelement <32 x i8> %v11,  i8 %ext12, i32 12
613  %v13 = insertelement <32 x i8> %v12,  i8 %ext13, i32 13
614  %v14 = insertelement <32 x i8> %v13,  i8 %ext14, i32 14
615  %v15 = insertelement <32 x i8> %v14,  i8 %ext15, i32 15
616  %v16 = insertelement <32 x i8> %v15,  i8 %ext16, i32 16
617  %v17 = insertelement <32 x i8> %v16,  i8 %ext17, i32 17
618  %v18 = insertelement <32 x i8> %v17,  i8 %ext18, i32 18
619  %v19 = insertelement <32 x i8> %v18,  i8 %ext19, i32 19
620  %v20 = insertelement <32 x i8> %v19,  i8 %ext20, i32 20
621  %v21 = insertelement <32 x i8> %v20,  i8 %ext21, i32 21
622  %v22 = insertelement <32 x i8> %v21,  i8 %ext22, i32 22
623  %v23 = insertelement <32 x i8> %v22,  i8 %ext23, i32 23
624  %v24 = insertelement <32 x i8> %v23,  i8 %ext24, i32 24
625  %v25 = insertelement <32 x i8> %v24,  i8 %ext25, i32 25
626  %v26 = insertelement <32 x i8> %v25,  i8 %ext26, i32 26
627  %v27 = insertelement <32 x i8> %v26,  i8 %ext27, i32 27
628  %v28 = insertelement <32 x i8> %v27,  i8 %ext28, i32 28
629  %v29 = insertelement <32 x i8> %v28,  i8 %ext29, i32 29
630  %v30 = insertelement <32 x i8> %v29,  i8 %ext30, i32 30
631  %v31 = insertelement <32 x i8> %v30,  i8 %ext31, i32 31
632  ret <32 x i8> %v31
633}
634
635define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
636; SSE-LABEL: _clearupper2xi64b:
637; SSE:       # BB#0:
638; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
639; SSE-NEXT:    retq
640;
641; AVX1-LABEL: _clearupper2xi64b:
642; AVX1:       # BB#0:
643; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
644; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
645; AVX1-NEXT:    retq
646;
647; AVX2-LABEL: _clearupper2xi64b:
648; AVX2:       # BB#0:
649; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
650; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
651; AVX2-NEXT:    retq
652  %x32 = bitcast <2 x i64> %0 to <4 x i32>
653  %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
654  %r1 = insertelement <4 x i32> %r0,  i32 zeroinitializer, i32 3
655  %r = bitcast <4 x i32> %r1 to <2 x i64>
656  ret <2 x i64> %r
657}
658
659define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind {
660; SSE-LABEL: _clearupper4xi64b:
661; SSE:       # BB#0:
662; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
663; SSE-NEXT:    andps %xmm2, %xmm0
664; SSE-NEXT:    andps %xmm2, %xmm1
665; SSE-NEXT:    retq
666;
667; AVX1-LABEL: _clearupper4xi64b:
668; AVX1:       # BB#0:
669; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
670; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
671; AVX1-NEXT:    retq
672;
673; AVX2-LABEL: _clearupper4xi64b:
674; AVX2:       # BB#0:
675; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
676; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
677; AVX2-NEXT:    retq
678  %x32 = bitcast <4 x i64> %0 to <8 x i32>
679  %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1
680  %r1 = insertelement <8 x i32> %r0,  i32 zeroinitializer, i32 3
681  %r2 = insertelement <8 x i32> %r1,  i32 zeroinitializer, i32 5
682  %r3 = insertelement <8 x i32> %r2,  i32 zeroinitializer, i32 7
683  %r = bitcast <8 x i32> %r3 to <4 x i64>
684  ret <4 x i64> %r
685}
686
687define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
688; SSE-LABEL: _clearupper4xi32b:
689; SSE:       # BB#0:
690; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
691; SSE-NEXT:    retq
692;
693; AVX-LABEL: _clearupper4xi32b:
694; AVX:       # BB#0:
695; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
696; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
697; AVX-NEXT:    retq
698  %x16 = bitcast <4 x i32> %0 to <8 x i16>
699  %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
700  %r1 = insertelement <8 x i16> %r0,  i16 zeroinitializer, i32 3
701  %r2 = insertelement <8 x i16> %r1,  i16 zeroinitializer, i32 5
702  %r3 = insertelement <8 x i16> %r2,  i16 zeroinitializer, i32 7
703  %r = bitcast <8 x i16> %r3 to <4 x i32>
704  ret <4 x i32> %r
705}
706
707define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind {
708; SSE-LABEL: _clearupper8xi32b:
709; SSE:       # BB#0:
710; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
711; SSE-NEXT:    andps %xmm2, %xmm0
712; SSE-NEXT:    andps %xmm2, %xmm1
713; SSE-NEXT:    retq
714;
715; AVX1-LABEL: _clearupper8xi32b:
716; AVX1:       # BB#0:
717; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
718; AVX1-NEXT:    retq
719;
720; AVX2-LABEL: _clearupper8xi32b:
721; AVX2:       # BB#0:
722; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
723; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
724; AVX2-NEXT:    retq
725  %x16 = bitcast <8 x i32> %0 to <16 x i16>
726  %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1
727  %r1 = insertelement <16 x i16> %r0,  i16 zeroinitializer, i32 3
728  %r2 = insertelement <16 x i16> %r1,  i16 zeroinitializer, i32 5
729  %r3 = insertelement <16 x i16> %r2,  i16 zeroinitializer, i32 7
730  %r4 = insertelement <16 x i16> %r3,  i16 zeroinitializer, i32 9
731  %r5 = insertelement <16 x i16> %r4,  i16 zeroinitializer, i32 11
732  %r6 = insertelement <16 x i16> %r5,  i16 zeroinitializer, i32 13
733  %r7 = insertelement <16 x i16> %r6,  i16 zeroinitializer, i32 15
734  %r = bitcast <16 x i16> %r7 to <8 x i32>
735  ret <8 x i32> %r
736}
737
738define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
739; SSE-LABEL: _clearupper8xi16b:
740; SSE:       # BB#0:
741; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
742; SSE-NEXT:    retq
743;
744; AVX-LABEL: _clearupper8xi16b:
745; AVX:       # BB#0:
746; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
747; AVX-NEXT:    retq
748  %x8 = bitcast <8 x i16> %0 to <16 x i8>
749  %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1
750  %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3
751  %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5
752  %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7
753  %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9
754  %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11
755  %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13
756  %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15
757  %r = bitcast <16 x i8> %r7 to <8 x i16>
758  ret <8 x i16> %r
759}
760
761define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind {
762; SSE-LABEL: _clearupper16xi16b:
763; SSE:       # BB#0:
764; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
765; SSE-NEXT:    andps %xmm2, %xmm0
766; SSE-NEXT:    andps %xmm2, %xmm1
767; SSE-NEXT:    retq
768;
769; AVX1-LABEL: _clearupper16xi16b:
770; AVX1:       # BB#0:
771; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
772; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm2
773; AVX1-NEXT:    vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
774; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
775; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0
776; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
777; AVX1-NEXT:    retq
778;
779; AVX2-LABEL: _clearupper16xi16b:
780; AVX2:       # BB#0:
781; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
782; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
783; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
784; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
785; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
786; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
787; AVX2-NEXT:    retq
788  %x8 = bitcast <16 x i16> %0 to <32 x i8>
789  %r0  = insertelement <32 x i8> %x8,  i8 zeroinitializer, i32 1
790  %r1  = insertelement <32 x i8> %r0,  i8 zeroinitializer, i32 3
791  %r2  = insertelement <32 x i8> %r1,  i8 zeroinitializer, i32 5
792  %r3  = insertelement <32 x i8> %r2,  i8 zeroinitializer, i32 7
793  %r4  = insertelement <32 x i8> %r3,  i8 zeroinitializer, i32 9
794  %r5  = insertelement <32 x i8> %r4,  i8 zeroinitializer, i32 11
795  %r6  = insertelement <32 x i8> %r5,  i8 zeroinitializer, i32 13
796  %r7  = insertelement <32 x i8> %r6,  i8 zeroinitializer, i32 15
797  %r8  = insertelement <32 x i8> %r7,  i8 zeroinitializer, i32 17
798  %r9  = insertelement <32 x i8> %r8,  i8 zeroinitializer, i32 19
799  %r10 = insertelement <32 x i8> %r9,  i8 zeroinitializer, i32 21
800  %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23
801  %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25
802  %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27
803  %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29
804  %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31
805  %r = bitcast <32 x i8> %r15 to <16 x i16>
806  ret <16 x i16> %r
807}
808
809define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
810; SSE-LABEL: _clearupper16xi8b:
811; SSE:       # BB#0:
812; SSE-NEXT:    pushq %r14
813; SSE-NEXT:    pushq %rbx
814; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
815; SSE-NEXT:    movq %xmm0, %rcx
816; SSE-NEXT:    movq %rcx, %r8
817; SSE-NEXT:    movq %rcx, %r9
818; SSE-NEXT:    movq %rcx, %r10
819; SSE-NEXT:    movq %rcx, %rax
820; SSE-NEXT:    movq %rcx, %rdx
821; SSE-NEXT:    movq %rcx, %rsi
822; SSE-NEXT:    movq %rcx, %rdi
823; SSE-NEXT:    andb $15, %cl
824; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
825; SSE-NEXT:    movq %xmm1, %rcx
826; SSE-NEXT:    shrq $56, %rdi
827; SSE-NEXT:    andb $15, %dil
828; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
829; SSE-NEXT:    movq %rcx, %r11
830; SSE-NEXT:    shrq $48, %rsi
831; SSE-NEXT:    andb $15, %sil
832; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
833; SSE-NEXT:    movq %rcx, %r14
834; SSE-NEXT:    shrq $40, %rdx
835; SSE-NEXT:    andb $15, %dl
836; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
837; SSE-NEXT:    movq %rcx, %rdx
838; SSE-NEXT:    shrq $32, %rax
839; SSE-NEXT:    andb $15, %al
840; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
841; SSE-NEXT:    movq %rcx, %rax
842; SSE-NEXT:    shrq $24, %r10
843; SSE-NEXT:    andb $15, %r10b
844; SSE-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
845; SSE-NEXT:    movq %rcx, %rdi
846; SSE-NEXT:    shrq $16, %r9
847; SSE-NEXT:    andb $15, %r9b
848; SSE-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
849; SSE-NEXT:    movq %rcx, %rsi
850; SSE-NEXT:    shrq $8, %r8
851; SSE-NEXT:    andb $15, %r8b
852; SSE-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
853; SSE-NEXT:    movq %rcx, %rbx
854; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
855; SSE-NEXT:    andb $15, %cl
856; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
857; SSE-NEXT:    shrq $56, %rbx
858; SSE-NEXT:    andb $15, %bl
859; SSE-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
860; SSE-NEXT:    shrq $48, %rsi
861; SSE-NEXT:    andb $15, %sil
862; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
863; SSE-NEXT:    shrq $40, %rdi
864; SSE-NEXT:    andb $15, %dil
865; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
866; SSE-NEXT:    shrq $32, %rax
867; SSE-NEXT:    andb $15, %al
868; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
869; SSE-NEXT:    shrq $24, %rdx
870; SSE-NEXT:    andb $15, %dl
871; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
872; SSE-NEXT:    shrq $16, %r14
873; SSE-NEXT:    andb $15, %r14b
874; SSE-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
875; SSE-NEXT:    shrq $8, %r11
876; SSE-NEXT:    andb $15, %r11b
877; SSE-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
878; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
879; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
880; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
881; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
882; SSE-NEXT:    popq %rbx
883; SSE-NEXT:    popq %r14
884; SSE-NEXT:    retq
885;
886; AVX-LABEL: _clearupper16xi8b:
887; AVX:       # BB#0:
888; AVX-NEXT:    pushq %rbp
889; AVX-NEXT:    pushq %r15
890; AVX-NEXT:    pushq %r14
891; AVX-NEXT:    pushq %r13
892; AVX-NEXT:    pushq %r12
893; AVX-NEXT:    pushq %rbx
894; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
895; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
896; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
897; AVX-NEXT:    movq %rcx, %r8
898; AVX-NEXT:    movq %rcx, %r9
899; AVX-NEXT:    movq %rcx, %r10
900; AVX-NEXT:    movq %rcx, %r11
901; AVX-NEXT:    movq %rcx, %r14
902; AVX-NEXT:    movq %rcx, %r15
903; AVX-NEXT:    movq %rdx, %r12
904; AVX-NEXT:    movq %rdx, %r13
905; AVX-NEXT:    movq %rdx, %rdi
906; AVX-NEXT:    movq %rdx, %rax
907; AVX-NEXT:    movq %rdx, %rsi
908; AVX-NEXT:    movq %rdx, %rbx
909; AVX-NEXT:    movq %rdx, %rbp
910; AVX-NEXT:    andb $15, %dl
911; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
912; AVX-NEXT:    movq %rcx, %rdx
913; AVX-NEXT:    andb $15, %cl
914; AVX-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
915; AVX-NEXT:    shrq $56, %rbp
916; AVX-NEXT:    andb $15, %bpl
917; AVX-NEXT:    movb %bpl, -{{[0-9]+}}(%rsp)
918; AVX-NEXT:    shrq $48, %rbx
919; AVX-NEXT:    andb $15, %bl
920; AVX-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
921; AVX-NEXT:    shrq $40, %rsi
922; AVX-NEXT:    andb $15, %sil
923; AVX-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
924; AVX-NEXT:    shrq $32, %rax
925; AVX-NEXT:    andb $15, %al
926; AVX-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
927; AVX-NEXT:    shrq $24, %rdi
928; AVX-NEXT:    andb $15, %dil
929; AVX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
930; AVX-NEXT:    shrq $16, %r13
931; AVX-NEXT:    andb $15, %r13b
932; AVX-NEXT:    movb %r13b, -{{[0-9]+}}(%rsp)
933; AVX-NEXT:    shrq $8, %r12
934; AVX-NEXT:    andb $15, %r12b
935; AVX-NEXT:    movb %r12b, -{{[0-9]+}}(%rsp)
936; AVX-NEXT:    shrq $56, %rdx
937; AVX-NEXT:    andb $15, %dl
938; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
939; AVX-NEXT:    shrq $48, %r15
940; AVX-NEXT:    andb $15, %r15b
941; AVX-NEXT:    movb %r15b, -{{[0-9]+}}(%rsp)
942; AVX-NEXT:    shrq $40, %r14
943; AVX-NEXT:    andb $15, %r14b
944; AVX-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
945; AVX-NEXT:    shrq $32, %r11
946; AVX-NEXT:    andb $15, %r11b
947; AVX-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
948; AVX-NEXT:    shrq $24, %r10
949; AVX-NEXT:    andb $15, %r10b
950; AVX-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
951; AVX-NEXT:    shrq $16, %r9
952; AVX-NEXT:    andb $15, %r9b
953; AVX-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
954; AVX-NEXT:    shrq $8, %r8
955; AVX-NEXT:    andb $15, %r8b
956; AVX-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
957; AVX-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
958; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
959; AVX-NEXT:    popq %rbx
960; AVX-NEXT:    popq %r12
961; AVX-NEXT:    popq %r13
962; AVX-NEXT:    popq %r14
963; AVX-NEXT:    popq %r15
964; AVX-NEXT:    popq %rbp
965; AVX-NEXT:    retq
966  %x4  = bitcast <16 x i8> %0 to <32 x i4>
967  %r0  = insertelement <32 x i4> %x4,  i4 zeroinitializer, i32 1
968  %r1  = insertelement <32 x i4> %r0,  i4 zeroinitializer, i32 3
969  %r2  = insertelement <32 x i4> %r1,  i4 zeroinitializer, i32 5
970  %r3  = insertelement <32 x i4> %r2,  i4 zeroinitializer, i32 7
971  %r4  = insertelement <32 x i4> %r3,  i4 zeroinitializer, i32 9
972  %r5  = insertelement <32 x i4> %r4,  i4 zeroinitializer, i32 11
973  %r6  = insertelement <32 x i4> %r5,  i4 zeroinitializer, i32 13
974  %r7  = insertelement <32 x i4> %r6,  i4 zeroinitializer, i32 15
975  %r8  = insertelement <32 x i4> %r7,  i4 zeroinitializer, i32 17
976  %r9  = insertelement <32 x i4> %r8,  i4 zeroinitializer, i32 19
977  %r10 = insertelement <32 x i4> %r9,  i4 zeroinitializer, i32 21
978  %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23
979  %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25
980  %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27
981  %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29
982  %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31
983  %r = bitcast <32 x i4> %r15 to <16 x i8>
984  ret <16 x i8> %r
985}
986
987define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
988; SSE-LABEL: _clearupper32xi8b:
989; SSE:       # BB#0:
990; SSE-NEXT:    pushq %r14
991; SSE-NEXT:    pushq %rbx
992; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
993; SSE-NEXT:    movq %xmm0, %rcx
994; SSE-NEXT:    movq %rcx, %r8
995; SSE-NEXT:    movq %rcx, %r9
996; SSE-NEXT:    movq %rcx, %r10
997; SSE-NEXT:    movq %rcx, %rax
998; SSE-NEXT:    movq %rcx, %rdx
999; SSE-NEXT:    movq %rcx, %rsi
1000; SSE-NEXT:    movq %rcx, %rdi
1001; SSE-NEXT:    andb $15, %cl
1002; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1003; SSE-NEXT:    movq %xmm2, %rcx
1004; SSE-NEXT:    shrq $56, %rdi
1005; SSE-NEXT:    andb $15, %dil
1006; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1007; SSE-NEXT:    movq %rcx, %r11
1008; SSE-NEXT:    shrq $48, %rsi
1009; SSE-NEXT:    andb $15, %sil
1010; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1011; SSE-NEXT:    movq %rcx, %r14
1012; SSE-NEXT:    shrq $40, %rdx
1013; SSE-NEXT:    andb $15, %dl
1014; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1015; SSE-NEXT:    movq %rcx, %rdx
1016; SSE-NEXT:    shrq $32, %rax
1017; SSE-NEXT:    andb $15, %al
1018; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1019; SSE-NEXT:    movq %rcx, %rax
1020; SSE-NEXT:    shrq $24, %r10
1021; SSE-NEXT:    andb $15, %r10b
1022; SSE-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1023; SSE-NEXT:    movq %rcx, %rdi
1024; SSE-NEXT:    shrq $16, %r9
1025; SSE-NEXT:    andb $15, %r9b
1026; SSE-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1027; SSE-NEXT:    movq %rcx, %rsi
1028; SSE-NEXT:    shrq $8, %r8
1029; SSE-NEXT:    andb $15, %r8b
1030; SSE-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1031; SSE-NEXT:    movq %rcx, %rbx
1032; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1033; SSE-NEXT:    andb $15, %cl
1034; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1035; SSE-NEXT:    shrq $56, %rbx
1036; SSE-NEXT:    andb $15, %bl
1037; SSE-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1038; SSE-NEXT:    shrq $48, %rsi
1039; SSE-NEXT:    andb $15, %sil
1040; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1041; SSE-NEXT:    shrq $40, %rdi
1042; SSE-NEXT:    andb $15, %dil
1043; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1044; SSE-NEXT:    shrq $32, %rax
1045; SSE-NEXT:    andb $15, %al
1046; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1047; SSE-NEXT:    shrq $24, %rdx
1048; SSE-NEXT:    andb $15, %dl
1049; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1050; SSE-NEXT:    shrq $16, %r14
1051; SSE-NEXT:    andb $15, %r14b
1052; SSE-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1053; SSE-NEXT:    shrq $8, %r11
1054; SSE-NEXT:    andb $15, %r11b
1055; SSE-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1056; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1057; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1058; SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
1059; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1060; SSE-NEXT:    popq %rbx
1061; SSE-NEXT:    popq %r14
1062; SSE-NEXT:    retq
1063;
1064; AVX1-LABEL: _clearupper32xi8b:
1065; AVX1:       # BB#0:
1066; AVX1-NEXT:    pushq %r14
1067; AVX1-NEXT:    pushq %rbx
1068; AVX1-NEXT:    vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp)
1069; AVX1-NEXT:    vmovq %xmm0, -{{[0-9]+}}(%rsp)
1070; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %r14
1071; AVX1-NEXT:    vpextrq $1, %xmm0, %rdx
1072; AVX1-NEXT:    movq %rdx, %r8
1073; AVX1-NEXT:    movq %rdx, %r9
1074; AVX1-NEXT:    movq %rdx, %r11
1075; AVX1-NEXT:    movq %rdx, %rsi
1076; AVX1-NEXT:    movq %rdx, %rdi
1077; AVX1-NEXT:    movq %rdx, %rcx
1078; AVX1-NEXT:    movq %rdx, %rax
1079; AVX1-NEXT:    andb $15, %dl
1080; AVX1-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1081; AVX1-NEXT:    shrq $56, %rax
1082; AVX1-NEXT:    andb $15, %al
1083; AVX1-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1084; AVX1-NEXT:    movq %r14, %r10
1085; AVX1-NEXT:    shrq $48, %rcx
1086; AVX1-NEXT:    andb $15, %cl
1087; AVX1-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1088; AVX1-NEXT:    movq %r14, %rdx
1089; AVX1-NEXT:    shrq $40, %rdi
1090; AVX1-NEXT:    andb $15, %dil
1091; AVX1-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1092; AVX1-NEXT:    movq %r14, %rax
1093; AVX1-NEXT:    shrq $32, %rsi
1094; AVX1-NEXT:    andb $15, %sil
1095; AVX1-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1096; AVX1-NEXT:    movq %r14, %rcx
1097; AVX1-NEXT:    shrq $24, %r11
1098; AVX1-NEXT:    andb $15, %r11b
1099; AVX1-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1100; AVX1-NEXT:    movq %r14, %rsi
1101; AVX1-NEXT:    shrq $16, %r9
1102; AVX1-NEXT:    andb $15, %r9b
1103; AVX1-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1104; AVX1-NEXT:    movq %r14, %rdi
1105; AVX1-NEXT:    shrq $8, %r8
1106; AVX1-NEXT:    andb $15, %r8b
1107; AVX1-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1108; AVX1-NEXT:    movq %r14, %rbx
1109; AVX1-NEXT:    andb $15, %r14b
1110; AVX1-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1111; AVX1-NEXT:    shrq $8, %r10
1112; AVX1-NEXT:    shrq $16, %rdx
1113; AVX1-NEXT:    shrq $24, %rax
1114; AVX1-NEXT:    shrq $32, %rcx
1115; AVX1-NEXT:    shrq $40, %rsi
1116; AVX1-NEXT:    shrq $48, %rdi
1117; AVX1-NEXT:    shrq $56, %rbx
1118; AVX1-NEXT:    andb $15, %bl
1119; AVX1-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1120; AVX1-NEXT:    andb $15, %dil
1121; AVX1-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1122; AVX1-NEXT:    andb $15, %sil
1123; AVX1-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1124; AVX1-NEXT:    andb $15, %cl
1125; AVX1-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1126; AVX1-NEXT:    andb $15, %al
1127; AVX1-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1128; AVX1-NEXT:    andb $15, %dl
1129; AVX1-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1130; AVX1-NEXT:    andb $15, %r10b
1131; AVX1-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1132; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1133; AVX1-NEXT:    vmovq %xmm0, %rax
1134; AVX1-NEXT:    movq %rax, %r8
1135; AVX1-NEXT:    movq %rax, %rdx
1136; AVX1-NEXT:    movq %rax, %rsi
1137; AVX1-NEXT:    movq %rax, %rdi
1138; AVX1-NEXT:    movl %eax, %ebx
1139; AVX1-NEXT:    movl %eax, %ecx
1140; AVX1-NEXT:    vmovd %eax, %xmm1
1141; AVX1-NEXT:    shrl $8, %eax
1142; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1143; AVX1-NEXT:    shrl $16, %ecx
1144; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
1145; AVX1-NEXT:    shrl $24, %ebx
1146; AVX1-NEXT:    vpinsrb $3, %ebx, %xmm1, %xmm1
1147; AVX1-NEXT:    shrq $32, %rdi
1148; AVX1-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1149; AVX1-NEXT:    shrq $40, %rsi
1150; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1151; AVX1-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1152; AVX1-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1153; AVX1-NEXT:    shrq $48, %rdx
1154; AVX1-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
1155; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
1156; AVX1-NEXT:    shrq $56, %r8
1157; AVX1-NEXT:    vpinsrb $7, %r8d, %xmm1, %xmm0
1158; AVX1-NEXT:    movl %eax, %ecx
1159; AVX1-NEXT:    shrl $8, %ecx
1160; AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1161; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1162; AVX1-NEXT:    movl %eax, %ecx
1163; AVX1-NEXT:    shrl $16, %ecx
1164; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1165; AVX1-NEXT:    movl %eax, %ecx
1166; AVX1-NEXT:    shrl $24, %ecx
1167; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1168; AVX1-NEXT:    movq %rax, %rcx
1169; AVX1-NEXT:    shrq $32, %rcx
1170; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1171; AVX1-NEXT:    movq %rax, %rcx
1172; AVX1-NEXT:    shrq $40, %rcx
1173; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1174; AVX1-NEXT:    movq %rax, %rcx
1175; AVX1-NEXT:    shrq $48, %rcx
1176; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1177; AVX1-NEXT:    vmovq %xmm2, %rcx
1178; AVX1-NEXT:    shrq $56, %rax
1179; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1180; AVX1-NEXT:    movl %ecx, %eax
1181; AVX1-NEXT:    shrl $8, %eax
1182; AVX1-NEXT:    vmovd %ecx, %xmm1
1183; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1184; AVX1-NEXT:    movl %ecx, %eax
1185; AVX1-NEXT:    shrl $16, %eax
1186; AVX1-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1187; AVX1-NEXT:    movl %ecx, %eax
1188; AVX1-NEXT:    shrl $24, %eax
1189; AVX1-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1190; AVX1-NEXT:    movq %rcx, %rax
1191; AVX1-NEXT:    shrq $32, %rax
1192; AVX1-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1193; AVX1-NEXT:    movq %rcx, %rax
1194; AVX1-NEXT:    shrq $40, %rax
1195; AVX1-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1196; AVX1-NEXT:    movq %rcx, %rax
1197; AVX1-NEXT:    shrq $48, %rax
1198; AVX1-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1199; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
1200; AVX1-NEXT:    shrq $56, %rcx
1201; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1202; AVX1-NEXT:    movl %eax, %ecx
1203; AVX1-NEXT:    shrl $8, %ecx
1204; AVX1-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1205; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1206; AVX1-NEXT:    movl %eax, %ecx
1207; AVX1-NEXT:    shrl $16, %ecx
1208; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1209; AVX1-NEXT:    movl %eax, %ecx
1210; AVX1-NEXT:    shrl $24, %ecx
1211; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1212; AVX1-NEXT:    movq %rax, %rcx
1213; AVX1-NEXT:    shrq $32, %rcx
1214; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1215; AVX1-NEXT:    movq %rax, %rcx
1216; AVX1-NEXT:    shrq $40, %rcx
1217; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1218; AVX1-NEXT:    movq %rax, %rcx
1219; AVX1-NEXT:    shrq $48, %rcx
1220; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1221; AVX1-NEXT:    shrq $56, %rax
1222; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1223; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1224; AVX1-NEXT:    popq %rbx
1225; AVX1-NEXT:    popq %r14
1226; AVX1-NEXT:    retq
1227;
1228; AVX2-LABEL: _clearupper32xi8b:
1229; AVX2:       # BB#0:
1230; AVX2-NEXT:    pushq %r14
1231; AVX2-NEXT:    pushq %rbx
1232; AVX2-NEXT:    vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp)
1233; AVX2-NEXT:    vmovq %xmm0, -{{[0-9]+}}(%rsp)
1234; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %r14
1235; AVX2-NEXT:    vpextrq $1, %xmm0, %rdx
1236; AVX2-NEXT:    movq %rdx, %r8
1237; AVX2-NEXT:    movq %rdx, %r9
1238; AVX2-NEXT:    movq %rdx, %r11
1239; AVX2-NEXT:    movq %rdx, %rsi
1240; AVX2-NEXT:    movq %rdx, %rdi
1241; AVX2-NEXT:    movq %rdx, %rcx
1242; AVX2-NEXT:    movq %rdx, %rax
1243; AVX2-NEXT:    andb $15, %dl
1244; AVX2-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1245; AVX2-NEXT:    shrq $56, %rax
1246; AVX2-NEXT:    andb $15, %al
1247; AVX2-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1248; AVX2-NEXT:    movq %r14, %r10
1249; AVX2-NEXT:    shrq $48, %rcx
1250; AVX2-NEXT:    andb $15, %cl
1251; AVX2-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1252; AVX2-NEXT:    movq %r14, %rdx
1253; AVX2-NEXT:    shrq $40, %rdi
1254; AVX2-NEXT:    andb $15, %dil
1255; AVX2-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1256; AVX2-NEXT:    movq %r14, %rax
1257; AVX2-NEXT:    shrq $32, %rsi
1258; AVX2-NEXT:    andb $15, %sil
1259; AVX2-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1260; AVX2-NEXT:    movq %r14, %rcx
1261; AVX2-NEXT:    shrq $24, %r11
1262; AVX2-NEXT:    andb $15, %r11b
1263; AVX2-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1264; AVX2-NEXT:    movq %r14, %rsi
1265; AVX2-NEXT:    shrq $16, %r9
1266; AVX2-NEXT:    andb $15, %r9b
1267; AVX2-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1268; AVX2-NEXT:    movq %r14, %rdi
1269; AVX2-NEXT:    shrq $8, %r8
1270; AVX2-NEXT:    andb $15, %r8b
1271; AVX2-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1272; AVX2-NEXT:    movq %r14, %rbx
1273; AVX2-NEXT:    andb $15, %r14b
1274; AVX2-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1275; AVX2-NEXT:    shrq $8, %r10
1276; AVX2-NEXT:    shrq $16, %rdx
1277; AVX2-NEXT:    shrq $24, %rax
1278; AVX2-NEXT:    shrq $32, %rcx
1279; AVX2-NEXT:    shrq $40, %rsi
1280; AVX2-NEXT:    shrq $48, %rdi
1281; AVX2-NEXT:    shrq $56, %rbx
1282; AVX2-NEXT:    andb $15, %bl
1283; AVX2-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1284; AVX2-NEXT:    andb $15, %dil
1285; AVX2-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1286; AVX2-NEXT:    andb $15, %sil
1287; AVX2-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1288; AVX2-NEXT:    andb $15, %cl
1289; AVX2-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1290; AVX2-NEXT:    andb $15, %al
1291; AVX2-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1292; AVX2-NEXT:    andb $15, %dl
1293; AVX2-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1294; AVX2-NEXT:    andb $15, %r10b
1295; AVX2-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1296; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1297; AVX2-NEXT:    vmovq %xmm0, %rax
1298; AVX2-NEXT:    movq %rax, %r8
1299; AVX2-NEXT:    movq %rax, %rdx
1300; AVX2-NEXT:    movq %rax, %rsi
1301; AVX2-NEXT:    movq %rax, %rdi
1302; AVX2-NEXT:    movl %eax, %ebx
1303; AVX2-NEXT:    movl %eax, %ecx
1304; AVX2-NEXT:    vmovd %eax, %xmm1
1305; AVX2-NEXT:    shrl $8, %eax
1306; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1307; AVX2-NEXT:    shrl $16, %ecx
1308; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
1309; AVX2-NEXT:    shrl $24, %ebx
1310; AVX2-NEXT:    vpinsrb $3, %ebx, %xmm1, %xmm1
1311; AVX2-NEXT:    shrq $32, %rdi
1312; AVX2-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1313; AVX2-NEXT:    shrq $40, %rsi
1314; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1315; AVX2-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1316; AVX2-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1317; AVX2-NEXT:    shrq $48, %rdx
1318; AVX2-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
1319; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
1320; AVX2-NEXT:    shrq $56, %r8
1321; AVX2-NEXT:    vpinsrb $7, %r8d, %xmm1, %xmm0
1322; AVX2-NEXT:    movl %eax, %ecx
1323; AVX2-NEXT:    shrl $8, %ecx
1324; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1325; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1326; AVX2-NEXT:    movl %eax, %ecx
1327; AVX2-NEXT:    shrl $16, %ecx
1328; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1329; AVX2-NEXT:    movl %eax, %ecx
1330; AVX2-NEXT:    shrl $24, %ecx
1331; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1332; AVX2-NEXT:    movq %rax, %rcx
1333; AVX2-NEXT:    shrq $32, %rcx
1334; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1335; AVX2-NEXT:    movq %rax, %rcx
1336; AVX2-NEXT:    shrq $40, %rcx
1337; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1338; AVX2-NEXT:    movq %rax, %rcx
1339; AVX2-NEXT:    shrq $48, %rcx
1340; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1341; AVX2-NEXT:    vmovq %xmm2, %rcx
1342; AVX2-NEXT:    shrq $56, %rax
1343; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1344; AVX2-NEXT:    movl %ecx, %eax
1345; AVX2-NEXT:    shrl $8, %eax
1346; AVX2-NEXT:    vmovd %ecx, %xmm1
1347; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1348; AVX2-NEXT:    movl %ecx, %eax
1349; AVX2-NEXT:    shrl $16, %eax
1350; AVX2-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1351; AVX2-NEXT:    movl %ecx, %eax
1352; AVX2-NEXT:    shrl $24, %eax
1353; AVX2-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1354; AVX2-NEXT:    movq %rcx, %rax
1355; AVX2-NEXT:    shrq $32, %rax
1356; AVX2-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1357; AVX2-NEXT:    movq %rcx, %rax
1358; AVX2-NEXT:    shrq $40, %rax
1359; AVX2-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1360; AVX2-NEXT:    movq %rcx, %rax
1361; AVX2-NEXT:    shrq $48, %rax
1362; AVX2-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1363; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
1364; AVX2-NEXT:    shrq $56, %rcx
1365; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1366; AVX2-NEXT:    movl %eax, %ecx
1367; AVX2-NEXT:    shrl $8, %ecx
1368; AVX2-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1369; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1370; AVX2-NEXT:    movl %eax, %ecx
1371; AVX2-NEXT:    shrl $16, %ecx
1372; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1373; AVX2-NEXT:    movl %eax, %ecx
1374; AVX2-NEXT:    shrl $24, %ecx
1375; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1376; AVX2-NEXT:    movq %rax, %rcx
1377; AVX2-NEXT:    shrq $32, %rcx
1378; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1379; AVX2-NEXT:    movq %rax, %rcx
1380; AVX2-NEXT:    shrq $40, %rcx
1381; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1382; AVX2-NEXT:    movq %rax, %rcx
1383; AVX2-NEXT:    shrq $48, %rcx
1384; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1385; AVX2-NEXT:    shrq $56, %rax
1386; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1387; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1388; AVX2-NEXT:    popq %rbx
1389; AVX2-NEXT:    popq %r14
1390; AVX2-NEXT:    retq
1391  %x4  = bitcast <32 x i8> %0 to <64 x i4>
1392  %r0  = insertelement <64 x i4> %x4,  i4 zeroinitializer, i32 1
1393  %r1  = insertelement <64 x i4> %r0,  i4 zeroinitializer, i32 3
1394  %r2  = insertelement <64 x i4> %r1,  i4 zeroinitializer, i32 5
1395  %r3  = insertelement <64 x i4> %r2,  i4 zeroinitializer, i32 7
1396  %r4  = insertelement <64 x i4> %r3,  i4 zeroinitializer, i32 9
1397  %r5  = insertelement <64 x i4> %r4,  i4 zeroinitializer, i32 11
1398  %r6  = insertelement <64 x i4> %r5,  i4 zeroinitializer, i32 13
1399  %r7  = insertelement <64 x i4> %r6,  i4 zeroinitializer, i32 15
1400  %r8  = insertelement <64 x i4> %r7,  i4 zeroinitializer, i32 17
1401  %r9  = insertelement <64 x i4> %r8,  i4 zeroinitializer, i32 19
1402  %r10 = insertelement <64 x i4> %r9,  i4 zeroinitializer, i32 21
1403  %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23
1404  %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25
1405  %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27
1406  %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29
1407  %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31
1408  %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33
1409  %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35
1410  %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37
1411  %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39
1412  %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41
1413  %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43
1414  %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45
1415  %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47
1416  %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49
1417  %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51
1418  %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53
1419  %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55
1420  %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57
1421  %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59
1422  %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61
1423  %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63
1424  %r = bitcast <64 x i4> %r15 to <32 x i8>
1425  ret <32 x i8> %r
1426}
1427
1428define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind {
1429; SSE-LABEL: _clearupper2xi64c:
1430; SSE:       # BB#0:
1431; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1432; SSE-NEXT:    retq
1433;
1434; AVX1-LABEL: _clearupper2xi64c:
1435; AVX1:       # BB#0:
1436; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1437; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1438; AVX1-NEXT:    retq
1439;
1440; AVX2-LABEL: _clearupper2xi64c:
1441; AVX2:       # BB#0:
1442; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1443; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1444; AVX2-NEXT:    retq
1445  %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
1446  ret <2 x i64> %r
1447}
1448
1449define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind {
1450; SSE-LABEL: _clearupper4xi64c:
1451; SSE:       # BB#0:
1452; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
1453; SSE-NEXT:    andps %xmm2, %xmm0
1454; SSE-NEXT:    andps %xmm2, %xmm1
1455; SSE-NEXT:    retq
1456;
1457; AVX1-LABEL: _clearupper4xi64c:
1458; AVX1:       # BB#0:
1459; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1460; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1461; AVX1-NEXT:    retq
1462;
1463; AVX2-LABEL: _clearupper4xi64c:
1464; AVX2:       # BB#0:
1465; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
1466; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1467; AVX2-NEXT:    retq
1468  %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0
1469  ret <4 x i64> %r
1470}
1471
1472define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind {
1473; SSE-LABEL: _clearupper4xi32c:
1474; SSE:       # BB#0:
1475; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1476; SSE-NEXT:    retq
1477;
1478; AVX-LABEL: _clearupper4xi32c:
1479; AVX:       # BB#0:
1480; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1481; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1482; AVX-NEXT:    retq
1483  %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0
1484  ret <4 x i32> %r
1485}
1486
1487define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind {
1488; SSE-LABEL: _clearupper8xi32c:
1489; SSE:       # BB#0:
1490; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
1491; SSE-NEXT:    andps %xmm2, %xmm0
1492; SSE-NEXT:    andps %xmm2, %xmm1
1493; SSE-NEXT:    retq
1494;
1495; AVX1-LABEL: _clearupper8xi32c:
1496; AVX1:       # BB#0:
1497; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1498; AVX1-NEXT:    retq
1499;
1500; AVX2-LABEL: _clearupper8xi32c:
1501; AVX2:       # BB#0:
1502; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
1503; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1504; AVX2-NEXT:    retq
1505  %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0
1506  ret <8 x i32> %r
1507}
1508
1509define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind {
1510; SSE-LABEL: _clearupper8xi16c:
1511; SSE:       # BB#0:
1512; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1513; SSE-NEXT:    retq
1514;
1515; AVX-LABEL: _clearupper8xi16c:
1516; AVX:       # BB#0:
1517; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1518; AVX-NEXT:    retq
1519  %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1520  ret <8 x i16> %r
1521}
1522
1523define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind {
1524; SSE-LABEL: _clearupper16xi16c:
1525; SSE:       # BB#0:
1526; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1527; SSE-NEXT:    andps %xmm2, %xmm0
1528; SSE-NEXT:    andps %xmm2, %xmm1
1529; SSE-NEXT:    retq
1530;
1531; AVX-LABEL: _clearupper16xi16c:
1532; AVX:       # BB#0:
1533; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1534; AVX-NEXT:    retq
1535  %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1536  ret <16 x i16> %r
1537}
1538
1539define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind {
1540; SSE-LABEL: _clearupper16xi8c:
1541; SSE:       # BB#0:
1542; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1543; SSE-NEXT:    retq
1544;
1545; AVX-LABEL: _clearupper16xi8c:
1546; AVX:       # BB#0:
1547; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1548; AVX-NEXT:    retq
1549  %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1550  ret <16 x i8> %r
1551}
1552
1553define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind {
1554; SSE-LABEL: _clearupper32xi8c:
1555; SSE:       # BB#0:
1556; SSE-NEXT:    movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1557; SSE-NEXT:    andps %xmm2, %xmm0
1558; SSE-NEXT:    andps %xmm2, %xmm1
1559; SSE-NEXT:    retq
1560;
1561; AVX-LABEL: _clearupper32xi8c:
1562; AVX:       # BB#0:
1563; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1564; AVX-NEXT:    retq
1565  %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1566  ret <32 x i8> %r
1567}
1568