1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5
6;
7; PR6455 'Clear Upper Bits' Patterns
8;
9
10define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
11; SSE-LABEL: _clearupper2xi64a:
12; SSE:       # BB#0:
13; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
14; SSE-NEXT:    retq
15;
16; AVX1-LABEL: _clearupper2xi64a:
17; AVX1:       # BB#0:
18; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
19; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
20; AVX1-NEXT:    retq
21;
22; AVX2-LABEL: _clearupper2xi64a:
23; AVX2:       # BB#0:
24; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
25; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
26; AVX2-NEXT:    retq
27  %x0 = extractelement <2 x i64> %0, i32 0
28  %x1 = extractelement <2 x i64> %0, i32 1
29  %trunc0 = trunc i64 %x0 to i32
30  %trunc1 = trunc i64 %x1 to i32
31  %ext0 = zext i32 %trunc0 to i64
32  %ext1 = zext i32 %trunc1 to i64
33  %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
34  %v1 = insertelement <2 x i64> %v0,   i64 %ext1, i32 1
35  ret <2 x i64> %v1
36}
37
38define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
39; SSE-LABEL: _clearupper4xi64a:
40; SSE:       # BB#0:
41; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,4294967295]
42; SSE-NEXT:    andps %xmm2, %xmm0
43; SSE-NEXT:    andps %xmm2, %xmm1
44; SSE-NEXT:    retq
45;
46; AVX1-LABEL: _clearupper4xi64a:
47; AVX1:       # BB#0:
48; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
49; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
50; AVX1-NEXT:    retq
51;
52; AVX2-LABEL: _clearupper4xi64a:
53; AVX2:       # BB#0:
54; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
55; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
56; AVX2-NEXT:    retq
57  %x0 = extractelement <4 x i64> %0, i32 0
58  %x1 = extractelement <4 x i64> %0, i32 1
59  %x2 = extractelement <4 x i64> %0, i32 2
60  %x3 = extractelement <4 x i64> %0, i32 3
61  %trunc0 = trunc i64 %x0 to i32
62  %trunc1 = trunc i64 %x1 to i32
63  %trunc2 = trunc i64 %x2 to i32
64  %trunc3 = trunc i64 %x3 to i32
65  %ext0 = zext i32 %trunc0 to i64
66  %ext1 = zext i32 %trunc1 to i64
67  %ext2 = zext i32 %trunc2 to i64
68  %ext3 = zext i32 %trunc3 to i64
69  %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0
70  %v1 = insertelement <4 x i64> %v0,   i64 %ext1, i32 1
71  %v2 = insertelement <4 x i64> %v1,   i64 %ext2, i32 2
72  %v3 = insertelement <4 x i64> %v2,   i64 %ext3, i32 3
73  ret <4 x i64> %v3
74}
75
76define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
77; SSE-LABEL: _clearupper4xi32a:
78; SSE:       # BB#0:
79; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
80; SSE-NEXT:    retq
81;
82; AVX-LABEL: _clearupper4xi32a:
83; AVX:       # BB#0:
84; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
85; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
86; AVX-NEXT:    retq
87  %x0 = extractelement <4 x i32> %0, i32 0
88  %x1 = extractelement <4 x i32> %0, i32 1
89  %x2 = extractelement <4 x i32> %0, i32 2
90  %x3 = extractelement <4 x i32> %0, i32 3
91  %trunc0 = trunc i32 %x0 to i16
92  %trunc1 = trunc i32 %x1 to i16
93  %trunc2 = trunc i32 %x2 to i16
94  %trunc3 = trunc i32 %x3 to i16
95  %ext0 = zext i16 %trunc0 to i32
96  %ext1 = zext i16 %trunc1 to i32
97  %ext2 = zext i16 %trunc2 to i32
98  %ext3 = zext i16 %trunc3 to i32
99  %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
100  %v1 = insertelement <4 x i32> %v0,   i32 %ext1, i32 1
101  %v2 = insertelement <4 x i32> %v1,   i32 %ext2, i32 2
102  %v3 = insertelement <4 x i32> %v2,   i32 %ext3, i32 3
103  ret <4 x i32> %v3
104}
105
106define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
107; SSE-LABEL: _clearupper8xi32a:
108; SSE:       # BB#0:
109; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,65535,65535,65535]
110; SSE-NEXT:    andps %xmm2, %xmm0
111; SSE-NEXT:    andps %xmm2, %xmm1
112; SSE-NEXT:    retq
113;
114; AVX1-LABEL: _clearupper8xi32a:
115; AVX1:       # BB#0:
116; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
117; AVX1-NEXT:    retq
118;
119; AVX2-LABEL: _clearupper8xi32a:
120; AVX2:       # BB#0:
121; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
122; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
123; AVX2-NEXT:    retq
124  %x0 = extractelement <8 x i32> %0, i32 0
125  %x1 = extractelement <8 x i32> %0, i32 1
126  %x2 = extractelement <8 x i32> %0, i32 2
127  %x3 = extractelement <8 x i32> %0, i32 3
128  %x4 = extractelement <8 x i32> %0, i32 4
129  %x5 = extractelement <8 x i32> %0, i32 5
130  %x6 = extractelement <8 x i32> %0, i32 6
131  %x7 = extractelement <8 x i32> %0, i32 7
132  %trunc0 = trunc i32 %x0 to i16
133  %trunc1 = trunc i32 %x1 to i16
134  %trunc2 = trunc i32 %x2 to i16
135  %trunc3 = trunc i32 %x3 to i16
136  %trunc4 = trunc i32 %x4 to i16
137  %trunc5 = trunc i32 %x5 to i16
138  %trunc6 = trunc i32 %x6 to i16
139  %trunc7 = trunc i32 %x7 to i16
140  %ext0 = zext i16 %trunc0 to i32
141  %ext1 = zext i16 %trunc1 to i32
142  %ext2 = zext i16 %trunc2 to i32
143  %ext3 = zext i16 %trunc3 to i32
144  %ext4 = zext i16 %trunc4 to i32
145  %ext5 = zext i16 %trunc5 to i32
146  %ext6 = zext i16 %trunc6 to i32
147  %ext7 = zext i16 %trunc7 to i32
148  %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0
149  %v1 = insertelement <8 x i32> %v0,   i32 %ext1, i32 1
150  %v2 = insertelement <8 x i32> %v1,   i32 %ext2, i32 2
151  %v3 = insertelement <8 x i32> %v2,   i32 %ext3, i32 3
152  %v4 = insertelement <8 x i32> %v3,   i32 %ext4, i32 4
153  %v5 = insertelement <8 x i32> %v4,   i32 %ext5, i32 5
154  %v6 = insertelement <8 x i32> %v5,   i32 %ext6, i32 6
155  %v7 = insertelement <8 x i32> %v6,   i32 %ext7, i32 7
156  ret <8 x i32> %v7
157}
158
159define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
160; SSE-LABEL: _clearupper8xi16a:
161; SSE:       # BB#0:
162; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: _clearupper8xi16a:
166; AVX:       # BB#0:
167; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
168; AVX-NEXT:    retq
169  %x0 = extractelement <8 x i16> %0, i32 0
170  %x1 = extractelement <8 x i16> %0, i32 1
171  %x2 = extractelement <8 x i16> %0, i32 2
172  %x3 = extractelement <8 x i16> %0, i32 3
173  %x4 = extractelement <8 x i16> %0, i32 4
174  %x5 = extractelement <8 x i16> %0, i32 5
175  %x6 = extractelement <8 x i16> %0, i32 6
176  %x7 = extractelement <8 x i16> %0, i32 7
177  %trunc0 = trunc i16 %x0 to i8
178  %trunc1 = trunc i16 %x1 to i8
179  %trunc2 = trunc i16 %x2 to i8
180  %trunc3 = trunc i16 %x3 to i8
181  %trunc4 = trunc i16 %x4 to i8
182  %trunc5 = trunc i16 %x5 to i8
183  %trunc6 = trunc i16 %x6 to i8
184  %trunc7 = trunc i16 %x7 to i8
185  %ext0 = zext i8 %trunc0 to i16
186  %ext1 = zext i8 %trunc1 to i16
187  %ext2 = zext i8 %trunc2 to i16
188  %ext3 = zext i8 %trunc3 to i16
189  %ext4 = zext i8 %trunc4 to i16
190  %ext5 = zext i8 %trunc5 to i16
191  %ext6 = zext i8 %trunc6 to i16
192  %ext7 = zext i8 %trunc7 to i16
193  %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0
194  %v1 = insertelement <8 x i16> %v0,   i16 %ext1, i32 1
195  %v2 = insertelement <8 x i16> %v1,   i16 %ext2, i32 2
196  %v3 = insertelement <8 x i16> %v2,   i16 %ext3, i32 3
197  %v4 = insertelement <8 x i16> %v3,   i16 %ext4, i32 4
198  %v5 = insertelement <8 x i16> %v4,   i16 %ext5, i32 5
199  %v6 = insertelement <8 x i16> %v5,   i16 %ext6, i32 6
200  %v7 = insertelement <8 x i16> %v6,   i16 %ext7, i32 7
201  ret <8 x i16> %v7
202}
203
204define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
205; SSE-LABEL: _clearupper16xi16a:
206; SSE:       # BB#0:
207; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
208; SSE-NEXT:    andps %xmm2, %xmm0
209; SSE-NEXT:    andps %xmm2, %xmm1
210; SSE-NEXT:    retq
211;
212; AVX-LABEL: _clearupper16xi16a:
213; AVX:       # BB#0:
214; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
215; AVX-NEXT:    retq
216  %x0  = extractelement <16 x i16> %0, i32 0
217  %x1  = extractelement <16 x i16> %0, i32 1
218  %x2  = extractelement <16 x i16> %0, i32 2
219  %x3  = extractelement <16 x i16> %0, i32 3
220  %x4  = extractelement <16 x i16> %0, i32 4
221  %x5  = extractelement <16 x i16> %0, i32 5
222  %x6  = extractelement <16 x i16> %0, i32 6
223  %x7  = extractelement <16 x i16> %0, i32 7
224  %x8  = extractelement <16 x i16> %0, i32 8
225  %x9  = extractelement <16 x i16> %0, i32 9
226  %x10 = extractelement <16 x i16> %0, i32 10
227  %x11 = extractelement <16 x i16> %0, i32 11
228  %x12 = extractelement <16 x i16> %0, i32 12
229  %x13 = extractelement <16 x i16> %0, i32 13
230  %x14 = extractelement <16 x i16> %0, i32 14
231  %x15 = extractelement <16 x i16> %0, i32 15
232  %trunc0  = trunc i16 %x0  to i8
233  %trunc1  = trunc i16 %x1  to i8
234  %trunc2  = trunc i16 %x2  to i8
235  %trunc3  = trunc i16 %x3  to i8
236  %trunc4  = trunc i16 %x4  to i8
237  %trunc5  = trunc i16 %x5  to i8
238  %trunc6  = trunc i16 %x6  to i8
239  %trunc7  = trunc i16 %x7  to i8
240  %trunc8  = trunc i16 %x8  to i8
241  %trunc9  = trunc i16 %x9  to i8
242  %trunc10 = trunc i16 %x10 to i8
243  %trunc11 = trunc i16 %x11 to i8
244  %trunc12 = trunc i16 %x12 to i8
245  %trunc13 = trunc i16 %x13 to i8
246  %trunc14 = trunc i16 %x14 to i8
247  %trunc15 = trunc i16 %x15 to i8
248  %ext0  = zext i8 %trunc0  to i16
249  %ext1  = zext i8 %trunc1  to i16
250  %ext2  = zext i8 %trunc2  to i16
251  %ext3  = zext i8 %trunc3  to i16
252  %ext4  = zext i8 %trunc4  to i16
253  %ext5  = zext i8 %trunc5  to i16
254  %ext6  = zext i8 %trunc6  to i16
255  %ext7  = zext i8 %trunc7  to i16
256  %ext8  = zext i8 %trunc8  to i16
257  %ext9  = zext i8 %trunc9  to i16
258  %ext10 = zext i8 %trunc10 to i16
259  %ext11 = zext i8 %trunc11 to i16
260  %ext12 = zext i8 %trunc12 to i16
261  %ext13 = zext i8 %trunc13 to i16
262  %ext14 = zext i8 %trunc14 to i16
263  %ext15 = zext i8 %trunc15 to i16
264  %v0  = insertelement <16 x i16> undef, i16 %ext0,  i32 0
265  %v1  = insertelement <16 x i16> %v0,   i16 %ext1,  i32 1
266  %v2  = insertelement <16 x i16> %v1,   i16 %ext2,  i32 2
267  %v3  = insertelement <16 x i16> %v2,   i16 %ext3,  i32 3
268  %v4  = insertelement <16 x i16> %v3,   i16 %ext4,  i32 4
269  %v5  = insertelement <16 x i16> %v4,   i16 %ext5,  i32 5
270  %v6  = insertelement <16 x i16> %v5,   i16 %ext6,  i32 6
271  %v7  = insertelement <16 x i16> %v6,   i16 %ext7,  i32 7
272  %v8  = insertelement <16 x i16> %v7,   i16 %ext8,  i32 8
273  %v9  = insertelement <16 x i16> %v8,   i16 %ext9,  i32 9
274  %v10 = insertelement <16 x i16> %v9,   i16 %ext10, i32 10
275  %v11 = insertelement <16 x i16> %v10,  i16 %ext11, i32 11
276  %v12 = insertelement <16 x i16> %v11,  i16 %ext12, i32 12
277  %v13 = insertelement <16 x i16> %v12,  i16 %ext13, i32 13
278  %v14 = insertelement <16 x i16> %v13,  i16 %ext14, i32 14
279  %v15 = insertelement <16 x i16> %v14,  i16 %ext15, i32 15
280  ret <16 x i16> %v15
281}
282
283define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
284; SSE-LABEL: _clearupper16xi8a:
285; SSE:       # BB#0:
286; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
287; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
288; SSE-NEXT:    movd %eax, %xmm0
289; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
290; SSE-NEXT:    movd %eax, %xmm1
291; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
292; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
293; SSE-NEXT:    movd %eax, %xmm0
294; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
295; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
296; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
297; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
298; SSE-NEXT:    movd %eax, %xmm0
299; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
300; SSE-NEXT:    movd %eax, %xmm3
301; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
302; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
303; SSE-NEXT:    movd %eax, %xmm0
304; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
305; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
306; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
307; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
308; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
309; SSE-NEXT:    movd %eax, %xmm0
310; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
311; SSE-NEXT:    movd %eax, %xmm2
312; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
313; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
314; SSE-NEXT:    movd %eax, %xmm0
315; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
316; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
317; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
318; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
319; SSE-NEXT:    movd %eax, %xmm0
320; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
321; SSE-NEXT:    movd %eax, %xmm2
322; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
323; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
324; SSE-NEXT:    movd %eax, %xmm4
325; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
326; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
327; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
328; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
329; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
330; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
331; SSE-NEXT:    retq
332;
333; AVX-LABEL: _clearupper16xi8a:
334; AVX:       # BB#0:
335; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
336; AVX-NEXT:    retq
337  %x0  = extractelement <16 x i8> %0, i32 0
338  %x1  = extractelement <16 x i8> %0, i32 1
339  %x2  = extractelement <16 x i8> %0, i32 2
340  %x3  = extractelement <16 x i8> %0, i32 3
341  %x4  = extractelement <16 x i8> %0, i32 4
342  %x5  = extractelement <16 x i8> %0, i32 5
343  %x6  = extractelement <16 x i8> %0, i32 6
344  %x7  = extractelement <16 x i8> %0, i32 7
345  %x8  = extractelement <16 x i8> %0, i32 8
346  %x9  = extractelement <16 x i8> %0, i32 9
347  %x10 = extractelement <16 x i8> %0, i32 10
348  %x11 = extractelement <16 x i8> %0, i32 11
349  %x12 = extractelement <16 x i8> %0, i32 12
350  %x13 = extractelement <16 x i8> %0, i32 13
351  %x14 = extractelement <16 x i8> %0, i32 14
352  %x15 = extractelement <16 x i8> %0, i32 15
353  %trunc0  = trunc i8 %x0  to i4
354  %trunc1  = trunc i8 %x1  to i4
355  %trunc2  = trunc i8 %x2  to i4
356  %trunc3  = trunc i8 %x3  to i4
357  %trunc4  = trunc i8 %x4  to i4
358  %trunc5  = trunc i8 %x5  to i4
359  %trunc6  = trunc i8 %x6  to i4
360  %trunc7  = trunc i8 %x7  to i4
361  %trunc8  = trunc i8 %x8  to i4
362  %trunc9  = trunc i8 %x9  to i4
363  %trunc10 = trunc i8 %x10 to i4
364  %trunc11 = trunc i8 %x11 to i4
365  %trunc12 = trunc i8 %x12 to i4
366  %trunc13 = trunc i8 %x13 to i4
367  %trunc14 = trunc i8 %x14 to i4
368  %trunc15 = trunc i8 %x15 to i4
369  %ext0  = zext i4 %trunc0  to i8
370  %ext1  = zext i4 %trunc1  to i8
371  %ext2  = zext i4 %trunc2  to i8
372  %ext3  = zext i4 %trunc3  to i8
373  %ext4  = zext i4 %trunc4  to i8
374  %ext5  = zext i4 %trunc5  to i8
375  %ext6  = zext i4 %trunc6  to i8
376  %ext7  = zext i4 %trunc7  to i8
377  %ext8  = zext i4 %trunc8  to i8
378  %ext9  = zext i4 %trunc9  to i8
379  %ext10 = zext i4 %trunc10 to i8
380  %ext11 = zext i4 %trunc11 to i8
381  %ext12 = zext i4 %trunc12 to i8
382  %ext13 = zext i4 %trunc13 to i8
383  %ext14 = zext i4 %trunc14 to i8
384  %ext15 = zext i4 %trunc15 to i8
385  %v0  = insertelement <16 x i8> undef, i8 %ext0,  i32 0
386  %v1  = insertelement <16 x i8> %v0,   i8 %ext1,  i32 1
387  %v2  = insertelement <16 x i8> %v1,   i8 %ext2,  i32 2
388  %v3  = insertelement <16 x i8> %v2,   i8 %ext3,  i32 3
389  %v4  = insertelement <16 x i8> %v3,   i8 %ext4,  i32 4
390  %v5  = insertelement <16 x i8> %v4,   i8 %ext5,  i32 5
391  %v6  = insertelement <16 x i8> %v5,   i8 %ext6,  i32 6
392  %v7  = insertelement <16 x i8> %v6,   i8 %ext7,  i32 7
393  %v8  = insertelement <16 x i8> %v7,   i8 %ext8,  i32 8
394  %v9  = insertelement <16 x i8> %v8,   i8 %ext9,  i32 9
395  %v10 = insertelement <16 x i8> %v9,   i8 %ext10, i32 10
396  %v11 = insertelement <16 x i8> %v10,  i8 %ext11, i32 11
397  %v12 = insertelement <16 x i8> %v11,  i8 %ext12, i32 12
398  %v13 = insertelement <16 x i8> %v12,  i8 %ext13, i32 13
399  %v14 = insertelement <16 x i8> %v13,  i8 %ext14, i32 14
400  %v15 = insertelement <16 x i8> %v14,  i8 %ext15, i32 15
401  ret <16 x i8> %v15
402}
403
404define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
405; SSE-LABEL: _clearupper32xi8a:
406; SSE:       # BB#0:
407; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
408; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
409; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
410; SSE-NEXT:    movd %eax, %xmm0
411; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
412; SSE-NEXT:    movd %eax, %xmm1
413; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
414; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
415; SSE-NEXT:    movd %eax, %xmm0
416; SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
417; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
418; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
419; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
420; SSE-NEXT:    movd %eax, %xmm0
421; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
422; SSE-NEXT:    movd %eax, %xmm3
423; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
424; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
425; SSE-NEXT:    movd %eax, %xmm0
426; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
427; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
428; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
429; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
430; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
431; SSE-NEXT:    movd %eax, %xmm0
432; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
433; SSE-NEXT:    movd %eax, %xmm2
434; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
435; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
436; SSE-NEXT:    movd %eax, %xmm0
437; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
438; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
439; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
440; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
441; SSE-NEXT:    movd %eax, %xmm0
442; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
443; SSE-NEXT:    movd %eax, %xmm2
444; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
445; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
446; SSE-NEXT:    movd %eax, %xmm4
447; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
448; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
449; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
450; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
451; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
452; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
453; SSE-NEXT:    pand %xmm2, %xmm0
454; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
455; SSE-NEXT:    movd %eax, %xmm1
456; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
457; SSE-NEXT:    movd %eax, %xmm3
458; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
459; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
460; SSE-NEXT:    movd %eax, %xmm1
461; SSE-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
462; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
463; SSE-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
464; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
465; SSE-NEXT:    movd %eax, %xmm1
466; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
467; SSE-NEXT:    movd %eax, %xmm5
468; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
469; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
470; SSE-NEXT:    movd %eax, %xmm1
471; SSE-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
472; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
473; SSE-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
474; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
475; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
476; SSE-NEXT:    movd %eax, %xmm1
477; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
478; SSE-NEXT:    movd %eax, %xmm4
479; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
480; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
481; SSE-NEXT:    movd %eax, %xmm1
482; SSE-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
483; SSE-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
484; SSE-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
485; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
486; SSE-NEXT:    movd %eax, %xmm1
487; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
488; SSE-NEXT:    movd %eax, %xmm4
489; SSE-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
490; SSE-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
491; SSE-NEXT:    movd %eax, %xmm6
492; SSE-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
493; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
494; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
495; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
496; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
497; SSE-NEXT:    pand %xmm2, %xmm1
498; SSE-NEXT:    retq
499;
500; AVX-LABEL: _clearupper32xi8a:
501; AVX:       # BB#0:
502; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
503; AVX-NEXT:    retq
504  %x0  = extractelement <32 x i8> %0, i32 0
505  %x1  = extractelement <32 x i8> %0, i32 1
506  %x2  = extractelement <32 x i8> %0, i32 2
507  %x3  = extractelement <32 x i8> %0, i32 3
508  %x4  = extractelement <32 x i8> %0, i32 4
509  %x5  = extractelement <32 x i8> %0, i32 5
510  %x6  = extractelement <32 x i8> %0, i32 6
511  %x7  = extractelement <32 x i8> %0, i32 7
512  %x8  = extractelement <32 x i8> %0, i32 8
513  %x9  = extractelement <32 x i8> %0, i32 9
514  %x10 = extractelement <32 x i8> %0, i32 10
515  %x11 = extractelement <32 x i8> %0, i32 11
516  %x12 = extractelement <32 x i8> %0, i32 12
517  %x13 = extractelement <32 x i8> %0, i32 13
518  %x14 = extractelement <32 x i8> %0, i32 14
519  %x15 = extractelement <32 x i8> %0, i32 15
520  %x16 = extractelement <32 x i8> %0, i32 16
521  %x17 = extractelement <32 x i8> %0, i32 17
522  %x18 = extractelement <32 x i8> %0, i32 18
523  %x19 = extractelement <32 x i8> %0, i32 19
524  %x20 = extractelement <32 x i8> %0, i32 20
525  %x21 = extractelement <32 x i8> %0, i32 21
526  %x22 = extractelement <32 x i8> %0, i32 22
527  %x23 = extractelement <32 x i8> %0, i32 23
528  %x24 = extractelement <32 x i8> %0, i32 24
529  %x25 = extractelement <32 x i8> %0, i32 25
530  %x26 = extractelement <32 x i8> %0, i32 26
531  %x27 = extractelement <32 x i8> %0, i32 27
532  %x28 = extractelement <32 x i8> %0, i32 28
533  %x29 = extractelement <32 x i8> %0, i32 29
534  %x30 = extractelement <32 x i8> %0, i32 30
535  %x31 = extractelement <32 x i8> %0, i32 31
536  %trunc0  = trunc i8 %x0  to i4
537  %trunc1  = trunc i8 %x1  to i4
538  %trunc2  = trunc i8 %x2  to i4
539  %trunc3  = trunc i8 %x3  to i4
540  %trunc4  = trunc i8 %x4  to i4
541  %trunc5  = trunc i8 %x5  to i4
542  %trunc6  = trunc i8 %x6  to i4
543  %trunc7  = trunc i8 %x7  to i4
544  %trunc8  = trunc i8 %x8  to i4
545  %trunc9  = trunc i8 %x9  to i4
546  %trunc10 = trunc i8 %x10 to i4
547  %trunc11 = trunc i8 %x11 to i4
548  %trunc12 = trunc i8 %x12 to i4
549  %trunc13 = trunc i8 %x13 to i4
550  %trunc14 = trunc i8 %x14 to i4
551  %trunc15 = trunc i8 %x15 to i4
552  %trunc16 = trunc i8 %x16 to i4
553  %trunc17 = trunc i8 %x17 to i4
554  %trunc18 = trunc i8 %x18 to i4
555  %trunc19 = trunc i8 %x19 to i4
556  %trunc20 = trunc i8 %x20 to i4
557  %trunc21 = trunc i8 %x21 to i4
558  %trunc22 = trunc i8 %x22 to i4
559  %trunc23 = trunc i8 %x23 to i4
560  %trunc24 = trunc i8 %x24 to i4
561  %trunc25 = trunc i8 %x25 to i4
562  %trunc26 = trunc i8 %x26 to i4
563  %trunc27 = trunc i8 %x27 to i4
564  %trunc28 = trunc i8 %x28 to i4
565  %trunc29 = trunc i8 %x29 to i4
566  %trunc30 = trunc i8 %x30 to i4
567  %trunc31 = trunc i8 %x31 to i4
568  %ext0  = zext i4 %trunc0  to i8
569  %ext1  = zext i4 %trunc1  to i8
570  %ext2  = zext i4 %trunc2  to i8
571  %ext3  = zext i4 %trunc3  to i8
572  %ext4  = zext i4 %trunc4  to i8
573  %ext5  = zext i4 %trunc5  to i8
574  %ext6  = zext i4 %trunc6  to i8
575  %ext7  = zext i4 %trunc7  to i8
576  %ext8  = zext i4 %trunc8  to i8
577  %ext9  = zext i4 %trunc9  to i8
578  %ext10 = zext i4 %trunc10 to i8
579  %ext11 = zext i4 %trunc11 to i8
580  %ext12 = zext i4 %trunc12 to i8
581  %ext13 = zext i4 %trunc13 to i8
582  %ext14 = zext i4 %trunc14 to i8
583  %ext15 = zext i4 %trunc15 to i8
584  %ext16 = zext i4 %trunc16 to i8
585  %ext17 = zext i4 %trunc17 to i8
586  %ext18 = zext i4 %trunc18 to i8
587  %ext19 = zext i4 %trunc19 to i8
588  %ext20 = zext i4 %trunc20 to i8
589  %ext21 = zext i4 %trunc21 to i8
590  %ext22 = zext i4 %trunc22 to i8
591  %ext23 = zext i4 %trunc23 to i8
592  %ext24 = zext i4 %trunc24 to i8
593  %ext25 = zext i4 %trunc25 to i8
594  %ext26 = zext i4 %trunc26 to i8
595  %ext27 = zext i4 %trunc27 to i8
596  %ext28 = zext i4 %trunc28 to i8
597  %ext29 = zext i4 %trunc29 to i8
598  %ext30 = zext i4 %trunc30 to i8
599  %ext31 = zext i4 %trunc31 to i8
600  %v0  = insertelement <32 x i8> undef, i8 %ext0,  i32 0
601  %v1  = insertelement <32 x i8> %v0,   i8 %ext1,  i32 1
602  %v2  = insertelement <32 x i8> %v1,   i8 %ext2,  i32 2
603  %v3  = insertelement <32 x i8> %v2,   i8 %ext3,  i32 3
604  %v4  = insertelement <32 x i8> %v3,   i8 %ext4,  i32 4
605  %v5  = insertelement <32 x i8> %v4,   i8 %ext5,  i32 5
606  %v6  = insertelement <32 x i8> %v5,   i8 %ext6,  i32 6
607  %v7  = insertelement <32 x i8> %v6,   i8 %ext7,  i32 7
608  %v8  = insertelement <32 x i8> %v7,   i8 %ext8,  i32 8
609  %v9  = insertelement <32 x i8> %v8,   i8 %ext9,  i32 9
610  %v10 = insertelement <32 x i8> %v9,   i8 %ext10, i32 10
611  %v11 = insertelement <32 x i8> %v10,  i8 %ext11, i32 11
612  %v12 = insertelement <32 x i8> %v11,  i8 %ext12, i32 12
613  %v13 = insertelement <32 x i8> %v12,  i8 %ext13, i32 13
614  %v14 = insertelement <32 x i8> %v13,  i8 %ext14, i32 14
615  %v15 = insertelement <32 x i8> %v14,  i8 %ext15, i32 15
616  %v16 = insertelement <32 x i8> %v15,  i8 %ext16, i32 16
617  %v17 = insertelement <32 x i8> %v16,  i8 %ext17, i32 17
618  %v18 = insertelement <32 x i8> %v17,  i8 %ext18, i32 18
619  %v19 = insertelement <32 x i8> %v18,  i8 %ext19, i32 19
620  %v20 = insertelement <32 x i8> %v19,  i8 %ext20, i32 20
621  %v21 = insertelement <32 x i8> %v20,  i8 %ext21, i32 21
622  %v22 = insertelement <32 x i8> %v21,  i8 %ext22, i32 22
623  %v23 = insertelement <32 x i8> %v22,  i8 %ext23, i32 23
624  %v24 = insertelement <32 x i8> %v23,  i8 %ext24, i32 24
625  %v25 = insertelement <32 x i8> %v24,  i8 %ext25, i32 25
626  %v26 = insertelement <32 x i8> %v25,  i8 %ext26, i32 26
627  %v27 = insertelement <32 x i8> %v26,  i8 %ext27, i32 27
628  %v28 = insertelement <32 x i8> %v27,  i8 %ext28, i32 28
629  %v29 = insertelement <32 x i8> %v28,  i8 %ext29, i32 29
630  %v30 = insertelement <32 x i8> %v29,  i8 %ext30, i32 30
631  %v31 = insertelement <32 x i8> %v30,  i8 %ext31, i32 31
632  ret <32 x i8> %v31
633}
634
635define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
636; SSE-LABEL: _clearupper2xi64b:
637; SSE:       # BB#0:
638; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
639; SSE-NEXT:    retq
640;
641; AVX1-LABEL: _clearupper2xi64b:
642; AVX1:       # BB#0:
643; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
644; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
645; AVX1-NEXT:    retq
646;
647; AVX2-LABEL: _clearupper2xi64b:
648; AVX2:       # BB#0:
649; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
650; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
651; AVX2-NEXT:    retq
652  %x32 = bitcast <2 x i64> %0 to <4 x i32>
653  %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
654  %r1 = insertelement <4 x i32> %r0,  i32 zeroinitializer, i32 3
655  %r = bitcast <4 x i32> %r1 to <2 x i64>
656  ret <2 x i64> %r
657}
658
659define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind {
660; SSE-LABEL: _clearupper4xi64b:
661; SSE:       # BB#0:
662; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
663; SSE-NEXT:    andps %xmm2, %xmm0
664; SSE-NEXT:    andps %xmm2, %xmm1
665; SSE-NEXT:    retq
666;
667; AVX1-LABEL: _clearupper4xi64b:
668; AVX1:       # BB#0:
669; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
670; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
671; AVX1-NEXT:    retq
672;
673; AVX2-LABEL: _clearupper4xi64b:
674; AVX2:       # BB#0:
675; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
676; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
677; AVX2-NEXT:    retq
678  %x32 = bitcast <4 x i64> %0 to <8 x i32>
679  %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1
680  %r1 = insertelement <8 x i32> %r0,  i32 zeroinitializer, i32 3
681  %r2 = insertelement <8 x i32> %r1,  i32 zeroinitializer, i32 5
682  %r3 = insertelement <8 x i32> %r2,  i32 zeroinitializer, i32 7
683  %r = bitcast <8 x i32> %r3 to <4 x i64>
684  ret <4 x i64> %r
685}
686
687define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
688; SSE-LABEL: _clearupper4xi32b:
689; SSE:       # BB#0:
690; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
691; SSE-NEXT:    retq
692;
693; AVX-LABEL: _clearupper4xi32b:
694; AVX:       # BB#0:
695; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
696; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
697; AVX-NEXT:    retq
698  %x16 = bitcast <4 x i32> %0 to <8 x i16>
699  %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
700  %r1 = insertelement <8 x i16> %r0,  i16 zeroinitializer, i32 3
701  %r2 = insertelement <8 x i16> %r1,  i16 zeroinitializer, i32 5
702  %r3 = insertelement <8 x i16> %r2,  i16 zeroinitializer, i32 7
703  %r = bitcast <8 x i16> %r3 to <4 x i32>
704  ret <4 x i32> %r
705}
706
707define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind {
708; SSE-LABEL: _clearupper8xi32b:
709; SSE:       # BB#0:
710; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
711; SSE-NEXT:    andps %xmm2, %xmm0
712; SSE-NEXT:    andps %xmm2, %xmm1
713; SSE-NEXT:    retq
714;
715; AVX1-LABEL: _clearupper8xi32b:
716; AVX1:       # BB#0:
717; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
718; AVX1-NEXT:    retq
719;
720; AVX2-LABEL: _clearupper8xi32b:
721; AVX2:       # BB#0:
722; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
723; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
724; AVX2-NEXT:    retq
725  %x16 = bitcast <8 x i32> %0 to <16 x i16>
726  %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1
727  %r1 = insertelement <16 x i16> %r0,  i16 zeroinitializer, i32 3
728  %r2 = insertelement <16 x i16> %r1,  i16 zeroinitializer, i32 5
729  %r3 = insertelement <16 x i16> %r2,  i16 zeroinitializer, i32 7
730  %r4 = insertelement <16 x i16> %r3,  i16 zeroinitializer, i32 9
731  %r5 = insertelement <16 x i16> %r4,  i16 zeroinitializer, i32 11
732  %r6 = insertelement <16 x i16> %r5,  i16 zeroinitializer, i32 13
733  %r7 = insertelement <16 x i16> %r6,  i16 zeroinitializer, i32 15
734  %r = bitcast <16 x i16> %r7 to <8 x i32>
735  ret <8 x i32> %r
736}
737
738define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
739; SSE-LABEL: _clearupper8xi16b:
740; SSE:       # BB#0:
741; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
742; SSE-NEXT:    retq
743;
744; AVX-LABEL: _clearupper8xi16b:
745; AVX:       # BB#0:
746; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
747; AVX-NEXT:    retq
748  %x8 = bitcast <8 x i16> %0 to <16 x i8>
749  %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1
750  %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3
751  %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5
752  %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7
753  %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9
754  %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11
755  %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13
756  %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15
757  %r = bitcast <16 x i8> %r7 to <8 x i16>
758  ret <8 x i16> %r
759}
760
761define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind {
762; SSE-LABEL: _clearupper16xi16b:
763; SSE:       # BB#0:
764; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
765; SSE-NEXT:    andps %xmm2, %xmm0
766; SSE-NEXT:    andps %xmm2, %xmm1
767; SSE-NEXT:    retq
768;
769; AVX1-LABEL: _clearupper16xi16b:
770; AVX1:       # BB#0:
771; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
772; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm2
773; AVX1-NEXT:    vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
774; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
775; AVX1-NEXT:    vandps %xmm1, %xmm0, %xmm0
776; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
777; AVX1-NEXT:    retq
778;
779; AVX2-LABEL: _clearupper16xi16b:
780; AVX2:       # BB#0:
781; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
782; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
783; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7]
784; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
785; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
786; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm2, %ymm0
787; AVX2-NEXT:    retq
788  %x8 = bitcast <16 x i16> %0 to <32 x i8>
789  %r0  = insertelement <32 x i8> %x8,  i8 zeroinitializer, i32 1
790  %r1  = insertelement <32 x i8> %r0,  i8 zeroinitializer, i32 3
791  %r2  = insertelement <32 x i8> %r1,  i8 zeroinitializer, i32 5
792  %r3  = insertelement <32 x i8> %r2,  i8 zeroinitializer, i32 7
793  %r4  = insertelement <32 x i8> %r3,  i8 zeroinitializer, i32 9
794  %r5  = insertelement <32 x i8> %r4,  i8 zeroinitializer, i32 11
795  %r6  = insertelement <32 x i8> %r5,  i8 zeroinitializer, i32 13
796  %r7  = insertelement <32 x i8> %r6,  i8 zeroinitializer, i32 15
797  %r8  = insertelement <32 x i8> %r7,  i8 zeroinitializer, i32 17
798  %r9  = insertelement <32 x i8> %r8,  i8 zeroinitializer, i32 19
799  %r10 = insertelement <32 x i8> %r9,  i8 zeroinitializer, i32 21
800  %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23
801  %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25
802  %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27
803  %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29
804  %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31
805  %r = bitcast <32 x i8> %r15 to <16 x i16>
806  ret <16 x i16> %r
807}
808
809define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
810; SSE-LABEL: _clearupper16xi8b:
811; SSE:       # BB#0:
812; SSE-NEXT:    pushq %r14
813; SSE-NEXT:    pushq %rbx
814; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
815; SSE-NEXT:    movq %xmm0, %rcx
816; SSE-NEXT:    movq %rcx, %r8
817; SSE-NEXT:    movq %rcx, %r9
818; SSE-NEXT:    movq %rcx, %r10
819; SSE-NEXT:    movq %rcx, %rax
820; SSE-NEXT:    movq %rcx, %rdx
821; SSE-NEXT:    movq %rcx, %rsi
822; SSE-NEXT:    movq %rcx, %rdi
823; SSE-NEXT:    andb $15, %cl
824; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
825; SSE-NEXT:    movq %xmm1, %rcx
826; SSE-NEXT:    shrq $56, %rdi
827; SSE-NEXT:    andb $15, %dil
828; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
829; SSE-NEXT:    movq %rcx, %r11
830; SSE-NEXT:    shrq $48, %rsi
831; SSE-NEXT:    andb $15, %sil
832; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
833; SSE-NEXT:    movq %rcx, %r14
834; SSE-NEXT:    shrq $40, %rdx
835; SSE-NEXT:    andb $15, %dl
836; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
837; SSE-NEXT:    movq %rcx, %rdx
838; SSE-NEXT:    shrq $32, %rax
839; SSE-NEXT:    andb $15, %al
840; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
841; SSE-NEXT:    movq %rcx, %rax
842; SSE-NEXT:    shrq $24, %r10
843; SSE-NEXT:    andb $15, %r10b
844; SSE-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
845; SSE-NEXT:    movq %rcx, %rdi
846; SSE-NEXT:    shrq $16, %r9
847; SSE-NEXT:    andb $15, %r9b
848; SSE-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
849; SSE-NEXT:    movq %rcx, %rsi
850; SSE-NEXT:    shrq $8, %r8
851; SSE-NEXT:    andb $15, %r8b
852; SSE-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
853; SSE-NEXT:    movq %rcx, %rbx
854; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
855; SSE-NEXT:    andb $15, %cl
856; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
857; SSE-NEXT:    shrq $56, %rbx
858; SSE-NEXT:    andb $15, %bl
859; SSE-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
860; SSE-NEXT:    shrq $48, %rsi
861; SSE-NEXT:    andb $15, %sil
862; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
863; SSE-NEXT:    shrq $40, %rdi
864; SSE-NEXT:    andb $15, %dil
865; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
866; SSE-NEXT:    shrq $32, %rax
867; SSE-NEXT:    andb $15, %al
868; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
869; SSE-NEXT:    shrq $24, %rdx
870; SSE-NEXT:    andb $15, %dl
871; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
872; SSE-NEXT:    shrq $16, %r14
873; SSE-NEXT:    andb $15, %r14b
874; SSE-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
875; SSE-NEXT:    shrq $8, %r11
876; SSE-NEXT:    andb $15, %r11b
877; SSE-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
878; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
879; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
880; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
881; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
882; SSE-NEXT:    popq %rbx
883; SSE-NEXT:    popq %r14
884; SSE-NEXT:    retq
885;
886; AVX-LABEL: _clearupper16xi8b:
887; AVX:       # BB#0:
888; AVX-NEXT:    pushq %rbp
889; AVX-NEXT:    pushq %r15
890; AVX-NEXT:    pushq %r14
891; AVX-NEXT:    pushq %r13
892; AVX-NEXT:    pushq %r12
893; AVX-NEXT:    pushq %rbx
894; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
895; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
896; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
897; AVX-NEXT:    movq %rcx, %r8
898; AVX-NEXT:    movq %rcx, %r9
899; AVX-NEXT:    movq %rcx, %r10
900; AVX-NEXT:    movq %rcx, %r11
901; AVX-NEXT:    movq %rcx, %r14
902; AVX-NEXT:    movq %rcx, %r15
903; AVX-NEXT:    movq %rdx, %r12
904; AVX-NEXT:    movq %rdx, %r13
905; AVX-NEXT:    movq %rdx, %rdi
906; AVX-NEXT:    movq %rdx, %rax
907; AVX-NEXT:    movq %rdx, %rsi
908; AVX-NEXT:    movq %rdx, %rbx
909; AVX-NEXT:    movq %rdx, %rbp
910; AVX-NEXT:    andb $15, %dl
911; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
912; AVX-NEXT:    movq %rcx, %rdx
913; AVX-NEXT:    andb $15, %cl
914; AVX-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
915; AVX-NEXT:    shrq $56, %rbp
916; AVX-NEXT:    andb $15, %bpl
917; AVX-NEXT:    movb %bpl, -{{[0-9]+}}(%rsp)
918; AVX-NEXT:    shrq $48, %rbx
919; AVX-NEXT:    andb $15, %bl
920; AVX-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
921; AVX-NEXT:    shrq $40, %rsi
922; AVX-NEXT:    andb $15, %sil
923; AVX-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
924; AVX-NEXT:    shrq $32, %rax
925; AVX-NEXT:    andb $15, %al
926; AVX-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
927; AVX-NEXT:    shrq $24, %rdi
928; AVX-NEXT:    andb $15, %dil
929; AVX-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
930; AVX-NEXT:    shrq $16, %r13
931; AVX-NEXT:    andb $15, %r13b
932; AVX-NEXT:    movb %r13b, -{{[0-9]+}}(%rsp)
933; AVX-NEXT:    shrq $8, %r12
934; AVX-NEXT:    andb $15, %r12b
935; AVX-NEXT:    movb %r12b, -{{[0-9]+}}(%rsp)
936; AVX-NEXT:    shrq $56, %rdx
937; AVX-NEXT:    andb $15, %dl
938; AVX-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
939; AVX-NEXT:    shrq $48, %r15
940; AVX-NEXT:    andb $15, %r15b
941; AVX-NEXT:    movb %r15b, -{{[0-9]+}}(%rsp)
942; AVX-NEXT:    shrq $40, %r14
943; AVX-NEXT:    andb $15, %r14b
944; AVX-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
945; AVX-NEXT:    shrq $32, %r11
946; AVX-NEXT:    andb $15, %r11b
947; AVX-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
948; AVX-NEXT:    shrq $24, %r10
949; AVX-NEXT:    andb $15, %r10b
950; AVX-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
951; AVX-NEXT:    shrq $16, %r9
952; AVX-NEXT:    andb $15, %r9b
953; AVX-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
954; AVX-NEXT:    shrq $8, %r8
955; AVX-NEXT:    andb $15, %r8b
956; AVX-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
957; AVX-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
958; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
959; AVX-NEXT:    popq %rbx
960; AVX-NEXT:    popq %r12
961; AVX-NEXT:    popq %r13
962; AVX-NEXT:    popq %r14
963; AVX-NEXT:    popq %r15
964; AVX-NEXT:    popq %rbp
965; AVX-NEXT:    retq
966  %x4  = bitcast <16 x i8> %0 to <32 x i4>
967  %r0  = insertelement <32 x i4> %x4,  i4 zeroinitializer, i32 1
968  %r1  = insertelement <32 x i4> %r0,  i4 zeroinitializer, i32 3
969  %r2  = insertelement <32 x i4> %r1,  i4 zeroinitializer, i32 5
970  %r3  = insertelement <32 x i4> %r2,  i4 zeroinitializer, i32 7
971  %r4  = insertelement <32 x i4> %r3,  i4 zeroinitializer, i32 9
972  %r5  = insertelement <32 x i4> %r4,  i4 zeroinitializer, i32 11
973  %r6  = insertelement <32 x i4> %r5,  i4 zeroinitializer, i32 13
974  %r7  = insertelement <32 x i4> %r6,  i4 zeroinitializer, i32 15
975  %r8  = insertelement <32 x i4> %r7,  i4 zeroinitializer, i32 17
976  %r9  = insertelement <32 x i4> %r8,  i4 zeroinitializer, i32 19
977  %r10 = insertelement <32 x i4> %r9,  i4 zeroinitializer, i32 21
978  %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23
979  %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25
980  %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27
981  %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29
982  %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31
983  %r = bitcast <32 x i4> %r15 to <16 x i8>
984  ret <16 x i8> %r
985}
986
987define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
988; SSE-LABEL: _clearupper32xi8b:
989; SSE:       # BB#0:
990; SSE-NEXT:    pushq %r14
991; SSE-NEXT:    pushq %rbx
992; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
993; SSE-NEXT:    movq %xmm0, %rcx
994; SSE-NEXT:    movq %rcx, %r8
995; SSE-NEXT:    movq %rcx, %r9
996; SSE-NEXT:    movq %rcx, %r10
997; SSE-NEXT:    movq %rcx, %rax
998; SSE-NEXT:    movq %rcx, %rdx
999; SSE-NEXT:    movq %rcx, %rsi
1000; SSE-NEXT:    movq %rcx, %rdi
1001; SSE-NEXT:    andb $15, %cl
1002; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1003; SSE-NEXT:    movq %xmm2, %rcx
1004; SSE-NEXT:    shrq $56, %rdi
1005; SSE-NEXT:    andb $15, %dil
1006; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1007; SSE-NEXT:    movq %rcx, %r11
1008; SSE-NEXT:    shrq $48, %rsi
1009; SSE-NEXT:    andb $15, %sil
1010; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1011; SSE-NEXT:    movq %rcx, %r14
1012; SSE-NEXT:    shrq $40, %rdx
1013; SSE-NEXT:    andb $15, %dl
1014; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1015; SSE-NEXT:    movq %rcx, %rdx
1016; SSE-NEXT:    shrq $32, %rax
1017; SSE-NEXT:    andb $15, %al
1018; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1019; SSE-NEXT:    movq %rcx, %rax
1020; SSE-NEXT:    shrq $24, %r10
1021; SSE-NEXT:    andb $15, %r10b
1022; SSE-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1023; SSE-NEXT:    movq %rcx, %rdi
1024; SSE-NEXT:    shrq $16, %r9
1025; SSE-NEXT:    andb $15, %r9b
1026; SSE-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1027; SSE-NEXT:    movq %rcx, %rsi
1028; SSE-NEXT:    shrq $8, %r8
1029; SSE-NEXT:    andb $15, %r8b
1030; SSE-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1031; SSE-NEXT:    movq %rcx, %rbx
1032; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1033; SSE-NEXT:    andb $15, %cl
1034; SSE-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1035; SSE-NEXT:    shrq $56, %rbx
1036; SSE-NEXT:    andb $15, %bl
1037; SSE-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1038; SSE-NEXT:    shrq $48, %rsi
1039; SSE-NEXT:    andb $15, %sil
1040; SSE-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1041; SSE-NEXT:    shrq $40, %rdi
1042; SSE-NEXT:    andb $15, %dil
1043; SSE-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1044; SSE-NEXT:    shrq $32, %rax
1045; SSE-NEXT:    andb $15, %al
1046; SSE-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1047; SSE-NEXT:    shrq $24, %rdx
1048; SSE-NEXT:    andb $15, %dl
1049; SSE-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1050; SSE-NEXT:    shrq $16, %r14
1051; SSE-NEXT:    andb $15, %r14b
1052; SSE-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1053; SSE-NEXT:    shrq $8, %r11
1054; SSE-NEXT:    andb $15, %r11b
1055; SSE-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1056; SSE-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1057; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
1058; SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
1059; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1060; SSE-NEXT:    popq %rbx
1061; SSE-NEXT:    popq %r14
1062; SSE-NEXT:    retq
1063;
1064; AVX1-LABEL: _clearupper32xi8b:
1065; AVX1:       # BB#0:
1066; AVX1-NEXT:    pushq %rbp
1067; AVX1-NEXT:    pushq %r15
1068; AVX1-NEXT:    pushq %r14
1069; AVX1-NEXT:    pushq %r13
1070; AVX1-NEXT:    pushq %r12
1071; AVX1-NEXT:    pushq %rbx
1072; AVX1-NEXT:    vmovq %xmm0, %rcx
1073; AVX1-NEXT:    movq %rcx, %r8
1074; AVX1-NEXT:    movq %rcx, %r9
1075; AVX1-NEXT:    movq %rcx, %r10
1076; AVX1-NEXT:    movq %rcx, %r11
1077; AVX1-NEXT:    movq %rcx, %r14
1078; AVX1-NEXT:    movq %rcx, %r15
1079; AVX1-NEXT:    vpextrq $1, %xmm0, %rdx
1080; AVX1-NEXT:    movq %rdx, %r12
1081; AVX1-NEXT:    movq %rdx, %r13
1082; AVX1-NEXT:    movq %rdx, %rbx
1083; AVX1-NEXT:    movq %rdx, %rax
1084; AVX1-NEXT:    movq %rdx, %rdi
1085; AVX1-NEXT:    movq %rdx, %rsi
1086; AVX1-NEXT:    movq %rdx, %rbp
1087; AVX1-NEXT:    andb $15, %dl
1088; AVX1-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1089; AVX1-NEXT:    movq %rcx, %rdx
1090; AVX1-NEXT:    andb $15, %cl
1091; AVX1-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1092; AVX1-NEXT:    shrq $56, %rbp
1093; AVX1-NEXT:    andb $15, %bpl
1094; AVX1-NEXT:    movb %bpl, -{{[0-9]+}}(%rsp)
1095; AVX1-NEXT:    shrq $48, %rsi
1096; AVX1-NEXT:    andb $15, %sil
1097; AVX1-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1098; AVX1-NEXT:    shrq $40, %rdi
1099; AVX1-NEXT:    andb $15, %dil
1100; AVX1-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1101; AVX1-NEXT:    shrq $32, %rax
1102; AVX1-NEXT:    andb $15, %al
1103; AVX1-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1104; AVX1-NEXT:    shrq $24, %rbx
1105; AVX1-NEXT:    andb $15, %bl
1106; AVX1-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1107; AVX1-NEXT:    shrq $16, %r13
1108; AVX1-NEXT:    andb $15, %r13b
1109; AVX1-NEXT:    movb %r13b, -{{[0-9]+}}(%rsp)
1110; AVX1-NEXT:    shrq $8, %r12
1111; AVX1-NEXT:    andb $15, %r12b
1112; AVX1-NEXT:    movb %r12b, -{{[0-9]+}}(%rsp)
1113; AVX1-NEXT:    shrq $8, %r8
1114; AVX1-NEXT:    shrq $16, %r9
1115; AVX1-NEXT:    shrq $24, %r10
1116; AVX1-NEXT:    shrq $32, %r11
1117; AVX1-NEXT:    shrq $40, %r14
1118; AVX1-NEXT:    shrq $48, %r15
1119; AVX1-NEXT:    shrq $56, %rdx
1120; AVX1-NEXT:    andb $15, %dl
1121; AVX1-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1122; AVX1-NEXT:    andb $15, %r15b
1123; AVX1-NEXT:    movb %r15b, -{{[0-9]+}}(%rsp)
1124; AVX1-NEXT:    andb $15, %r14b
1125; AVX1-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1126; AVX1-NEXT:    andb $15, %r11b
1127; AVX1-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1128; AVX1-NEXT:    andb $15, %r10b
1129; AVX1-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1130; AVX1-NEXT:    andb $15, %r9b
1131; AVX1-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1132; AVX1-NEXT:    andb $15, %r8b
1133; AVX1-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1134; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1135; AVX1-NEXT:    vmovq %xmm0, %rax
1136; AVX1-NEXT:    movq %rax, %rcx
1137; AVX1-NEXT:    movq %rax, %rdx
1138; AVX1-NEXT:    movq %rax, %rsi
1139; AVX1-NEXT:    movq %rax, %rdi
1140; AVX1-NEXT:    movl %eax, %ebp
1141; AVX1-NEXT:    movl %eax, %ebx
1142; AVX1-NEXT:    vmovd %eax, %xmm1
1143; AVX1-NEXT:    shrl $8, %eax
1144; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1145; AVX1-NEXT:    shrl $16, %ebx
1146; AVX1-NEXT:    vpinsrb $2, %ebx, %xmm1, %xmm1
1147; AVX1-NEXT:    shrl $24, %ebp
1148; AVX1-NEXT:    vpinsrb $3, %ebp, %xmm1, %xmm1
1149; AVX1-NEXT:    shrq $32, %rdi
1150; AVX1-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1151; AVX1-NEXT:    shrq $40, %rsi
1152; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1153; AVX1-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1154; AVX1-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1155; AVX1-NEXT:    shrq $48, %rdx
1156; AVX1-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
1157; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
1158; AVX1-NEXT:    shrq $56, %rcx
1159; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm0
1160; AVX1-NEXT:    movl %eax, %ecx
1161; AVX1-NEXT:    shrl $8, %ecx
1162; AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1163; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1164; AVX1-NEXT:    movl %eax, %ecx
1165; AVX1-NEXT:    shrl $16, %ecx
1166; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1167; AVX1-NEXT:    movl %eax, %ecx
1168; AVX1-NEXT:    shrl $24, %ecx
1169; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1170; AVX1-NEXT:    movq %rax, %rcx
1171; AVX1-NEXT:    shrq $32, %rcx
1172; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1173; AVX1-NEXT:    movq %rax, %rcx
1174; AVX1-NEXT:    shrq $40, %rcx
1175; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1176; AVX1-NEXT:    movq %rax, %rcx
1177; AVX1-NEXT:    shrq $48, %rcx
1178; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1179; AVX1-NEXT:    vmovq %xmm2, %rcx
1180; AVX1-NEXT:    shrq $56, %rax
1181; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1182; AVX1-NEXT:    movl %ecx, %eax
1183; AVX1-NEXT:    shrl $8, %eax
1184; AVX1-NEXT:    vmovd %ecx, %xmm1
1185; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1186; AVX1-NEXT:    movl %ecx, %eax
1187; AVX1-NEXT:    shrl $16, %eax
1188; AVX1-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1189; AVX1-NEXT:    movl %ecx, %eax
1190; AVX1-NEXT:    shrl $24, %eax
1191; AVX1-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1192; AVX1-NEXT:    movq %rcx, %rax
1193; AVX1-NEXT:    shrq $32, %rax
1194; AVX1-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1195; AVX1-NEXT:    movq %rcx, %rax
1196; AVX1-NEXT:    shrq $40, %rax
1197; AVX1-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1198; AVX1-NEXT:    movq %rcx, %rax
1199; AVX1-NEXT:    shrq $48, %rax
1200; AVX1-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1201; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
1202; AVX1-NEXT:    shrq $56, %rcx
1203; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1204; AVX1-NEXT:    movl %eax, %ecx
1205; AVX1-NEXT:    shrl $8, %ecx
1206; AVX1-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1207; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1208; AVX1-NEXT:    movl %eax, %ecx
1209; AVX1-NEXT:    shrl $16, %ecx
1210; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1211; AVX1-NEXT:    movl %eax, %ecx
1212; AVX1-NEXT:    shrl $24, %ecx
1213; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1214; AVX1-NEXT:    movq %rax, %rcx
1215; AVX1-NEXT:    shrq $32, %rcx
1216; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1217; AVX1-NEXT:    movq %rax, %rcx
1218; AVX1-NEXT:    shrq $40, %rcx
1219; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1220; AVX1-NEXT:    movq %rax, %rcx
1221; AVX1-NEXT:    shrq $48, %rcx
1222; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1223; AVX1-NEXT:    shrq $56, %rax
1224; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1225; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1226; AVX1-NEXT:    popq %rbx
1227; AVX1-NEXT:    popq %r12
1228; AVX1-NEXT:    popq %r13
1229; AVX1-NEXT:    popq %r14
1230; AVX1-NEXT:    popq %r15
1231; AVX1-NEXT:    popq %rbp
1232; AVX1-NEXT:    retq
1233;
1234; AVX2-LABEL: _clearupper32xi8b:
1235; AVX2:       # BB#0:
1236; AVX2-NEXT:    pushq %rbp
1237; AVX2-NEXT:    pushq %r15
1238; AVX2-NEXT:    pushq %r14
1239; AVX2-NEXT:    pushq %r13
1240; AVX2-NEXT:    pushq %r12
1241; AVX2-NEXT:    pushq %rbx
1242; AVX2-NEXT:    vmovq %xmm0, %rcx
1243; AVX2-NEXT:    movq %rcx, %r8
1244; AVX2-NEXT:    movq %rcx, %r9
1245; AVX2-NEXT:    movq %rcx, %r10
1246; AVX2-NEXT:    movq %rcx, %r11
1247; AVX2-NEXT:    movq %rcx, %r14
1248; AVX2-NEXT:    movq %rcx, %r15
1249; AVX2-NEXT:    vpextrq $1, %xmm0, %rdx
1250; AVX2-NEXT:    movq %rdx, %r12
1251; AVX2-NEXT:    movq %rdx, %r13
1252; AVX2-NEXT:    movq %rdx, %rbx
1253; AVX2-NEXT:    movq %rdx, %rax
1254; AVX2-NEXT:    movq %rdx, %rdi
1255; AVX2-NEXT:    movq %rdx, %rsi
1256; AVX2-NEXT:    movq %rdx, %rbp
1257; AVX2-NEXT:    andb $15, %dl
1258; AVX2-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1259; AVX2-NEXT:    movq %rcx, %rdx
1260; AVX2-NEXT:    andb $15, %cl
1261; AVX2-NEXT:    movb %cl, -{{[0-9]+}}(%rsp)
1262; AVX2-NEXT:    shrq $56, %rbp
1263; AVX2-NEXT:    andb $15, %bpl
1264; AVX2-NEXT:    movb %bpl, -{{[0-9]+}}(%rsp)
1265; AVX2-NEXT:    shrq $48, %rsi
1266; AVX2-NEXT:    andb $15, %sil
1267; AVX2-NEXT:    movb %sil, -{{[0-9]+}}(%rsp)
1268; AVX2-NEXT:    shrq $40, %rdi
1269; AVX2-NEXT:    andb $15, %dil
1270; AVX2-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1271; AVX2-NEXT:    shrq $32, %rax
1272; AVX2-NEXT:    andb $15, %al
1273; AVX2-NEXT:    movb %al, -{{[0-9]+}}(%rsp)
1274; AVX2-NEXT:    shrq $24, %rbx
1275; AVX2-NEXT:    andb $15, %bl
1276; AVX2-NEXT:    movb %bl, -{{[0-9]+}}(%rsp)
1277; AVX2-NEXT:    shrq $16, %r13
1278; AVX2-NEXT:    andb $15, %r13b
1279; AVX2-NEXT:    movb %r13b, -{{[0-9]+}}(%rsp)
1280; AVX2-NEXT:    shrq $8, %r12
1281; AVX2-NEXT:    andb $15, %r12b
1282; AVX2-NEXT:    movb %r12b, -{{[0-9]+}}(%rsp)
1283; AVX2-NEXT:    shrq $8, %r8
1284; AVX2-NEXT:    shrq $16, %r9
1285; AVX2-NEXT:    shrq $24, %r10
1286; AVX2-NEXT:    shrq $32, %r11
1287; AVX2-NEXT:    shrq $40, %r14
1288; AVX2-NEXT:    shrq $48, %r15
1289; AVX2-NEXT:    shrq $56, %rdx
1290; AVX2-NEXT:    andb $15, %dl
1291; AVX2-NEXT:    movb %dl, -{{[0-9]+}}(%rsp)
1292; AVX2-NEXT:    andb $15, %r15b
1293; AVX2-NEXT:    movb %r15b, -{{[0-9]+}}(%rsp)
1294; AVX2-NEXT:    andb $15, %r14b
1295; AVX2-NEXT:    movb %r14b, -{{[0-9]+}}(%rsp)
1296; AVX2-NEXT:    andb $15, %r11b
1297; AVX2-NEXT:    movb %r11b, -{{[0-9]+}}(%rsp)
1298; AVX2-NEXT:    andb $15, %r10b
1299; AVX2-NEXT:    movb %r10b, -{{[0-9]+}}(%rsp)
1300; AVX2-NEXT:    andb $15, %r9b
1301; AVX2-NEXT:    movb %r9b, -{{[0-9]+}}(%rsp)
1302; AVX2-NEXT:    andb $15, %r8b
1303; AVX2-NEXT:    movb %r8b, -{{[0-9]+}}(%rsp)
1304; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1305; AVX2-NEXT:    vmovq %xmm0, %rax
1306; AVX2-NEXT:    movq %rax, %rcx
1307; AVX2-NEXT:    movq %rax, %rdx
1308; AVX2-NEXT:    movq %rax, %rsi
1309; AVX2-NEXT:    movq %rax, %rdi
1310; AVX2-NEXT:    movl %eax, %ebp
1311; AVX2-NEXT:    movl %eax, %ebx
1312; AVX2-NEXT:    vmovd %eax, %xmm1
1313; AVX2-NEXT:    shrl $8, %eax
1314; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1315; AVX2-NEXT:    shrl $16, %ebx
1316; AVX2-NEXT:    vpinsrb $2, %ebx, %xmm1, %xmm1
1317; AVX2-NEXT:    shrl $24, %ebp
1318; AVX2-NEXT:    vpinsrb $3, %ebp, %xmm1, %xmm1
1319; AVX2-NEXT:    shrq $32, %rdi
1320; AVX2-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1321; AVX2-NEXT:    shrq $40, %rsi
1322; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1323; AVX2-NEXT:    movb $0, -{{[0-9]+}}(%rsp)
1324; AVX2-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1325; AVX2-NEXT:    shrq $48, %rdx
1326; AVX2-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
1327; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
1328; AVX2-NEXT:    shrq $56, %rcx
1329; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm0
1330; AVX2-NEXT:    movl %eax, %ecx
1331; AVX2-NEXT:    shrl $8, %ecx
1332; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1333; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1334; AVX2-NEXT:    movl %eax, %ecx
1335; AVX2-NEXT:    shrl $16, %ecx
1336; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1337; AVX2-NEXT:    movl %eax, %ecx
1338; AVX2-NEXT:    shrl $24, %ecx
1339; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1340; AVX2-NEXT:    movq %rax, %rcx
1341; AVX2-NEXT:    shrq $32, %rcx
1342; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1343; AVX2-NEXT:    movq %rax, %rcx
1344; AVX2-NEXT:    shrq $40, %rcx
1345; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1346; AVX2-NEXT:    movq %rax, %rcx
1347; AVX2-NEXT:    shrq $48, %rcx
1348; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1349; AVX2-NEXT:    vmovq %xmm2, %rcx
1350; AVX2-NEXT:    shrq $56, %rax
1351; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1352; AVX2-NEXT:    movl %ecx, %eax
1353; AVX2-NEXT:    shrl $8, %eax
1354; AVX2-NEXT:    vmovd %ecx, %xmm1
1355; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1356; AVX2-NEXT:    movl %ecx, %eax
1357; AVX2-NEXT:    shrl $16, %eax
1358; AVX2-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1359; AVX2-NEXT:    movl %ecx, %eax
1360; AVX2-NEXT:    shrl $24, %eax
1361; AVX2-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1362; AVX2-NEXT:    movq %rcx, %rax
1363; AVX2-NEXT:    shrq $32, %rax
1364; AVX2-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1365; AVX2-NEXT:    movq %rcx, %rax
1366; AVX2-NEXT:    shrq $40, %rax
1367; AVX2-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1368; AVX2-NEXT:    movq %rcx, %rax
1369; AVX2-NEXT:    shrq $48, %rax
1370; AVX2-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1371; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
1372; AVX2-NEXT:    shrq $56, %rcx
1373; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1374; AVX2-NEXT:    movl %eax, %ecx
1375; AVX2-NEXT:    shrl $8, %ecx
1376; AVX2-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1377; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1378; AVX2-NEXT:    movl %eax, %ecx
1379; AVX2-NEXT:    shrl $16, %ecx
1380; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1381; AVX2-NEXT:    movl %eax, %ecx
1382; AVX2-NEXT:    shrl $24, %ecx
1383; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1384; AVX2-NEXT:    movq %rax, %rcx
1385; AVX2-NEXT:    shrq $32, %rcx
1386; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1387; AVX2-NEXT:    movq %rax, %rcx
1388; AVX2-NEXT:    shrq $40, %rcx
1389; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1390; AVX2-NEXT:    movq %rax, %rcx
1391; AVX2-NEXT:    shrq $48, %rcx
1392; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1393; AVX2-NEXT:    shrq $56, %rax
1394; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1395; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1396; AVX2-NEXT:    popq %rbx
1397; AVX2-NEXT:    popq %r12
1398; AVX2-NEXT:    popq %r13
1399; AVX2-NEXT:    popq %r14
1400; AVX2-NEXT:    popq %r15
1401; AVX2-NEXT:    popq %rbp
1402; AVX2-NEXT:    retq
1403  %x4  = bitcast <32 x i8> %0 to <64 x i4>
1404  %r0  = insertelement <64 x i4> %x4,  i4 zeroinitializer, i32 1
1405  %r1  = insertelement <64 x i4> %r0,  i4 zeroinitializer, i32 3
1406  %r2  = insertelement <64 x i4> %r1,  i4 zeroinitializer, i32 5
1407  %r3  = insertelement <64 x i4> %r2,  i4 zeroinitializer, i32 7
1408  %r4  = insertelement <64 x i4> %r3,  i4 zeroinitializer, i32 9
1409  %r5  = insertelement <64 x i4> %r4,  i4 zeroinitializer, i32 11
1410  %r6  = insertelement <64 x i4> %r5,  i4 zeroinitializer, i32 13
1411  %r7  = insertelement <64 x i4> %r6,  i4 zeroinitializer, i32 15
1412  %r8  = insertelement <64 x i4> %r7,  i4 zeroinitializer, i32 17
1413  %r9  = insertelement <64 x i4> %r8,  i4 zeroinitializer, i32 19
1414  %r10 = insertelement <64 x i4> %r9,  i4 zeroinitializer, i32 21
1415  %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23
1416  %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25
1417  %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27
1418  %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29
1419  %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31
1420  %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33
1421  %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35
1422  %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37
1423  %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39
1424  %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41
1425  %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43
1426  %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45
1427  %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47
1428  %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49
1429  %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51
1430  %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53
1431  %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55
1432  %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57
1433  %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59
1434  %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61
1435  %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63
1436  %r = bitcast <64 x i4> %r15 to <32 x i8>
1437  ret <32 x i8> %r
1438}
1439
1440define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind {
1441; SSE-LABEL: _clearupper2xi64c:
1442; SSE:       # BB#0:
1443; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1444; SSE-NEXT:    retq
1445;
1446; AVX1-LABEL: _clearupper2xi64c:
1447; AVX1:       # BB#0:
1448; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1449; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
1450; AVX1-NEXT:    retq
1451;
1452; AVX2-LABEL: _clearupper2xi64c:
1453; AVX2:       # BB#0:
1454; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1455; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1456; AVX2-NEXT:    retq
1457  %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
1458  ret <2 x i64> %r
1459}
1460
1461define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind {
1462; SSE-LABEL: _clearupper4xi64c:
1463; SSE:       # BB#0:
1464; SSE-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
1465; SSE-NEXT:    andps %xmm2, %xmm0
1466; SSE-NEXT:    andps %xmm2, %xmm1
1467; SSE-NEXT:    retq
1468;
1469; AVX1-LABEL: _clearupper4xi64c:
1470; AVX1:       # BB#0:
1471; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1472; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1473; AVX1-NEXT:    retq
1474;
1475; AVX2-LABEL: _clearupper4xi64c:
1476; AVX2:       # BB#0:
1477; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
1478; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1479; AVX2-NEXT:    retq
1480  %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0
1481  ret <4 x i64> %r
1482}
1483
1484define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind {
1485; SSE-LABEL: _clearupper4xi32c:
1486; SSE:       # BB#0:
1487; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1488; SSE-NEXT:    retq
1489;
1490; AVX-LABEL: _clearupper4xi32c:
1491; AVX:       # BB#0:
1492; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1493; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1494; AVX-NEXT:    retq
1495  %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0
1496  ret <4 x i32> %r
1497}
1498
1499define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind {
1500; SSE-LABEL: _clearupper8xi32c:
1501; SSE:       # BB#0:
1502; SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
1503; SSE-NEXT:    andps %xmm2, %xmm0
1504; SSE-NEXT:    andps %xmm2, %xmm1
1505; SSE-NEXT:    retq
1506;
1507; AVX1-LABEL: _clearupper8xi32c:
1508; AVX1:       # BB#0:
1509; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1510; AVX1-NEXT:    retq
1511;
1512; AVX2-LABEL: _clearupper8xi32c:
1513; AVX2:       # BB#0:
1514; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
1515; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1516; AVX2-NEXT:    retq
1517  %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0
1518  ret <8 x i32> %r
1519}
1520
1521define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind {
1522; SSE-LABEL: _clearupper8xi16c:
1523; SSE:       # BB#0:
1524; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1525; SSE-NEXT:    retq
1526;
1527; AVX-LABEL: _clearupper8xi16c:
1528; AVX:       # BB#0:
1529; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1530; AVX-NEXT:    retq
1531  %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1532  ret <8 x i16> %r
1533}
1534
1535define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind {
1536; SSE-LABEL: _clearupper16xi16c:
1537; SSE:       # BB#0:
1538; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1539; SSE-NEXT:    andps %xmm2, %xmm0
1540; SSE-NEXT:    andps %xmm2, %xmm1
1541; SSE-NEXT:    retq
1542;
1543; AVX-LABEL: _clearupper16xi16c:
1544; AVX:       # BB#0:
1545; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1546; AVX-NEXT:    retq
1547  %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1548  ret <16 x i16> %r
1549}
1550
1551define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind {
1552; SSE-LABEL: _clearupper16xi8c:
1553; SSE:       # BB#0:
1554; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1555; SSE-NEXT:    retq
1556;
1557; AVX-LABEL: _clearupper16xi8c:
1558; AVX:       # BB#0:
1559; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1560; AVX-NEXT:    retq
1561  %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1562  ret <16 x i8> %r
1563}
1564
1565define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind {
1566; SSE-LABEL: _clearupper32xi8c:
1567; SSE:       # BB#0:
1568; SSE-NEXT:    movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1569; SSE-NEXT:    andps %xmm2, %xmm0
1570; SSE-NEXT:    andps %xmm2, %xmm1
1571; SSE-NEXT:    retq
1572;
1573; AVX-LABEL: _clearupper32xi8c:
1574; AVX:       # BB#0:
1575; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1576; AVX-NEXT:    retq
1577  %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1578  ret <32 x i8> %r
1579}
1580