1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6
7;
8; PR6455 'Clear Upper Bits' Patterns
9;
10
11define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind {
12; SSE2-LABEL: _clearupper2xi64a:
13; SSE2:       # %bb.0:
14; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
15; SSE2-NEXT:    retq
16;
17; SSE42-LABEL: _clearupper2xi64a:
18; SSE42:       # %bb.0:
19; SSE42-NEXT:    xorps %xmm1, %xmm1
20; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
21; SSE42-NEXT:    retq
22;
23; AVX-LABEL: _clearupper2xi64a:
24; AVX:       # %bb.0:
25; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
26; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
27; AVX-NEXT:    retq
28  %x0 = extractelement <2 x i64> %0, i32 0
29  %x1 = extractelement <2 x i64> %0, i32 1
30  %trunc0 = trunc i64 %x0 to i32
31  %trunc1 = trunc i64 %x1 to i32
32  %ext0 = zext i32 %trunc0 to i64
33  %ext1 = zext i32 %trunc1 to i64
34  %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
35  %v1 = insertelement <2 x i64> %v0,   i64 %ext1, i32 1
36  ret <2 x i64> %v1
37}
38
39define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind {
40; SSE2-LABEL: _clearupper4xi64a:
41; SSE2:       # %bb.0:
42; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,4294967295]
43; SSE2-NEXT:    andps %xmm2, %xmm0
44; SSE2-NEXT:    andps %xmm2, %xmm1
45; SSE2-NEXT:    retq
46;
47; SSE42-LABEL: _clearupper4xi64a:
48; SSE42:       # %bb.0:
49; SSE42-NEXT:    xorps %xmm2, %xmm2
50; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
51; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
52; SSE42-NEXT:    retq
53;
54; AVX-LABEL: _clearupper4xi64a:
55; AVX:       # %bb.0:
56; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
57; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
58; AVX-NEXT:    retq
59  %x0 = extractelement <4 x i64> %0, i32 0
60  %x1 = extractelement <4 x i64> %0, i32 1
61  %x2 = extractelement <4 x i64> %0, i32 2
62  %x3 = extractelement <4 x i64> %0, i32 3
63  %trunc0 = trunc i64 %x0 to i32
64  %trunc1 = trunc i64 %x1 to i32
65  %trunc2 = trunc i64 %x2 to i32
66  %trunc3 = trunc i64 %x3 to i32
67  %ext0 = zext i32 %trunc0 to i64
68  %ext1 = zext i32 %trunc1 to i64
69  %ext2 = zext i32 %trunc2 to i64
70  %ext3 = zext i32 %trunc3 to i64
71  %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0
72  %v1 = insertelement <4 x i64> %v0,   i64 %ext1, i32 1
73  %v2 = insertelement <4 x i64> %v1,   i64 %ext2, i32 2
74  %v3 = insertelement <4 x i64> %v2,   i64 %ext3, i32 3
75  ret <4 x i64> %v3
76}
77
78define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
79; SSE2-LABEL: _clearupper4xi32a:
80; SSE2:       # %bb.0:
81; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
82; SSE2-NEXT:    retq
83;
84; SSE42-LABEL: _clearupper4xi32a:
85; SSE42:       # %bb.0:
86; SSE42-NEXT:    pxor %xmm1, %xmm1
87; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
88; SSE42-NEXT:    retq
89;
90; AVX-LABEL: _clearupper4xi32a:
91; AVX:       # %bb.0:
92; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
93; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
94; AVX-NEXT:    retq
95  %x0 = extractelement <4 x i32> %0, i32 0
96  %x1 = extractelement <4 x i32> %0, i32 1
97  %x2 = extractelement <4 x i32> %0, i32 2
98  %x3 = extractelement <4 x i32> %0, i32 3
99  %trunc0 = trunc i32 %x0 to i16
100  %trunc1 = trunc i32 %x1 to i16
101  %trunc2 = trunc i32 %x2 to i16
102  %trunc3 = trunc i32 %x3 to i16
103  %ext0 = zext i16 %trunc0 to i32
104  %ext1 = zext i16 %trunc1 to i32
105  %ext2 = zext i16 %trunc2 to i32
106  %ext3 = zext i16 %trunc3 to i32
107  %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
108  %v1 = insertelement <4 x i32> %v0,   i32 %ext1, i32 1
109  %v2 = insertelement <4 x i32> %v1,   i32 %ext2, i32 2
110  %v3 = insertelement <4 x i32> %v2,   i32 %ext3, i32 3
111  ret <4 x i32> %v3
112}
113
114define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
115; SSE2-LABEL: _clearupper8xi32a:
116; SSE2:       # %bb.0:
117; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,65535,65535,65535]
118; SSE2-NEXT:    andps %xmm2, %xmm0
119; SSE2-NEXT:    andps %xmm2, %xmm1
120; SSE2-NEXT:    retq
121;
122; SSE42-LABEL: _clearupper8xi32a:
123; SSE42:       # %bb.0:
124; SSE42-NEXT:    pxor %xmm2, %xmm2
125; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
126; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
127; SSE42-NEXT:    retq
128;
129; AVX1-LABEL: _clearupper8xi32a:
130; AVX1:       # %bb.0:
131; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
132; AVX1-NEXT:    retq
133;
134; AVX2-LABEL: _clearupper8xi32a:
135; AVX2:       # %bb.0:
136; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
137; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
138; AVX2-NEXT:    retq
139  %x0 = extractelement <8 x i32> %0, i32 0
140  %x1 = extractelement <8 x i32> %0, i32 1
141  %x2 = extractelement <8 x i32> %0, i32 2
142  %x3 = extractelement <8 x i32> %0, i32 3
143  %x4 = extractelement <8 x i32> %0, i32 4
144  %x5 = extractelement <8 x i32> %0, i32 5
145  %x6 = extractelement <8 x i32> %0, i32 6
146  %x7 = extractelement <8 x i32> %0, i32 7
147  %trunc0 = trunc i32 %x0 to i16
148  %trunc1 = trunc i32 %x1 to i16
149  %trunc2 = trunc i32 %x2 to i16
150  %trunc3 = trunc i32 %x3 to i16
151  %trunc4 = trunc i32 %x4 to i16
152  %trunc5 = trunc i32 %x5 to i16
153  %trunc6 = trunc i32 %x6 to i16
154  %trunc7 = trunc i32 %x7 to i16
155  %ext0 = zext i16 %trunc0 to i32
156  %ext1 = zext i16 %trunc1 to i32
157  %ext2 = zext i16 %trunc2 to i32
158  %ext3 = zext i16 %trunc3 to i32
159  %ext4 = zext i16 %trunc4 to i32
160  %ext5 = zext i16 %trunc5 to i32
161  %ext6 = zext i16 %trunc6 to i32
162  %ext7 = zext i16 %trunc7 to i32
163  %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0
164  %v1 = insertelement <8 x i32> %v0,   i32 %ext1, i32 1
165  %v2 = insertelement <8 x i32> %v1,   i32 %ext2, i32 2
166  %v3 = insertelement <8 x i32> %v2,   i32 %ext3, i32 3
167  %v4 = insertelement <8 x i32> %v3,   i32 %ext4, i32 4
168  %v5 = insertelement <8 x i32> %v4,   i32 %ext5, i32 5
169  %v6 = insertelement <8 x i32> %v5,   i32 %ext6, i32 6
170  %v7 = insertelement <8 x i32> %v6,   i32 %ext7, i32 7
171  ret <8 x i32> %v7
172}
173
174define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
175; SSE-LABEL: _clearupper8xi16a:
176; SSE:       # %bb.0:
177; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
178; SSE-NEXT:    retq
179;
180; AVX-LABEL: _clearupper8xi16a:
181; AVX:       # %bb.0:
182; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
183; AVX-NEXT:    retq
184  %x0 = extractelement <8 x i16> %0, i32 0
185  %x1 = extractelement <8 x i16> %0, i32 1
186  %x2 = extractelement <8 x i16> %0, i32 2
187  %x3 = extractelement <8 x i16> %0, i32 3
188  %x4 = extractelement <8 x i16> %0, i32 4
189  %x5 = extractelement <8 x i16> %0, i32 5
190  %x6 = extractelement <8 x i16> %0, i32 6
191  %x7 = extractelement <8 x i16> %0, i32 7
192  %trunc0 = trunc i16 %x0 to i8
193  %trunc1 = trunc i16 %x1 to i8
194  %trunc2 = trunc i16 %x2 to i8
195  %trunc3 = trunc i16 %x3 to i8
196  %trunc4 = trunc i16 %x4 to i8
197  %trunc5 = trunc i16 %x5 to i8
198  %trunc6 = trunc i16 %x6 to i8
199  %trunc7 = trunc i16 %x7 to i8
200  %ext0 = zext i8 %trunc0 to i16
201  %ext1 = zext i8 %trunc1 to i16
202  %ext2 = zext i8 %trunc2 to i16
203  %ext3 = zext i8 %trunc3 to i16
204  %ext4 = zext i8 %trunc4 to i16
205  %ext5 = zext i8 %trunc5 to i16
206  %ext6 = zext i8 %trunc6 to i16
207  %ext7 = zext i8 %trunc7 to i16
208  %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0
209  %v1 = insertelement <8 x i16> %v0,   i16 %ext1, i32 1
210  %v2 = insertelement <8 x i16> %v1,   i16 %ext2, i32 2
211  %v3 = insertelement <8 x i16> %v2,   i16 %ext3, i32 3
212  %v4 = insertelement <8 x i16> %v3,   i16 %ext4, i32 4
213  %v5 = insertelement <8 x i16> %v4,   i16 %ext5, i32 5
214  %v6 = insertelement <8 x i16> %v5,   i16 %ext6, i32 6
215  %v7 = insertelement <8 x i16> %v6,   i16 %ext7, i32 7
216  ret <8 x i16> %v7
217}
218
219define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
220; SSE-LABEL: _clearupper16xi16a:
221; SSE:       # %bb.0:
222; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
223; SSE-NEXT:    andps %xmm2, %xmm0
224; SSE-NEXT:    andps %xmm2, %xmm1
225; SSE-NEXT:    retq
226;
227; AVX-LABEL: _clearupper16xi16a:
228; AVX:       # %bb.0:
229; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
230; AVX-NEXT:    retq
231  %x0  = extractelement <16 x i16> %0, i32 0
232  %x1  = extractelement <16 x i16> %0, i32 1
233  %x2  = extractelement <16 x i16> %0, i32 2
234  %x3  = extractelement <16 x i16> %0, i32 3
235  %x4  = extractelement <16 x i16> %0, i32 4
236  %x5  = extractelement <16 x i16> %0, i32 5
237  %x6  = extractelement <16 x i16> %0, i32 6
238  %x7  = extractelement <16 x i16> %0, i32 7
239  %x8  = extractelement <16 x i16> %0, i32 8
240  %x9  = extractelement <16 x i16> %0, i32 9
241  %x10 = extractelement <16 x i16> %0, i32 10
242  %x11 = extractelement <16 x i16> %0, i32 11
243  %x12 = extractelement <16 x i16> %0, i32 12
244  %x13 = extractelement <16 x i16> %0, i32 13
245  %x14 = extractelement <16 x i16> %0, i32 14
246  %x15 = extractelement <16 x i16> %0, i32 15
247  %trunc0  = trunc i16 %x0  to i8
248  %trunc1  = trunc i16 %x1  to i8
249  %trunc2  = trunc i16 %x2  to i8
250  %trunc3  = trunc i16 %x3  to i8
251  %trunc4  = trunc i16 %x4  to i8
252  %trunc5  = trunc i16 %x5  to i8
253  %trunc6  = trunc i16 %x6  to i8
254  %trunc7  = trunc i16 %x7  to i8
255  %trunc8  = trunc i16 %x8  to i8
256  %trunc9  = trunc i16 %x9  to i8
257  %trunc10 = trunc i16 %x10 to i8
258  %trunc11 = trunc i16 %x11 to i8
259  %trunc12 = trunc i16 %x12 to i8
260  %trunc13 = trunc i16 %x13 to i8
261  %trunc14 = trunc i16 %x14 to i8
262  %trunc15 = trunc i16 %x15 to i8
263  %ext0  = zext i8 %trunc0  to i16
264  %ext1  = zext i8 %trunc1  to i16
265  %ext2  = zext i8 %trunc2  to i16
266  %ext3  = zext i8 %trunc3  to i16
267  %ext4  = zext i8 %trunc4  to i16
268  %ext5  = zext i8 %trunc5  to i16
269  %ext6  = zext i8 %trunc6  to i16
270  %ext7  = zext i8 %trunc7  to i16
271  %ext8  = zext i8 %trunc8  to i16
272  %ext9  = zext i8 %trunc9  to i16
273  %ext10 = zext i8 %trunc10 to i16
274  %ext11 = zext i8 %trunc11 to i16
275  %ext12 = zext i8 %trunc12 to i16
276  %ext13 = zext i8 %trunc13 to i16
277  %ext14 = zext i8 %trunc14 to i16
278  %ext15 = zext i8 %trunc15 to i16
279  %v0  = insertelement <16 x i16> undef, i16 %ext0,  i32 0
280  %v1  = insertelement <16 x i16> %v0,   i16 %ext1,  i32 1
281  %v2  = insertelement <16 x i16> %v1,   i16 %ext2,  i32 2
282  %v3  = insertelement <16 x i16> %v2,   i16 %ext3,  i32 3
283  %v4  = insertelement <16 x i16> %v3,   i16 %ext4,  i32 4
284  %v5  = insertelement <16 x i16> %v4,   i16 %ext5,  i32 5
285  %v6  = insertelement <16 x i16> %v5,   i16 %ext6,  i32 6
286  %v7  = insertelement <16 x i16> %v6,   i16 %ext7,  i32 7
287  %v8  = insertelement <16 x i16> %v7,   i16 %ext8,  i32 8
288  %v9  = insertelement <16 x i16> %v8,   i16 %ext9,  i32 9
289  %v10 = insertelement <16 x i16> %v9,   i16 %ext10, i32 10
290  %v11 = insertelement <16 x i16> %v10,  i16 %ext11, i32 11
291  %v12 = insertelement <16 x i16> %v11,  i16 %ext12, i32 12
292  %v13 = insertelement <16 x i16> %v12,  i16 %ext13, i32 13
293  %v14 = insertelement <16 x i16> %v13,  i16 %ext14, i32 14
294  %v15 = insertelement <16 x i16> %v14,  i16 %ext15, i32 15
295  ret <16 x i16> %v15
296}
297
298define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
299; SSE2-LABEL: _clearupper16xi8a:
300; SSE2:       # %bb.0:
301; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
302; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
303; SSE2-NEXT:    movd %eax, %xmm0
304; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
305; SSE2-NEXT:    movd %eax, %xmm1
306; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
307; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
308; SSE2-NEXT:    movd %eax, %xmm0
309; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
310; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
311; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
312; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
313; SSE2-NEXT:    movd %eax, %xmm0
314; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
315; SSE2-NEXT:    movd %eax, %xmm3
316; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
317; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
318; SSE2-NEXT:    movd %eax, %xmm0
319; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
320; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
321; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
322; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
323; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
324; SSE2-NEXT:    movd %eax, %xmm0
325; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
326; SSE2-NEXT:    movd %eax, %xmm2
327; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
328; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
329; SSE2-NEXT:    movd %eax, %xmm0
330; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
331; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
332; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
333; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
334; SSE2-NEXT:    movd %eax, %xmm0
335; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
336; SSE2-NEXT:    movd %eax, %xmm2
337; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
338; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
339; SSE2-NEXT:    movd %eax, %xmm4
340; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
341; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
342; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
343; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
344; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
345; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
346; SSE2-NEXT:    retq
347;
348; SSE42-LABEL: _clearupper16xi8a:
349; SSE42:       # %bb.0:
350; SSE42-NEXT:    andps {{.*}}(%rip), %xmm0
351; SSE42-NEXT:    retq
352;
353; AVX-LABEL: _clearupper16xi8a:
354; AVX:       # %bb.0:
355; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
356; AVX-NEXT:    retq
357  %x0  = extractelement <16 x i8> %0, i32 0
358  %x1  = extractelement <16 x i8> %0, i32 1
359  %x2  = extractelement <16 x i8> %0, i32 2
360  %x3  = extractelement <16 x i8> %0, i32 3
361  %x4  = extractelement <16 x i8> %0, i32 4
362  %x5  = extractelement <16 x i8> %0, i32 5
363  %x6  = extractelement <16 x i8> %0, i32 6
364  %x7  = extractelement <16 x i8> %0, i32 7
365  %x8  = extractelement <16 x i8> %0, i32 8
366  %x9  = extractelement <16 x i8> %0, i32 9
367  %x10 = extractelement <16 x i8> %0, i32 10
368  %x11 = extractelement <16 x i8> %0, i32 11
369  %x12 = extractelement <16 x i8> %0, i32 12
370  %x13 = extractelement <16 x i8> %0, i32 13
371  %x14 = extractelement <16 x i8> %0, i32 14
372  %x15 = extractelement <16 x i8> %0, i32 15
373  %trunc0  = trunc i8 %x0  to i4
374  %trunc1  = trunc i8 %x1  to i4
375  %trunc2  = trunc i8 %x2  to i4
376  %trunc3  = trunc i8 %x3  to i4
377  %trunc4  = trunc i8 %x4  to i4
378  %trunc5  = trunc i8 %x5  to i4
379  %trunc6  = trunc i8 %x6  to i4
380  %trunc7  = trunc i8 %x7  to i4
381  %trunc8  = trunc i8 %x8  to i4
382  %trunc9  = trunc i8 %x9  to i4
383  %trunc10 = trunc i8 %x10 to i4
384  %trunc11 = trunc i8 %x11 to i4
385  %trunc12 = trunc i8 %x12 to i4
386  %trunc13 = trunc i8 %x13 to i4
387  %trunc14 = trunc i8 %x14 to i4
388  %trunc15 = trunc i8 %x15 to i4
389  %ext0  = zext i4 %trunc0  to i8
390  %ext1  = zext i4 %trunc1  to i8
391  %ext2  = zext i4 %trunc2  to i8
392  %ext3  = zext i4 %trunc3  to i8
393  %ext4  = zext i4 %trunc4  to i8
394  %ext5  = zext i4 %trunc5  to i8
395  %ext6  = zext i4 %trunc6  to i8
396  %ext7  = zext i4 %trunc7  to i8
397  %ext8  = zext i4 %trunc8  to i8
398  %ext9  = zext i4 %trunc9  to i8
399  %ext10 = zext i4 %trunc10 to i8
400  %ext11 = zext i4 %trunc11 to i8
401  %ext12 = zext i4 %trunc12 to i8
402  %ext13 = zext i4 %trunc13 to i8
403  %ext14 = zext i4 %trunc14 to i8
404  %ext15 = zext i4 %trunc15 to i8
405  %v0  = insertelement <16 x i8> undef, i8 %ext0,  i32 0
406  %v1  = insertelement <16 x i8> %v0,   i8 %ext1,  i32 1
407  %v2  = insertelement <16 x i8> %v1,   i8 %ext2,  i32 2
408  %v3  = insertelement <16 x i8> %v2,   i8 %ext3,  i32 3
409  %v4  = insertelement <16 x i8> %v3,   i8 %ext4,  i32 4
410  %v5  = insertelement <16 x i8> %v4,   i8 %ext5,  i32 5
411  %v6  = insertelement <16 x i8> %v5,   i8 %ext6,  i32 6
412  %v7  = insertelement <16 x i8> %v6,   i8 %ext7,  i32 7
413  %v8  = insertelement <16 x i8> %v7,   i8 %ext8,  i32 8
414  %v9  = insertelement <16 x i8> %v8,   i8 %ext9,  i32 9
415  %v10 = insertelement <16 x i8> %v9,   i8 %ext10, i32 10
416  %v11 = insertelement <16 x i8> %v10,  i8 %ext11, i32 11
417  %v12 = insertelement <16 x i8> %v11,  i8 %ext12, i32 12
418  %v13 = insertelement <16 x i8> %v12,  i8 %ext13, i32 13
419  %v14 = insertelement <16 x i8> %v13,  i8 %ext14, i32 14
420  %v15 = insertelement <16 x i8> %v14,  i8 %ext15, i32 15
421  ret <16 x i8> %v15
422}
423
424define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
425; SSE2-LABEL: _clearupper32xi8a:
426; SSE2:       # %bb.0:
427; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
428; SSE2-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
429; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
430; SSE2-NEXT:    movd %eax, %xmm0
431; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
432; SSE2-NEXT:    movd %eax, %xmm1
433; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
434; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
435; SSE2-NEXT:    movd %eax, %xmm0
436; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
437; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
438; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
439; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
440; SSE2-NEXT:    movd %eax, %xmm0
441; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
442; SSE2-NEXT:    movd %eax, %xmm3
443; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
444; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
445; SSE2-NEXT:    movd %eax, %xmm0
446; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
447; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
448; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
449; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
450; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
451; SSE2-NEXT:    movd %eax, %xmm0
452; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
453; SSE2-NEXT:    movd %eax, %xmm2
454; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
455; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
456; SSE2-NEXT:    movd %eax, %xmm0
457; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
458; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
459; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
460; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
461; SSE2-NEXT:    movd %eax, %xmm0
462; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
463; SSE2-NEXT:    movd %eax, %xmm2
464; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
465; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
466; SSE2-NEXT:    movd %eax, %xmm4
467; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
468; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
469; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
470; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
471; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
472; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
473; SSE2-NEXT:    pand %xmm2, %xmm0
474; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
475; SSE2-NEXT:    movd %eax, %xmm1
476; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
477; SSE2-NEXT:    movd %eax, %xmm3
478; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
479; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
480; SSE2-NEXT:    movd %eax, %xmm1
481; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
482; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
483; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
484; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
485; SSE2-NEXT:    movd %eax, %xmm1
486; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
487; SSE2-NEXT:    movd %eax, %xmm5
488; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
489; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
490; SSE2-NEXT:    movd %eax, %xmm1
491; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
492; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
493; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
494; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
495; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
496; SSE2-NEXT:    movd %eax, %xmm1
497; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
498; SSE2-NEXT:    movd %eax, %xmm4
499; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
500; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
501; SSE2-NEXT:    movd %eax, %xmm1
502; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
503; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
504; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
505; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
506; SSE2-NEXT:    movd %eax, %xmm1
507; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
508; SSE2-NEXT:    movd %eax, %xmm4
509; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
510; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
511; SSE2-NEXT:    movd %eax, %xmm6
512; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
513; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
514; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
515; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
516; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
517; SSE2-NEXT:    pand %xmm2, %xmm1
518; SSE2-NEXT:    retq
519;
520; SSE42-LABEL: _clearupper32xi8a:
521; SSE42:       # %bb.0:
522; SSE42-NEXT:    movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
523; SSE42-NEXT:    andps %xmm2, %xmm0
524; SSE42-NEXT:    andps %xmm2, %xmm1
525; SSE42-NEXT:    retq
526;
527; AVX-LABEL: _clearupper32xi8a:
528; AVX:       # %bb.0:
529; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
530; AVX-NEXT:    retq
531  %x0  = extractelement <32 x i8> %0, i32 0
532  %x1  = extractelement <32 x i8> %0, i32 1
533  %x2  = extractelement <32 x i8> %0, i32 2
534  %x3  = extractelement <32 x i8> %0, i32 3
535  %x4  = extractelement <32 x i8> %0, i32 4
536  %x5  = extractelement <32 x i8> %0, i32 5
537  %x6  = extractelement <32 x i8> %0, i32 6
538  %x7  = extractelement <32 x i8> %0, i32 7
539  %x8  = extractelement <32 x i8> %0, i32 8
540  %x9  = extractelement <32 x i8> %0, i32 9
541  %x10 = extractelement <32 x i8> %0, i32 10
542  %x11 = extractelement <32 x i8> %0, i32 11
543  %x12 = extractelement <32 x i8> %0, i32 12
544  %x13 = extractelement <32 x i8> %0, i32 13
545  %x14 = extractelement <32 x i8> %0, i32 14
546  %x15 = extractelement <32 x i8> %0, i32 15
547  %x16 = extractelement <32 x i8> %0, i32 16
548  %x17 = extractelement <32 x i8> %0, i32 17
549  %x18 = extractelement <32 x i8> %0, i32 18
550  %x19 = extractelement <32 x i8> %0, i32 19
551  %x20 = extractelement <32 x i8> %0, i32 20
552  %x21 = extractelement <32 x i8> %0, i32 21
553  %x22 = extractelement <32 x i8> %0, i32 22
554  %x23 = extractelement <32 x i8> %0, i32 23
555  %x24 = extractelement <32 x i8> %0, i32 24
556  %x25 = extractelement <32 x i8> %0, i32 25
557  %x26 = extractelement <32 x i8> %0, i32 26
558  %x27 = extractelement <32 x i8> %0, i32 27
559  %x28 = extractelement <32 x i8> %0, i32 28
560  %x29 = extractelement <32 x i8> %0, i32 29
561  %x30 = extractelement <32 x i8> %0, i32 30
562  %x31 = extractelement <32 x i8> %0, i32 31
563  %trunc0  = trunc i8 %x0  to i4
564  %trunc1  = trunc i8 %x1  to i4
565  %trunc2  = trunc i8 %x2  to i4
566  %trunc3  = trunc i8 %x3  to i4
567  %trunc4  = trunc i8 %x4  to i4
568  %trunc5  = trunc i8 %x5  to i4
569  %trunc6  = trunc i8 %x6  to i4
570  %trunc7  = trunc i8 %x7  to i4
571  %trunc8  = trunc i8 %x8  to i4
572  %trunc9  = trunc i8 %x9  to i4
573  %trunc10 = trunc i8 %x10 to i4
574  %trunc11 = trunc i8 %x11 to i4
575  %trunc12 = trunc i8 %x12 to i4
576  %trunc13 = trunc i8 %x13 to i4
577  %trunc14 = trunc i8 %x14 to i4
578  %trunc15 = trunc i8 %x15 to i4
579  %trunc16 = trunc i8 %x16 to i4
580  %trunc17 = trunc i8 %x17 to i4
581  %trunc18 = trunc i8 %x18 to i4
582  %trunc19 = trunc i8 %x19 to i4
583  %trunc20 = trunc i8 %x20 to i4
584  %trunc21 = trunc i8 %x21 to i4
585  %trunc22 = trunc i8 %x22 to i4
586  %trunc23 = trunc i8 %x23 to i4
587  %trunc24 = trunc i8 %x24 to i4
588  %trunc25 = trunc i8 %x25 to i4
589  %trunc26 = trunc i8 %x26 to i4
590  %trunc27 = trunc i8 %x27 to i4
591  %trunc28 = trunc i8 %x28 to i4
592  %trunc29 = trunc i8 %x29 to i4
593  %trunc30 = trunc i8 %x30 to i4
594  %trunc31 = trunc i8 %x31 to i4
595  %ext0  = zext i4 %trunc0  to i8
596  %ext1  = zext i4 %trunc1  to i8
597  %ext2  = zext i4 %trunc2  to i8
598  %ext3  = zext i4 %trunc3  to i8
599  %ext4  = zext i4 %trunc4  to i8
600  %ext5  = zext i4 %trunc5  to i8
601  %ext6  = zext i4 %trunc6  to i8
602  %ext7  = zext i4 %trunc7  to i8
603  %ext8  = zext i4 %trunc8  to i8
604  %ext9  = zext i4 %trunc9  to i8
605  %ext10 = zext i4 %trunc10 to i8
606  %ext11 = zext i4 %trunc11 to i8
607  %ext12 = zext i4 %trunc12 to i8
608  %ext13 = zext i4 %trunc13 to i8
609  %ext14 = zext i4 %trunc14 to i8
610  %ext15 = zext i4 %trunc15 to i8
611  %ext16 = zext i4 %trunc16 to i8
612  %ext17 = zext i4 %trunc17 to i8
613  %ext18 = zext i4 %trunc18 to i8
614  %ext19 = zext i4 %trunc19 to i8
615  %ext20 = zext i4 %trunc20 to i8
616  %ext21 = zext i4 %trunc21 to i8
617  %ext22 = zext i4 %trunc22 to i8
618  %ext23 = zext i4 %trunc23 to i8
619  %ext24 = zext i4 %trunc24 to i8
620  %ext25 = zext i4 %trunc25 to i8
621  %ext26 = zext i4 %trunc26 to i8
622  %ext27 = zext i4 %trunc27 to i8
623  %ext28 = zext i4 %trunc28 to i8
624  %ext29 = zext i4 %trunc29 to i8
625  %ext30 = zext i4 %trunc30 to i8
626  %ext31 = zext i4 %trunc31 to i8
627  %v0  = insertelement <32 x i8> undef, i8 %ext0,  i32 0
628  %v1  = insertelement <32 x i8> %v0,   i8 %ext1,  i32 1
629  %v2  = insertelement <32 x i8> %v1,   i8 %ext2,  i32 2
630  %v3  = insertelement <32 x i8> %v2,   i8 %ext3,  i32 3
631  %v4  = insertelement <32 x i8> %v3,   i8 %ext4,  i32 4
632  %v5  = insertelement <32 x i8> %v4,   i8 %ext5,  i32 5
633  %v6  = insertelement <32 x i8> %v5,   i8 %ext6,  i32 6
634  %v7  = insertelement <32 x i8> %v6,   i8 %ext7,  i32 7
635  %v8  = insertelement <32 x i8> %v7,   i8 %ext8,  i32 8
636  %v9  = insertelement <32 x i8> %v8,   i8 %ext9,  i32 9
637  %v10 = insertelement <32 x i8> %v9,   i8 %ext10, i32 10
638  %v11 = insertelement <32 x i8> %v10,  i8 %ext11, i32 11
639  %v12 = insertelement <32 x i8> %v11,  i8 %ext12, i32 12
640  %v13 = insertelement <32 x i8> %v12,  i8 %ext13, i32 13
641  %v14 = insertelement <32 x i8> %v13,  i8 %ext14, i32 14
642  %v15 = insertelement <32 x i8> %v14,  i8 %ext15, i32 15
643  %v16 = insertelement <32 x i8> %v15,  i8 %ext16, i32 16
644  %v17 = insertelement <32 x i8> %v16,  i8 %ext17, i32 17
645  %v18 = insertelement <32 x i8> %v17,  i8 %ext18, i32 18
646  %v19 = insertelement <32 x i8> %v18,  i8 %ext19, i32 19
647  %v20 = insertelement <32 x i8> %v19,  i8 %ext20, i32 20
648  %v21 = insertelement <32 x i8> %v20,  i8 %ext21, i32 21
649  %v22 = insertelement <32 x i8> %v21,  i8 %ext22, i32 22
650  %v23 = insertelement <32 x i8> %v22,  i8 %ext23, i32 23
651  %v24 = insertelement <32 x i8> %v23,  i8 %ext24, i32 24
652  %v25 = insertelement <32 x i8> %v24,  i8 %ext25, i32 25
653  %v26 = insertelement <32 x i8> %v25,  i8 %ext26, i32 26
654  %v27 = insertelement <32 x i8> %v26,  i8 %ext27, i32 27
655  %v28 = insertelement <32 x i8> %v27,  i8 %ext28, i32 28
656  %v29 = insertelement <32 x i8> %v28,  i8 %ext29, i32 29
657  %v30 = insertelement <32 x i8> %v29,  i8 %ext30, i32 30
658  %v31 = insertelement <32 x i8> %v30,  i8 %ext31, i32 31
659  ret <32 x i8> %v31
660}
661
662define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind {
663; SSE2-LABEL: _clearupper2xi64b:
664; SSE2:       # %bb.0:
665; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
666; SSE2-NEXT:    retq
667;
668; SSE42-LABEL: _clearupper2xi64b:
669; SSE42:       # %bb.0:
670; SSE42-NEXT:    xorps %xmm1, %xmm1
671; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
672; SSE42-NEXT:    retq
673;
674; AVX-LABEL: _clearupper2xi64b:
675; AVX:       # %bb.0:
676; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
677; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
678; AVX-NEXT:    retq
679  %x32 = bitcast <2 x i64> %0 to <4 x i32>
680  %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
681  %r1 = insertelement <4 x i32> %r0,  i32 zeroinitializer, i32 3
682  %r = bitcast <4 x i32> %r1 to <2 x i64>
683  ret <2 x i64> %r
684}
685
686define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind {
687; SSE2-LABEL: _clearupper4xi64b:
688; SSE2:       # %bb.0:
689; SSE2-NEXT:    movaps {{.*#+}} xmm2
690; SSE2-NEXT:    andps %xmm2, %xmm0
691; SSE2-NEXT:    andps %xmm2, %xmm1
692; SSE2-NEXT:    retq
693;
694; SSE42-LABEL: _clearupper4xi64b:
695; SSE42:       # %bb.0:
696; SSE42-NEXT:    xorps %xmm2, %xmm2
697; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
698; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
699; SSE42-NEXT:    retq
700;
701; AVX-LABEL: _clearupper4xi64b:
702; AVX:       # %bb.0:
703; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
704; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
705; AVX-NEXT:    retq
706  %x32 = bitcast <4 x i64> %0 to <8 x i32>
707  %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1
708  %r1 = insertelement <8 x i32> %r0,  i32 zeroinitializer, i32 3
709  %r2 = insertelement <8 x i32> %r1,  i32 zeroinitializer, i32 5
710  %r3 = insertelement <8 x i32> %r2,  i32 zeroinitializer, i32 7
711  %r = bitcast <8 x i32> %r3 to <4 x i64>
712  ret <4 x i64> %r
713}
714
715define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
716; SSE2-LABEL: _clearupper4xi32b:
717; SSE2:       # %bb.0:
718; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
719; SSE2-NEXT:    retq
720;
721; SSE42-LABEL: _clearupper4xi32b:
722; SSE42:       # %bb.0:
723; SSE42-NEXT:    pxor %xmm1, %xmm1
724; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
725; SSE42-NEXT:    retq
726;
727; AVX-LABEL: _clearupper4xi32b:
728; AVX:       # %bb.0:
729; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
730; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
731; AVX-NEXT:    retq
732  %x16 = bitcast <4 x i32> %0 to <8 x i16>
733  %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
734  %r1 = insertelement <8 x i16> %r0,  i16 zeroinitializer, i32 3
735  %r2 = insertelement <8 x i16> %r1,  i16 zeroinitializer, i32 5
736  %r3 = insertelement <8 x i16> %r2,  i16 zeroinitializer, i32 7
737  %r = bitcast <8 x i16> %r3 to <4 x i32>
738  ret <4 x i32> %r
739}
740
741define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind {
742; SSE2-LABEL: _clearupper8xi32b:
743; SSE2:       # %bb.0:
744; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
745; SSE2-NEXT:    andps %xmm2, %xmm0
746; SSE2-NEXT:    andps %xmm2, %xmm1
747; SSE2-NEXT:    retq
748;
749; SSE42-LABEL: _clearupper8xi32b:
750; SSE42:       # %bb.0:
751; SSE42-NEXT:    pxor %xmm2, %xmm2
752; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
753; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
754; SSE42-NEXT:    retq
755;
756; AVX1-LABEL: _clearupper8xi32b:
757; AVX1:       # %bb.0:
758; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
759; AVX1-NEXT:    retq
760;
761; AVX2-LABEL: _clearupper8xi32b:
762; AVX2:       # %bb.0:
763; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
764; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
765; AVX2-NEXT:    retq
766  %x16 = bitcast <8 x i32> %0 to <16 x i16>
767  %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1
768  %r1 = insertelement <16 x i16> %r0,  i16 zeroinitializer, i32 3
769  %r2 = insertelement <16 x i16> %r1,  i16 zeroinitializer, i32 5
770  %r3 = insertelement <16 x i16> %r2,  i16 zeroinitializer, i32 7
771  %r4 = insertelement <16 x i16> %r3,  i16 zeroinitializer, i32 9
772  %r5 = insertelement <16 x i16> %r4,  i16 zeroinitializer, i32 11
773  %r6 = insertelement <16 x i16> %r5,  i16 zeroinitializer, i32 13
774  %r7 = insertelement <16 x i16> %r6,  i16 zeroinitializer, i32 15
775  %r = bitcast <16 x i16> %r7 to <8 x i32>
776  ret <8 x i32> %r
777}
778
779define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind {
780; SSE-LABEL: _clearupper8xi16b:
781; SSE:       # %bb.0:
782; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
783; SSE-NEXT:    retq
784;
785; AVX-LABEL: _clearupper8xi16b:
786; AVX:       # %bb.0:
787; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
788; AVX-NEXT:    retq
789  %x8 = bitcast <8 x i16> %0 to <16 x i8>
790  %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1
791  %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3
792  %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5
793  %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7
794  %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9
795  %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11
796  %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13
797  %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15
798  %r = bitcast <16 x i8> %r7 to <8 x i16>
799  ret <8 x i16> %r
800}
801
802define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind {
803; SSE-LABEL: _clearupper16xi16b:
804; SSE:       # %bb.0:
805; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
806; SSE-NEXT:    andps %xmm2, %xmm0
807; SSE-NEXT:    andps %xmm2, %xmm1
808; SSE-NEXT:    retq
809;
810; AVX-LABEL: _clearupper16xi16b:
811; AVX:       # %bb.0:
812; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
813; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm2
814; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
815; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
816; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
817; AVX-NEXT:    retq
818  %x8 = bitcast <16 x i16> %0 to <32 x i8>
819  %r0  = insertelement <32 x i8> %x8,  i8 zeroinitializer, i32 1
820  %r1  = insertelement <32 x i8> %r0,  i8 zeroinitializer, i32 3
821  %r2  = insertelement <32 x i8> %r1,  i8 zeroinitializer, i32 5
822  %r3  = insertelement <32 x i8> %r2,  i8 zeroinitializer, i32 7
823  %r4  = insertelement <32 x i8> %r3,  i8 zeroinitializer, i32 9
824  %r5  = insertelement <32 x i8> %r4,  i8 zeroinitializer, i32 11
825  %r6  = insertelement <32 x i8> %r5,  i8 zeroinitializer, i32 13
826  %r7  = insertelement <32 x i8> %r6,  i8 zeroinitializer, i32 15
827  %r8  = insertelement <32 x i8> %r7,  i8 zeroinitializer, i32 17
828  %r9  = insertelement <32 x i8> %r8,  i8 zeroinitializer, i32 19
829  %r10 = insertelement <32 x i8> %r9,  i8 zeroinitializer, i32 21
830  %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23
831  %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25
832  %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27
833  %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29
834  %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31
835  %r = bitcast <32 x i8> %r15 to <16 x i16>
836  ret <16 x i16> %r
837}
838
839define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind {
840; SSE2-LABEL: _clearupper16xi8b:
841; SSE2:       # %bb.0:
842; SSE2-NEXT:    pushq %rbx
843; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
844; SSE2-NEXT:    movq %xmm1, %r10
845; SSE2-NEXT:    movq %r10, %r8
846; SSE2-NEXT:    shrq $56, %r8
847; SSE2-NEXT:    andl $15, %r8d
848; SSE2-NEXT:    movq %r10, %r9
849; SSE2-NEXT:    shrq $48, %r9
850; SSE2-NEXT:    andl $15, %r9d
851; SSE2-NEXT:    movq %r10, %rsi
852; SSE2-NEXT:    shrq $40, %rsi
853; SSE2-NEXT:    andl $15, %esi
854; SSE2-NEXT:    movq %r10, %r11
855; SSE2-NEXT:    shrq $32, %r11
856; SSE2-NEXT:    andl $15, %r11d
857; SSE2-NEXT:    movq %xmm0, %rax
858; SSE2-NEXT:    movq %rax, %rdx
859; SSE2-NEXT:    shrq $56, %rdx
860; SSE2-NEXT:    andl $15, %edx
861; SSE2-NEXT:    movq %rax, %rcx
862; SSE2-NEXT:    shrq $48, %rcx
863; SSE2-NEXT:    andl $15, %ecx
864; SSE2-NEXT:    movq %rax, %rdi
865; SSE2-NEXT:    shrq $40, %rdi
866; SSE2-NEXT:    andl $15, %edi
867; SSE2-NEXT:    movq %rax, %rbx
868; SSE2-NEXT:    shrq $32, %rbx
869; SSE2-NEXT:    andl $15, %ebx
870; SSE2-NEXT:    shlq $32, %rbx
871; SSE2-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
872; SSE2-NEXT:    orq %rbx, %rax
873; SSE2-NEXT:    shlq $40, %rdi
874; SSE2-NEXT:    orq %rax, %rdi
875; SSE2-NEXT:    shlq $48, %rcx
876; SSE2-NEXT:    orq %rdi, %rcx
877; SSE2-NEXT:    shlq $56, %rdx
878; SSE2-NEXT:    orq %rcx, %rdx
879; SSE2-NEXT:    shlq $32, %r11
880; SSE2-NEXT:    andl $252645135, %r10d # imm = 0xF0F0F0F
881; SSE2-NEXT:    orq %r11, %r10
882; SSE2-NEXT:    shlq $40, %rsi
883; SSE2-NEXT:    orq %r10, %rsi
884; SSE2-NEXT:    shlq $48, %r9
885; SSE2-NEXT:    orq %rsi, %r9
886; SSE2-NEXT:    shlq $56, %r8
887; SSE2-NEXT:    orq %r9, %r8
888; SSE2-NEXT:    movq %rdx, %xmm0
889; SSE2-NEXT:    movq %r8, %xmm1
890; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
891; SSE2-NEXT:    popq %rbx
892; SSE2-NEXT:    retq
893;
894; SSE42-LABEL: _clearupper16xi8b:
895; SSE42:       # %bb.0:
896; SSE42-NEXT:    pushq %rbx
897; SSE42-NEXT:    pextrq $1, %xmm0, %r10
898; SSE42-NEXT:    movq %r10, %r8
899; SSE42-NEXT:    shrq $56, %r8
900; SSE42-NEXT:    andl $15, %r8d
901; SSE42-NEXT:    movq %r10, %r9
902; SSE42-NEXT:    shrq $48, %r9
903; SSE42-NEXT:    andl $15, %r9d
904; SSE42-NEXT:    movq %r10, %rsi
905; SSE42-NEXT:    shrq $40, %rsi
906; SSE42-NEXT:    andl $15, %esi
907; SSE42-NEXT:    movq %r10, %r11
908; SSE42-NEXT:    shrq $32, %r11
909; SSE42-NEXT:    andl $15, %r11d
910; SSE42-NEXT:    movq %xmm0, %rax
911; SSE42-NEXT:    movq %rax, %rdx
912; SSE42-NEXT:    shrq $56, %rdx
913; SSE42-NEXT:    andl $15, %edx
914; SSE42-NEXT:    movq %rax, %rcx
915; SSE42-NEXT:    shrq $48, %rcx
916; SSE42-NEXT:    andl $15, %ecx
917; SSE42-NEXT:    movq %rax, %rdi
918; SSE42-NEXT:    shrq $40, %rdi
919; SSE42-NEXT:    andl $15, %edi
920; SSE42-NEXT:    movq %rax, %rbx
921; SSE42-NEXT:    shrq $32, %rbx
922; SSE42-NEXT:    andl $15, %ebx
923; SSE42-NEXT:    shlq $32, %rbx
924; SSE42-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
925; SSE42-NEXT:    orq %rbx, %rax
926; SSE42-NEXT:    shlq $40, %rdi
927; SSE42-NEXT:    orq %rax, %rdi
928; SSE42-NEXT:    shlq $48, %rcx
929; SSE42-NEXT:    orq %rdi, %rcx
930; SSE42-NEXT:    shlq $56, %rdx
931; SSE42-NEXT:    orq %rcx, %rdx
932; SSE42-NEXT:    shlq $32, %r11
933; SSE42-NEXT:    andl $252645135, %r10d # imm = 0xF0F0F0F
934; SSE42-NEXT:    orq %r11, %r10
935; SSE42-NEXT:    shlq $40, %rsi
936; SSE42-NEXT:    orq %r10, %rsi
937; SSE42-NEXT:    shlq $48, %r9
938; SSE42-NEXT:    orq %rsi, %r9
939; SSE42-NEXT:    shlq $56, %r8
940; SSE42-NEXT:    orq %r9, %r8
941; SSE42-NEXT:    movq %r8, %xmm1
942; SSE42-NEXT:    movq %rdx, %xmm0
943; SSE42-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
944; SSE42-NEXT:    popq %rbx
945; SSE42-NEXT:    retq
946;
947; AVX-LABEL: _clearupper16xi8b:
948; AVX:       # %bb.0:
949; AVX-NEXT:    pushq %rbx
950; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
951; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
952; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rdx
953; AVX-NEXT:    movq %r9, %r8
954; AVX-NEXT:    shrq $56, %r8
955; AVX-NEXT:    andl $15, %r8d
956; AVX-NEXT:    movq %r9, %r10
957; AVX-NEXT:    shrq $48, %r10
958; AVX-NEXT:    andl $15, %r10d
959; AVX-NEXT:    movq %r9, %rsi
960; AVX-NEXT:    shrq $40, %rsi
961; AVX-NEXT:    andl $15, %esi
962; AVX-NEXT:    movq %r9, %r11
963; AVX-NEXT:    shrq $32, %r11
964; AVX-NEXT:    andl $15, %r11d
965; AVX-NEXT:    movq %rdx, %rdi
966; AVX-NEXT:    shrq $56, %rdi
967; AVX-NEXT:    andl $15, %edi
968; AVX-NEXT:    movq %rdx, %rax
969; AVX-NEXT:    shrq $48, %rax
970; AVX-NEXT:    andl $15, %eax
971; AVX-NEXT:    movq %rdx, %rcx
972; AVX-NEXT:    shrq $40, %rcx
973; AVX-NEXT:    andl $15, %ecx
974; AVX-NEXT:    movq %rdx, %rbx
975; AVX-NEXT:    shrq $32, %rbx
976; AVX-NEXT:    andl $15, %ebx
977; AVX-NEXT:    shlq $32, %rbx
978; AVX-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
979; AVX-NEXT:    orq %rbx, %rdx
980; AVX-NEXT:    shlq $40, %rcx
981; AVX-NEXT:    orq %rdx, %rcx
982; AVX-NEXT:    shlq $48, %rax
983; AVX-NEXT:    orq %rcx, %rax
984; AVX-NEXT:    shlq $56, %rdi
985; AVX-NEXT:    orq %rax, %rdi
986; AVX-NEXT:    movq %rdi, -{{[0-9]+}}(%rsp)
987; AVX-NEXT:    shlq $32, %r11
988; AVX-NEXT:    andl $252645135, %r9d # imm = 0xF0F0F0F
989; AVX-NEXT:    orq %r11, %r9
990; AVX-NEXT:    shlq $40, %rsi
991; AVX-NEXT:    orq %r9, %rsi
992; AVX-NEXT:    shlq $48, %r10
993; AVX-NEXT:    orq %rsi, %r10
994; AVX-NEXT:    shlq $56, %r8
995; AVX-NEXT:    orq %r10, %r8
996; AVX-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
997; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
998; AVX-NEXT:    popq %rbx
999; AVX-NEXT:    retq
1000  %x4  = bitcast <16 x i8> %0 to <32 x i4>
1001  %r0  = insertelement <32 x i4> %x4,  i4 zeroinitializer, i32 1
1002  %r1  = insertelement <32 x i4> %r0,  i4 zeroinitializer, i32 3
1003  %r2  = insertelement <32 x i4> %r1,  i4 zeroinitializer, i32 5
1004  %r3  = insertelement <32 x i4> %r2,  i4 zeroinitializer, i32 7
1005  %r4  = insertelement <32 x i4> %r3,  i4 zeroinitializer, i32 9
1006  %r5  = insertelement <32 x i4> %r4,  i4 zeroinitializer, i32 11
1007  %r6  = insertelement <32 x i4> %r5,  i4 zeroinitializer, i32 13
1008  %r7  = insertelement <32 x i4> %r6,  i4 zeroinitializer, i32 15
1009  %r8  = insertelement <32 x i4> %r7,  i4 zeroinitializer, i32 17
1010  %r9  = insertelement <32 x i4> %r8,  i4 zeroinitializer, i32 19
1011  %r10 = insertelement <32 x i4> %r9,  i4 zeroinitializer, i32 21
1012  %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23
1013  %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25
1014  %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27
1015  %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29
1016  %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31
1017  %r = bitcast <32 x i4> %r15 to <16 x i8>
1018  ret <16 x i8> %r
1019}
1020
1021define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind {
1022; SSE2-LABEL: _clearupper32xi8b:
1023; SSE2:       # %bb.0:
1024; SSE2-NEXT:    pushq %rbx
1025; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1026; SSE2-NEXT:    movq %xmm2, %r10
1027; SSE2-NEXT:    movq %r10, %r8
1028; SSE2-NEXT:    shrq $56, %r8
1029; SSE2-NEXT:    andl $15, %r8d
1030; SSE2-NEXT:    movq %r10, %r9
1031; SSE2-NEXT:    shrq $48, %r9
1032; SSE2-NEXT:    andl $15, %r9d
1033; SSE2-NEXT:    movq %r10, %rsi
1034; SSE2-NEXT:    shrq $40, %rsi
1035; SSE2-NEXT:    andl $15, %esi
1036; SSE2-NEXT:    movq %r10, %r11
1037; SSE2-NEXT:    shrq $32, %r11
1038; SSE2-NEXT:    andl $15, %r11d
1039; SSE2-NEXT:    movq %xmm0, %rax
1040; SSE2-NEXT:    movq %rax, %rdx
1041; SSE2-NEXT:    shrq $56, %rdx
1042; SSE2-NEXT:    andl $15, %edx
1043; SSE2-NEXT:    movq %rax, %rcx
1044; SSE2-NEXT:    shrq $48, %rcx
1045; SSE2-NEXT:    andl $15, %ecx
1046; SSE2-NEXT:    movq %rax, %rdi
1047; SSE2-NEXT:    shrq $40, %rdi
1048; SSE2-NEXT:    andl $15, %edi
1049; SSE2-NEXT:    movq %rax, %rbx
1050; SSE2-NEXT:    shrq $32, %rbx
1051; SSE2-NEXT:    andl $15, %ebx
1052; SSE2-NEXT:    shlq $32, %rbx
1053; SSE2-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
1054; SSE2-NEXT:    orq %rbx, %rax
1055; SSE2-NEXT:    shlq $40, %rdi
1056; SSE2-NEXT:    orq %rax, %rdi
1057; SSE2-NEXT:    shlq $48, %rcx
1058; SSE2-NEXT:    orq %rdi, %rcx
1059; SSE2-NEXT:    shlq $56, %rdx
1060; SSE2-NEXT:    orq %rcx, %rdx
1061; SSE2-NEXT:    shlq $32, %r11
1062; SSE2-NEXT:    andl $252645135, %r10d # imm = 0xF0F0F0F
1063; SSE2-NEXT:    orq %r11, %r10
1064; SSE2-NEXT:    shlq $40, %rsi
1065; SSE2-NEXT:    orq %r10, %rsi
1066; SSE2-NEXT:    shlq $48, %r9
1067; SSE2-NEXT:    orq %rsi, %r9
1068; SSE2-NEXT:    shlq $56, %r8
1069; SSE2-NEXT:    orq %r9, %r8
1070; SSE2-NEXT:    movq %rdx, %xmm0
1071; SSE2-NEXT:    movq %r8, %xmm2
1072; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1073; SSE2-NEXT:    popq %rbx
1074; SSE2-NEXT:    retq
1075;
1076; SSE42-LABEL: _clearupper32xi8b:
1077; SSE42:       # %bb.0:
1078; SSE42-NEXT:    pushq %rbx
1079; SSE42-NEXT:    pextrq $1, %xmm0, %r10
1080; SSE42-NEXT:    movq %r10, %r8
1081; SSE42-NEXT:    shrq $56, %r8
1082; SSE42-NEXT:    andl $15, %r8d
1083; SSE42-NEXT:    movq %r10, %r9
1084; SSE42-NEXT:    shrq $48, %r9
1085; SSE42-NEXT:    andl $15, %r9d
1086; SSE42-NEXT:    movq %r10, %rsi
1087; SSE42-NEXT:    shrq $40, %rsi
1088; SSE42-NEXT:    andl $15, %esi
1089; SSE42-NEXT:    movq %r10, %r11
1090; SSE42-NEXT:    shrq $32, %r11
1091; SSE42-NEXT:    andl $15, %r11d
1092; SSE42-NEXT:    movq %xmm0, %rax
1093; SSE42-NEXT:    movq %rax, %rdx
1094; SSE42-NEXT:    shrq $56, %rdx
1095; SSE42-NEXT:    andl $15, %edx
1096; SSE42-NEXT:    movq %rax, %rcx
1097; SSE42-NEXT:    shrq $48, %rcx
1098; SSE42-NEXT:    andl $15, %ecx
1099; SSE42-NEXT:    movq %rax, %rdi
1100; SSE42-NEXT:    shrq $40, %rdi
1101; SSE42-NEXT:    andl $15, %edi
1102; SSE42-NEXT:    movq %rax, %rbx
1103; SSE42-NEXT:    shrq $32, %rbx
1104; SSE42-NEXT:    andl $15, %ebx
1105; SSE42-NEXT:    shlq $32, %rbx
1106; SSE42-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
1107; SSE42-NEXT:    orq %rbx, %rax
1108; SSE42-NEXT:    shlq $40, %rdi
1109; SSE42-NEXT:    orq %rax, %rdi
1110; SSE42-NEXT:    shlq $48, %rcx
1111; SSE42-NEXT:    orq %rdi, %rcx
1112; SSE42-NEXT:    shlq $56, %rdx
1113; SSE42-NEXT:    orq %rcx, %rdx
1114; SSE42-NEXT:    shlq $32, %r11
1115; SSE42-NEXT:    andl $252645135, %r10d # imm = 0xF0F0F0F
1116; SSE42-NEXT:    orq %r11, %r10
1117; SSE42-NEXT:    shlq $40, %rsi
1118; SSE42-NEXT:    orq %r10, %rsi
1119; SSE42-NEXT:    shlq $48, %r9
1120; SSE42-NEXT:    orq %rsi, %r9
1121; SSE42-NEXT:    shlq $56, %r8
1122; SSE42-NEXT:    orq %r9, %r8
1123; SSE42-NEXT:    movq %r8, %xmm2
1124; SSE42-NEXT:    movq %rdx, %xmm0
1125; SSE42-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
1126; SSE42-NEXT:    popq %rbx
1127; SSE42-NEXT:    retq
1128;
1129; AVX1-LABEL: _clearupper32xi8b:
1130; AVX1:       # %bb.0:
1131; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
1132; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
1133; AVX1-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1134; AVX1-NEXT:    movq %r9, %r8
1135; AVX1-NEXT:    shrq $56, %r8
1136; AVX1-NEXT:    andl $15, %r8d
1137; AVX1-NEXT:    movq %rcx, %rsi
1138; AVX1-NEXT:    movq %rcx, %rdi
1139; AVX1-NEXT:    movq %rcx, %rdx
1140; AVX1-NEXT:    movq %rcx, %rax
1141; AVX1-NEXT:    shrq $32, %rax
1142; AVX1-NEXT:    andl $15, %eax
1143; AVX1-NEXT:    shlq $32, %rax
1144; AVX1-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
1145; AVX1-NEXT:    orq %rax, %rcx
1146; AVX1-NEXT:    movq %r9, %rax
1147; AVX1-NEXT:    shrq $48, %rax
1148; AVX1-NEXT:    andl $15, %eax
1149; AVX1-NEXT:    shrq $40, %rdx
1150; AVX1-NEXT:    andl $15, %edx
1151; AVX1-NEXT:    shlq $40, %rdx
1152; AVX1-NEXT:    orq %rcx, %rdx
1153; AVX1-NEXT:    movq %r9, %rcx
1154; AVX1-NEXT:    shrq $40, %rcx
1155; AVX1-NEXT:    andl $15, %ecx
1156; AVX1-NEXT:    shrq $48, %rdi
1157; AVX1-NEXT:    andl $15, %edi
1158; AVX1-NEXT:    shlq $48, %rdi
1159; AVX1-NEXT:    orq %rdx, %rdi
1160; AVX1-NEXT:    movq %r9, %rdx
1161; AVX1-NEXT:    shrq $32, %rdx
1162; AVX1-NEXT:    andl $15, %edx
1163; AVX1-NEXT:    shrq $56, %rsi
1164; AVX1-NEXT:    andl $15, %esi
1165; AVX1-NEXT:    shlq $56, %rsi
1166; AVX1-NEXT:    orq %rdi, %rsi
1167; AVX1-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
1168; AVX1-NEXT:    shlq $32, %rdx
1169; AVX1-NEXT:    andl $252645135, %r9d # imm = 0xF0F0F0F
1170; AVX1-NEXT:    orq %rdx, %r9
1171; AVX1-NEXT:    shlq $40, %rcx
1172; AVX1-NEXT:    orq %r9, %rcx
1173; AVX1-NEXT:    shlq $48, %rax
1174; AVX1-NEXT:    orq %rcx, %rax
1175; AVX1-NEXT:    shlq $56, %r8
1176; AVX1-NEXT:    orq %rax, %r8
1177; AVX1-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
1178; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1179; AVX1-NEXT:    vmovq %xmm0, %rax
1180; AVX1-NEXT:    movq %rax, %r8
1181; AVX1-NEXT:    movq %rax, %r9
1182; AVX1-NEXT:    movq %rax, %rsi
1183; AVX1-NEXT:    movq %rax, %rdi
1184; AVX1-NEXT:    movl %eax, %ecx
1185; AVX1-NEXT:    movl %eax, %edx
1186; AVX1-NEXT:    vmovd %eax, %xmm1
1187; AVX1-NEXT:    shrl $8, %eax
1188; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1189; AVX1-NEXT:    shrl $16, %edx
1190; AVX1-NEXT:    vpinsrb $2, %edx, %xmm1, %xmm1
1191; AVX1-NEXT:    shrl $24, %ecx
1192; AVX1-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
1193; AVX1-NEXT:    shrq $32, %rdi
1194; AVX1-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1195; AVX1-NEXT:    shrq $40, %rsi
1196; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1197; AVX1-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1198; AVX1-NEXT:    shrq $48, %r9
1199; AVX1-NEXT:    vpinsrb $6, %r9d, %xmm1, %xmm1
1200; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
1201; AVX1-NEXT:    shrq $56, %r8
1202; AVX1-NEXT:    vpinsrb $7, %r8d, %xmm1, %xmm0
1203; AVX1-NEXT:    movl %eax, %ecx
1204; AVX1-NEXT:    shrl $8, %ecx
1205; AVX1-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1206; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1207; AVX1-NEXT:    movl %eax, %ecx
1208; AVX1-NEXT:    shrl $16, %ecx
1209; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1210; AVX1-NEXT:    movl %eax, %ecx
1211; AVX1-NEXT:    shrl $24, %ecx
1212; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1213; AVX1-NEXT:    movq %rax, %rcx
1214; AVX1-NEXT:    shrq $32, %rcx
1215; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1216; AVX1-NEXT:    movq %rax, %rcx
1217; AVX1-NEXT:    shrq $40, %rcx
1218; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1219; AVX1-NEXT:    movq %rax, %rcx
1220; AVX1-NEXT:    shrq $48, %rcx
1221; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1222; AVX1-NEXT:    vmovq %xmm2, %rcx
1223; AVX1-NEXT:    shrq $56, %rax
1224; AVX1-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1225; AVX1-NEXT:    movl %ecx, %eax
1226; AVX1-NEXT:    shrl $8, %eax
1227; AVX1-NEXT:    vmovd %ecx, %xmm1
1228; AVX1-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1229; AVX1-NEXT:    movl %ecx, %eax
1230; AVX1-NEXT:    shrl $16, %eax
1231; AVX1-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1232; AVX1-NEXT:    movl %ecx, %eax
1233; AVX1-NEXT:    shrl $24, %eax
1234; AVX1-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1235; AVX1-NEXT:    movq %rcx, %rax
1236; AVX1-NEXT:    shrq $32, %rax
1237; AVX1-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1238; AVX1-NEXT:    movq %rcx, %rax
1239; AVX1-NEXT:    shrq $40, %rax
1240; AVX1-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1241; AVX1-NEXT:    movq %rcx, %rax
1242; AVX1-NEXT:    shrq $48, %rax
1243; AVX1-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1244; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
1245; AVX1-NEXT:    shrq $56, %rcx
1246; AVX1-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1247; AVX1-NEXT:    movl %eax, %ecx
1248; AVX1-NEXT:    shrl $8, %ecx
1249; AVX1-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1250; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1251; AVX1-NEXT:    movl %eax, %ecx
1252; AVX1-NEXT:    shrl $16, %ecx
1253; AVX1-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1254; AVX1-NEXT:    movl %eax, %ecx
1255; AVX1-NEXT:    shrl $24, %ecx
1256; AVX1-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1257; AVX1-NEXT:    movq %rax, %rcx
1258; AVX1-NEXT:    shrq $32, %rcx
1259; AVX1-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1260; AVX1-NEXT:    movq %rax, %rcx
1261; AVX1-NEXT:    shrq $40, %rcx
1262; AVX1-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1263; AVX1-NEXT:    movq %rax, %rcx
1264; AVX1-NEXT:    shrq $48, %rcx
1265; AVX1-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1266; AVX1-NEXT:    shrq $56, %rax
1267; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1268; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1269; AVX1-NEXT:    retq
1270;
1271; AVX2-LABEL: _clearupper32xi8b:
1272; AVX2:       # %bb.0:
1273; AVX2-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
1274; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %r9
1275; AVX2-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
1276; AVX2-NEXT:    movq %r9, %r8
1277; AVX2-NEXT:    shrq $56, %r8
1278; AVX2-NEXT:    andl $15, %r8d
1279; AVX2-NEXT:    movq %rcx, %rsi
1280; AVX2-NEXT:    movq %rcx, %rdi
1281; AVX2-NEXT:    movq %rcx, %rdx
1282; AVX2-NEXT:    movq %rcx, %rax
1283; AVX2-NEXT:    shrq $32, %rax
1284; AVX2-NEXT:    andl $15, %eax
1285; AVX2-NEXT:    shlq $32, %rax
1286; AVX2-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
1287; AVX2-NEXT:    orq %rax, %rcx
1288; AVX2-NEXT:    movq %r9, %rax
1289; AVX2-NEXT:    shrq $48, %rax
1290; AVX2-NEXT:    andl $15, %eax
1291; AVX2-NEXT:    shrq $40, %rdx
1292; AVX2-NEXT:    andl $15, %edx
1293; AVX2-NEXT:    shlq $40, %rdx
1294; AVX2-NEXT:    orq %rcx, %rdx
1295; AVX2-NEXT:    movq %r9, %rcx
1296; AVX2-NEXT:    shrq $40, %rcx
1297; AVX2-NEXT:    andl $15, %ecx
1298; AVX2-NEXT:    shrq $48, %rdi
1299; AVX2-NEXT:    andl $15, %edi
1300; AVX2-NEXT:    shlq $48, %rdi
1301; AVX2-NEXT:    orq %rdx, %rdi
1302; AVX2-NEXT:    movq %r9, %rdx
1303; AVX2-NEXT:    shrq $32, %rdx
1304; AVX2-NEXT:    andl $15, %edx
1305; AVX2-NEXT:    shrq $56, %rsi
1306; AVX2-NEXT:    andl $15, %esi
1307; AVX2-NEXT:    shlq $56, %rsi
1308; AVX2-NEXT:    orq %rdi, %rsi
1309; AVX2-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
1310; AVX2-NEXT:    shlq $32, %rdx
1311; AVX2-NEXT:    andl $252645135, %r9d # imm = 0xF0F0F0F
1312; AVX2-NEXT:    orq %rdx, %r9
1313; AVX2-NEXT:    shlq $40, %rcx
1314; AVX2-NEXT:    orq %r9, %rcx
1315; AVX2-NEXT:    shlq $48, %rax
1316; AVX2-NEXT:    orq %rcx, %rax
1317; AVX2-NEXT:    shlq $56, %r8
1318; AVX2-NEXT:    orq %rax, %r8
1319; AVX2-NEXT:    movq %r8, -{{[0-9]+}}(%rsp)
1320; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
1321; AVX2-NEXT:    vmovq %xmm0, %rax
1322; AVX2-NEXT:    movq %rax, %r8
1323; AVX2-NEXT:    movq %rax, %r9
1324; AVX2-NEXT:    movq %rax, %rsi
1325; AVX2-NEXT:    movq %rax, %rdi
1326; AVX2-NEXT:    movl %eax, %ecx
1327; AVX2-NEXT:    movl %eax, %edx
1328; AVX2-NEXT:    vmovd %eax, %xmm1
1329; AVX2-NEXT:    shrl $8, %eax
1330; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1331; AVX2-NEXT:    shrl $16, %edx
1332; AVX2-NEXT:    vpinsrb $2, %edx, %xmm1, %xmm1
1333; AVX2-NEXT:    shrl $24, %ecx
1334; AVX2-NEXT:    vpinsrb $3, %ecx, %xmm1, %xmm1
1335; AVX2-NEXT:    shrq $32, %rdi
1336; AVX2-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
1337; AVX2-NEXT:    shrq $40, %rsi
1338; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
1339; AVX2-NEXT:    vmovdqa -{{[0-9]+}}(%rsp), %xmm2
1340; AVX2-NEXT:    shrq $48, %r9
1341; AVX2-NEXT:    vpinsrb $6, %r9d, %xmm1, %xmm1
1342; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
1343; AVX2-NEXT:    shrq $56, %r8
1344; AVX2-NEXT:    vpinsrb $7, %r8d, %xmm1, %xmm0
1345; AVX2-NEXT:    movl %eax, %ecx
1346; AVX2-NEXT:    shrl $8, %ecx
1347; AVX2-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
1348; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
1349; AVX2-NEXT:    movl %eax, %ecx
1350; AVX2-NEXT:    shrl $16, %ecx
1351; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
1352; AVX2-NEXT:    movl %eax, %ecx
1353; AVX2-NEXT:    shrl $24, %ecx
1354; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
1355; AVX2-NEXT:    movq %rax, %rcx
1356; AVX2-NEXT:    shrq $32, %rcx
1357; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
1358; AVX2-NEXT:    movq %rax, %rcx
1359; AVX2-NEXT:    shrq $40, %rcx
1360; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
1361; AVX2-NEXT:    movq %rax, %rcx
1362; AVX2-NEXT:    shrq $48, %rcx
1363; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
1364; AVX2-NEXT:    vmovq %xmm2, %rcx
1365; AVX2-NEXT:    shrq $56, %rax
1366; AVX2-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
1367; AVX2-NEXT:    movl %ecx, %eax
1368; AVX2-NEXT:    shrl $8, %eax
1369; AVX2-NEXT:    vmovd %ecx, %xmm1
1370; AVX2-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
1371; AVX2-NEXT:    movl %ecx, %eax
1372; AVX2-NEXT:    shrl $16, %eax
1373; AVX2-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
1374; AVX2-NEXT:    movl %ecx, %eax
1375; AVX2-NEXT:    shrl $24, %eax
1376; AVX2-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
1377; AVX2-NEXT:    movq %rcx, %rax
1378; AVX2-NEXT:    shrq $32, %rax
1379; AVX2-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
1380; AVX2-NEXT:    movq %rcx, %rax
1381; AVX2-NEXT:    shrq $40, %rax
1382; AVX2-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
1383; AVX2-NEXT:    movq %rcx, %rax
1384; AVX2-NEXT:    shrq $48, %rax
1385; AVX2-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
1386; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
1387; AVX2-NEXT:    shrq $56, %rcx
1388; AVX2-NEXT:    vpinsrb $7, %ecx, %xmm1, %xmm1
1389; AVX2-NEXT:    movl %eax, %ecx
1390; AVX2-NEXT:    shrl $8, %ecx
1391; AVX2-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
1392; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm1, %xmm1
1393; AVX2-NEXT:    movl %eax, %ecx
1394; AVX2-NEXT:    shrl $16, %ecx
1395; AVX2-NEXT:    vpinsrb $10, %ecx, %xmm1, %xmm1
1396; AVX2-NEXT:    movl %eax, %ecx
1397; AVX2-NEXT:    shrl $24, %ecx
1398; AVX2-NEXT:    vpinsrb $11, %ecx, %xmm1, %xmm1
1399; AVX2-NEXT:    movq %rax, %rcx
1400; AVX2-NEXT:    shrq $32, %rcx
1401; AVX2-NEXT:    vpinsrb $12, %ecx, %xmm1, %xmm1
1402; AVX2-NEXT:    movq %rax, %rcx
1403; AVX2-NEXT:    shrq $40, %rcx
1404; AVX2-NEXT:    vpinsrb $13, %ecx, %xmm1, %xmm1
1405; AVX2-NEXT:    movq %rax, %rcx
1406; AVX2-NEXT:    shrq $48, %rcx
1407; AVX2-NEXT:    vpinsrb $14, %ecx, %xmm1, %xmm1
1408; AVX2-NEXT:    shrq $56, %rax
1409; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
1410; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
1411; AVX2-NEXT:    retq
1412  %x4  = bitcast <32 x i8> %0 to <64 x i4>
1413  %r0  = insertelement <64 x i4> %x4,  i4 zeroinitializer, i32 1
1414  %r1  = insertelement <64 x i4> %r0,  i4 zeroinitializer, i32 3
1415  %r2  = insertelement <64 x i4> %r1,  i4 zeroinitializer, i32 5
1416  %r3  = insertelement <64 x i4> %r2,  i4 zeroinitializer, i32 7
1417  %r4  = insertelement <64 x i4> %r3,  i4 zeroinitializer, i32 9
1418  %r5  = insertelement <64 x i4> %r4,  i4 zeroinitializer, i32 11
1419  %r6  = insertelement <64 x i4> %r5,  i4 zeroinitializer, i32 13
1420  %r7  = insertelement <64 x i4> %r6,  i4 zeroinitializer, i32 15
1421  %r8  = insertelement <64 x i4> %r7,  i4 zeroinitializer, i32 17
1422  %r9  = insertelement <64 x i4> %r8,  i4 zeroinitializer, i32 19
1423  %r10 = insertelement <64 x i4> %r9,  i4 zeroinitializer, i32 21
1424  %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23
1425  %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25
1426  %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27
1427  %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29
1428  %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31
1429  %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33
1430  %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35
1431  %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37
1432  %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39
1433  %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41
1434  %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43
1435  %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45
1436  %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47
1437  %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49
1438  %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51
1439  %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53
1440  %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55
1441  %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57
1442  %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59
1443  %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61
1444  %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63
1445  %r = bitcast <64 x i4> %r15 to <32 x i8>
1446  ret <32 x i8> %r
1447}
1448
1449define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind {
1450; SSE2-LABEL: _clearupper2xi64c:
1451; SSE2:       # %bb.0:
1452; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1453; SSE2-NEXT:    retq
1454;
1455; SSE42-LABEL: _clearupper2xi64c:
1456; SSE42:       # %bb.0:
1457; SSE42-NEXT:    xorps %xmm1, %xmm1
1458; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1459; SSE42-NEXT:    retq
1460;
1461; AVX-LABEL: _clearupper2xi64c:
1462; AVX:       # %bb.0:
1463; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1464; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1465; AVX-NEXT:    retq
1466  %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
1467  ret <2 x i64> %r
1468}
1469
1470define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind {
1471; SSE2-LABEL: _clearupper4xi64c:
1472; SSE2:       # %bb.0:
1473; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
1474; SSE2-NEXT:    andps %xmm2, %xmm0
1475; SSE2-NEXT:    andps %xmm2, %xmm1
1476; SSE2-NEXT:    retq
1477;
1478; SSE42-LABEL: _clearupper4xi64c:
1479; SSE42:       # %bb.0:
1480; SSE42-NEXT:    xorps %xmm2, %xmm2
1481; SSE42-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
1482; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1483; SSE42-NEXT:    retq
1484;
1485; AVX-LABEL: _clearupper4xi64c:
1486; AVX:       # %bb.0:
1487; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
1488; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
1489; AVX-NEXT:    retq
1490  %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0
1491  ret <4 x i64> %r
1492}
1493
1494define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind {
1495; SSE2-LABEL: _clearupper4xi32c:
1496; SSE2:       # %bb.0:
1497; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
1498; SSE2-NEXT:    retq
1499;
1500; SSE42-LABEL: _clearupper4xi32c:
1501; SSE42:       # %bb.0:
1502; SSE42-NEXT:    pxor %xmm1, %xmm1
1503; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1504; SSE42-NEXT:    retq
1505;
1506; AVX-LABEL: _clearupper4xi32c:
1507; AVX:       # %bb.0:
1508; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1509; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
1510; AVX-NEXT:    retq
1511  %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0
1512  ret <4 x i32> %r
1513}
1514
1515define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind {
1516; SSE2-LABEL: _clearupper8xi32c:
1517; SSE2:       # %bb.0:
1518; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0]
1519; SSE2-NEXT:    andps %xmm2, %xmm0
1520; SSE2-NEXT:    andps %xmm2, %xmm1
1521; SSE2-NEXT:    retq
1522;
1523; SSE42-LABEL: _clearupper8xi32c:
1524; SSE42:       # %bb.0:
1525; SSE42-NEXT:    pxor %xmm2, %xmm2
1526; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
1527; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
1528; SSE42-NEXT:    retq
1529;
1530; AVX1-LABEL: _clearupper8xi32c:
1531; AVX1:       # %bb.0:
1532; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1533; AVX1-NEXT:    retq
1534;
1535; AVX2-LABEL: _clearupper8xi32c:
1536; AVX2:       # %bb.0:
1537; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1538; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
1539; AVX2-NEXT:    retq
1540  %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0
1541  ret <8 x i32> %r
1542}
1543
1544define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind {
1545; SSE-LABEL: _clearupper8xi16c:
1546; SSE:       # %bb.0:
1547; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1548; SSE-NEXT:    retq
1549;
1550; AVX-LABEL: _clearupper8xi16c:
1551; AVX:       # %bb.0:
1552; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1553; AVX-NEXT:    retq
1554  %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1555  ret <8 x i16> %r
1556}
1557
1558define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind {
1559; SSE-LABEL: _clearupper16xi16c:
1560; SSE:       # %bb.0:
1561; SSE-NEXT:    movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
1562; SSE-NEXT:    andps %xmm2, %xmm0
1563; SSE-NEXT:    andps %xmm2, %xmm1
1564; SSE-NEXT:    retq
1565;
1566; AVX-LABEL: _clearupper16xi16c:
1567; AVX:       # %bb.0:
1568; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1569; AVX-NEXT:    retq
1570  %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0
1571  ret <16 x i16> %r
1572}
1573
1574define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind {
1575; SSE-LABEL: _clearupper16xi8c:
1576; SSE:       # %bb.0:
1577; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
1578; SSE-NEXT:    retq
1579;
1580; AVX-LABEL: _clearupper16xi8c:
1581; AVX:       # %bb.0:
1582; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
1583; AVX-NEXT:    retq
1584  %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1585  ret <16 x i8> %r
1586}
1587
1588define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind {
1589; SSE-LABEL: _clearupper32xi8c:
1590; SSE:       # %bb.0:
1591; SSE-NEXT:    movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1592; SSE-NEXT:    andps %xmm2, %xmm0
1593; SSE-NEXT:    andps %xmm2, %xmm1
1594; SSE-NEXT:    retq
1595;
1596; AVX-LABEL: _clearupper32xi8c:
1597; AVX:       # %bb.0:
1598; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
1599; AVX-NEXT:    retq
1600  %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0
1601  ret <32 x i8> %r
1602}
1603