1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
4
5define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
6; SSE2-LABEL: foo:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
9; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
10; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
11; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx
12; SSE2-NEXT:    shll $8, %ecx
13; SSE2-NEXT:    orl %eax, %ecx
14; SSE2-NEXT:    movd %ecx, %xmm0
15; SSE2-NEXT:    movl $65280, %eax # imm = 0xFF00
16; SSE2-NEXT:    orl -{{[0-9]+}}(%rsp), %eax
17; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
18; SSE2-NEXT:    movd %xmm0, (%rdi)
19; SSE2-NEXT:    retq
20;
21; SSE41-LABEL: foo:
22; SSE41:       # %bb.0:
23; SSE41-NEXT:    cvttps2dq %xmm0, %xmm0
24; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
25; SSE41-NEXT:    movl $255, %eax
26; SSE41-NEXT:    pinsrb $3, %eax, %xmm0
27; SSE41-NEXT:    movd %xmm0, (%rdi)
28; SSE41-NEXT:    retq
29  %t0 = fptoui <3 x float> %in to <3 x i8>
30  %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
31  %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
32  store <4 x i8> %t2, <4 x i8>* %out, align 4
33  ret void
34}
35
36; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
37; blend with a zero vector if the build_vector contains negative zero.
38
39define <4 x float> @test_negative_zero_1(<4 x float> %A) {
40; SSE2-LABEL: test_negative_zero_1:
41; SSE2:       # %bb.0: # %entry
42; SSE2-NEXT:    movaps %xmm0, %xmm1
43; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
44; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
45; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
46; SSE2-NEXT:    xorps %xmm2, %xmm2
47; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
48; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
49; SSE2-NEXT:    retq
50;
51; SSE41-LABEL: test_negative_zero_1:
52; SSE41:       # %bb.0: # %entry
53; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
54; SSE41-NEXT:    retq
55entry:
56  %0 = extractelement <4 x float> %A, i32 0
57  %1 = insertelement <4 x float> undef, float %0, i32 0
58  %2 = insertelement <4 x float> %1, float -0.0, i32 1
59  %3 = extractelement <4 x float> %A, i32 2
60  %4 = insertelement <4 x float> %2, float %3, i32 2
61  %5 = insertelement <4 x float> %4, float 0.0, i32 3
62  ret <4 x float> %5
63}
64
65; FIXME: This could be 'movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]'.
66
67define <2 x double> @test_negative_zero_2(<2 x double> %A) {
68; SSE2-LABEL: test_negative_zero_2:
69; SSE2:       # %bb.0: # %entry
70; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
71; SSE2-NEXT:    retq
72;
73; SSE41-LABEL: test_negative_zero_2:
74; SSE41:       # %bb.0: # %entry
75; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
76; SSE41-NEXT:    retq
77entry:
78  %0 = extractelement <2 x double> %A, i32 0
79  %1 = insertelement <2 x double> undef, double %0, i32 0
80  %2 = insertelement <2 x double> %1, double -0.0, i32 1
81  ret <2 x double> %2
82}
83
84define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
85; SSE2-LABEL: test_buildvector_v4f32_register:
86; SSE2:       # %bb.0:
87; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
88; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
89; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
90; SSE2-NEXT:    retq
91;
92; SSE41-LABEL: test_buildvector_v4f32_register:
93; SSE41:       # %bb.0:
94; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
95; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
96; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
97; SSE41-NEXT:    retq
98  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
99  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
100  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
101  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
102  ret <4 x float> %ins3
103}
104
105define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %p2, float* %p3) {
106; SSE2-LABEL: test_buildvector_v4f32_load:
107; SSE2:       # %bb.0:
108; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
109; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
110; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
111; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
112; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
113; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
114; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
115; SSE2-NEXT:    retq
116;
117; SSE41-LABEL: test_buildvector_v4f32_load:
118; SSE41:       # %bb.0:
119; SSE41-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
120; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
121; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
122; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
123; SSE41-NEXT:    retq
124  %f0 = load float, float* %p0, align 4
125  %f1 = load float, float* %p1, align 4
126  %f2 = load float, float* %p2, align 4
127  %f3 = load float, float* %p3, align 4
128  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
129  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
130  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
131  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
132  ret <4 x float> %ins3
133}
134
135define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
136; SSE2-LABEL: test_buildvector_v4f32_partial_load:
137; SSE2:       # %bb.0:
138; SSE2-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
139; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
140; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
141; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
142; SSE2-NEXT:    retq
143;
144; SSE41-LABEL: test_buildvector_v4f32_partial_load:
145; SSE41:       # %bb.0:
146; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
147; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
148; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
149; SSE41-NEXT:    retq
150  %f3 = load float, float* %p3, align 4
151  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
152  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
153  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
154  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
155  ret <4 x float> %ins3
156}
157
158define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
159; SSE2-LABEL: test_buildvector_v4i32_register:
160; SSE2:       # %bb.0:
161; SSE2-NEXT:    movd %ecx, %xmm0
162; SSE2-NEXT:    movd %edx, %xmm1
163; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
164; SSE2-NEXT:    movd %esi, %xmm2
165; SSE2-NEXT:    movd %edi, %xmm0
166; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
167; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
168; SSE2-NEXT:    retq
169;
170; SSE41-LABEL: test_buildvector_v4i32_register:
171; SSE41:       # %bb.0:
172; SSE41-NEXT:    movd %edi, %xmm0
173; SSE41-NEXT:    pinsrd $1, %esi, %xmm0
174; SSE41-NEXT:    pinsrd $2, %edx, %xmm0
175; SSE41-NEXT:    pinsrd $3, %ecx, %xmm0
176; SSE41-NEXT:    retq
177  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
178  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
179  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
180  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
181  ret <4 x i32> %ins3
182}
183
184define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
185; SSE2-LABEL: test_buildvector_v4i32_partial:
186; SSE2:       # %bb.0:
187; SSE2-NEXT:    movd %edi, %xmm0
188; SSE2-NEXT:    movd %esi, %xmm1
189; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
190; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
191; SSE2-NEXT:    retq
192;
193; SSE41-LABEL: test_buildvector_v4i32_partial:
194; SSE41:       # %bb.0:
195; SSE41-NEXT:    movd %edi, %xmm0
196; SSE41-NEXT:    pinsrd $3, %esi, %xmm0
197; SSE41-NEXT:    retq
198  %ins0 = insertelement <4 x i32> undef, i32   %a0, i32 0
199  %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1
200  %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2
201  %ins3 = insertelement <4 x i32> %ins2, i32   %a3, i32 3
202  ret <4 x i32> %ins3
203}
204
205define <4 x i32> @test_buildvector_v4i32_register_zero(i32 %a0, i32 %a2, i32 %a3) {
206; CHECK-LABEL: test_buildvector_v4i32_register_zero:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    movd %edx, %xmm0
209; CHECK-NEXT:    movd %esi, %xmm1
210; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
211; CHECK-NEXT:    movd %edi, %xmm0
212; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
213; CHECK-NEXT:    retq
214  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
215  %ins1 = insertelement <4 x i32> %ins0, i32   0, i32 1
216  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
217  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
218  ret <4 x i32> %ins3
219}
220
221define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %a3) {
222; CHECK-LABEL: test_buildvector_v4i32_register_zero_2:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    movd %edx, %xmm0
225; CHECK-NEXT:    movd %esi, %xmm1
226; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
227; CHECK-NEXT:    movd %edi, %xmm0
228; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1]
229; CHECK-NEXT:    retq
230  %ins0 = insertelement <4 x i32> undef, i32   0, i32 0
231  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
232  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
233  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
234  ret <4 x i32> %ins3
235}
236
237define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
238; SSE2-LABEL: test_buildvector_v8i16_register:
239; SSE2:       # %bb.0:
240; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
241; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
242; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
243; SSE2-NEXT:    movd %r9d, %xmm0
244; SSE2-NEXT:    movd %r8d, %xmm2
245; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
246; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
247; SSE2-NEXT:    movd %ecx, %xmm0
248; SSE2-NEXT:    movd %edx, %xmm1
249; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
250; SSE2-NEXT:    movd %esi, %xmm3
251; SSE2-NEXT:    movd %edi, %xmm0
252; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
253; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
254; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
255; SSE2-NEXT:    retq
256;
257; SSE41-LABEL: test_buildvector_v8i16_register:
258; SSE41:       # %bb.0:
259; SSE41-NEXT:    movd %edi, %xmm0
260; SSE41-NEXT:    pinsrw $1, %esi, %xmm0
261; SSE41-NEXT:    pinsrw $2, %edx, %xmm0
262; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
263; SSE41-NEXT:    pinsrw $4, %r8d, %xmm0
264; SSE41-NEXT:    pinsrw $5, %r9d, %xmm0
265; SSE41-NEXT:    pinsrw $6, {{[0-9]+}}(%rsp), %xmm0
266; SSE41-NEXT:    pinsrw $7, {{[0-9]+}}(%rsp), %xmm0
267; SSE41-NEXT:    retq
268  %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
269  %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
270  %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
271  %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
272  %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
273  %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
274  %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
275  %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
276  ret <8 x i16> %ins7
277}
278
279define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
280; CHECK-LABEL: test_buildvector_v8i16_partial:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    pxor %xmm0, %xmm0
283; CHECK-NEXT:    pinsrw $1, %edi, %xmm0
284; CHECK-NEXT:    pinsrw $3, %esi, %xmm0
285; CHECK-NEXT:    pinsrw $4, %edx, %xmm0
286; CHECK-NEXT:    pinsrw $5, %ecx, %xmm0
287; CHECK-NEXT:    retq
288  %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0
289  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
290  %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2
291  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
292  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
293  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
294  %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6
295  %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7
296  ret <8 x i16> %ins7
297}
298
299define <8 x i16> @test_buildvector_v8i16_register_zero(i16 %a0, i16 %a3, i16 %a4, i16 %a5) {
300; CHECK-LABEL: test_buildvector_v8i16_register_zero:
301; CHECK:       # %bb.0:
302; CHECK-NEXT:    movzwl %di, %eax
303; CHECK-NEXT:    movd %eax, %xmm0
304; CHECK-NEXT:    pinsrw $3, %esi, %xmm0
305; CHECK-NEXT:    pinsrw $4, %edx, %xmm0
306; CHECK-NEXT:    pinsrw $5, %ecx, %xmm0
307; CHECK-NEXT:    retq
308  %ins0 = insertelement <8 x i16> undef, i16   %a0, i32 0
309  %ins1 = insertelement <8 x i16> %ins0, i16     0, i32 1
310  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
311  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
312  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
313  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
314  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
315  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
316  ret <8 x i16> %ins7
317}
318
319define <8 x i16> @test_buildvector_v8i16_register_zero_2(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
320; CHECK-LABEL: test_buildvector_v8i16_register_zero_2:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    pxor %xmm0, %xmm0
323; CHECK-NEXT:    pinsrw $1, %edi, %xmm0
324; CHECK-NEXT:    pinsrw $3, %esi, %xmm0
325; CHECK-NEXT:    pinsrw $4, %edx, %xmm0
326; CHECK-NEXT:    pinsrw $5, %ecx, %xmm0
327; CHECK-NEXT:    retq
328  %ins0 = insertelement <8 x i16> undef, i16     0, i32 0
329  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
330  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
331  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
332  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
333  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
334  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
335  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
336  ret <8 x i16> %ins7
337}
338
339define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
340; SSE2-LABEL: test_buildvector_v16i8_register:
341; SSE2:       # %bb.0:
342; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
343; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
344; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
345; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
346; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
347; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
348; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
349; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
350; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
351; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
352; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
353; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
354; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
355; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
356; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
357; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
358; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
359; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
360; SSE2-NEXT:    movd %r9d, %xmm0
361; SSE2-NEXT:    movd %r8d, %xmm2
362; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
363; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
364; SSE2-NEXT:    movd %ecx, %xmm0
365; SSE2-NEXT:    movd %edx, %xmm1
366; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
367; SSE2-NEXT:    movd %esi, %xmm4
368; SSE2-NEXT:    movd %edi, %xmm0
369; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
370; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
371; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
372; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
373; SSE2-NEXT:    retq
374;
375; SSE41-LABEL: test_buildvector_v16i8_register:
376; SSE41:       # %bb.0:
377; SSE41-NEXT:    movd %edi, %xmm0
378; SSE41-NEXT:    pinsrb $1, %esi, %xmm0
379; SSE41-NEXT:    pinsrb $2, %edx, %xmm0
380; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
381; SSE41-NEXT:    pinsrb $4, %r8d, %xmm0
382; SSE41-NEXT:    pinsrb $5, %r9d, %xmm0
383; SSE41-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm0
384; SSE41-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm0
385; SSE41-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm0
386; SSE41-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm0
387; SSE41-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm0
388; SSE41-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm0
389; SSE41-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm0
390; SSE41-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm0
391; SSE41-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm0
392; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
393; SSE41-NEXT:    retq
394  %ins0  = insertelement <16 x i8> undef,  i8 %a0,  i32 0
395  %ins1  = insertelement <16 x i8> %ins0,  i8 %a1,  i32 1
396  %ins2  = insertelement <16 x i8> %ins1,  i8 %a2,  i32 2
397  %ins3  = insertelement <16 x i8> %ins2,  i8 %a3,  i32 3
398  %ins4  = insertelement <16 x i8> %ins3,  i8 %a4,  i32 4
399  %ins5  = insertelement <16 x i8> %ins4,  i8 %a5,  i32 5
400  %ins6  = insertelement <16 x i8> %ins5,  i8 %a6,  i32 6
401  %ins7  = insertelement <16 x i8> %ins6,  i8 %a7,  i32 7
402  %ins8  = insertelement <16 x i8> %ins7,  i8 %a8,  i32 8
403  %ins9  = insertelement <16 x i8> %ins8,  i8 %a9,  i32 9
404  %ins10 = insertelement <16 x i8> %ins9,  i8 %a10, i32 10
405  %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
406  %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
407  %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
408  %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
409  %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
410  ret <16 x i8> %ins15
411}
412
413define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
414; SSE2-LABEL: test_buildvector_v16i8_partial:
415; SSE2:       # %bb.0:
416; SSE2-NEXT:    pxor %xmm0, %xmm0
417; SSE2-NEXT:    pinsrw $1, %edi, %xmm0
418; SSE2-NEXT:    pinsrw $3, %esi, %xmm0
419; SSE2-NEXT:    pinsrw $4, %edx, %xmm0
420; SSE2-NEXT:    shll $8, %ecx
421; SSE2-NEXT:    pinsrw $5, %ecx, %xmm0
422; SSE2-NEXT:    pinsrw $6, %r8d, %xmm0
423; SSE2-NEXT:    shll $8, %r9d
424; SSE2-NEXT:    pinsrw $7, %r9d, %xmm0
425; SSE2-NEXT:    retq
426;
427; SSE41-LABEL: test_buildvector_v16i8_partial:
428; SSE41:       # %bb.0:
429; SSE41-NEXT:    pxor %xmm0, %xmm0
430; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
431; SSE41-NEXT:    pinsrb $6, %esi, %xmm0
432; SSE41-NEXT:    pinsrb $8, %edx, %xmm0
433; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
434; SSE41-NEXT:    pinsrb $12, %r8d, %xmm0
435; SSE41-NEXT:    pinsrb $15, %r9d, %xmm0
436; SSE41-NEXT:    retq
437  %ins0  = insertelement <16 x i8> undef,  i8 undef, i32 0
438  %ins1  = insertelement <16 x i8> %ins0,  i8 undef, i32 1
439  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
440  %ins3  = insertelement <16 x i8> %ins2,  i8 undef, i32 3
441  %ins4  = insertelement <16 x i8> %ins3,  i8 undef, i32 4
442  %ins5  = insertelement <16 x i8> %ins4,  i8 undef, i32 5
443  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
444  %ins7  = insertelement <16 x i8> %ins6,  i8 undef, i32 7
445  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
446  %ins9  = insertelement <16 x i8> %ins8,  i8 undef, i32 9
447  %ins10 = insertelement <16 x i8> %ins9,  i8 undef, i32 10
448  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
449  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
450  %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13
451  %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14
452  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
453  ret <16 x i8> %ins15
454}
455
456define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
457; SSE2-LABEL: test_buildvector_v16i8_register_zero:
458; SSE2:       # %bb.0:
459; SSE2-NEXT:    movzbl %sil, %eax
460; SSE2-NEXT:    movzbl %dil, %esi
461; SSE2-NEXT:    movd %esi, %xmm0
462; SSE2-NEXT:    pinsrw $2, %eax, %xmm0
463; SSE2-NEXT:    movzbl %dl, %eax
464; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
465; SSE2-NEXT:    movzbl %cl, %eax
466; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
467; SSE2-NEXT:    shll $8, %r8d
468; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
469; SSE2-NEXT:    movzbl %r9b, %eax
470; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
471; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
472; SSE2-NEXT:    shll $8, %eax
473; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
474; SSE2-NEXT:    retq
475;
476; SSE41-LABEL: test_buildvector_v16i8_register_zero:
477; SSE41:       # %bb.0:
478; SSE41-NEXT:    movzbl %dil, %eax
479; SSE41-NEXT:    movd %eax, %xmm0
480; SSE41-NEXT:    pinsrb $4, %esi, %xmm0
481; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
482; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
483; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
484; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
485; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
486; SSE41-NEXT:    retq
487  %ins0  = insertelement <16 x i8> undef,  i8   %a0, i32 0
488  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
489  %ins2  = insertelement <16 x i8> %ins1,  i8     0, i32 2
490  %ins3  = insertelement <16 x i8> %ins2,  i8     0, i32 3
491  %ins4  = insertelement <16 x i8> %ins3,  i8   %a4, i32 4
492  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
493  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
494  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
495  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
496  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
497  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
498  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
499  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
500  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
501  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
502  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
503  ret <16 x i8> %ins15
504}
505
506define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
507; SSE2-LABEL: test_buildvector_v16i8_register_zero_2:
508; SSE2:       # %bb.0:
509; SSE2-NEXT:    shll $8, %esi
510; SSE2-NEXT:    movzbl %dil, %eax
511; SSE2-NEXT:    orl %esi, %eax
512; SSE2-NEXT:    pxor %xmm0, %xmm0
513; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
514; SSE2-NEXT:    movzbl %dl, %eax
515; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
516; SSE2-NEXT:    movzbl %cl, %eax
517; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
518; SSE2-NEXT:    shll $8, %r8d
519; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
520; SSE2-NEXT:    movzbl %r9b, %eax
521; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
522; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
523; SSE2-NEXT:    shll $8, %eax
524; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
525; SSE2-NEXT:    retq
526;
527; SSE41-LABEL: test_buildvector_v16i8_register_zero_2:
528; SSE41:       # %bb.0:
529; SSE41-NEXT:    pxor %xmm0, %xmm0
530; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
531; SSE41-NEXT:    pinsrb $3, %esi, %xmm0
532; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
533; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
534; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
535; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
536; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
537; SSE41-NEXT:    retq
538  %ins0  = insertelement <16 x i8> undef,  i8     0, i32 0
539  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
540  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
541  %ins3  = insertelement <16 x i8> %ins2,  i8   %a3, i32 3
542  %ins4  = insertelement <16 x i8> %ins3,  i8     0, i32 4
543  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
544  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
545  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
546  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
547  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
548  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
549  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
550  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
551  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
552  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
553  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
554  ret <16 x i8> %ins15
555}
556
557; OSS-Fuzz #5688
558; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5688
559define <4 x i32> @ossfuzz5688(i32 %a0) {
560; CHECK-LABEL: ossfuzz5688:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    retq
563  %1 = insertelement <4 x i32> zeroinitializer, i32 -2147483648, i32 %a0
564  %2 = extractelement <4 x i32> %1, i32 %a0
565  %3 = extractelement <4 x i32> <i32 30, i32 53, i32 42, i32 12>, i32 %2
566  %4 = extractelement <4 x i32> zeroinitializer, i32 %2
567  %5 = insertelement <4 x i32> undef, i32 %3, i32 undef
568  store i32 %4, i32* undef
569  ret <4 x i32> %5
570}
571