1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6
7define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
8; SSE2-LABEL: foo:
9; SSE2:       # %bb.0:
10; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
11; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
12; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
13; SSE2-NEXT:    movl -{{[0-9]+}}(%rsp), %ecx
14; SSE2-NEXT:    shll $8, %ecx
15; SSE2-NEXT:    orl %eax, %ecx
16; SSE2-NEXT:    movd %ecx, %xmm0
17; SSE2-NEXT:    movl $65280, %eax # imm = 0xFF00
18; SSE2-NEXT:    orl -{{[0-9]+}}(%rsp), %eax
19; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
20; SSE2-NEXT:    movd %xmm0, (%rdi)
21; SSE2-NEXT:    retq
22;
23; SSE41-LABEL: foo:
24; SSE41:       # %bb.0:
25; SSE41-NEXT:    cvttps2dq %xmm0, %xmm0
26; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
27; SSE41-NEXT:    movl $255, %eax
28; SSE41-NEXT:    pinsrb $3, %eax, %xmm0
29; SSE41-NEXT:    movd %xmm0, (%rdi)
30; SSE41-NEXT:    retq
31;
32; AVX-LABEL: foo:
33; AVX:       # %bb.0:
34; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
35; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
36; AVX-NEXT:    movl $255, %eax
37; AVX-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
38; AVX-NEXT:    vmovd %xmm0, (%rdi)
39; AVX-NEXT:    retq
40  %t0 = fptoui <3 x float> %in to <3 x i8>
41  %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
42  %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
43  store <4 x i8> %t2, <4 x i8>* %out, align 4
44  ret void
45}
46
47; Verify that the DAGCombiner doesn't wrongly fold a build_vector into a
48; blend with a zero vector if the build_vector contains negative zero.
49
50define <4 x float> @test_negative_zero_1(<4 x float> %A) {
51; SSE2-LABEL: test_negative_zero_1:
52; SSE2:       # %bb.0: # %entry
53; SSE2-NEXT:    movaps %xmm0, %xmm1
54; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
55; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
56; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
57; SSE2-NEXT:    xorps %xmm2, %xmm2
58; SSE2-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
59; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
60; SSE2-NEXT:    retq
61;
62; SSE41-LABEL: test_negative_zero_1:
63; SSE41:       # %bb.0: # %entry
64; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
65; SSE41-NEXT:    retq
66;
67; AVX-LABEL: test_negative_zero_1:
68; AVX:       # %bb.0: # %entry
69; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2],zero
70; AVX-NEXT:    retq
71entry:
72  %0 = extractelement <4 x float> %A, i32 0
73  %1 = insertelement <4 x float> undef, float %0, i32 0
74  %2 = insertelement <4 x float> %1, float -0.0, i32 1
75  %3 = extractelement <4 x float> %A, i32 2
76  %4 = insertelement <4 x float> %2, float %3, i32 2
77  %5 = insertelement <4 x float> %4, float 0.0, i32 3
78  ret <4 x float> %5
79}
80
81; FIXME: This could be 'movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]'.
82
83define <2 x double> @test_negative_zero_2(<2 x double> %A) {
84; SSE2-LABEL: test_negative_zero_2:
85; SSE2:       # %bb.0: # %entry
86; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[0],mem[1]
87; SSE2-NEXT:    retq
88;
89; SSE41-LABEL: test_negative_zero_2:
90; SSE41:       # %bb.0: # %entry
91; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
92; SSE41-NEXT:    retq
93;
94; AVX-LABEL: test_negative_zero_2:
95; AVX:       # %bb.0: # %entry
96; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
97; AVX-NEXT:    retq
98entry:
99  %0 = extractelement <2 x double> %A, i32 0
100  %1 = insertelement <2 x double> undef, double %0, i32 0
101  %2 = insertelement <2 x double> %1, double -0.0, i32 1
102  ret <2 x double> %2
103}
104
105define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
106; SSE2-LABEL: test_buildvector_v4f32_register:
107; SSE2:       # %bb.0:
108; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
109; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
110; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
111; SSE2-NEXT:    retq
112;
113; SSE41-LABEL: test_buildvector_v4f32_register:
114; SSE41:       # %bb.0:
115; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
116; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
117; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
118; SSE41-NEXT:    retq
119;
120; AVX-LABEL: test_buildvector_v4f32_register:
121; AVX:       # %bb.0:
122; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
123; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
124; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
125; AVX-NEXT:    retq
126  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
127  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
128  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
129  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
130  ret <4 x float> %ins3
131}
132
133define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %p2, float* %p3) {
134; SSE2-LABEL: test_buildvector_v4f32_load:
135; SSE2:       # %bb.0:
136; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
137; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
138; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
139; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
140; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
141; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
142; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
143; SSE2-NEXT:    retq
144;
145; SSE41-LABEL: test_buildvector_v4f32_load:
146; SSE41:       # %bb.0:
147; SSE41-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
148; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
149; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
150; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
151; SSE41-NEXT:    retq
152;
153; AVX-LABEL: test_buildvector_v4f32_load:
154; AVX:       # %bb.0:
155; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
156; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
157; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
158; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
159; AVX-NEXT:    retq
160  %f0 = load float, float* %p0, align 4
161  %f1 = load float, float* %p1, align 4
162  %f2 = load float, float* %p2, align 4
163  %f3 = load float, float* %p3, align 4
164  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
165  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
166  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
167  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
168  ret <4 x float> %ins3
169}
170
171define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
172; SSE2-LABEL: test_buildvector_v4f32_partial_load:
173; SSE2:       # %bb.0:
174; SSE2-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
175; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
176; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
177; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
178; SSE2-NEXT:    retq
179;
180; SSE41-LABEL: test_buildvector_v4f32_partial_load:
181; SSE41:       # %bb.0:
182; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
183; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
184; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
185; SSE41-NEXT:    retq
186;
187; AVX-LABEL: test_buildvector_v4f32_partial_load:
188; AVX:       # %bb.0:
189; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
190; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
191; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
192; AVX-NEXT:    retq
193  %f3 = load float, float* %p3, align 4
194  %ins0 = insertelement <4 x float> undef, float %f0, i32 0
195  %ins1 = insertelement <4 x float> %ins0, float %f1, i32 1
196  %ins2 = insertelement <4 x float> %ins1, float %f2, i32 2
197  %ins3 = insertelement <4 x float> %ins2, float %f3, i32 3
198  ret <4 x float> %ins3
199}
200
201define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
202; SSE2-LABEL: test_buildvector_v4i32_register:
203; SSE2:       # %bb.0:
204; SSE2-NEXT:    movd %ecx, %xmm0
205; SSE2-NEXT:    movd %edx, %xmm1
206; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
207; SSE2-NEXT:    movd %esi, %xmm2
208; SSE2-NEXT:    movd %edi, %xmm0
209; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
210; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
211; SSE2-NEXT:    retq
212;
213; SSE41-LABEL: test_buildvector_v4i32_register:
214; SSE41:       # %bb.0:
215; SSE41-NEXT:    movd %edi, %xmm0
216; SSE41-NEXT:    pinsrd $1, %esi, %xmm0
217; SSE41-NEXT:    pinsrd $2, %edx, %xmm0
218; SSE41-NEXT:    pinsrd $3, %ecx, %xmm0
219; SSE41-NEXT:    retq
220;
221; AVX-LABEL: test_buildvector_v4i32_register:
222; AVX:       # %bb.0:
223; AVX-NEXT:    vmovd %edi, %xmm0
224; AVX-NEXT:    vpinsrd $1, %esi, %xmm0, %xmm0
225; AVX-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
226; AVX-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
227; AVX-NEXT:    retq
228  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
229  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
230  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
231  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
232  ret <4 x i32> %ins3
233}
234
235define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
236; SSE2-LABEL: test_buildvector_v4i32_partial:
237; SSE2:       # %bb.0:
238; SSE2-NEXT:    movd %edi, %xmm0
239; SSE2-NEXT:    movd %esi, %xmm1
240; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
241; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
242; SSE2-NEXT:    retq
243;
244; SSE41-LABEL: test_buildvector_v4i32_partial:
245; SSE41:       # %bb.0:
246; SSE41-NEXT:    movd %edi, %xmm0
247; SSE41-NEXT:    pinsrd $3, %esi, %xmm0
248; SSE41-NEXT:    retq
249;
250; AVX-LABEL: test_buildvector_v4i32_partial:
251; AVX:       # %bb.0:
252; AVX-NEXT:    vmovd %edi, %xmm0
253; AVX-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
254; AVX-NEXT:    retq
255  %ins0 = insertelement <4 x i32> undef, i32   %a0, i32 0
256  %ins1 = insertelement <4 x i32> %ins0, i32 undef, i32 1
257  %ins2 = insertelement <4 x i32> %ins1, i32 undef, i32 2
258  %ins3 = insertelement <4 x i32> %ins2, i32   %a3, i32 3
259  ret <4 x i32> %ins3
260}
261
262define <4 x i32> @test_buildvector_v4i32_register_zero(i32 %a0, i32 %a2, i32 %a3) {
263; SSE-LABEL: test_buildvector_v4i32_register_zero:
264; SSE:       # %bb.0:
265; SSE-NEXT:    movd %edx, %xmm0
266; SSE-NEXT:    movd %esi, %xmm1
267; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
268; SSE-NEXT:    movd %edi, %xmm0
269; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
270; SSE-NEXT:    retq
271;
272; AVX-LABEL: test_buildvector_v4i32_register_zero:
273; AVX:       # %bb.0:
274; AVX-NEXT:    vmovd %edx, %xmm0
275; AVX-NEXT:    vmovd %esi, %xmm1
276; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
277; AVX-NEXT:    vmovd %edi, %xmm1
278; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
279; AVX-NEXT:    retq
280  %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
281  %ins1 = insertelement <4 x i32> %ins0, i32   0, i32 1
282  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
283  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
284  ret <4 x i32> %ins3
285}
286
287define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %a3) {
288; SSE-LABEL: test_buildvector_v4i32_register_zero_2:
289; SSE:       # %bb.0:
290; SSE-NEXT:    movd %edx, %xmm0
291; SSE-NEXT:    movd %esi, %xmm1
292; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
293; SSE-NEXT:    movd %edi, %xmm0
294; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1]
295; SSE-NEXT:    retq
296;
297; AVX-LABEL: test_buildvector_v4i32_register_zero_2:
298; AVX:       # %bb.0:
299; AVX-NEXT:    vmovd %edx, %xmm0
300; AVX-NEXT:    vmovd %esi, %xmm1
301; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
302; AVX-NEXT:    vmovd %edi, %xmm1
303; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
304; AVX-NEXT:    retq
305  %ins0 = insertelement <4 x i32> undef, i32   0, i32 0
306  %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
307  %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
308  %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
309  ret <4 x i32> %ins3
310}
311
312define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
313; SSE2-LABEL: test_buildvector_v8i16_register:
314; SSE2:       # %bb.0:
315; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
316; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
317; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
318; SSE2-NEXT:    movd %r9d, %xmm0
319; SSE2-NEXT:    movd %r8d, %xmm2
320; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
321; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
322; SSE2-NEXT:    movd %ecx, %xmm0
323; SSE2-NEXT:    movd %edx, %xmm1
324; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
325; SSE2-NEXT:    movd %esi, %xmm3
326; SSE2-NEXT:    movd %edi, %xmm0
327; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
328; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
329; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
330; SSE2-NEXT:    retq
331;
332; SSE41-LABEL: test_buildvector_v8i16_register:
333; SSE41:       # %bb.0:
334; SSE41-NEXT:    movd %edi, %xmm0
335; SSE41-NEXT:    pinsrw $1, %esi, %xmm0
336; SSE41-NEXT:    pinsrw $2, %edx, %xmm0
337; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
338; SSE41-NEXT:    pinsrw $4, %r8d, %xmm0
339; SSE41-NEXT:    pinsrw $5, %r9d, %xmm0
340; SSE41-NEXT:    pinsrw $6, {{[0-9]+}}(%rsp), %xmm0
341; SSE41-NEXT:    pinsrw $7, {{[0-9]+}}(%rsp), %xmm0
342; SSE41-NEXT:    retq
343;
344; AVX-LABEL: test_buildvector_v8i16_register:
345; AVX:       # %bb.0:
346; AVX-NEXT:    vmovd %edi, %xmm0
347; AVX-NEXT:    vpinsrw $1, %esi, %xmm0, %xmm0
348; AVX-NEXT:    vpinsrw $2, %edx, %xmm0, %xmm0
349; AVX-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
350; AVX-NEXT:    vpinsrw $4, %r8d, %xmm0, %xmm0
351; AVX-NEXT:    vpinsrw $5, %r9d, %xmm0, %xmm0
352; AVX-NEXT:    vpinsrw $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
353; AVX-NEXT:    vpinsrw $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
354; AVX-NEXT:    retq
355  %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
356  %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
357  %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
358  %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
359  %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
360  %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
361  %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
362  %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
363  ret <8 x i16> %ins7
364}
365
366define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
367; SSE-LABEL: test_buildvector_v8i16_partial:
368; SSE:       # %bb.0:
369; SSE-NEXT:    pxor %xmm0, %xmm0
370; SSE-NEXT:    pinsrw $1, %edi, %xmm0
371; SSE-NEXT:    pinsrw $3, %esi, %xmm0
372; SSE-NEXT:    pinsrw $4, %edx, %xmm0
373; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
374; SSE-NEXT:    retq
375;
376; AVX-LABEL: test_buildvector_v8i16_partial:
377; AVX:       # %bb.0:
378; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
379; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
380; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
381; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
382; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
383; AVX-NEXT:    retq
384  %ins0 = insertelement <8 x i16> undef, i16 undef, i32 0
385  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
386  %ins2 = insertelement <8 x i16> %ins1, i16 undef, i32 2
387  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
388  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
389  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
390  %ins6 = insertelement <8 x i16> %ins5, i16 undef, i32 6
391  %ins7 = insertelement <8 x i16> %ins6, i16 undef, i32 7
392  ret <8 x i16> %ins7
393}
394
395define <8 x i16> @test_buildvector_v8i16_register_zero(i16 %a0, i16 %a3, i16 %a4, i16 %a5) {
396; SSE-LABEL: test_buildvector_v8i16_register_zero:
397; SSE:       # %bb.0:
398; SSE-NEXT:    movzwl %di, %eax
399; SSE-NEXT:    movd %eax, %xmm0
400; SSE-NEXT:    pinsrw $3, %esi, %xmm0
401; SSE-NEXT:    pinsrw $4, %edx, %xmm0
402; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
403; SSE-NEXT:    retq
404;
405; AVX-LABEL: test_buildvector_v8i16_register_zero:
406; AVX:       # %bb.0:
407; AVX-NEXT:    movzwl %di, %eax
408; AVX-NEXT:    vmovd %eax, %xmm0
409; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
410; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
411; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
412; AVX-NEXT:    retq
413  %ins0 = insertelement <8 x i16> undef, i16   %a0, i32 0
414  %ins1 = insertelement <8 x i16> %ins0, i16     0, i32 1
415  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
416  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
417  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
418  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
419  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
420  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
421  ret <8 x i16> %ins7
422}
423
424define <8 x i16> @test_buildvector_v8i16_register_zero_2(i16 %a1, i16 %a3, i16 %a4, i16 %a5) {
425; SSE-LABEL: test_buildvector_v8i16_register_zero_2:
426; SSE:       # %bb.0:
427; SSE-NEXT:    pxor %xmm0, %xmm0
428; SSE-NEXT:    pinsrw $1, %edi, %xmm0
429; SSE-NEXT:    pinsrw $3, %esi, %xmm0
430; SSE-NEXT:    pinsrw $4, %edx, %xmm0
431; SSE-NEXT:    pinsrw $5, %ecx, %xmm0
432; SSE-NEXT:    retq
433;
434; AVX-LABEL: test_buildvector_v8i16_register_zero_2:
435; AVX:       # %bb.0:
436; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
437; AVX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
438; AVX-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
439; AVX-NEXT:    vpinsrw $4, %edx, %xmm0, %xmm0
440; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
441; AVX-NEXT:    retq
442  %ins0 = insertelement <8 x i16> undef, i16     0, i32 0
443  %ins1 = insertelement <8 x i16> %ins0, i16   %a1, i32 1
444  %ins2 = insertelement <8 x i16> %ins1, i16     0, i32 2
445  %ins3 = insertelement <8 x i16> %ins2, i16   %a3, i32 3
446  %ins4 = insertelement <8 x i16> %ins3, i16   %a4, i32 4
447  %ins5 = insertelement <8 x i16> %ins4, i16   %a5, i32 5
448  %ins6 = insertelement <8 x i16> %ins5, i16     0, i32 6
449  %ins7 = insertelement <8 x i16> %ins6, i16     0, i32 7
450  ret <8 x i16> %ins7
451}
452
453define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) {
454; SSE2-LABEL: test_buildvector_v16i8_register:
455; SSE2:       # %bb.0:
456; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
457; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
458; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
459; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
460; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
461; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
462; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
463; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
464; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
465; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
466; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
467; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
468; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
469; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
470; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
471; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
472; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
473; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
474; SSE2-NEXT:    movd %r9d, %xmm0
475; SSE2-NEXT:    movd %r8d, %xmm2
476; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
477; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
478; SSE2-NEXT:    movd %ecx, %xmm0
479; SSE2-NEXT:    movd %edx, %xmm1
480; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
481; SSE2-NEXT:    movd %esi, %xmm4
482; SSE2-NEXT:    movd %edi, %xmm0
483; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
484; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
485; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
486; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
487; SSE2-NEXT:    retq
488;
489; SSE41-LABEL: test_buildvector_v16i8_register:
490; SSE41:       # %bb.0:
491; SSE41-NEXT:    movd %edi, %xmm0
492; SSE41-NEXT:    pinsrb $1, %esi, %xmm0
493; SSE41-NEXT:    pinsrb $2, %edx, %xmm0
494; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
495; SSE41-NEXT:    pinsrb $4, %r8d, %xmm0
496; SSE41-NEXT:    pinsrb $5, %r9d, %xmm0
497; SSE41-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm0
498; SSE41-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm0
499; SSE41-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm0
500; SSE41-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm0
501; SSE41-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm0
502; SSE41-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm0
503; SSE41-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm0
504; SSE41-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm0
505; SSE41-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm0
506; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
507; SSE41-NEXT:    retq
508;
509; AVX-LABEL: test_buildvector_v16i8_register:
510; AVX:       # %bb.0:
511; AVX-NEXT:    vmovd %edi, %xmm0
512; AVX-NEXT:    vpinsrb $1, %esi, %xmm0, %xmm0
513; AVX-NEXT:    vpinsrb $2, %edx, %xmm0, %xmm0
514; AVX-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
515; AVX-NEXT:    vpinsrb $4, %r8d, %xmm0, %xmm0
516; AVX-NEXT:    vpinsrb $5, %r9d, %xmm0, %xmm0
517; AVX-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
518; AVX-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm0, %xmm0
519; AVX-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm0, %xmm0
520; AVX-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm0, %xmm0
521; AVX-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm0, %xmm0
522; AVX-NEXT:    vpinsrb $11, {{[0-9]+}}(%rsp), %xmm0, %xmm0
523; AVX-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm0, %xmm0
524; AVX-NEXT:    vpinsrb $13, {{[0-9]+}}(%rsp), %xmm0, %xmm0
525; AVX-NEXT:    vpinsrb $14, {{[0-9]+}}(%rsp), %xmm0, %xmm0
526; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
527; AVX-NEXT:    retq
528  %ins0  = insertelement <16 x i8> undef,  i8 %a0,  i32 0
529  %ins1  = insertelement <16 x i8> %ins0,  i8 %a1,  i32 1
530  %ins2  = insertelement <16 x i8> %ins1,  i8 %a2,  i32 2
531  %ins3  = insertelement <16 x i8> %ins2,  i8 %a3,  i32 3
532  %ins4  = insertelement <16 x i8> %ins3,  i8 %a4,  i32 4
533  %ins5  = insertelement <16 x i8> %ins4,  i8 %a5,  i32 5
534  %ins6  = insertelement <16 x i8> %ins5,  i8 %a6,  i32 6
535  %ins7  = insertelement <16 x i8> %ins6,  i8 %a7,  i32 7
536  %ins8  = insertelement <16 x i8> %ins7,  i8 %a8,  i32 8
537  %ins9  = insertelement <16 x i8> %ins8,  i8 %a9,  i32 9
538  %ins10 = insertelement <16 x i8> %ins9,  i8 %a10, i32 10
539  %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
540  %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
541  %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
542  %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
543  %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
544  ret <16 x i8> %ins15
545}
546
547define <16 x i8> @test_buildvector_v16i8_partial(i8 %a2, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
548; SSE2-LABEL: test_buildvector_v16i8_partial:
549; SSE2:       # %bb.0:
550; SSE2-NEXT:    pxor %xmm0, %xmm0
551; SSE2-NEXT:    pinsrw $1, %edi, %xmm0
552; SSE2-NEXT:    pinsrw $3, %esi, %xmm0
553; SSE2-NEXT:    pinsrw $4, %edx, %xmm0
554; SSE2-NEXT:    shll $8, %ecx
555; SSE2-NEXT:    pinsrw $5, %ecx, %xmm0
556; SSE2-NEXT:    pinsrw $6, %r8d, %xmm0
557; SSE2-NEXT:    shll $8, %r9d
558; SSE2-NEXT:    pinsrw $7, %r9d, %xmm0
559; SSE2-NEXT:    retq
560;
561; SSE41-LABEL: test_buildvector_v16i8_partial:
562; SSE41:       # %bb.0:
563; SSE41-NEXT:    pxor %xmm0, %xmm0
564; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
565; SSE41-NEXT:    pinsrb $6, %esi, %xmm0
566; SSE41-NEXT:    pinsrb $8, %edx, %xmm0
567; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
568; SSE41-NEXT:    pinsrb $12, %r8d, %xmm0
569; SSE41-NEXT:    pinsrb $15, %r9d, %xmm0
570; SSE41-NEXT:    retq
571;
572; AVX-LABEL: test_buildvector_v16i8_partial:
573; AVX:       # %bb.0:
574; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
575; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
576; AVX-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
577; AVX-NEXT:    vpinsrb $8, %edx, %xmm0, %xmm0
578; AVX-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
579; AVX-NEXT:    vpinsrb $12, %r8d, %xmm0, %xmm0
580; AVX-NEXT:    vpinsrb $15, %r9d, %xmm0, %xmm0
581; AVX-NEXT:    retq
582  %ins0  = insertelement <16 x i8> undef,  i8 undef, i32 0
583  %ins1  = insertelement <16 x i8> %ins0,  i8 undef, i32 1
584  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
585  %ins3  = insertelement <16 x i8> %ins2,  i8 undef, i32 3
586  %ins4  = insertelement <16 x i8> %ins3,  i8 undef, i32 4
587  %ins5  = insertelement <16 x i8> %ins4,  i8 undef, i32 5
588  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
589  %ins7  = insertelement <16 x i8> %ins6,  i8 undef, i32 7
590  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
591  %ins9  = insertelement <16 x i8> %ins8,  i8 undef, i32 9
592  %ins10 = insertelement <16 x i8> %ins9,  i8 undef, i32 10
593  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
594  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
595  %ins13 = insertelement <16 x i8> %ins12, i8 undef, i32 13
596  %ins14 = insertelement <16 x i8> %ins13, i8 undef, i32 14
597  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
598  ret <16 x i8> %ins15
599}
600
601define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
602; SSE2-LABEL: test_buildvector_v16i8_register_zero:
603; SSE2:       # %bb.0:
604; SSE2-NEXT:    movzbl %sil, %eax
605; SSE2-NEXT:    movzbl %dil, %esi
606; SSE2-NEXT:    movd %esi, %xmm0
607; SSE2-NEXT:    pinsrw $2, %eax, %xmm0
608; SSE2-NEXT:    movzbl %dl, %eax
609; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
610; SSE2-NEXT:    movzbl %cl, %eax
611; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
612; SSE2-NEXT:    shll $8, %r8d
613; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
614; SSE2-NEXT:    movzbl %r9b, %eax
615; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
616; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
617; SSE2-NEXT:    shll $8, %eax
618; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
619; SSE2-NEXT:    retq
620;
621; SSE41-LABEL: test_buildvector_v16i8_register_zero:
622; SSE41:       # %bb.0:
623; SSE41-NEXT:    movzbl %dil, %eax
624; SSE41-NEXT:    movd %eax, %xmm0
625; SSE41-NEXT:    pinsrb $4, %esi, %xmm0
626; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
627; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
628; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
629; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
630; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
631; SSE41-NEXT:    retq
632;
633; AVX-LABEL: test_buildvector_v16i8_register_zero:
634; AVX:       # %bb.0:
635; AVX-NEXT:    movzbl %dil, %eax
636; AVX-NEXT:    vmovd %eax, %xmm0
637; AVX-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
638; AVX-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
639; AVX-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
640; AVX-NEXT:    vpinsrb $11, %r8d, %xmm0, %xmm0
641; AVX-NEXT:    vpinsrb $12, %r9d, %xmm0, %xmm0
642; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
643; AVX-NEXT:    retq
644  %ins0  = insertelement <16 x i8> undef,  i8   %a0, i32 0
645  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
646  %ins2  = insertelement <16 x i8> %ins1,  i8     0, i32 2
647  %ins3  = insertelement <16 x i8> %ins2,  i8     0, i32 3
648  %ins4  = insertelement <16 x i8> %ins3,  i8   %a4, i32 4
649  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
650  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
651  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
652  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
653  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
654  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
655  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
656  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
657  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
658  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
659  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
660  ret <16 x i8> %ins15
661}
662
663define <16 x i8> @test_buildvector_v16i8_register_zero_2(i8 %a2, i8 %a3, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) {
664; SSE2-LABEL: test_buildvector_v16i8_register_zero_2:
665; SSE2:       # %bb.0:
666; SSE2-NEXT:    shll $8, %esi
667; SSE2-NEXT:    movzbl %dil, %eax
668; SSE2-NEXT:    orl %esi, %eax
669; SSE2-NEXT:    pxor %xmm0, %xmm0
670; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
671; SSE2-NEXT:    movzbl %dl, %eax
672; SSE2-NEXT:    pinsrw $3, %eax, %xmm0
673; SSE2-NEXT:    movzbl %cl, %eax
674; SSE2-NEXT:    pinsrw $4, %eax, %xmm0
675; SSE2-NEXT:    shll $8, %r8d
676; SSE2-NEXT:    pinsrw $5, %r8d, %xmm0
677; SSE2-NEXT:    movzbl %r9b, %eax
678; SSE2-NEXT:    pinsrw $6, %eax, %xmm0
679; SSE2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
680; SSE2-NEXT:    shll $8, %eax
681; SSE2-NEXT:    pinsrw $7, %eax, %xmm0
682; SSE2-NEXT:    retq
683;
684; SSE41-LABEL: test_buildvector_v16i8_register_zero_2:
685; SSE41:       # %bb.0:
686; SSE41-NEXT:    pxor %xmm0, %xmm0
687; SSE41-NEXT:    pinsrb $2, %edi, %xmm0
688; SSE41-NEXT:    pinsrb $3, %esi, %xmm0
689; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
690; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
691; SSE41-NEXT:    pinsrb $11, %r8d, %xmm0
692; SSE41-NEXT:    pinsrb $12, %r9d, %xmm0
693; SSE41-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm0
694; SSE41-NEXT:    retq
695;
696; AVX-LABEL: test_buildvector_v16i8_register_zero_2:
697; AVX:       # %bb.0:
698; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
699; AVX-NEXT:    vpinsrb $2, %edi, %xmm0, %xmm0
700; AVX-NEXT:    vpinsrb $3, %esi, %xmm0, %xmm0
701; AVX-NEXT:    vpinsrb $6, %edx, %xmm0, %xmm0
702; AVX-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
703; AVX-NEXT:    vpinsrb $11, %r8d, %xmm0, %xmm0
704; AVX-NEXT:    vpinsrb $12, %r9d, %xmm0, %xmm0
705; AVX-NEXT:    vpinsrb $15, {{[0-9]+}}(%rsp), %xmm0, %xmm0
706; AVX-NEXT:    retq
707  %ins0  = insertelement <16 x i8> undef,  i8     0, i32 0
708  %ins1  = insertelement <16 x i8> %ins0,  i8     0, i32 1
709  %ins2  = insertelement <16 x i8> %ins1,  i8   %a2, i32 2
710  %ins3  = insertelement <16 x i8> %ins2,  i8   %a3, i32 3
711  %ins4  = insertelement <16 x i8> %ins3,  i8     0, i32 4
712  %ins5  = insertelement <16 x i8> %ins4,  i8     0, i32 5
713  %ins6  = insertelement <16 x i8> %ins5,  i8   %a6, i32 6
714  %ins7  = insertelement <16 x i8> %ins6,  i8     0, i32 7
715  %ins8  = insertelement <16 x i8> %ins7,  i8   %a8, i32 8
716  %ins9  = insertelement <16 x i8> %ins8,  i8     0, i32 9
717  %ins10 = insertelement <16 x i8> %ins9,  i8     0, i32 10
718  %ins11 = insertelement <16 x i8> %ins10, i8  %a11, i32 11
719  %ins12 = insertelement <16 x i8> %ins11, i8  %a12, i32 12
720  %ins13 = insertelement <16 x i8> %ins12, i8     0, i32 13
721  %ins14 = insertelement <16 x i8> %ins13, i8     0, i32 14
722  %ins15 = insertelement <16 x i8> %ins14, i8  %a15, i32 15
723  ret <16 x i8> %ins15
724}
725
726; PR46461 - Don't let reduceBuildVecExtToExtBuildVec break splat(zero_extend) patterns,
727; resulting in the BUILD_VECTOR lowering to individual insertions into a zero vector.
728
729define void @PR46461(i16 %x, <16 x i32>* %y) {
730; SSE-LABEL: PR46461:
731; SSE:       # %bb.0:
732; SSE-NEXT:    movzwl %di, %eax
733; SSE-NEXT:    shrl %eax
734; SSE-NEXT:    movd %eax, %xmm0
735; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
736; SSE-NEXT:    movdqa %xmm0, 48(%rsi)
737; SSE-NEXT:    movdqa %xmm0, 32(%rsi)
738; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
739; SSE-NEXT:    movdqa %xmm0, (%rsi)
740; SSE-NEXT:    retq
741;
742; AVX1-LABEL: PR46461:
743; AVX1:       # %bb.0:
744; AVX1-NEXT:    movzwl %di, %eax
745; AVX1-NEXT:    shrl %eax
746; AVX1-NEXT:    vmovd %eax, %xmm0
747; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
748; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
749; AVX1-NEXT:    vmovaps %ymm0, 32(%rsi)
750; AVX1-NEXT:    vmovaps %ymm0, (%rsi)
751; AVX1-NEXT:    vzeroupper
752; AVX1-NEXT:    retq
753;
754; AVX2-LABEL: PR46461:
755; AVX2:       # %bb.0:
756; AVX2-NEXT:    movzwl %di, %eax
757; AVX2-NEXT:    shrl %eax
758; AVX2-NEXT:    vmovd %eax, %xmm0
759; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
760; AVX2-NEXT:    vmovdqa %ymm0, 32(%rsi)
761; AVX2-NEXT:    vmovdqa %ymm0, (%rsi)
762; AVX2-NEXT:    vzeroupper
763; AVX2-NEXT:    retq
764  %z = lshr i16 %x, 1
765  %a = zext i16 %z to i32
766  %b = insertelement <16 x i32> undef, i32 %a, i32 0
767  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
768  store <16 x i32> %c, <16 x i32>* %y
769  ret void
770}
771
772; OSS-Fuzz #5688
773; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5688
774define <4 x i32> @ossfuzz5688(i32 %a0) {
775; CHECK-LABEL: ossfuzz5688:
776; CHECK:       # %bb.0:
777; CHECK-NEXT:    retq
778  %1 = insertelement <4 x i32> zeroinitializer, i32 -2147483648, i32 %a0
779  %2 = extractelement <4 x i32> %1, i32 %a0
780  %3 = extractelement <4 x i32> <i32 30, i32 53, i32 42, i32 12>, i32 %2
781  %4 = extractelement <4 x i32> zeroinitializer, i32 %2
782  %5 = insertelement <4 x i32> undef, i32 %3, i32 undef
783  store i32 %4, i32* undef
784  ret <4 x i32> %5
785}
786
787; If we do not define all bytes that are extracted, this is a miscompile.
788
789define i32 @PR46586(i8* %p, <4 x i32> %v) {
790; SSE2-LABEL: PR46586:
791; SSE2:       # %bb.0:
792; SSE2-NEXT:    movzbl 3(%rdi), %eax
793; SSE2-NEXT:    pxor %xmm1, %xmm1
794; SSE2-NEXT:    pinsrw $6, %eax, %xmm1
795; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
796; SSE2-NEXT:    movd %xmm1, %eax
797; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
798; SSE2-NEXT:    movd %xmm0, %ecx
799; SSE2-NEXT:    xorl %edx, %edx
800; SSE2-NEXT:    divl %ecx
801; SSE2-NEXT:    movl %edx, %eax
802; SSE2-NEXT:    retq
803;
804; SSE41-LABEL: PR46586:
805; SSE41:       # %bb.0:
806; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
807; SSE41-NEXT:    extractps $3, %xmm0, %ecx
808; SSE41-NEXT:    pextrd $3, %xmm1, %eax
809; SSE41-NEXT:    xorl %edx, %edx
810; SSE41-NEXT:    divl %ecx
811; SSE41-NEXT:    movl %edx, %eax
812; SSE41-NEXT:    retq
813;
814; AVX-LABEL: PR46586:
815; AVX:       # %bb.0:
816; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
817; AVX-NEXT:    vextractps $3, %xmm0, %ecx
818; AVX-NEXT:    vpextrd $3, %xmm1, %eax
819; AVX-NEXT:    xorl %edx, %edx
820; AVX-NEXT:    divl %ecx
821; AVX-NEXT:    movl %edx, %eax
822; AVX-NEXT:    retq
823  %p0 = getelementptr inbounds i8, i8* %p, i64 0
824  %p3 = getelementptr inbounds i8, i8* %p, i64 3
825  %t25 = load i8, i8* %p0
826  %t28 = load i8, i8* %p3
827  %t29 = insertelement <4 x i8> undef, i8 %t25, i32 0
828  %t32 = insertelement <4 x i8> %t29, i8 %t28, i32 3
829  %t33 = zext <4 x i8> %t32 to <4 x i32>
830  %t34 = urem <4 x i32> %t33, %v
831  %t35 = extractelement <4 x i32> %t34, i32 3
832  ret i32 %t35
833}
834