1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN
5
6; There are no MMX operations in @t1
7
8define void  @t1(i32 %a, ptr %P) nounwind {
9; X32-LABEL: t1:
10; X32:       # %bb.0:
11; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
12; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
13; X32-NEXT:    shll $12, %ecx
14; X32-NEXT:    movd %ecx, %xmm0
15; X32-NEXT:    psllq $32, %xmm0
16; X32-NEXT:    movq %xmm0, (%eax)
17; X32-NEXT:    retl
18;
19; X64-LABEL: t1:
20; X64:       # %bb.0:
21; X64-NEXT:    shll $12, %edi
22; X64-NEXT:    movd %edi, %xmm0
23; X64-NEXT:    psllq $32, %xmm0
24; X64-NEXT:    movq %xmm0, (%rsi)
25; X64-NEXT:    retq
26 %tmp12 = shl i32 %a, 12
27 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
28 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
29 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
30 store x86_mmx %tmp23, ptr %P
31 ret void
32}
33
34define <4 x float> @t2(ptr %P) nounwind {
35; X32-LABEL: t2:
36; X32:       # %bb.0:
37; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
38; X32-NEXT:    xorps %xmm0, %xmm0
39; X32-NEXT:    xorps %xmm1, %xmm1
40; X32-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
41; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
42; X32-NEXT:    retl
43;
44; X64-LABEL: t2:
45; X64:       # %bb.0:
46; X64-NEXT:    xorps %xmm0, %xmm0
47; X64-NEXT:    xorps %xmm1, %xmm1
48; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0]
49; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
50; X64-NEXT:    retq
51  %tmp1 = load <4 x float>, ptr %P
52  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
53  ret <4 x float> %tmp2
54}
55
56define <4 x float> @t3(ptr %P) nounwind {
57; X32-LABEL: t3:
58; X32:       # %bb.0:
59; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
60; X32-NEXT:    xorps %xmm0, %xmm0
61; X32-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
62; X32-NEXT:    retl
63;
64; X64-LABEL: t3:
65; X64:       # %bb.0:
66; X64-NEXT:    xorps %xmm0, %xmm0
67; X64-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
68; X64-NEXT:    retq
69  %tmp1 = load <4 x float>, ptr %P
70  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
71  ret <4 x float> %tmp2
72}
73
74define <4 x float> @t4(ptr %P) nounwind {
75; X32-LABEL: t4:
76; X32:       # %bb.0:
77; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
78; X32-NEXT:    xorps %xmm1, %xmm1
79; X32-NEXT:    xorps %xmm0, %xmm0
80; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
81; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
82; X32-NEXT:    retl
83;
84; X64-LABEL: t4:
85; X64:       # %bb.0:
86; X64-NEXT:    xorps %xmm1, %xmm1
87; X64-NEXT:    xorps %xmm0, %xmm0
88; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
89; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
90; X64-NEXT:    retq
91  %tmp1 = load <4 x float>, ptr %P
92  %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
93  ret <4 x float> %tmp2
94}
95
96define <4 x float> @t4_under_aligned(ptr %P) nounwind {
97; X32-LABEL: t4_under_aligned:
98; X32:       # %bb.0:
99; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
100; X32-NEXT:    movups (%eax), %xmm0
101; X32-NEXT:    xorps %xmm1, %xmm1
102; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
103; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
104; X32-NEXT:    retl
105;
106; ALIGN-LABEL: t4_under_aligned:
107; ALIGN:       # %bb.0:
108; ALIGN-NEXT:    movups (%rdi), %xmm0
109; ALIGN-NEXT:    xorps %xmm1, %xmm1
110; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
111; ALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
112; ALIGN-NEXT:    retq
113;
114; UNALIGN-LABEL: t4_under_aligned:
115; UNALIGN:       # %bb.0:
116; UNALIGN-NEXT:    xorps %xmm1, %xmm1
117; UNALIGN-NEXT:    xorps %xmm0, %xmm0
118; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0]
119; UNALIGN-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
120; UNALIGN-NEXT:    retq
121  %tmp1 = load <4 x float>, ptr %P, align 4
122  %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
123  ret <4 x float> %tmp2
124}
125
126define <16 x i8> @t5(<16 x i8> %x) nounwind {
127; X32-LABEL: t5:
128; X32:       # %bb.0:
129; X32-NEXT:    psrlw $8, %xmm0
130; X32-NEXT:    retl
131;
132; X64-LABEL: t5:
133; X64:       # %bb.0:
134; X64-NEXT:    psrlw $8, %xmm0
135; X64-NEXT:    retq
136  %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
137  ret <16 x i8> %s
138}
139
140define <16 x i8> @t6(<16 x i8> %x) nounwind {
141; X32-LABEL: t6:
142; X32:       # %bb.0:
143; X32-NEXT:    psrlw $8, %xmm0
144; X32-NEXT:    retl
145;
146; X64-LABEL: t6:
147; X64:       # %bb.0:
148; X64-NEXT:    psrlw $8, %xmm0
149; X64-NEXT:    retq
150  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
151  ret <16 x i8> %s
152}
153
154define <16 x i8> @t7(<16 x i8> %x) nounwind {
155; X32-LABEL: t7:
156; X32:       # %bb.0:
157; X32-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
158; X32-NEXT:    retl
159;
160; X64-LABEL: t7:
161; X64:       # %bb.0:
162; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
163; X64-NEXT:    retq
164  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
165  ret <16 x i8> %s
166}
167
168define <16 x i8> @t8(<16 x i8> %x) nounwind {
169; X32-LABEL: t8:
170; X32:       # %bb.0:
171; X32-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
172; X32-NEXT:    retl
173;
174; X64-LABEL: t8:
175; X64:       # %bb.0:
176; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
177; X64-NEXT:    retq
178  %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
179  ret <16 x i8> %s
180}
181
182define <16 x i8> @t9(<16 x i8> %x) nounwind {
183; X32-LABEL: t9:
184; X32:       # %bb.0:
185; X32-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
186; X32-NEXT:    retl
187;
188; X64-LABEL: t9:
189; X64:       # %bb.0:
190; X64-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
191; X64-NEXT:    retq
192  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
193  ret <16 x i8> %s
194}
195