1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32:       # BB#0:
8; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
9; X32-NEXT:    vpextrw $0, %xmm0, %eax
10; X32-NEXT:    retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64:       # BB#0:
14; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
15; X64-NEXT:    vpextrw $0, %xmm0, %eax
16; X64-NEXT:    retq
17  %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18  %2 = extractelement <8 x i16> %1, i32 0
19  %3 = sext i16 %2 to i32
20  ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32:       # BB#0:
26; X32-NEXT:    pushl %ebp
27; X32-NEXT:    movl %esp, %ebp
28; X32-NEXT:    andl $-8, %esp
29; X32-NEXT:    subl $16, %esp
30; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
32; X32-NEXT:    vmovq %xmm0, {{[0-9]+}}(%esp)
33; X32-NEXT:    fildll {{[0-9]+}}(%esp)
34; X32-NEXT:    fstps {{[0-9]+}}(%esp)
35; X32-NEXT:    flds {{[0-9]+}}(%esp)
36; X32-NEXT:    movl %ebp, %esp
37; X32-NEXT:    popl %ebp
38; X32-NEXT:    retl
39;
40; X64-LABEL: knownbits_mask_extract_uitofp:
41; X64:       # BB#0:
42; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
43; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
44; X64-NEXT:    vmovq %xmm0, %rax
45; X64-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
46; X64-NEXT:    retq
47  %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
48  %2 = extractelement <2 x i64> %1, i32 0
49  %3 = uitofp i64 %2 to float
50  ret float %3
51}
52
53define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
54; X32-LABEL: knownbits_insert_uitofp:
55; X32:       # BB#0:
56; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
57; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
58; X32-NEXT:    vpinsrd $0, %eax, %xmm0, %xmm0
59; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
60; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
61; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
62; X32-NEXT:    retl
63;
64; X64-LABEL: knownbits_insert_uitofp:
65; X64:       # BB#0:
66; X64-NEXT:    movzwl %di, %eax
67; X64-NEXT:    movzwl %si, %ecx
68; X64-NEXT:    vpinsrd $0, %eax, %xmm0, %xmm0
69; X64-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
70; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
71; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
72; X64-NEXT:    retq
73  %1 = zext i16 %a1 to i32
74  %2 = zext i16 %a2 to i32
75  %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
76  %4 = insertelement <4 x i32>  %3, i32 %2, i32 2
77  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
78  %6 = uitofp <4 x i32> %5 to <4 x float>
79  ret <4 x float> %6
80}
81
82define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
83; X32-LABEL: knownbits_mask_shuffle_sext:
84; X32:       # BB#0:
85; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
86; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
87; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
88; X32-NEXT:    retl
89;
90; X64-LABEL: knownbits_mask_shuffle_sext:
91; X64:       # BB#0:
92; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
93; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
94; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
95; X64-NEXT:    retq
96  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
97  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98  %3 = sext <4 x i16> %2 to <4 x i32>
99  ret <4 x i32> %3
100}
101
102define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
103; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
104; X32:       # BB#0:
105; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
106; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
107; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
108; X32-NEXT:    retl
109;
110; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
111; X64:       # BB#0:
112; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
113; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
114; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
115; X64-NEXT:    retq
116  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
117  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
118  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
119  %4 = sext <4 x i16> %3 to <4 x i32>
120  ret <4 x i32> %4
121}
122
123define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
124; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
125; X32:       # BB#0:
126; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
127; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
128; X32-NEXT:    vpmovsxwd %xmm0, %xmm0
129; X32-NEXT:    retl
130;
131; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
132; X64:       # BB#0:
133; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
134; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
135; X64-NEXT:    vpmovsxwd %xmm0, %xmm0
136; X64-NEXT:    retq
137  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
138  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
139  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
140  %4 = sext <4 x i16> %3 to <4 x i32>
141  ret <4 x i32> %4
142}
143
144define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
145; X32-LABEL: knownbits_mask_shuffle_uitofp:
146; X32:       # BB#0:
147; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
148; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
149; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
150; X32-NEXT:    retl
151;
152; X64-LABEL: knownbits_mask_shuffle_uitofp:
153; X64:       # BB#0:
154; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
155; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
156; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
157; X64-NEXT:    retq
158  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
159  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
160  %3 = uitofp <4 x i32> %2 to <4 x float>
161  ret <4 x float> %3
162}
163
164define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
165; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
166; X32:       # BB#0:
167; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
168; X32-NEXT:    vpor {{\.LCPI.*}}, %xmm0, %xmm0
169; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
170; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
171; X32-NEXT:    retl
172;
173; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
174; X64:       # BB#0:
175; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
176; X64-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
177; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
178; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
179; X64-NEXT:    retq
180  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
181  %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
182  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
183  %4 = uitofp <4 x i32> %3 to <4 x float>
184  ret <4 x float> %4
185}
186
187define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
188; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
189; X32:       # BB#0:
190; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
191; X32-NEXT:    vpxor {{\.LCPI.*}}, %xmm0, %xmm0
192; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
193; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
194; X32-NEXT:    retl
195;
196; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
197; X64:       # BB#0:
198; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
199; X64-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
200; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
201; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
202; X64-NEXT:    retq
203  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
204  %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
205  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
206  %4 = uitofp <4 x i32> %3 to <4 x float>
207  ret <4 x float> %4
208}
209
210define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
211; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
212; X32:       # BB#0:
213; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
214; X32-NEXT:    retl
215;
216; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
217; X64:       # BB#0:
218; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
219; X64-NEXT:    retq
220  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
221  %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
222  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
223  %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
224  ret <4 x i32> %4
225}
226
227define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
228; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
229; X32:       # BB#0:
230; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
231; X32-NEXT:    retl
232;
233; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
234; X64:       # BB#0:
235; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
236; X64-NEXT:    retq
237  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
238  %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
239  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
240  %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
241  ret <4 x i32> %4
242}
243
244define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
245; X32-LABEL: knownbits_mask_mul_shuffle_shl:
246; X32:       # BB#0:
247; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
248; X32-NEXT:    retl
249;
250; X64-LABEL: knownbits_mask_mul_shuffle_shl:
251; X64:       # BB#0:
252; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
253; X64-NEXT:    retq
254  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
255  %2 = mul <4 x i32> %a1, %1
256  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
257  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
258  ret <4 x i32> %4
259}
260
261define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
262; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
263; X32:       # BB#0:
264; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
265; X32-NEXT:    retl
266;
267; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
268; X64:       # BB#0:
269; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
270; X64-NEXT:    retq
271  %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
272  %2 = trunc <4 x i64> %1 to <4 x i32>
273  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
274  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
275  ret <4 x i32> %4
276}
277
278define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
279; X32-LABEL: knownbits_mask_add_shuffle_lshr:
280; X32:       # BB#0:
281; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
282; X32-NEXT:    retl
283;
284; X64-LABEL: knownbits_mask_add_shuffle_lshr:
285; X64:       # BB#0:
286; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
287; X64-NEXT:    retq
288  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
289  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
290  %3 = add <4 x i32> %1, %2
291  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
292  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
293  ret <4 x i32> %5
294}
295
296define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
297; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
298; X32:       # BB#0:
299; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
300; X32-NEXT:    retl
301;
302; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
303; X64:       # BB#0:
304; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
305; X64-NEXT:    retq
306  %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
307  %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
308  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
309  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
310  ret <4 x i32> %4
311}
312
313define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
314; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
315; X32:       # BB#0:
316; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
317; X32-NEXT:    retl
318;
319; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
320; X64:       # BB#0:
321; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
322; X64-NEXT:    retq
323  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
324  %2 = udiv <4 x i32> %1, %a1
325  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
326  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
327  ret <4 x i32> %4
328}
329
330define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
331; X32-LABEL: knownbits_urem_lshr:
332; X32:       # BB#0:
333; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
334; X32-NEXT:    retl
335;
336; X64-LABEL: knownbits_urem_lshr:
337; X64:       # BB#0:
338; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
339; X64-NEXT:    retq
340  %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
341  %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
342  ret <4 x i32> %2
343}
344
345define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
346; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
347; X32:       # BB#0:
348; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
349; X32-NEXT:    retl
350;
351; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
352; X64:       # BB#0:
353; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
354; X64-NEXT:    retq
355  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
356  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
357  %3 = urem <4 x i32> %1, %2
358  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
359  %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
360  ret <4 x i32> %5
361}
362
363define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
364; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
365; X32:       # BB#0:
366; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
367; X32-NEXT:    retl
368;
369; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
370; X64:       # BB#0:
371; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
372; X64-NEXT:    retq
373  %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
374  %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
375  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
376  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
377  ret <4 x i32> %4
378}
379
380define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
381; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
382; X32:       # BB#0:
383; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
384; X32-NEXT:    retl
385;
386; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
387; X64:       # BB#0:
388; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
389; X64-NEXT:    retq
390  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
391  %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
392  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
393  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
394  ret <4 x i32> %4
395}
396declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
397
398define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
399; X32-LABEL: knownbits_mask_concat_uitofp:
400; X32:       # BB#0:
401; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
402; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm1, %xmm1
403; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
404; X32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
405; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
406; X32-NEXT:    vcvtdq2ps %ymm0, %ymm0
407; X32-NEXT:    retl
408;
409; X64-LABEL: knownbits_mask_concat_uitofp:
410; X64:       # BB#0:
411; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
412; X64-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
413; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
414; X64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3]
415; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
416; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
417; X64-NEXT:    retq
418  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
419  %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
420  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
421  %4 = uitofp <8 x i32> %3 to <8 x float>
422  ret <8 x float> %4
423}
424
425define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
426; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
427; X32:       # BB#0:
428; X32-NEXT:    vpsrlq $1, %xmm0, %xmm0
429; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
430; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
431; X32-NEXT:    retl
432;
433; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
434; X64:       # BB#0:
435; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
436; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
437; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
438; X64-NEXT:    retq
439  %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
440  %2 = bitcast <2 x i64> %1 to <4 x i32>
441  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
442  %4 = uitofp <4 x i32> %3 to <4 x float>
443  ret <4 x float> %4
444}
445
446define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
447; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
448; X32:       # BB#0:
449; X32-NEXT:    vpminsd {{\.LCPI.*}}, %xmm0, %xmm0
450; X32-NEXT:    vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0
451; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
452; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
453; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
454; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
455; X32-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
456; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
457; X32-NEXT:    retl
458;
459; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
460; X64:       # BB#0:
461; X64-NEXT:    vpminsd {{.*}}(%rip), %xmm0, %xmm0
462; X64-NEXT:    vpmaxsd {{.*}}(%rip), %xmm0, %xmm0
463; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
464; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
465; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
466; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
467; X64-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
468; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
469; X64-NEXT:    retq
470  %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
471  %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
472  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
473  %4 = uitofp <4 x i32> %3 to <4 x float>
474  ret <4 x float> %4
475}
476declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
477declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
478
479define <4 x float> @knownbits_umax_umin_shuffle_uitofp(<4 x i32> %a0) {
480; X32-LABEL: knownbits_umax_umin_shuffle_uitofp:
481; X32:       # BB#0:
482; X32-NEXT:    vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
483; X32-NEXT:    vpminud {{\.LCPI.*}}, %xmm0, %xmm0
484; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
485; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
486; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
487; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
488; X32-NEXT:    vaddps {{\.LCPI.*}}, %xmm0, %xmm0
489; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
490; X32-NEXT:    retl
491;
492; X64-LABEL: knownbits_umax_umin_shuffle_uitofp:
493; X64:       # BB#0:
494; X64-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
495; X64-NEXT:    vpminud {{.*}}(%rip), %xmm0, %xmm0
496; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
497; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
498; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
499; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
500; X64-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
501; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
502; X64-NEXT:    retq
503  %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
504  %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
505  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
506  %4 = uitofp <4 x i32> %3 to <4 x float>
507  ret <4 x float> %4
508}
509declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
510declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
511
512define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
513; X32-LABEL: knownbits_mask_umax_shuffle_uitofp:
514; X32:       # BB#0:
515; X32-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
516; X32-NEXT:    vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0
517; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
518; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
519; X32-NEXT:    retl
520;
521; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
522; X64:       # BB#0:
523; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
524; X64-NEXT:    vpmaxud {{.*}}(%rip), %xmm0, %xmm0
525; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
526; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
527; X64-NEXT:    retq
528  %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
529  %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
530  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
531  %4 = uitofp <4 x i32> %3 to <4 x float>
532  ret <4 x float> %4
533}
534