1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
6; X32-LABEL: knownbits_mask_extract_sext:
7; X32:       # %bb.0:
8; X32-NEXT:    vmovd %xmm0, %eax
9; X32-NEXT:    andl $15, %eax
10; X32-NEXT:    retl
11;
12; X64-LABEL: knownbits_mask_extract_sext:
13; X64:       # %bb.0:
14; X64-NEXT:    vmovd %xmm0, %eax
15; X64-NEXT:    andl $15, %eax
16; X64-NEXT:    retq
17  %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
18  %2 = extractelement <8 x i16> %1, i32 0
19  %3 = sext i16 %2 to i32
20  ret i32 %3
21}
22
23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
24; X32-LABEL: knownbits_mask_extract_uitofp:
25; X32:       # %bb.0:
26; X32-NEXT:    pushl %eax
27; X32-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
28; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
29; X32-NEXT:    vmovss %xmm0, (%esp)
30; X32-NEXT:    flds (%esp)
31; X32-NEXT:    popl %eax
32; X32-NEXT:    retl
33;
34; X64-LABEL: knownbits_mask_extract_uitofp:
35; X64:       # %bb.0:
36; X64-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
37; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
38; X64-NEXT:    retq
39  %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
40  %2 = extractelement <2 x i64> %1, i32 0
41  %3 = uitofp i64 %2 to float
42  ret float %3
43}
44
45define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind {
46; X32-LABEL: knownbits_insert_uitofp:
47; X32:       # %bb.0:
48; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
49; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
50; X32-NEXT:    vmovd %ecx, %xmm0
51; X32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
52; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
53; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
54; X32-NEXT:    retl
55;
56; X64-LABEL: knownbits_insert_uitofp:
57; X64:       # %bb.0:
58; X64-NEXT:    movzwl %di, %eax
59; X64-NEXT:    movzwl %si, %ecx
60; X64-NEXT:    vmovd %eax, %xmm0
61; X64-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
62; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
63; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
64; X64-NEXT:    retq
65  %1 = zext i16 %a1 to i32
66  %2 = zext i16 %a2 to i32
67  %3 = insertelement <4 x i32> %a0, i32 %1, i32 0
68  %4 = insertelement <4 x i32>  %3, i32 %2, i32 2
69  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
70  %6 = uitofp <4 x i32> %5 to <4 x float>
71  ret <4 x float> %6
72}
73
74define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind {
75; X32-LABEL: knownbits_mask_shuffle_sext:
76; X32:       # %bb.0:
77; X32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
78; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
79; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
80; X32-NEXT:    retl
81;
82; X64-LABEL: knownbits_mask_shuffle_sext:
83; X64:       # %bb.0:
84; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
85; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
86; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
87; X64-NEXT:    retq
88  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
89  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
90  %3 = sext <4 x i16> %2 to <4 x i32>
91  ret <4 x i32> %3
92}
93
94define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind {
95; X32-LABEL: knownbits_mask_shuffle_shuffle_sext:
96; X32:       # %bb.0:
97; X32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
98; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
99; X32-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
100; X32-NEXT:    retl
101;
102; X64-LABEL: knownbits_mask_shuffle_shuffle_sext:
103; X64:       # %bb.0:
104; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
105; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
106; X64-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
107; X64-NEXT:    retq
108  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
109  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
110  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
111  %4 = sext <4 x i16> %3 to <4 x i32>
112  ret <4 x i32> %4
113}
114
115define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind {
116; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
117; X32:       # %bb.0:
118; X32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
119; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
120; X32-NEXT:    vpmovsxwd %xmm0, %xmm0
121; X32-NEXT:    retl
122;
123; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext:
124; X64:       # %bb.0:
125; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
126; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
127; X64-NEXT:    vpmovsxwd %xmm0, %xmm0
128; X64-NEXT:    retq
129  %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15>
130  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
131  %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
132  %4 = sext <4 x i16> %3 to <4 x i32>
133  ret <4 x i32> %4
134}
135
136define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind {
137; X32-LABEL: knownbits_mask_shuffle_uitofp:
138; X32:       # %bb.0:
139; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
140; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
141; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
142; X32-NEXT:    retl
143;
144; X64-LABEL: knownbits_mask_shuffle_uitofp:
145; X64:       # %bb.0:
146; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
147; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
148; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
149; X64-NEXT:    retq
150  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
151  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
152  %3 = uitofp <4 x i32> %2 to <4 x float>
153  ret <4 x float> %3
154}
155
156define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind {
157; X32-LABEL: knownbits_mask_or_shuffle_uitofp:
158; X32:       # %bb.0:
159; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
160; X32-NEXT:    retl
161;
162; X64-LABEL: knownbits_mask_or_shuffle_uitofp:
163; X64:       # %bb.0:
164; X64-NEXT:    vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4]
165; X64-NEXT:    retq
166  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
167  %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
168  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
169  %4 = uitofp <4 x i32> %3 to <4 x float>
170  ret <4 x float> %4
171}
172
173define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind {
174; X32-LABEL: knownbits_mask_xor_shuffle_uitofp:
175; X32:       # %bb.0:
176; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
177; X32-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
178; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
179; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
180; X32-NEXT:    retl
181;
182; X64-LABEL: knownbits_mask_xor_shuffle_uitofp:
183; X64:       # %bb.0:
184; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
185; X64-NEXT:    vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
186; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
187; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
188; X64-NEXT:    retq
189  %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
190  %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
191  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
192  %4 = uitofp <4 x i32> %3 to <4 x float>
193  ret <4 x float> %4
194}
195
196define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind {
197; X32-LABEL: knownbits_mask_shl_shuffle_lshr:
198; X32:       # %bb.0:
199; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
200; X32-NEXT:    retl
201;
202; X64-LABEL: knownbits_mask_shl_shuffle_lshr:
203; X64:       # %bb.0:
204; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
205; X64-NEXT:    retq
206  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
207  %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
208  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
209  %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15>
210  ret <4 x i32> %4
211}
212
213define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind {
214; X32-LABEL: knownbits_mask_ashr_shuffle_lshr:
215; X32:       # %bb.0:
216; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
217; X32-NEXT:    retl
218;
219; X64-LABEL: knownbits_mask_ashr_shuffle_lshr:
220; X64:       # %bb.0:
221; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
222; X64-NEXT:    retq
223  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071>
224  %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15>
225  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
226  %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30>
227  ret <4 x i32> %4
228}
229
230define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind {
231; X32-LABEL: knownbits_mask_mul_shuffle_shl:
232; X32:       # %bb.0:
233; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
234; X32-NEXT:    retl
235;
236; X64-LABEL: knownbits_mask_mul_shuffle_shl:
237; X64:       # %bb.0:
238; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
239; X64-NEXT:    retq
240  %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
241  %2 = mul <4 x i32> %a1, %1
242  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
243  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
244  ret <4 x i32> %4
245}
246
247define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind {
248; X32-LABEL: knownbits_mask_trunc_shuffle_shl:
249; X32:       # %bb.0:
250; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
251; X32-NEXT:    retl
252;
253; X64-LABEL: knownbits_mask_trunc_shuffle_shl:
254; X64:       # %bb.0:
255; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
256; X64-NEXT:    retq
257  %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536>
258  %2 = trunc <4 x i64> %1 to <4 x i32>
259  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
260  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
261  ret <4 x i32> %4
262}
263
264define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
265; X32-LABEL: knownbits_mask_add_shuffle_lshr:
266; X32:       # %bb.0:
267; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
268; X32-NEXT:    retl
269;
270; X64-LABEL: knownbits_mask_add_shuffle_lshr:
271; X64:       # %bb.0:
272; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
273; X64-NEXT:    retq
274  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
275  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
276  %3 = add <4 x i32> %1, %2
277  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
278  %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17>
279  ret <4 x i32> %5
280}
281
282define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind {
283; X32-LABEL: knownbits_mask_sub_shuffle_lshr:
284; X32:       # %bb.0:
285; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
286; X32-NEXT:    retl
287;
288; X64-LABEL: knownbits_mask_sub_shuffle_lshr:
289; X64:       # %bb.0:
290; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
291; X64-NEXT:    retq
292  %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15>
293  %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1
294  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
295  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
296  ret <4 x i32> %4
297}
298
299define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
300; X32-LABEL: knownbits_mask_udiv_shuffle_lshr:
301; X32:       # %bb.0:
302; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
303; X32-NEXT:    retl
304;
305; X64-LABEL: knownbits_mask_udiv_shuffle_lshr:
306; X64:       # %bb.0:
307; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
308; X64-NEXT:    retq
309  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
310  %2 = udiv <4 x i32> %1, %a1
311  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
312  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
313  ret <4 x i32> %4
314}
315
316define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind {
317; X32-LABEL: knownbits_urem_lshr:
318; X32:       # %bb.0:
319; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
320; X32-NEXT:    retl
321;
322; X64-LABEL: knownbits_urem_lshr:
323; X64:       # %bb.0:
324; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
325; X64-NEXT:    retq
326  %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16>
327  %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22>
328  ret <4 x i32> %2
329}
330
331define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind {
332; X32-LABEL: knownbits_mask_urem_shuffle_lshr:
333; X32:       # %bb.0:
334; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
335; X32-NEXT:    retl
336;
337; X64-LABEL: knownbits_mask_urem_shuffle_lshr:
338; X64:       # %bb.0:
339; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
340; X64-NEXT:    retq
341  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
342  %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767>
343  %3 = urem <4 x i32> %1, %2
344  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
345  %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22>
346  ret <4 x i32> %5
347}
348
349define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind {
350; X32-LABEL: knownbits_mask_srem_shuffle_lshr:
351; X32:       # %bb.0:
352; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
353; X32-NEXT:    retl
354;
355; X64-LABEL: knownbits_mask_srem_shuffle_lshr:
356; X64:       # %bb.0:
357; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
358; X64-NEXT:    retq
359  %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768>
360  %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
361  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
362  %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
363  ret <4 x i32> %4
364}
365
366define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind {
367; X32-LABEL: knownbits_mask_bswap_shuffle_shl:
368; X32:       # %bb.0:
369; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
370; X32-NEXT:    retl
371;
372; X64-LABEL: knownbits_mask_bswap_shuffle_shl:
373; X64:       # %bb.0:
374; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
375; X64-NEXT:    retq
376  %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767>
377  %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1)
378  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
379  %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22>
380  ret <4 x i32> %4
381}
382declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
383
384define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind {
385; X32-LABEL: knownbits_mask_concat_uitofp:
386; X32:       # %bb.0:
387; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
388; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
389; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
390; X32-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
391; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
392; X32-NEXT:    vcvtdq2ps %ymm0, %ymm0
393; X32-NEXT:    retl
394;
395; X64-LABEL: knownbits_mask_concat_uitofp:
396; X64:       # %bb.0:
397; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
398; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
399; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
400; X64-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3]
401; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
402; X64-NEXT:    vcvtdq2ps %ymm0, %ymm0
403; X64-NEXT:    retq
404  %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1>
405  %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071>
406  %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7>
407  %4 = uitofp <8 x i32> %3 to <8 x float>
408  ret <8 x float> %4
409}
410
411define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind {
412; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
413; X32:       # %bb.0:
414; X32-NEXT:    vpsrlq $1, %xmm0, %xmm0
415; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
416; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
417; X32-NEXT:    retl
418;
419; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp:
420; X64:       # %bb.0:
421; X64-NEXT:    vpsrlq $1, %xmm0, %xmm0
422; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
423; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
424; X64-NEXT:    retq
425  %1 = lshr <2 x i64> %a0, <i64 1, i64 1>
426  %2 = bitcast <2 x i64> %1 to <4 x i32>
427  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
428  %4 = uitofp <4 x i32> %3 to <4 x float>
429  ret <4 x float> %4
430}
431
432define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) {
433; X32-LABEL: knownbits_smax_smin_shuffle_uitofp:
434; X32:       # %bb.0:
435; X32-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
436; X32-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
437; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
438; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
439; X32-NEXT:    retl
440;
441; X64-LABEL: knownbits_smax_smin_shuffle_uitofp:
442; X64:       # %bb.0:
443; X64-NEXT:    vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
444; X64-NEXT:    vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
445; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
446; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
447; X64-NEXT:    retq
448  %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>)
449  %2 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>)
450  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
451  %4 = uitofp <4 x i32> %3 to <4 x float>
452  ret <4 x float> %4
453}
454declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
455declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
456
457define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) {
458; X32-LABEL: knownbits_umin_shuffle_uitofp:
459; X32:       # %bb.0:
460; X32-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
461; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
462; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
463; X32-NEXT:    retl
464;
465; X64-LABEL: knownbits_umin_shuffle_uitofp:
466; X64:       # %bb.0:
467; X64-NEXT:    vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
468; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
469; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
470; X64-NEXT:    retq
471  %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
472  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
473  %3 = uitofp <4 x i32> %2 to <4 x float>
474  ret <4 x float> %3
475}
476declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
477
478define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) {
479; X32-LABEL: knownbits_umax_shuffle_ashr:
480; X32:       # %bb.0:
481; X32-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
482; X32-NEXT:    retl
483;
484; X64-LABEL: knownbits_umax_shuffle_ashr:
485; X64:       # %bb.0:
486; X64-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
487; X64-NEXT:    retq
488  %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>)
489  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
490  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
491  ret <4 x i32> %3
492}
493declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
494
495define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) {
496; X32-LABEL: knownbits_mask_umax_shuffle_uitofp:
497; X32:       # %bb.0:
498; X32-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
499; X32-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
500; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
501; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
502; X32-NEXT:    retl
503;
504; X64-LABEL: knownbits_mask_umax_shuffle_uitofp:
505; X64:       # %bb.0:
506; X64-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
507; X64-NEXT:    vpmaxud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
508; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3]
509; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
510; X64-NEXT:    retq
511  %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143>
512  %2 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>)
513  %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
514  %4 = uitofp <4 x i32> %3 to <4 x float>
515  ret <4 x float> %4
516}
517
518define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) {
519; X32-LABEL: knownbits_mask_bitreverse_ashr:
520; X32:       # %bb.0:
521; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
522; X32-NEXT:    retl
523;
524; X64-LABEL: knownbits_mask_bitreverse_ashr:
525; X64:       # %bb.0:
526; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
527; X64-NEXT:    retq
528  %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2>
529  %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1)
530  %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31>
531  ret <4 x i32> %3
532}
533declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone
534
535; If we don't know that the input isn't INT_MIN we can't combine to sitofp
536define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) {
537; X32-LABEL: knownbits_abs_uitofp:
538; X32:       # %bb.0:
539; X32-NEXT:    vpabsd %xmm0, %xmm0
540; X32-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
541; X32-NEXT:    vpsrld $16, %xmm0, %xmm0
542; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
543; X32-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
544; X32-NEXT:    vaddps %xmm0, %xmm1, %xmm0
545; X32-NEXT:    retl
546;
547; X64-LABEL: knownbits_abs_uitofp:
548; X64:       # %bb.0:
549; X64-NEXT:    vpabsd %xmm0, %xmm0
550; X64-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
551; X64-NEXT:    vpsrld $16, %xmm0, %xmm0
552; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
553; X64-NEXT:    vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
554; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0
555; X64-NEXT:    retq
556  %1 = sub <4 x i32> zeroinitializer, %a0
557  %2 = icmp slt <4 x i32> %a0, zeroinitializer
558  %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0
559  %4 = uitofp <4 x i32> %3 to <4 x float>
560  ret <4 x float> %4
561}
562
563define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) {
564; X32-LABEL: knownbits_or_abs_uitofp:
565; X32:       # %bb.0:
566; X32-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
567; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
568; X32-NEXT:    vpabsd %xmm0, %xmm0
569; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
570; X32-NEXT:    retl
571;
572; X64-LABEL: knownbits_or_abs_uitofp:
573; X64:       # %bb.0:
574; X64-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
575; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
576; X64-NEXT:    vpabsd %xmm0, %xmm0
577; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
578; X64-NEXT:    retq
579  %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0>
580  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
581  %3 = sub <4 x i32> zeroinitializer, %2
582  %4 = icmp slt <4 x i32> %2, zeroinitializer
583  %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2
584  %6 = uitofp <4 x i32> %5 to <4 x float>
585  ret <4 x float> %6
586}
587
588define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
589; X32-LABEL: knownbits_and_select_shuffle_uitofp:
590; X32:       # %bb.0:
591; X32-NEXT:    pushl %ebp
592; X32-NEXT:    movl %esp, %ebp
593; X32-NEXT:    andl $-16, %esp
594; X32-NEXT:    subl $16, %esp
595; X32-NEXT:    vmovaps 8(%ebp), %xmm3
596; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2, %xmm2
597; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
598; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
599; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
600; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
601; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
602; X32-NEXT:    movl %ebp, %esp
603; X32-NEXT:    popl %ebp
604; X32-NEXT:    retl
605;
606; X64-LABEL: knownbits_and_select_shuffle_uitofp:
607; X64:       # %bb.0:
608; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
609; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
610; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
611; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
612; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
613; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
614; X64-NEXT:    retq
615  %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1>
616  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
617  %3 = icmp eq <4 x i32> %a0, %a1
618  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
619  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
620  %6 = uitofp <4 x i32> %5 to <4 x float>
621  ret <4 x float> %6
622}
623
624define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind {
625; X32-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
626; X32:       # %bb.0:
627; X32-NEXT:    pushl %ebp
628; X32-NEXT:    movl %esp, %ebp
629; X32-NEXT:    andl $-16, %esp
630; X32-NEXT:    subl $16, %esp
631; X32-NEXT:    vmovaps 8(%ebp), %xmm3
632; X32-NEXT:    vpsrld $5, %xmm2, %xmm2
633; X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3, %xmm3
634; X32-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
635; X32-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
636; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
637; X32-NEXT:    vcvtdq2ps %xmm0, %xmm0
638; X32-NEXT:    movl %ebp, %esp
639; X32-NEXT:    popl %ebp
640; X32-NEXT:    retl
641;
642; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp:
643; X64:       # %bb.0:
644; X64-NEXT:    vpsrld $5, %xmm2, %xmm2
645; X64-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
646; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
647; X64-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
648; X64-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2]
649; X64-NEXT:    vcvtdq2ps %xmm0, %xmm0
650; X64-NEXT:    retq
651  %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1>
652  %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1>
653  %3 = icmp eq <4 x i32> %a0, %a1
654  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
655  %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
656  %6 = uitofp <4 x i32> %5 to <4 x float>
657  ret <4 x float> %6
658}
659
660define <2 x double> @knownbits_lshr_subvector_uitofp(<4 x i32> %x)  {
661; X32-LABEL: knownbits_lshr_subvector_uitofp:
662; X32:       # %bb.0:
663; X32-NEXT:    vpsrld $2, %xmm0, %xmm1
664; X32-NEXT:    vpsrld $1, %xmm0, %xmm0
665; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
666; X32-NEXT:    vcvtdq2pd %xmm0, %xmm0
667; X32-NEXT:    retl
668;
669; X64-LABEL: knownbits_lshr_subvector_uitofp:
670; X64:       # %bb.0:
671; X64-NEXT:    vpsrld $2, %xmm0, %xmm1
672; X64-NEXT:    vpsrld $1, %xmm0, %xmm0
673; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
674; X64-NEXT:    vcvtdq2pd %xmm0, %xmm0
675; X64-NEXT:    retq
676  %1 = lshr <4 x i32> %x, <i32 1, i32 2, i32 0, i32 0>
677  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
678  %3 = uitofp <2 x i32> %2 to <2 x double>
679  ret <2 x double> %3
680}
681