1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { 6; X32-LABEL: knownbits_mask_extract_sext: 7; X32: # BB#0: 8; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 9; X32-NEXT: vpextrw $0, %xmm0, %eax 10; X32-NEXT: retl 11; 12; X64-LABEL: knownbits_mask_extract_sext: 13; X64: # BB#0: 14; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 15; X64-NEXT: vpextrw $0, %xmm0, %eax 16; X64-NEXT: retq 17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 18 %2 = extractelement <8 x i16> %1, i32 0 19 %3 = sext i16 %2 to i32 20 ret i32 %3 21} 22 23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { 24; X32-LABEL: knownbits_mask_extract_uitofp: 25; X32: # BB#0: 26; X32-NEXT: pushl %ebp 27; X32-NEXT: movl %esp, %ebp 28; X32-NEXT: andl $-8, %esp 29; X32-NEXT: subl $16, %esp 30; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 31; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] 32; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) 33; X32-NEXT: fildll {{[0-9]+}}(%esp) 34; X32-NEXT: fstps {{[0-9]+}}(%esp) 35; X32-NEXT: flds {{[0-9]+}}(%esp) 36; X32-NEXT: movl %ebp, %esp 37; X32-NEXT: popl %ebp 38; X32-NEXT: retl 39; 40; X64-LABEL: knownbits_mask_extract_uitofp: 41; X64: # BB#0: 42; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 43; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] 44; X64-NEXT: vmovq %xmm0, %rax 45; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 46; X64-NEXT: retq 47 %1 = and <2 x i64> %a0, <i64 65535, i64 -1> 48 %2 = extractelement <2 x i64> %1, i32 0 49 %3 = uitofp i64 %2 to float 50 ret float %3 51} 52 53define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind { 54; X32-LABEL: knownbits_insert_uitofp: 55; X32: # BB#0: 56; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 57; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 58; X32-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 59; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 60; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 61; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 62; X32-NEXT: retl 63; 64; X64-LABEL: knownbits_insert_uitofp: 65; X64: # BB#0: 66; X64-NEXT: movzwl %di, %eax 67; X64-NEXT: movzwl %si, %ecx 68; X64-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 69; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 70; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 71; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 72; X64-NEXT: retq 73 %1 = zext i16 %a1 to i32 74 %2 = zext i16 %a2 to i32 75 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0 76 %4 = insertelement <4 x i32> %3, i32 %2, i32 2 77 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 78 %6 = uitofp <4 x i32> %5 to <4 x float> 79 ret <4 x float> %6 80} 81 82define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { 83; X32-LABEL: knownbits_mask_shuffle_sext: 84; X32: # BB#0: 85; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 86; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 87; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 88; X32-NEXT: retl 89; 90; X64-LABEL: knownbits_mask_shuffle_sext: 91; X64: # BB#0: 92; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 93; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 94; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 95; X64-NEXT: retq 96 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 97 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 98 %3 = sext <4 x i16> %2 to <4 x i32> 99 ret <4 x i32> %3 100} 101 102define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind { 103; X32-LABEL: knownbits_mask_shuffle_shuffle_sext: 104; X32: # BB#0: 105; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 106; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 107; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 108; X32-NEXT: retl 109; 110; X64-LABEL: knownbits_mask_shuffle_shuffle_sext: 111; X64: # BB#0: 112; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 113; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 114; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 115; X64-NEXT: retq 116 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 117 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 118 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 119 %4 = sext <4 x i16> %3 to <4 x i32> 120 ret <4 x i32> %4 121} 122 123define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind { 124; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 125; X32: # BB#0: 126; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 127; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 128; X32-NEXT: vpmovsxwd %xmm0, %xmm0 129; X32-NEXT: retl 130; 131; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 132; X64: # BB#0: 133; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 134; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 135; X64-NEXT: vpmovsxwd %xmm0, %xmm0 136; X64-NEXT: retq 137 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 138 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 139 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 140 %4 = sext <4 x i16> %3 to <4 x i32> 141 ret <4 x i32> %4 142} 143 144define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { 145; X32-LABEL: knownbits_mask_shuffle_uitofp: 146; X32: # BB#0: 147; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 148; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 149; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 150; X32-NEXT: retl 151; 152; X64-LABEL: knownbits_mask_shuffle_uitofp: 153; X64: # BB#0: 154; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 155; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 156; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 157; X64-NEXT: retq 158 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 159 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 160 %3 = uitofp <4 x i32> %2 to <4 x float> 161 ret <4 x float> %3 162} 163 164define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind { 165; X32-LABEL: knownbits_mask_or_shuffle_uitofp: 166; X32: # BB#0: 167; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 168; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 169; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 170; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 171; X32-NEXT: retl 172; 173; X64-LABEL: knownbits_mask_or_shuffle_uitofp: 174; X64: # BB#0: 175; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 176; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 177; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 178; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 179; X64-NEXT: retq 180 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 181 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 182 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 183 %4 = uitofp <4 x i32> %3 to <4 x float> 184 ret <4 x float> %4 185} 186 187define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind { 188; X32-LABEL: knownbits_mask_xor_shuffle_uitofp: 189; X32: # BB#0: 190; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 191; X32-NEXT: vpxor {{\.LCPI.*}}, %xmm0, %xmm0 192; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 193; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 194; X32-NEXT: retl 195; 196; X64-LABEL: knownbits_mask_xor_shuffle_uitofp: 197; X64: # BB#0: 198; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 199; X64-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 200; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] 201; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 202; X64-NEXT: retq 203 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 204 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 205 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 206 %4 = uitofp <4 x i32> %3 to <4 x float> 207 ret <4 x float> %4 208} 209 210define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind { 211; X32-LABEL: knownbits_mask_shl_shuffle_lshr: 212; X32: # BB#0: 213; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 214; X32-NEXT: retl 215; 216; X64-LABEL: knownbits_mask_shl_shuffle_lshr: 217; X64: # BB#0: 218; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 219; X64-NEXT: retq 220 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 221 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17> 222 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 223 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15> 224 ret <4 x i32> %4 225} 226 227define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind { 228; X32-LABEL: knownbits_mask_ashr_shuffle_lshr: 229; X32: # BB#0: 230; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 231; X32-NEXT: retl 232; 233; X64-LABEL: knownbits_mask_ashr_shuffle_lshr: 234; X64: # BB#0: 235; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 236; X64-NEXT: retq 237 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071> 238 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15> 239 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 240 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> 241 ret <4 x i32> %4 242} 243 244define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind { 245; X32-LABEL: knownbits_mask_mul_shuffle_shl: 246; X32: # BB#0: 247; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 248; X32-NEXT: retl 249; 250; X64-LABEL: knownbits_mask_mul_shuffle_shl: 251; X64: # BB#0: 252; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 253; X64-NEXT: retq 254 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 255 %2 = mul <4 x i32> %a1, %1 256 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 257 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 258 ret <4 x i32> %4 259} 260 261define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind { 262; X32-LABEL: knownbits_mask_trunc_shuffle_shl: 263; X32: # BB#0: 264; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 265; X32-NEXT: retl 266; 267; X64-LABEL: knownbits_mask_trunc_shuffle_shl: 268; X64: # BB#0: 269; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 270; X64-NEXT: retq 271 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536> 272 %2 = trunc <4 x i64> %1 to <4 x i32> 273 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 274 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 275 ret <4 x i32> %4 276} 277 278define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 279; X32-LABEL: knownbits_mask_add_shuffle_lshr: 280; X32: # BB#0: 281; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 282; X32-NEXT: retl 283; 284; X64-LABEL: knownbits_mask_add_shuffle_lshr: 285; X64: # BB#0: 286; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 287; X64-NEXT: retq 288 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 289 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 290 %3 = add <4 x i32> %1, %2 291 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 292 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17> 293 ret <4 x i32> %5 294} 295 296define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind { 297; X32-LABEL: knownbits_mask_sub_shuffle_lshr: 298; X32: # BB#0: 299; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 300; X32-NEXT: retl 301; 302; X64-LABEL: knownbits_mask_sub_shuffle_lshr: 303; X64: # BB#0: 304; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 305; X64-NEXT: retq 306 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15> 307 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1 308 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 309 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 310 ret <4 x i32> %4 311} 312 313define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 314; X32-LABEL: knownbits_mask_udiv_shuffle_lshr: 315; X32: # BB#0: 316; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 317; X32-NEXT: retl 318; 319; X64-LABEL: knownbits_mask_udiv_shuffle_lshr: 320; X64: # BB#0: 321; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 322; X64-NEXT: retq 323 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 324 %2 = udiv <4 x i32> %1, %a1 325 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 326 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 327 ret <4 x i32> %4 328} 329 330define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind { 331; X32-LABEL: knownbits_urem_lshr: 332; X32: # BB#0: 333; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 334; X32-NEXT: retl 335; 336; X64-LABEL: knownbits_urem_lshr: 337; X64: # BB#0: 338; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 339; X64-NEXT: retq 340 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16> 341 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22> 342 ret <4 x i32> %2 343} 344 345define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 346; X32-LABEL: knownbits_mask_urem_shuffle_lshr: 347; X32: # BB#0: 348; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 349; X32-NEXT: retl 350; 351; X64-LABEL: knownbits_mask_urem_shuffle_lshr: 352; X64: # BB#0: 353; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 354; X64-NEXT: retq 355 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 356 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 357 %3 = urem <4 x i32> %1, %2 358 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 359 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22> 360 ret <4 x i32> %5 361} 362 363define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind { 364; X32-LABEL: knownbits_mask_srem_shuffle_lshr: 365; X32: # BB#0: 366; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 367; X32-NEXT: retl 368; 369; X64-LABEL: knownbits_mask_srem_shuffle_lshr: 370; X64: # BB#0: 371; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 372; X64-NEXT: retq 373 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768> 374 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 375 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 376 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 377 ret <4 x i32> %4 378} 379 380define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind { 381; X32-LABEL: knownbits_mask_bswap_shuffle_shl: 382; X32: # BB#0: 383; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 384; X32-NEXT: retl 385; 386; X64-LABEL: knownbits_mask_bswap_shuffle_shl: 387; X64: # BB#0: 388; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 389; X64-NEXT: retq 390 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 391 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1) 392 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 393 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 394 ret <4 x i32> %4 395} 396declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 397 398define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind { 399; X32-LABEL: knownbits_mask_concat_uitofp: 400; X32: # BB#0: 401; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 402; X32-NEXT: vpand {{\.LCPI.*}}, %xmm1, %xmm1 403; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 404; X32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3] 405; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 406; X32-NEXT: vcvtdq2ps %ymm0, %ymm0 407; X32-NEXT: retl 408; 409; X64-LABEL: knownbits_mask_concat_uitofp: 410; X64: # BB#0: 411; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 412; X64-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 413; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 414; X64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,3,1,3] 415; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 416; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 417; X64-NEXT: retq 418 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1> 419 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071> 420 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7> 421 %4 = uitofp <8 x i32> %3 to <8 x float> 422 ret <8 x float> %4 423} 424 425define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 426; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 427; X32: # BB#0: 428; X32-NEXT: vpsrlq $1, %xmm0, %xmm0 429; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 430; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 431; X32-NEXT: retl 432; 433; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 434; X64: # BB#0: 435; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 436; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 437; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 438; X64-NEXT: retq 439 %1 = lshr <2 x i64> %a0, <i64 1, i64 1> 440 %2 = bitcast <2 x i64> %1 to <4 x i32> 441 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 442 %4 = uitofp <4 x i32> %3 to <4 x float> 443 ret <4 x float> %4 444} 445 446define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { 447; X32-LABEL: knownbits_smax_smin_shuffle_uitofp: 448; X32: # BB#0: 449; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0 450; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0 451; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 452; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 453; X32-NEXT: vpsrld $16, %xmm0, %xmm0 454; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 455; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 456; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 457; X32-NEXT: retl 458; 459; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: 460; X64: # BB#0: 461; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 462; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 463; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 464; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 465; X64-NEXT: vpsrld $16, %xmm0, %xmm0 466; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 467; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 468; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 469; X64-NEXT: retq 470 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>) 471 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>) 472 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 473 %4 = uitofp <4 x i32> %3 to <4 x float> 474 ret <4 x float> %4 475} 476declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 477declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 478 479define <4 x float> @knownbits_umax_umin_shuffle_uitofp(<4 x i32> %a0) { 480; X32-LABEL: knownbits_umax_umin_shuffle_uitofp: 481; X32: # BB#0: 482; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 483; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0 484; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 485; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 486; X32-NEXT: vpsrld $16, %xmm0, %xmm0 487; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 488; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 489; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 490; X32-NEXT: retl 491; 492; X64-LABEL: knownbits_umax_umin_shuffle_uitofp: 493; X64: # BB#0: 494; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 495; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 496; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 497; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 498; X64-NEXT: vpsrld $16, %xmm0, %xmm0 499; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 500; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 501; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 502; X64-NEXT: retq 503 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>) 504 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 505 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 506 %4 = uitofp <4 x i32> %3 to <4 x float> 507 ret <4 x float> %4 508} 509declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 510declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 511 512define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) { 513; X32-LABEL: knownbits_mask_umax_shuffle_uitofp: 514; X32: # BB#0: 515; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 516; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 517; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 518; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 519; X32-NEXT: retl 520; 521; X64-LABEL: knownbits_mask_umax_shuffle_uitofp: 522; X64: # BB#0: 523; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 524; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 525; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 526; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 527; X64-NEXT: retq 528 %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143> 529 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>) 530 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 531 %4 = uitofp <4 x i32> %3 to <4 x float> 532 ret <4 x float> %4 533} 534