1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW 8 9; PR28925 10 11define <4 x i32> @test1(<4 x i1> %cond, <4 x i32> %x) { 12; SSE-LABEL: test1: 13; SSE: # %bb.0: 14; SSE-NEXT: pslld $31, %xmm0 15; SSE-NEXT: psrad $31, %xmm0 16; SSE-NEXT: pandn %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX-LABEL: test1: 20; AVX: # %bb.0: 21; AVX-NEXT: vpslld $31, %xmm0, %xmm0 22; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 23; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 24; AVX-NEXT: retq 25; 26; AVX512F-LABEL: test1: 27; AVX512F: # %bb.0: 28; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 29; AVX512F-NEXT: vptestnmd %xmm0, %xmm0, %k1 30; AVX512F-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} 31; AVX512F-NEXT: retq 32; 33; AVX512DQBW-LABEL: test1: 34; AVX512DQBW: # %bb.0: 35; AVX512DQBW-NEXT: vpslld $31, %xmm0, %xmm0 36; AVX512DQBW-NEXT: vpmovd2m %xmm0, %k0 37; AVX512DQBW-NEXT: knotw %k0, %k1 38; AVX512DQBW-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} 39; AVX512DQBW-NEXT: retq 40 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x 41 ret <4 x i32> %r 42} 43 44define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) { 45; SSE-LABEL: test2: 46; SSE: # %bb.0: 47; SSE-NEXT: cmpneqps %xmm1, %xmm0 48; SSE-NEXT: andps %xmm2, %xmm0 49; SSE-NEXT: retq 50; 51; AVX-LABEL: test2: 52; AVX: # %bb.0: 53; AVX-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 54; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 55; AVX-NEXT: retq 56; 57; AVX512-LABEL: test2: 58; AVX512: # %bb.0: 59; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1 60; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} 61; AVX512-NEXT: retq 62 %cond = fcmp oeq <4 x float> %a, %b 63 %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x 64 ret <4 x i32> %r 65} 66 67define float @fsel_zero_false_val(float %a, float %b, float %x) { 68; SSE-LABEL: fsel_zero_false_val: 69; SSE: # %bb.0: 70; SSE-NEXT: cmpeqss %xmm1, %xmm0 71; SSE-NEXT: andps %xmm2, %xmm0 72; SSE-NEXT: retq 73; 74; AVX-LABEL: fsel_zero_false_val: 75; AVX: # %bb.0: 76; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 77; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 78; AVX-NEXT: retq 79; 80; AVX512-LABEL: fsel_zero_false_val: 81; AVX512: # %bb.0: 82; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 83; AVX512-NEXT: vmovss %xmm2, %xmm2, %xmm0 {%k1} {z} 84; AVX512-NEXT: retq 85 %cond = fcmp oeq float %a, %b 86 %r = select i1 %cond, float %x, float 0.0 87 ret float %r 88} 89 90define float @fsel_zero_true_val(float %a, float %b, float %x) { 91; SSE-LABEL: fsel_zero_true_val: 92; SSE: # %bb.0: 93; SSE-NEXT: cmpeqss %xmm1, %xmm0 94; SSE-NEXT: andnps %xmm2, %xmm0 95; SSE-NEXT: retq 96; 97; AVX-LABEL: fsel_zero_true_val: 98; AVX: # %bb.0: 99; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 100; AVX-NEXT: vandnps %xmm2, %xmm0, %xmm0 101; AVX-NEXT: retq 102; 103; AVX512-LABEL: fsel_zero_true_val: 104; AVX512: # %bb.0: 105; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 106; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 107; AVX512-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} 108; AVX512-NEXT: vmovaps %xmm2, %xmm0 109; AVX512-NEXT: retq 110 %cond = fcmp oeq float %a, %b 111 %r = select i1 %cond, float 0.0, float %x 112 ret float %r 113} 114 115define double @fsel_nonzero_false_val(double %x, double %y, double %z) { 116; SSE-LABEL: fsel_nonzero_false_val: 117; SSE: # %bb.0: 118; SSE-NEXT: cmpeqsd %xmm1, %xmm0 119; SSE-NEXT: andpd %xmm0, %xmm2 120; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 121; SSE-NEXT: andnpd %xmm1, %xmm0 122; SSE-NEXT: orpd %xmm2, %xmm0 123; SSE-NEXT: retq 124; 125; AVX-LABEL: fsel_nonzero_false_val: 126; AVX: # %bb.0: 127; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 128; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1] 129; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 130; AVX-NEXT: retq 131; 132; AVX512-LABEL: fsel_nonzero_false_val: 133; AVX512: # %bb.0: 134; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 135; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 136; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} 137; AVX512-NEXT: retq 138 %cond = fcmp oeq double %x, %y 139 %r = select i1 %cond, double %z, double 42.0 140 ret double %r 141} 142 143define double @fsel_nonzero_true_val(double %x, double %y, double %z) { 144; SSE-LABEL: fsel_nonzero_true_val: 145; SSE: # %bb.0: 146; SSE-NEXT: cmpeqsd %xmm1, %xmm0 147; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 148; SSE-NEXT: andpd %xmm0, %xmm1 149; SSE-NEXT: andnpd %xmm2, %xmm0 150; SSE-NEXT: orpd %xmm1, %xmm0 151; SSE-NEXT: retq 152; 153; AVX-LABEL: fsel_nonzero_true_val: 154; AVX: # %bb.0: 155; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 156; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0 157; AVX-NEXT: retq 158; 159; AVX512-LABEL: fsel_nonzero_true_val: 160; AVX512: # %bb.0: 161; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 162; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 {%k1} 163; AVX512-NEXT: vmovapd %xmm2, %xmm0 164; AVX512-NEXT: retq 165 %cond = fcmp oeq double %x, %y 166 %r = select i1 %cond, double 42.0, double %z 167 ret double %r 168} 169 170define double @fsel_nonzero_constants(double %x, double %y) { 171; SSE-LABEL: fsel_nonzero_constants: 172; SSE: # %bb.0: 173; SSE-NEXT: cmpeqsd %xmm1, %xmm0 174; SSE-NEXT: movq %xmm0, %rax 175; SSE-NEXT: andl $1, %eax 176; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 177; SSE-NEXT: retq 178; 179; AVX-LABEL: fsel_nonzero_constants: 180; AVX: # %bb.0: 181; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 182; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1] 183; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 184; AVX-NEXT: retq 185; 186; AVX512-LABEL: fsel_nonzero_constants: 187; AVX512: # %bb.0: 188; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 189; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 190; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} 191; AVX512-NEXT: retq 192 %cond = fcmp oeq double %x, %y 193 %r = select i1 %cond, double 12.0, double 42.0 194 ret double %r 195} 196 197define <2 x double> @vsel_nonzero_constants(<2 x double> %x, <2 x double> %y) { 198; SSE2-LABEL: vsel_nonzero_constants: 199; SSE2: # %bb.0: 200; SSE2-NEXT: cmplepd %xmm0, %xmm1 201; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 202; SSE2-NEXT: movapd %xmm1, %xmm0 203; SSE2-NEXT: andnpd %xmm2, %xmm0 204; SSE2-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 205; SSE2-NEXT: orpd %xmm1, %xmm0 206; SSE2-NEXT: retq 207; 208; SSE42-LABEL: vsel_nonzero_constants: 209; SSE42: # %bb.0: 210; SSE42-NEXT: cmplepd %xmm0, %xmm1 211; SSE42-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 212; SSE42-NEXT: movapd %xmm1, %xmm0 213; SSE42-NEXT: blendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 214; SSE42-NEXT: movapd %xmm2, %xmm0 215; SSE42-NEXT: retq 216; 217; AVX-LABEL: vsel_nonzero_constants: 218; AVX: # %bb.0: 219; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 220; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 221; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 222; AVX-NEXT: retq 223; 224; AVX512-LABEL: vsel_nonzero_constants: 225; AVX512: # %bb.0: 226; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1 227; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 228; AVX512-NEXT: vmovapd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} 229; AVX512-NEXT: retq 230 %cond = fcmp oge <2 x double> %x, %y 231 %r = select <2 x i1> %cond, <2 x double> <double 12.0, double -1.0>, <2 x double> <double 42.0, double 0.0> 232 ret <2 x double> %r 233} 234 235define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 236; SSE-LABEL: signbit_mask_v16i8: 237; SSE: # %bb.0: 238; SSE-NEXT: pxor %xmm2, %xmm2 239; SSE-NEXT: pcmpgtb %xmm0, %xmm2 240; SSE-NEXT: pand %xmm1, %xmm2 241; SSE-NEXT: movdqa %xmm2, %xmm0 242; SSE-NEXT: retq 243; 244; AVX-LABEL: signbit_mask_v16i8: 245; AVX: # %bb.0: 246; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 247; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 248; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 249; AVX-NEXT: retq 250; 251; AVX512-LABEL: signbit_mask_v16i8: 252; AVX512: # %bb.0: 253; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 254; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 255; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 256; AVX512-NEXT: retq 257 %cond = icmp slt <16 x i8> %a, zeroinitializer 258 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 259 ret <16 x i8> %r 260} 261 262define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 263; SSE-LABEL: signbit_mask_v8i16: 264; SSE: # %bb.0: 265; SSE-NEXT: psraw $15, %xmm0 266; SSE-NEXT: pand %xmm1, %xmm0 267; SSE-NEXT: retq 268; 269; AVX-LABEL: signbit_mask_v8i16: 270; AVX: # %bb.0: 271; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 272; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 273; AVX-NEXT: retq 274; 275; AVX512-LABEL: signbit_mask_v8i16: 276; AVX512: # %bb.0: 277; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 278; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 279; AVX512-NEXT: retq 280 %cond = icmp slt <8 x i16> %a, zeroinitializer 281 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 282 ret <8 x i16> %r 283} 284 285define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 286; SSE-LABEL: signbit_mask_v4i32: 287; SSE: # %bb.0: 288; SSE-NEXT: psrad $31, %xmm0 289; SSE-NEXT: pand %xmm1, %xmm0 290; SSE-NEXT: retq 291; 292; AVX-LABEL: signbit_mask_v4i32: 293; AVX: # %bb.0: 294; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 295; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 296; AVX-NEXT: retq 297; 298; AVX512-LABEL: signbit_mask_v4i32: 299; AVX512: # %bb.0: 300; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 301; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 302; AVX512-NEXT: retq 303 %cond = icmp slt <4 x i32> %a, zeroinitializer 304 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 305 ret <4 x i32> %r 306} 307 308define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 309; SSE2-LABEL: signbit_mask_v2i64: 310; SSE2: # %bb.0: 311; SSE2-NEXT: psrad $31, %xmm0 312; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 313; SSE2-NEXT: pand %xmm1, %xmm0 314; SSE2-NEXT: retq 315; 316; SSE42-LABEL: signbit_mask_v2i64: 317; SSE42: # %bb.0: 318; SSE42-NEXT: pxor %xmm2, %xmm2 319; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 320; SSE42-NEXT: pand %xmm1, %xmm2 321; SSE42-NEXT: movdqa %xmm2, %xmm0 322; SSE42-NEXT: retq 323; 324; AVX-LABEL: signbit_mask_v2i64: 325; AVX: # %bb.0: 326; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 327; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 328; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 329; AVX-NEXT: retq 330; 331; AVX512-LABEL: signbit_mask_v2i64: 332; AVX512: # %bb.0: 333; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 334; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 335; AVX512-NEXT: retq 336 %cond = icmp slt <2 x i64> %a, zeroinitializer 337 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 338 ret <2 x i64> %r 339} 340 341; Swap cmp pred and select ops. This is logically equivalent to the above test. 342 343define <2 x i64> @signbit_mask_swap_v2i64(<2 x i64> %a, <2 x i64> %b) { 344; SSE2-LABEL: signbit_mask_swap_v2i64: 345; SSE2: # %bb.0: 346; SSE2-NEXT: psrad $31, %xmm0 347; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 348; SSE2-NEXT: pand %xmm1, %xmm0 349; SSE2-NEXT: retq 350; 351; SSE42-LABEL: signbit_mask_swap_v2i64: 352; SSE42: # %bb.0: 353; SSE42-NEXT: pxor %xmm2, %xmm2 354; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 355; SSE42-NEXT: pand %xmm1, %xmm2 356; SSE42-NEXT: movdqa %xmm2, %xmm0 357; SSE42-NEXT: retq 358; 359; AVX-LABEL: signbit_mask_swap_v2i64: 360; AVX: # %bb.0: 361; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 362; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 363; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 364; AVX-NEXT: retq 365; 366; AVX512-LABEL: signbit_mask_swap_v2i64: 367; AVX512: # %bb.0: 368; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 369; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 370; AVX512-NEXT: retq 371 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1> 372 %r = select <2 x i1> %cond, <2 x i64> zeroinitializer, <2 x i64> %b 373 ret <2 x i64> %r 374} 375 376define <32 x i8> @signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { 377; SSE-LABEL: signbit_mask_v32i8: 378; SSE: # %bb.0: 379; SSE-NEXT: pxor %xmm4, %xmm4 380; SSE-NEXT: pxor %xmm5, %xmm5 381; SSE-NEXT: pcmpgtb %xmm0, %xmm5 382; SSE-NEXT: pand %xmm2, %xmm5 383; SSE-NEXT: pcmpgtb %xmm1, %xmm4 384; SSE-NEXT: pand %xmm3, %xmm4 385; SSE-NEXT: movdqa %xmm5, %xmm0 386; SSE-NEXT: movdqa %xmm4, %xmm1 387; SSE-NEXT: retq 388; 389; AVX1-LABEL: signbit_mask_v32i8: 390; AVX1: # %bb.0: 391; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 392; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 393; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 394; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 395; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 396; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 397; AVX1-NEXT: retq 398; 399; AVX2-LABEL: signbit_mask_v32i8: 400; AVX2: # %bb.0: 401; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 402; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 403; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 404; AVX2-NEXT: retq 405; 406; AVX512-LABEL: signbit_mask_v32i8: 407; AVX512: # %bb.0: 408; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 409; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 410; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 411; AVX512-NEXT: retq 412 %cond = icmp slt <32 x i8> %a, zeroinitializer 413 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer 414 ret <32 x i8> %r 415} 416 417define <16 x i16> @signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { 418; SSE-LABEL: signbit_mask_v16i16: 419; SSE: # %bb.0: 420; SSE-NEXT: psraw $15, %xmm0 421; SSE-NEXT: pand %xmm2, %xmm0 422; SSE-NEXT: psraw $15, %xmm1 423; SSE-NEXT: pand %xmm3, %xmm1 424; SSE-NEXT: retq 425; 426; AVX1-LABEL: signbit_mask_v16i16: 427; AVX1: # %bb.0: 428; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 429; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 430; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 431; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 432; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 433; AVX1-NEXT: retq 434; 435; AVX2-LABEL: signbit_mask_v16i16: 436; AVX2: # %bb.0: 437; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 438; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 439; AVX2-NEXT: retq 440; 441; AVX512-LABEL: signbit_mask_v16i16: 442; AVX512: # %bb.0: 443; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 444; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 445; AVX512-NEXT: retq 446 %cond = icmp slt <16 x i16> %a, zeroinitializer 447 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer 448 ret <16 x i16> %r 449} 450 451define <8 x i32> @signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { 452; SSE-LABEL: signbit_mask_v8i32: 453; SSE: # %bb.0: 454; SSE-NEXT: psrad $31, %xmm0 455; SSE-NEXT: pand %xmm2, %xmm0 456; SSE-NEXT: psrad $31, %xmm1 457; SSE-NEXT: pand %xmm3, %xmm1 458; SSE-NEXT: retq 459; 460; AVX1-LABEL: signbit_mask_v8i32: 461; AVX1: # %bb.0: 462; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 463; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 464; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 465; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 466; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 467; AVX1-NEXT: retq 468; 469; AVX2-LABEL: signbit_mask_v8i32: 470; AVX2: # %bb.0: 471; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 472; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 473; AVX2-NEXT: retq 474; 475; AVX512-LABEL: signbit_mask_v8i32: 476; AVX512: # %bb.0: 477; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 478; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 479; AVX512-NEXT: retq 480 %cond = icmp slt <8 x i32> %a, zeroinitializer 481 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer 482 ret <8 x i32> %r 483} 484 485; Swap cmp pred and select ops. This is logically equivalent to the above test. 486 487define <8 x i32> @signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) { 488; SSE-LABEL: signbit_mask_swap_v8i32: 489; SSE: # %bb.0: 490; SSE-NEXT: psrad $31, %xmm0 491; SSE-NEXT: pand %xmm2, %xmm0 492; SSE-NEXT: psrad $31, %xmm1 493; SSE-NEXT: pand %xmm3, %xmm1 494; SSE-NEXT: retq 495; 496; AVX1-LABEL: signbit_mask_swap_v8i32: 497; AVX1: # %bb.0: 498; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 500; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 501; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 502; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 503; AVX1-NEXT: retq 504; 505; AVX2-LABEL: signbit_mask_swap_v8i32: 506; AVX2: # %bb.0: 507; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 508; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 509; AVX2-NEXT: retq 510; 511; AVX512-LABEL: signbit_mask_swap_v8i32: 512; AVX512: # %bb.0: 513; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 514; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 515; AVX512-NEXT: retq 516 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 517 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b 518 ret <8 x i32> %r 519} 520 521define <4 x i64> @signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { 522; SSE2-LABEL: signbit_mask_v4i64: 523; SSE2: # %bb.0: 524; SSE2-NEXT: psrad $31, %xmm0 525; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 526; SSE2-NEXT: pand %xmm2, %xmm0 527; SSE2-NEXT: psrad $31, %xmm1 528; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 529; SSE2-NEXT: pand %xmm3, %xmm1 530; SSE2-NEXT: retq 531; 532; SSE42-LABEL: signbit_mask_v4i64: 533; SSE42: # %bb.0: 534; SSE42-NEXT: pxor %xmm4, %xmm4 535; SSE42-NEXT: pxor %xmm5, %xmm5 536; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 537; SSE42-NEXT: pand %xmm2, %xmm5 538; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 539; SSE42-NEXT: pand %xmm3, %xmm4 540; SSE42-NEXT: movdqa %xmm5, %xmm0 541; SSE42-NEXT: movdqa %xmm4, %xmm1 542; SSE42-NEXT: retq 543; 544; AVX1-LABEL: signbit_mask_v4i64: 545; AVX1: # %bb.0: 546; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 547; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 548; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 549; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 550; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 551; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 552; AVX1-NEXT: retq 553; 554; AVX2-LABEL: signbit_mask_v4i64: 555; AVX2: # %bb.0: 556; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 557; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 558; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 559; AVX2-NEXT: retq 560; 561; AVX512-LABEL: signbit_mask_v4i64: 562; AVX512: # %bb.0: 563; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 564; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 565; AVX512-NEXT: retq 566 %cond = icmp slt <4 x i64> %a, zeroinitializer 567 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer 568 ret <4 x i64> %r 569} 570 571define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { 572; SSE-LABEL: signbit_setmask_v16i8: 573; SSE: # %bb.0: 574; SSE-NEXT: pxor %xmm2, %xmm2 575; SSE-NEXT: pcmpgtb %xmm0, %xmm2 576; SSE-NEXT: por %xmm1, %xmm2 577; SSE-NEXT: movdqa %xmm2, %xmm0 578; SSE-NEXT: retq 579; 580; AVX-LABEL: signbit_setmask_v16i8: 581; AVX: # %bb.0: 582; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 583; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 584; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 585; AVX-NEXT: retq 586; 587; AVX512-LABEL: signbit_setmask_v16i8: 588; AVX512: # %bb.0: 589; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 590; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 591; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 592; AVX512-NEXT: retq 593 %cond = icmp slt <16 x i8> %a, zeroinitializer 594 %r = select <16 x i1> %cond, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %b 595 ret <16 x i8> %r 596} 597 598; Swap cmp pred and select ops. This is logically equivalent to the above test. 599 600define <16 x i8> @signbit_setmask_swap_v16i8(<16 x i8> %a, <16 x i8> %b) { 601; SSE-LABEL: signbit_setmask_swap_v16i8: 602; SSE: # %bb.0: 603; SSE-NEXT: pxor %xmm2, %xmm2 604; SSE-NEXT: pcmpgtb %xmm0, %xmm2 605; SSE-NEXT: por %xmm1, %xmm2 606; SSE-NEXT: movdqa %xmm2, %xmm0 607; SSE-NEXT: retq 608; 609; AVX-LABEL: signbit_setmask_swap_v16i8: 610; AVX: # %bb.0: 611; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 612; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 613; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 614; AVX-NEXT: retq 615; 616; AVX512-LABEL: signbit_setmask_swap_v16i8: 617; AVX512: # %bb.0: 618; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 619; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 620; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 621; AVX512-NEXT: retq 622 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 623 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 624 ret <16 x i8> %r 625} 626 627define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { 628; SSE-LABEL: signbit_setmask_v8i16: 629; SSE: # %bb.0: 630; SSE-NEXT: psraw $15, %xmm0 631; SSE-NEXT: por %xmm1, %xmm0 632; SSE-NEXT: retq 633; 634; AVX-LABEL: signbit_setmask_v8i16: 635; AVX: # %bb.0: 636; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 637; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 638; AVX-NEXT: retq 639; 640; AVX512-LABEL: signbit_setmask_v8i16: 641; AVX512: # %bb.0: 642; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 643; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 644; AVX512-NEXT: retq 645 %cond = icmp slt <8 x i16> %a, zeroinitializer 646 %r = select <8 x i1> %cond, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b 647 ret <8 x i16> %r 648} 649 650define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { 651; SSE-LABEL: signbit_setmask_v4i32: 652; SSE: # %bb.0: 653; SSE-NEXT: psrad $31, %xmm0 654; SSE-NEXT: por %xmm1, %xmm0 655; SSE-NEXT: retq 656; 657; AVX-LABEL: signbit_setmask_v4i32: 658; AVX: # %bb.0: 659; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 660; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 661; AVX-NEXT: retq 662; 663; AVX512-LABEL: signbit_setmask_v4i32: 664; AVX512: # %bb.0: 665; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 666; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 667; AVX512-NEXT: retq 668 %cond = icmp slt <4 x i32> %a, zeroinitializer 669 %r = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %b 670 ret <4 x i32> %r 671} 672 673define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { 674; SSE2-LABEL: signbit_setmask_v2i64: 675; SSE2: # %bb.0: 676; SSE2-NEXT: psrad $31, %xmm0 677; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 678; SSE2-NEXT: por %xmm1, %xmm0 679; SSE2-NEXT: retq 680; 681; SSE42-LABEL: signbit_setmask_v2i64: 682; SSE42: # %bb.0: 683; SSE42-NEXT: pxor %xmm2, %xmm2 684; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 685; SSE42-NEXT: por %xmm1, %xmm2 686; SSE42-NEXT: movdqa %xmm2, %xmm0 687; SSE42-NEXT: retq 688; 689; AVX-LABEL: signbit_setmask_v2i64: 690; AVX: # %bb.0: 691; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 692; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 693; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 694; AVX-NEXT: retq 695; 696; AVX512-LABEL: signbit_setmask_v2i64: 697; AVX512: # %bb.0: 698; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 699; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 700; AVX512-NEXT: retq 701 %cond = icmp slt <2 x i64> %a, zeroinitializer 702 %r = select <2 x i1> %cond, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %b 703 ret <2 x i64> %r 704} 705 706define <32 x i8> @signbit_setmask_v32i8(<32 x i8> %a, <32 x i8> %b) { 707; SSE-LABEL: signbit_setmask_v32i8: 708; SSE: # %bb.0: 709; SSE-NEXT: pxor %xmm4, %xmm4 710; SSE-NEXT: pxor %xmm5, %xmm5 711; SSE-NEXT: pcmpgtb %xmm0, %xmm5 712; SSE-NEXT: por %xmm2, %xmm5 713; SSE-NEXT: pcmpgtb %xmm1, %xmm4 714; SSE-NEXT: por %xmm3, %xmm4 715; SSE-NEXT: movdqa %xmm5, %xmm0 716; SSE-NEXT: movdqa %xmm4, %xmm1 717; SSE-NEXT: retq 718; 719; AVX1-LABEL: signbit_setmask_v32i8: 720; AVX1: # %bb.0: 721; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 722; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 723; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 724; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 725; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 726; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 727; AVX1-NEXT: retq 728; 729; AVX2-LABEL: signbit_setmask_v32i8: 730; AVX2: # %bb.0: 731; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 732; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 733; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 734; AVX2-NEXT: retq 735; 736; AVX512-LABEL: signbit_setmask_v32i8: 737; AVX512: # %bb.0: 738; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 739; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 740; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 741; AVX512-NEXT: retq 742 %cond = icmp slt <32 x i8> %a, zeroinitializer 743 %r = select <32 x i1> %cond, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> %b 744 ret <32 x i8> %r 745} 746 747define <16 x i16> @signbit_setmask_v16i16(<16 x i16> %a, <16 x i16> %b) { 748; SSE-LABEL: signbit_setmask_v16i16: 749; SSE: # %bb.0: 750; SSE-NEXT: psraw $15, %xmm0 751; SSE-NEXT: por %xmm2, %xmm0 752; SSE-NEXT: psraw $15, %xmm1 753; SSE-NEXT: por %xmm3, %xmm1 754; SSE-NEXT: retq 755; 756; AVX1-LABEL: signbit_setmask_v16i16: 757; AVX1: # %bb.0: 758; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 759; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 760; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 761; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 762; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 763; AVX1-NEXT: retq 764; 765; AVX2-LABEL: signbit_setmask_v16i16: 766; AVX2: # %bb.0: 767; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 768; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 769; AVX2-NEXT: retq 770; 771; AVX512-LABEL: signbit_setmask_v16i16: 772; AVX512: # %bb.0: 773; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 774; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 775; AVX512-NEXT: retq 776 %cond = icmp slt <16 x i16> %a, zeroinitializer 777 %r = select <16 x i1> %cond, <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <16 x i16> %b 778 ret <16 x i16> %r 779} 780 781define <8 x i32> @signbit_setmask_v8i32(<8 x i32> %a, <8 x i32> %b) { 782; SSE-LABEL: signbit_setmask_v8i32: 783; SSE: # %bb.0: 784; SSE-NEXT: psrad $31, %xmm0 785; SSE-NEXT: por %xmm2, %xmm0 786; SSE-NEXT: psrad $31, %xmm1 787; SSE-NEXT: por %xmm3, %xmm1 788; SSE-NEXT: retq 789; 790; AVX1-LABEL: signbit_setmask_v8i32: 791; AVX1: # %bb.0: 792; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 793; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 794; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 795; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 796; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 797; AVX1-NEXT: retq 798; 799; AVX2-LABEL: signbit_setmask_v8i32: 800; AVX2: # %bb.0: 801; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 802; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 803; AVX2-NEXT: retq 804; 805; AVX512-LABEL: signbit_setmask_v8i32: 806; AVX512: # %bb.0: 807; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 808; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 809; AVX512-NEXT: retq 810 %cond = icmp slt <8 x i32> %a, zeroinitializer 811 %r = select <8 x i1> %cond, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> %b 812 ret <8 x i32> %r 813} 814 815define <4 x i64> @signbit_setmask_v4i64(<4 x i64> %a, <4 x i64> %b) { 816; SSE2-LABEL: signbit_setmask_v4i64: 817; SSE2: # %bb.0: 818; SSE2-NEXT: psrad $31, %xmm0 819; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 820; SSE2-NEXT: por %xmm2, %xmm0 821; SSE2-NEXT: psrad $31, %xmm1 822; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 823; SSE2-NEXT: por %xmm3, %xmm1 824; SSE2-NEXT: retq 825; 826; SSE42-LABEL: signbit_setmask_v4i64: 827; SSE42: # %bb.0: 828; SSE42-NEXT: pxor %xmm4, %xmm4 829; SSE42-NEXT: pxor %xmm5, %xmm5 830; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 831; SSE42-NEXT: por %xmm2, %xmm5 832; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 833; SSE42-NEXT: por %xmm3, %xmm4 834; SSE42-NEXT: movdqa %xmm5, %xmm0 835; SSE42-NEXT: movdqa %xmm4, %xmm1 836; SSE42-NEXT: retq 837; 838; AVX1-LABEL: signbit_setmask_v4i64: 839; AVX1: # %bb.0: 840; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 841; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 842; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 843; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 844; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 845; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 846; AVX1-NEXT: retq 847; 848; AVX2-LABEL: signbit_setmask_v4i64: 849; AVX2: # %bb.0: 850; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 851; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 852; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 853; AVX2-NEXT: retq 854; 855; AVX512-LABEL: signbit_setmask_v4i64: 856; AVX512: # %bb.0: 857; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 858; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 859; AVX512-NEXT: retq 860 %cond = icmp slt <4 x i64> %a, zeroinitializer 861 %r = select <4 x i1> %cond, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %b 862 ret <4 x i64> %r 863} 864 865; Swap cmp pred and select ops. This is logically equivalent to the above test. 866 867define <4 x i64> @signbit_setmask_swap_v4i64(<4 x i64> %a, <4 x i64> %b) { 868; SSE2-LABEL: signbit_setmask_swap_v4i64: 869; SSE2: # %bb.0: 870; SSE2-NEXT: psrad $31, %xmm0 871; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 872; SSE2-NEXT: por %xmm2, %xmm0 873; SSE2-NEXT: psrad $31, %xmm1 874; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 875; SSE2-NEXT: por %xmm3, %xmm1 876; SSE2-NEXT: retq 877; 878; SSE42-LABEL: signbit_setmask_swap_v4i64: 879; SSE42: # %bb.0: 880; SSE42-NEXT: pxor %xmm4, %xmm4 881; SSE42-NEXT: pxor %xmm5, %xmm5 882; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 883; SSE42-NEXT: por %xmm2, %xmm5 884; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 885; SSE42-NEXT: por %xmm3, %xmm4 886; SSE42-NEXT: movdqa %xmm5, %xmm0 887; SSE42-NEXT: movdqa %xmm4, %xmm1 888; SSE42-NEXT: retq 889; 890; AVX1-LABEL: signbit_setmask_swap_v4i64: 891; AVX1: # %bb.0: 892; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 893; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 894; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 895; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 896; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 897; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 898; AVX1-NEXT: retq 899; 900; AVX2-LABEL: signbit_setmask_swap_v4i64: 901; AVX2: # %bb.0: 902; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 903; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 904; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 905; AVX2-NEXT: retq 906; 907; AVX512-LABEL: signbit_setmask_swap_v4i64: 908; AVX512: # %bb.0: 909; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 910; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 911; AVX512-NEXT: retq 912 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 913 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> 914 ret <4 x i64> %r 915} 916 917define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { 918; SSE-LABEL: not_signbit_mask_v16i8: 919; SSE: # %bb.0: 920; SSE-NEXT: pcmpeqd %xmm2, %xmm2 921; SSE-NEXT: pcmpgtb %xmm2, %xmm0 922; SSE-NEXT: pand %xmm1, %xmm0 923; SSE-NEXT: retq 924; 925; AVX-LABEL: not_signbit_mask_v16i8: 926; AVX: # %bb.0: 927; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 928; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 929; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 930; AVX-NEXT: retq 931; 932; AVX512-LABEL: not_signbit_mask_v16i8: 933; AVX512: # %bb.0: 934; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 935; AVX512-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 936; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 937; AVX512-NEXT: retq 938 %cond = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 939 %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer 940 ret <16 x i8> %r 941} 942 943define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { 944; SSE-LABEL: not_signbit_mask_v8i16: 945; SSE: # %bb.0: 946; SSE-NEXT: psraw $15, %xmm0 947; SSE-NEXT: pandn %xmm1, %xmm0 948; SSE-NEXT: retq 949; 950; AVX-LABEL: not_signbit_mask_v8i16: 951; AVX: # %bb.0: 952; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 953; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 954; AVX-NEXT: retq 955; 956; AVX512-LABEL: not_signbit_mask_v8i16: 957; AVX512: # %bb.0: 958; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 959; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 960; AVX512-NEXT: retq 961 %cond = icmp sgt <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 962 %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer 963 ret <8 x i16> %r 964} 965 966; Swap cmp pred and select ops. This is logically equivalent to the above test. 967 968define <8 x i16> @not_signbit_mask_swap_v8i16(<8 x i16> %a, <8 x i16> %b) { 969; SSE-LABEL: not_signbit_mask_swap_v8i16: 970; SSE: # %bb.0: 971; SSE-NEXT: psraw $15, %xmm0 972; SSE-NEXT: pandn %xmm1, %xmm0 973; SSE-NEXT: retq 974; 975; AVX-LABEL: not_signbit_mask_swap_v8i16: 976; AVX: # %bb.0: 977; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 978; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 979; AVX-NEXT: retq 980; 981; AVX512-LABEL: not_signbit_mask_swap_v8i16: 982; AVX512: # %bb.0: 983; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 984; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 985; AVX512-NEXT: retq 986 %cond = icmp slt <8 x i16> %a, zeroinitializer 987 %r = select <8 x i1> %cond, <8 x i16> zeroinitializer, <8 x i16> %b 988 ret <8 x i16> %r 989} 990 991define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { 992; SSE-LABEL: not_signbit_mask_v4i32: 993; SSE: # %bb.0: 994; SSE-NEXT: psrad $31, %xmm0 995; SSE-NEXT: pandn %xmm1, %xmm0 996; SSE-NEXT: retq 997; 998; AVX-LABEL: not_signbit_mask_v4i32: 999; AVX: # %bb.0: 1000; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 1001; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 1002; AVX-NEXT: retq 1003; 1004; AVX512-LABEL: not_signbit_mask_v4i32: 1005; AVX512: # %bb.0: 1006; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 1007; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 1008; AVX512-NEXT: retq 1009 %cond = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 1010 %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer 1011 ret <4 x i32> %r 1012} 1013 1014define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { 1015; SSE2-LABEL: not_signbit_mask_v2i64: 1016; SSE2: # %bb.0: 1017; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1018; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1019; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1020; SSE2-NEXT: pand %xmm1, %xmm0 1021; SSE2-NEXT: retq 1022; 1023; SSE42-LABEL: not_signbit_mask_v2i64: 1024; SSE42: # %bb.0: 1025; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1026; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1027; SSE42-NEXT: pand %xmm1, %xmm0 1028; SSE42-NEXT: retq 1029; 1030; AVX-LABEL: not_signbit_mask_v2i64: 1031; AVX: # %bb.0: 1032; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1033; AVX-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1034; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 1035; AVX-NEXT: retq 1036; 1037; AVX512-LABEL: not_signbit_mask_v2i64: 1038; AVX512: # %bb.0: 1039; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 1040; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 1041; AVX512-NEXT: retq 1042 %cond = icmp sgt <2 x i64> %a, <i64 -1, i64 -1> 1043 %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer 1044 ret <2 x i64> %r 1045} 1046 1047define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { 1048; SSE-LABEL: not_signbit_mask_v32i8: 1049; SSE: # %bb.0: 1050; SSE-NEXT: pcmpeqd %xmm4, %xmm4 1051; SSE-NEXT: pcmpgtb %xmm4, %xmm0 1052; SSE-NEXT: pand %xmm2, %xmm0 1053; SSE-NEXT: pcmpgtb %xmm4, %xmm1 1054; SSE-NEXT: pand %xmm3, %xmm1 1055; SSE-NEXT: retq 1056; 1057; AVX1-LABEL: not_signbit_mask_v32i8: 1058; AVX1: # %bb.0: 1059; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1060; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1061; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 1062; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 1063; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1064; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1065; AVX1-NEXT: retq 1066; 1067; AVX2-LABEL: not_signbit_mask_v32i8: 1068; AVX2: # %bb.0: 1069; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1070; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1071; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1072; AVX2-NEXT: retq 1073; 1074; AVX512-LABEL: not_signbit_mask_v32i8: 1075; AVX512: # %bb.0: 1076; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1077; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1078; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 1079; AVX512-NEXT: retq 1080 %cond = icmp sgt <32 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1081 %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer 1082 ret <32 x i8> %r 1083} 1084 1085define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { 1086; SSE-LABEL: not_signbit_mask_v16i16: 1087; SSE: # %bb.0: 1088; SSE-NEXT: psraw $15, %xmm0 1089; SSE-NEXT: pandn %xmm2, %xmm0 1090; SSE-NEXT: psraw $15, %xmm1 1091; SSE-NEXT: pandn %xmm3, %xmm1 1092; SSE-NEXT: retq 1093; 1094; AVX1-LABEL: not_signbit_mask_v16i16: 1095; AVX1: # %bb.0: 1096; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 1097; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1098; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 1099; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1100; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1101; AVX1-NEXT: retq 1102; 1103; AVX2-LABEL: not_signbit_mask_v16i16: 1104; AVX2: # %bb.0: 1105; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1106; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1107; AVX2-NEXT: retq 1108; 1109; AVX512-LABEL: not_signbit_mask_v16i16: 1110; AVX512: # %bb.0: 1111; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1112; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1113; AVX512-NEXT: retq 1114 %cond = icmp sgt <16 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1115 %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer 1116 ret <16 x i16> %r 1117} 1118 1119define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { 1120; SSE-LABEL: not_signbit_mask_v8i32: 1121; SSE: # %bb.0: 1122; SSE-NEXT: psrad $31, %xmm0 1123; SSE-NEXT: pandn %xmm2, %xmm0 1124; SSE-NEXT: psrad $31, %xmm1 1125; SSE-NEXT: pandn %xmm3, %xmm1 1126; SSE-NEXT: retq 1127; 1128; AVX1-LABEL: not_signbit_mask_v8i32: 1129; AVX1: # %bb.0: 1130; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 1131; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1132; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1133; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1134; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1135; AVX1-NEXT: retq 1136; 1137; AVX2-LABEL: not_signbit_mask_v8i32: 1138; AVX2: # %bb.0: 1139; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1140; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1141; AVX2-NEXT: retq 1142; 1143; AVX512-LABEL: not_signbit_mask_v8i32: 1144; AVX512: # %bb.0: 1145; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1146; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1147; AVX512-NEXT: retq 1148 %cond = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1149 %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer 1150 ret <8 x i32> %r 1151} 1152 1153; Swap cmp pred and select ops. This is logically equivalent to the above test. 1154 1155define <8 x i32> @not_signbit_mask_swap_v8i32(<8 x i32> %a, <8 x i32> %b) { 1156; SSE-LABEL: not_signbit_mask_swap_v8i32: 1157; SSE: # %bb.0: 1158; SSE-NEXT: psrad $31, %xmm0 1159; SSE-NEXT: pandn %xmm2, %xmm0 1160; SSE-NEXT: psrad $31, %xmm1 1161; SSE-NEXT: pandn %xmm3, %xmm1 1162; SSE-NEXT: retq 1163; 1164; AVX1-LABEL: not_signbit_mask_swap_v8i32: 1165; AVX1: # %bb.0: 1166; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 1167; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1168; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1169; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1170; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1171; AVX1-NEXT: retq 1172; 1173; AVX2-LABEL: not_signbit_mask_swap_v8i32: 1174; AVX2: # %bb.0: 1175; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1176; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1177; AVX2-NEXT: retq 1178; 1179; AVX512-LABEL: not_signbit_mask_swap_v8i32: 1180; AVX512: # %bb.0: 1181; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1182; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1183; AVX512-NEXT: retq 1184 %cond = icmp slt <8 x i32> %a, zeroinitializer 1185 %r = select <8 x i1> %cond, <8 x i32> zeroinitializer, <8 x i32> %b 1186 ret <8 x i32> %r 1187} 1188 1189define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { 1190; SSE2-LABEL: not_signbit_mask_v4i64: 1191; SSE2: # %bb.0: 1192; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1193; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1194; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1195; SSE2-NEXT: pand %xmm2, %xmm0 1196; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1197; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1198; SSE2-NEXT: pand %xmm3, %xmm1 1199; SSE2-NEXT: retq 1200; 1201; SSE42-LABEL: not_signbit_mask_v4i64: 1202; SSE42: # %bb.0: 1203; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 1204; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1205; SSE42-NEXT: pand %xmm2, %xmm0 1206; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 1207; SSE42-NEXT: pand %xmm3, %xmm1 1208; SSE42-NEXT: retq 1209; 1210; AVX1-LABEL: not_signbit_mask_v4i64: 1211; AVX1: # %bb.0: 1212; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1213; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1214; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1215; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 1216; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1217; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 1218; AVX1-NEXT: retq 1219; 1220; AVX2-LABEL: not_signbit_mask_v4i64: 1221; AVX2: # %bb.0: 1222; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1223; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1224; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1225; AVX2-NEXT: retq 1226; 1227; AVX512-LABEL: not_signbit_mask_v4i64: 1228; AVX512: # %bb.0: 1229; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1230; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1231; AVX512-NEXT: retq 1232 %cond = icmp sgt <4 x i64> %a, <i64 -1, i64 -1, i64 -1, i64 -1> 1233 %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer 1234 ret <4 x i64> %r 1235} 1236