1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512DQBW 8 9; Lower common integer comparisons such as 'isPositive' efficiently: 10; https://llvm.org/bugs/show_bug.cgi?id=26701 11 12define <16 x i8> @test_pcmpgtb(<16 x i8> %x) { 13; SSE-LABEL: test_pcmpgtb: 14; SSE: # %bb.0: 15; SSE-NEXT: pcmpeqd %xmm1, %xmm1 16; SSE-NEXT: pcmpgtb %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX-LABEL: test_pcmpgtb: 20; AVX: # %bb.0: 21; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 22; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 23; AVX-NEXT: retq 24 %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 25 %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 26 ret <16 x i8> %not 27} 28 29define <8 x i16> @test_pcmpgtw(<8 x i16> %x) { 30; SSE-LABEL: test_pcmpgtw: 31; SSE: # %bb.0: 32; SSE-NEXT: pcmpeqd %xmm1, %xmm1 33; SSE-NEXT: pcmpgtw %xmm1, %xmm0 34; SSE-NEXT: retq 35; 36; AVX-LABEL: test_pcmpgtw: 37; AVX: # %bb.0: 38; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 39; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 40; AVX-NEXT: retq 41 %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 42 %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 43 ret <8 x i16> %not 44} 45 46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) { 47; SSE-LABEL: test_pcmpgtd: 48; SSE: # %bb.0: 49; SSE-NEXT: pcmpeqd %xmm1, %xmm1 50; SSE-NEXT: pcmpgtd %xmm1, %xmm0 51; SSE-NEXT: retq 52; 53; AVX-LABEL: test_pcmpgtd: 54; AVX: # %bb.0: 55; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 56; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 57; AVX-NEXT: retq 58 %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 59 %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1> 60 ret <4 x i32> %not 61} 62 63define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { 64; SSE2-LABEL: test_pcmpgtq: 65; SSE2: # %bb.0: 66; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 67; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 68; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 69; SSE2-NEXT: retq 70; 71; SSE42-LABEL: test_pcmpgtq: 72; SSE42: # %bb.0: 73; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 74; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 75; SSE42-NEXT: retq 76; 77; AVX-LABEL: test_pcmpgtq: 78; AVX: # %bb.0: 79; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 80; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 81; AVX-NEXT: retq 82 %sign = ashr <2 x i64> %x, <i64 63, i64 63> 83 %not = xor <2 x i64> %sign, <i64 -1, i64 -1> 84 ret <2 x i64> %not 85} 86 87define <1 x i128> @test_strange_type(<1 x i128> %x) { 88; CHECK-LABEL: test_strange_type: 89; CHECK: # %bb.0: 90; CHECK-NEXT: movq %rsi, %rax 91; CHECK-NEXT: sarq $63, %rax 92; CHECK-NEXT: notq %rax 93; CHECK-NEXT: movq %rax, %rdx 94; CHECK-NEXT: retq 95 %sign = ashr <1 x i128> %x, <i128 127> 96 %not = xor <1 x i128> %sign, <i128 -1> 97 ret <1 x i128> %not 98} 99 100define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) { 101; SSE-LABEL: test_pcmpgtb_256: 102; SSE: # %bb.0: 103; SSE-NEXT: pcmpeqd %xmm2, %xmm2 104; SSE-NEXT: pcmpgtb %xmm2, %xmm0 105; SSE-NEXT: pcmpgtb %xmm2, %xmm1 106; SSE-NEXT: retq 107; 108; AVX1-LABEL: test_pcmpgtb_256: 109; AVX1: # %bb.0: 110; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 111; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 112; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1 113; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 114; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 115; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 116; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 117; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 118; AVX1-NEXT: retq 119; 120; AVX2-LABEL: test_pcmpgtb_256: 121; AVX2: # %bb.0: 122; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 123; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 124; AVX2-NEXT: retq 125; 126; AVX512-LABEL: test_pcmpgtb_256: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 129; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 130; AVX512-NEXT: retq 131 %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 132 %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 133 ret <32 x i8> %not 134} 135 136define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) { 137; SSE-LABEL: test_pcmpgtw_256: 138; SSE: # %bb.0: 139; SSE-NEXT: pcmpeqd %xmm2, %xmm2 140; SSE-NEXT: pcmpgtw %xmm2, %xmm0 141; SSE-NEXT: pcmpgtw %xmm2, %xmm1 142; SSE-NEXT: retq 143; 144; AVX1-LABEL: test_pcmpgtw_256: 145; AVX1: # %bb.0: 146; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 147; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 148; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 149; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 150; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 151; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 152; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 153; AVX1-NEXT: retq 154; 155; AVX2-LABEL: test_pcmpgtw_256: 156; AVX2: # %bb.0: 157; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 158; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 159; AVX2-NEXT: retq 160; 161; AVX512-LABEL: test_pcmpgtw_256: 162; AVX512: # %bb.0: 163; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 164; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 165; AVX512-NEXT: retq 166 %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 167 %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 168 ret <16 x i16> %not 169} 170 171define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) { 172; SSE-LABEL: test_pcmpgtd_256: 173; SSE: # %bb.0: 174; SSE-NEXT: pcmpeqd %xmm2, %xmm2 175; SSE-NEXT: pcmpgtd %xmm2, %xmm0 176; SSE-NEXT: pcmpgtd %xmm2, %xmm1 177; SSE-NEXT: retq 178; 179; AVX1-LABEL: test_pcmpgtd_256: 180; AVX1: # %bb.0: 181; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1 182; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 183; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 184; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 185; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 186; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 187; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 188; AVX1-NEXT: retq 189; 190; AVX2-LABEL: test_pcmpgtd_256: 191; AVX2: # %bb.0: 192; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 193; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 194; AVX2-NEXT: retq 195; 196; AVX512-LABEL: test_pcmpgtd_256: 197; AVX512: # %bb.0: 198; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 199; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 200; AVX512-NEXT: retq 201 %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 202 %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 203 ret <8 x i32> %not 204} 205 206define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { 207; SSE2-LABEL: test_pcmpgtq_256: 208; SSE2: # %bb.0: 209; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 210; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 211; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 212; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 213; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 214; SSE2-NEXT: retq 215; 216; SSE42-LABEL: test_pcmpgtq_256: 217; SSE42: # %bb.0: 218; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 219; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 220; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 221; SSE42-NEXT: retq 222; 223; AVX1-LABEL: test_pcmpgtq_256: 224; AVX1: # %bb.0: 225; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 226; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 227; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1 228; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 229; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 230; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 231; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 232; AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 233; AVX1-NEXT: retq 234; 235; AVX2-LABEL: test_pcmpgtq_256: 236; AVX2: # %bb.0: 237; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 238; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 239; AVX2-NEXT: retq 240; 241; AVX512-LABEL: test_pcmpgtq_256: 242; AVX512: # %bb.0: 243; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 244; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 245; AVX512-NEXT: retq 246 %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 247 %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1> 248 ret <4 x i64> %not 249} 250 251define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) { 252; SSE-LABEL: cmpeq_zext_v16i8: 253; SSE: # %bb.0: 254; SSE-NEXT: pcmpeqb %xmm1, %xmm0 255; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 256; SSE-NEXT: retq 257; 258; AVX-LABEL: cmpeq_zext_v16i8: 259; AVX: # %bb.0: 260; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 261; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 262; AVX-NEXT: retq 263 %cmp = icmp eq <16 x i8> %a, %b 264 %zext = zext <16 x i1> %cmp to <16 x i8> 265 ret <16 x i8> %zext 266} 267 268define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) { 269; SSE-LABEL: cmpeq_zext_v16i16: 270; SSE: # %bb.0: 271; SSE-NEXT: pcmpeqw %xmm2, %xmm0 272; SSE-NEXT: psrlw $15, %xmm0 273; SSE-NEXT: pcmpeqw %xmm3, %xmm1 274; SSE-NEXT: psrlw $15, %xmm1 275; SSE-NEXT: retq 276; 277; AVX1-LABEL: cmpeq_zext_v16i16: 278; AVX1: # %bb.0: 279; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 280; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 281; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2 282; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 283; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 284; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 285; AVX1-NEXT: retq 286; 287; AVX2-LABEL: cmpeq_zext_v16i16: 288; AVX2: # %bb.0: 289; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 290; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 291; AVX2-NEXT: retq 292; 293; AVX512-LABEL: cmpeq_zext_v16i16: 294; AVX512: # %bb.0: 295; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 296; AVX512-NEXT: vpsrlw $15, %ymm0, %ymm0 297; AVX512-NEXT: retq 298 %cmp = icmp eq <16 x i16> %a, %b 299 %zext = zext <16 x i1> %cmp to <16 x i16> 300 ret <16 x i16> %zext 301} 302 303define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) { 304; SSE-LABEL: cmpeq_zext_v4i32: 305; SSE: # %bb.0: 306; SSE-NEXT: pcmpeqd %xmm1, %xmm0 307; SSE-NEXT: psrld $31, %xmm0 308; SSE-NEXT: retq 309; 310; AVX-LABEL: cmpeq_zext_v4i32: 311; AVX: # %bb.0: 312; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 313; AVX-NEXT: vpsrld $31, %xmm0, %xmm0 314; AVX-NEXT: retq 315 %cmp = icmp eq <4 x i32> %a, %b 316 %zext = zext <4 x i1> %cmp to <4 x i32> 317 ret <4 x i32> %zext 318} 319 320define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) { 321; SSE2-LABEL: cmpeq_zext_v4i64: 322; SSE2: # %bb.0: 323; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 324; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] 325; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,1] 326; SSE2-NEXT: pand %xmm4, %xmm2 327; SSE2-NEXT: pand %xmm2, %xmm0 328; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 329; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] 330; SSE2-NEXT: pand %xmm4, %xmm2 331; SSE2-NEXT: pand %xmm2, %xmm1 332; SSE2-NEXT: retq 333; 334; SSE42-LABEL: cmpeq_zext_v4i64: 335; SSE42: # %bb.0: 336; SSE42-NEXT: pcmpeqq %xmm2, %xmm0 337; SSE42-NEXT: psrlq $63, %xmm0 338; SSE42-NEXT: pcmpeqq %xmm3, %xmm1 339; SSE42-NEXT: psrlq $63, %xmm1 340; SSE42-NEXT: retq 341; 342; AVX1-LABEL: cmpeq_zext_v4i64: 343; AVX1: # %bb.0: 344; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 345; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 346; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2 347; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 348; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 349; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 350; AVX1-NEXT: retq 351; 352; AVX2-LABEL: cmpeq_zext_v4i64: 353; AVX2: # %bb.0: 354; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 355; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 356; AVX2-NEXT: retq 357; 358; AVX512-LABEL: cmpeq_zext_v4i64: 359; AVX512: # %bb.0: 360; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 361; AVX512-NEXT: vpsrlq $63, %ymm0, %ymm0 362; AVX512-NEXT: retq 363 %cmp = icmp eq <4 x i64> %a, %b 364 %zext = zext <4 x i1> %cmp to <4 x i64> 365 ret <4 x i64> %zext 366} 367 368define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) { 369; SSE-LABEL: cmpgt_zext_v32i8: 370; SSE: # %bb.0: 371; SSE-NEXT: pcmpgtb %xmm2, %xmm0 372; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 373; SSE-NEXT: pand %xmm2, %xmm0 374; SSE-NEXT: pcmpgtb %xmm3, %xmm1 375; SSE-NEXT: pand %xmm2, %xmm1 376; SSE-NEXT: retq 377; 378; AVX1-LABEL: cmpgt_zext_v32i8: 379; AVX1: # %bb.0: 380; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 381; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 382; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 383; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 384; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 385; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 386; AVX1-NEXT: retq 387; 388; AVX2-LABEL: cmpgt_zext_v32i8: 389; AVX2: # %bb.0: 390; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 391; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 392; AVX2-NEXT: retq 393; 394; AVX512-LABEL: cmpgt_zext_v32i8: 395; AVX512: # %bb.0: 396; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 397; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 398; AVX512-NEXT: retq 399 %cmp = icmp sgt <32 x i8> %a, %b 400 %zext = zext <32 x i1> %cmp to <32 x i8> 401 ret <32 x i8> %zext 402} 403 404define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) { 405; SSE-LABEL: cmpgt_zext_v8i16: 406; SSE: # %bb.0: 407; SSE-NEXT: pcmpgtw %xmm1, %xmm0 408; SSE-NEXT: psrlw $15, %xmm0 409; SSE-NEXT: retq 410; 411; AVX-LABEL: cmpgt_zext_v8i16: 412; AVX: # %bb.0: 413; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 414; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 415; AVX-NEXT: retq 416 %cmp = icmp sgt <8 x i16> %a, %b 417 %zext = zext <8 x i1> %cmp to <8 x i16> 418 ret <8 x i16> %zext 419} 420 421define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) { 422; SSE-LABEL: cmpgt_zext_v8i32: 423; SSE: # %bb.0: 424; SSE-NEXT: pcmpgtd %xmm2, %xmm0 425; SSE-NEXT: psrld $31, %xmm0 426; SSE-NEXT: pcmpgtd %xmm3, %xmm1 427; SSE-NEXT: psrld $31, %xmm1 428; SSE-NEXT: retq 429; 430; AVX1-LABEL: cmpgt_zext_v8i32: 431; AVX1: # %bb.0: 432; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 433; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 434; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 435; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 436; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 437; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 438; AVX1-NEXT: retq 439; 440; AVX2-LABEL: cmpgt_zext_v8i32: 441; AVX2: # %bb.0: 442; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 443; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 444; AVX2-NEXT: retq 445; 446; AVX512-LABEL: cmpgt_zext_v8i32: 447; AVX512: # %bb.0: 448; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 449; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 450; AVX512-NEXT: retq 451 %cmp = icmp sgt <8 x i32> %a, %b 452 %zext = zext <8 x i1> %cmp to <8 x i32> 453 ret <8 x i32> %zext 454} 455 456define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) { 457; SSE2-LABEL: cmpgt_zext_v2i64: 458; SSE2: # %bb.0: 459; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 460; SSE2-NEXT: pxor %xmm2, %xmm1 461; SSE2-NEXT: pxor %xmm2, %xmm0 462; SSE2-NEXT: movdqa %xmm0, %xmm2 463; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 464; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 465; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 466; SSE2-NEXT: pand %xmm2, %xmm1 467; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 468; SSE2-NEXT: por %xmm1, %xmm0 469; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 470; SSE2-NEXT: retq 471; 472; SSE42-LABEL: cmpgt_zext_v2i64: 473; SSE42: # %bb.0: 474; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 475; SSE42-NEXT: psrlq $63, %xmm0 476; SSE42-NEXT: retq 477; 478; AVX-LABEL: cmpgt_zext_v2i64: 479; AVX: # %bb.0: 480; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 481; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0 482; AVX-NEXT: retq 483 %cmp = icmp sgt <2 x i64> %a, %b 484 %zext = zext <2 x i1> %cmp to <2 x i64> 485 ret <2 x i64> %zext 486} 487 488; Test that we optimize a zext of a vector setcc ne zero where all bits but the 489; lsb are known to be zero. 490define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) { 491; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 492; SSE2: # %bb.0: 493; SSE2-NEXT: movdqa %xmm0, %xmm1 494; SSE2-NEXT: psrlw $15, %xmm1 495; SSE2-NEXT: pxor %xmm2, %xmm2 496; SSE2-NEXT: movdqa %xmm1, %xmm0 497; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 498; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 499; SSE2-NEXT: retq 500; 501; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 502; SSE42: # %bb.0: 503; SSE42-NEXT: movdqa %xmm0, %xmm1 504; SSE42-NEXT: psrlw $15, %xmm1 505; SSE42-NEXT: pxor %xmm2, %xmm2 506; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 507; SSE42-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 508; SSE42-NEXT: retq 509; 510; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 511; AVX1: # %bb.0: 512; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 513; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 514; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 515; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 516; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 517; AVX1-NEXT: retq 518; 519; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 520; AVX2: # %bb.0: 521; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 522; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 523; AVX2-NEXT: retq 524; 525; AVX512-LABEL: cmpne_knownzeros_zext_v8i16_v8i32: 526; AVX512: # %bb.0: 527; AVX512-NEXT: vpsrlw $15, %xmm0, %xmm0 528; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 529; AVX512-NEXT: retq 530 %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 531 %b = icmp ne <8 x i16> %a, zeroinitializer 532 %c = zext <8 x i1> %b to <8 x i32> 533 ret <8 x i32> %c 534} 535 536define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) { 537; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 538; SSE: # %bb.0: 539; SSE-NEXT: psrld $31, %xmm0 540; SSE-NEXT: psrld $31, %xmm1 541; SSE-NEXT: retq 542; 543; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 544; AVX1: # %bb.0: 545; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1 546; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 547; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 548; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 549; AVX1-NEXT: retq 550; 551; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 552; AVX2: # %bb.0: 553; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 554; AVX2-NEXT: retq 555; 556; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i32: 557; AVX512: # %bb.0: 558; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 559; AVX512-NEXT: retq 560 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 561 %b = icmp ne <8 x i32> %a, zeroinitializer 562 %c = zext <8 x i1> %b to <8 x i32> 563 ret <8 x i32> %c 564} 565 566define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) { 567; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 568; SSE2: # %bb.0: 569; SSE2-NEXT: psrld $31, %xmm1 570; SSE2-NEXT: psrld $31, %xmm0 571; SSE2-NEXT: packuswb %xmm1, %xmm0 572; SSE2-NEXT: retq 573; 574; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 575; SSE42: # %bb.0: 576; SSE42-NEXT: psrld $31, %xmm1 577; SSE42-NEXT: psrld $31, %xmm0 578; SSE42-NEXT: packusdw %xmm1, %xmm0 579; SSE42-NEXT: retq 580; 581; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 582; AVX1: # %bb.0: 583; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 584; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 585; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 586; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 587; AVX1-NEXT: vzeroupper 588; AVX1-NEXT: retq 589; 590; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 591; AVX2: # %bb.0: 592; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 593; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 594; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 595; AVX2-NEXT: vzeroupper 596; AVX2-NEXT: retq 597; 598; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i16: 599; AVX512: # %bb.0: 600; AVX512-NEXT: vpsrld $31, %ymm0, %ymm0 601; AVX512-NEXT: vpmovdw %ymm0, %xmm0 602; AVX512-NEXT: vzeroupper 603; AVX512-NEXT: retq 604 %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 605 %b = icmp ne <8 x i32> %a, zeroinitializer 606 %c = zext <8 x i1> %b to <8 x i16> 607 ret <8 x i16> %c 608} 609 610; PR26697 611define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) { 612; SSE-LABEL: cmpeq_one_mask_bit: 613; SSE: # %bb.0: 614; SSE-NEXT: psrad $31, %xmm0 615; SSE-NEXT: retq 616; 617; AVX-LABEL: cmpeq_one_mask_bit: 618; AVX: # %bb.0: 619; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 620; AVX-NEXT: retq 621 %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 622 %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer 623 %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32> 624 ret <4 x i32> %mask_bool_ext 625} 626 627define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %x, <2 x i64> %y) { 628; SSE2-LABEL: not_signbit_mask_v2i64: 629; SSE2: # %bb.0: 630; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 631; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 632; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 633; SSE2-NEXT: pand %xmm1, %xmm0 634; SSE2-NEXT: retq 635; 636; SSE42-LABEL: not_signbit_mask_v2i64: 637; SSE42: # %bb.0: 638; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 639; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 640; SSE42-NEXT: pand %xmm1, %xmm0 641; SSE42-NEXT: retq 642; 643; AVX1-LABEL: not_signbit_mask_v2i64: 644; AVX1: # %bb.0: 645; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 646; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 647; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: not_signbit_mask_v2i64: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 653; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 654; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 655; AVX2-NEXT: retq 656; 657; AVX512-LABEL: not_signbit_mask_v2i64: 658; AVX512: # %bb.0: 659; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 660; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 661; AVX512-NEXT: retq 662 %sh = ashr <2 x i64> %x, <i64 63, i64 63> 663 %not = xor <2 x i64> %sh, <i64 -1, i64 -1> 664 %and = and <2 x i64> %y, %not 665 ret <2 x i64> %and 666} 667 668define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %x, <4 x i32> %y) { 669; SSE-LABEL: not_signbit_mask_v4i32: 670; SSE: # %bb.0: 671; SSE-NEXT: psrad $31, %xmm0 672; SSE-NEXT: pandn %xmm1, %xmm0 673; SSE-NEXT: retq 674; 675; AVX-LABEL: not_signbit_mask_v4i32: 676; AVX: # %bb.0: 677; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 678; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 679; AVX-NEXT: retq 680 %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31> 681 %not = xor <4 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1> 682 %and = and <4 x i32> %not, %y 683 ret <4 x i32> %and 684} 685 686define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %x, <8 x i16> %y) { 687; SSE-LABEL: not_signbit_mask_v8i16: 688; SSE: # %bb.0: 689; SSE-NEXT: psraw $15, %xmm0 690; SSE-NEXT: pandn %xmm1, %xmm0 691; SSE-NEXT: retq 692; 693; AVX-LABEL: not_signbit_mask_v8i16: 694; AVX: # %bb.0: 695; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 696; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 697; AVX-NEXT: retq 698 %sh = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 699 %not = xor <8 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 700 %and = and <8 x i16> %y, %not 701 ret <8 x i16> %and 702} 703 704define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %x, <16 x i8> %y) { 705; SSE-LABEL: not_signbit_mask_v16i8: 706; SSE: # %bb.0: 707; SSE-NEXT: pcmpeqd %xmm2, %xmm2 708; SSE-NEXT: pcmpgtb %xmm2, %xmm0 709; SSE-NEXT: pand %xmm1, %xmm0 710; SSE-NEXT: retq 711; 712; AVX-LABEL: not_signbit_mask_v16i8: 713; AVX: # %bb.0: 714; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 715; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 716; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 717; AVX-NEXT: retq 718 %sh = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 719 %not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 720 %and = and <16 x i8> %not, %y 721 ret <16 x i8> %and 722} 723 724define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %x, <4 x i64> %y) { 725; SSE2-LABEL: not_signbit_mask_v4i64: 726; SSE2: # %bb.0: 727; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 728; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 729; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 730; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 731; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 732; SSE2-NEXT: pand %xmm2, %xmm0 733; SSE2-NEXT: pand %xmm3, %xmm1 734; SSE2-NEXT: retq 735; 736; SSE42-LABEL: not_signbit_mask_v4i64: 737; SSE42: # %bb.0: 738; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 739; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 740; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 741; SSE42-NEXT: pand %xmm2, %xmm0 742; SSE42-NEXT: pand %xmm3, %xmm1 743; SSE42-NEXT: retq 744; 745; AVX1-LABEL: not_signbit_mask_v4i64: 746; AVX1: # %bb.0: 747; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 748; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 749; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 750; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 751; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 752; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 753; AVX1-NEXT: retq 754; 755; AVX2-LABEL: not_signbit_mask_v4i64: 756; AVX2: # %bb.0: 757; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 758; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 759; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 760; AVX2-NEXT: retq 761; 762; AVX512-LABEL: not_signbit_mask_v4i64: 763; AVX512: # %bb.0: 764; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 765; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 766; AVX512-NEXT: retq 767 %sh = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63> 768 %not = xor <4 x i64> %sh, <i64 -1, i64 -1, i64 -1, i64 -1> 769 %and = and <4 x i64> %y, %not 770 ret <4 x i64> %and 771} 772 773define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { 774; SSE-LABEL: not_signbit_mask_v8i32: 775; SSE: # %bb.0: 776; SSE-NEXT: psrad $31, %xmm0 777; SSE-NEXT: pandn %xmm2, %xmm0 778; SSE-NEXT: psrad $31, %xmm1 779; SSE-NEXT: pandn %xmm3, %xmm1 780; SSE-NEXT: retq 781; 782; AVX1-LABEL: not_signbit_mask_v8i32: 783; AVX1: # %bb.0: 784; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 785; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 786; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 787; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 788; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 789; AVX1-NEXT: retq 790; 791; AVX2-LABEL: not_signbit_mask_v8i32: 792; AVX2: # %bb.0: 793; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 794; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 795; AVX2-NEXT: retq 796; 797; AVX512-LABEL: not_signbit_mask_v8i32: 798; AVX512: # %bb.0: 799; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 800; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 801; AVX512-NEXT: retq 802 %sh = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 803 %not = xor <8 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 804 %and = and <8 x i32> %not, %y 805 ret <8 x i32> %and 806} 807 808define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %x, <16 x i16> %y) { 809; SSE-LABEL: not_signbit_mask_v16i16: 810; SSE: # %bb.0: 811; SSE-NEXT: psraw $15, %xmm0 812; SSE-NEXT: pandn %xmm2, %xmm0 813; SSE-NEXT: psraw $15, %xmm1 814; SSE-NEXT: pandn %xmm3, %xmm1 815; SSE-NEXT: retq 816; 817; AVX1-LABEL: not_signbit_mask_v16i16: 818; AVX1: # %bb.0: 819; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 820; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 821; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 822; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 823; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 824; AVX1-NEXT: retq 825; 826; AVX2-LABEL: not_signbit_mask_v16i16: 827; AVX2: # %bb.0: 828; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 829; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 830; AVX2-NEXT: retq 831; 832; AVX512-LABEL: not_signbit_mask_v16i16: 833; AVX512: # %bb.0: 834; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 835; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 836; AVX512-NEXT: retq 837 %sh = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 838 %not = xor <16 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 839 %and = and <16 x i16> %y, %not 840 ret <16 x i16> %and 841} 842 843define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %x, <32 x i8> %y) { 844; SSE-LABEL: not_signbit_mask_v32i8: 845; SSE: # %bb.0: 846; SSE-NEXT: pcmpeqd %xmm4, %xmm4 847; SSE-NEXT: pcmpgtb %xmm4, %xmm1 848; SSE-NEXT: pcmpgtb %xmm4, %xmm0 849; SSE-NEXT: pand %xmm2, %xmm0 850; SSE-NEXT: pand %xmm3, %xmm1 851; SSE-NEXT: retq 852; 853; AVX1-LABEL: not_signbit_mask_v32i8: 854; AVX1: # %bb.0: 855; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 856; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 857; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 858; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 859; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 860; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm0 861; AVX1-NEXT: retq 862; 863; AVX2-LABEL: not_signbit_mask_v32i8: 864; AVX2: # %bb.0: 865; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 866; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 867; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 868; AVX2-NEXT: retq 869; 870; AVX512-LABEL: not_signbit_mask_v32i8: 871; AVX512: # %bb.0: 872; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 873; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 874; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 875; AVX512-NEXT: retq 876 %sh = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 877 %not = xor <32 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 878 %and = and <32 x i8> %not, %y 879 ret <32 x i8> %and 880} 881 882define <2 x i64> @ispositive_mask_v2i64(<2 x i64> %x, <2 x i64> %y) { 883; SSE2-LABEL: ispositive_mask_v2i64: 884; SSE2: # %bb.0: 885; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 886; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 887; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 888; SSE2-NEXT: pand %xmm1, %xmm0 889; SSE2-NEXT: retq 890; 891; SSE42-LABEL: ispositive_mask_v2i64: 892; SSE42: # %bb.0: 893; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 894; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 895; SSE42-NEXT: pand %xmm1, %xmm0 896; SSE42-NEXT: retq 897; 898; AVX1-LABEL: ispositive_mask_v2i64: 899; AVX1: # %bb.0: 900; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 901; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 902; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 903; AVX1-NEXT: retq 904; 905; AVX2-LABEL: ispositive_mask_v2i64: 906; AVX2: # %bb.0: 907; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 908; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 909; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 910; AVX2-NEXT: retq 911; 912; AVX512-LABEL: ispositive_mask_v2i64: 913; AVX512: # %bb.0: 914; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 915; AVX512-NEXT: vpandn %xmm1, %xmm0, %xmm0 916; AVX512-NEXT: retq 917 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 918 %mask = sext <2 x i1> %cmp to <2 x i64> 919 %and = and <2 x i64> %mask, %y 920 ret <2 x i64> %and 921} 922 923define <4 x i32> @is_positive_mask_v4i32(<4 x i32> %x, <4 x i32> %y) { 924; SSE-LABEL: is_positive_mask_v4i32: 925; SSE: # %bb.0: 926; SSE-NEXT: psrad $31, %xmm0 927; SSE-NEXT: pandn %xmm1, %xmm0 928; SSE-NEXT: retq 929; 930; AVX-LABEL: is_positive_mask_v4i32: 931; AVX: # %bb.0: 932; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 933; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 934; AVX-NEXT: retq 935 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 936 %mask = sext <4 x i1> %cmp to <4 x i32> 937 %and = and <4 x i32> %y, %mask 938 ret <4 x i32> %and 939} 940 941define <8 x i16> @is_positive_mask_v8i16(<8 x i16> %x, <8 x i16> %y) { 942; SSE-LABEL: is_positive_mask_v8i16: 943; SSE: # %bb.0: 944; SSE-NEXT: psraw $15, %xmm0 945; SSE-NEXT: pandn %xmm1, %xmm0 946; SSE-NEXT: retq 947; 948; AVX-LABEL: is_positive_mask_v8i16: 949; AVX: # %bb.0: 950; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 951; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 952; AVX-NEXT: retq 953 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 954 %mask = sext <8 x i1> %cmp to <8 x i16> 955 %and = and <8 x i16> %mask, %y 956 ret <8 x i16> %and 957} 958 959define <16 x i8> @is_positive_mask_v16i8(<16 x i8> %x, <16 x i8> %y) { 960; SSE-LABEL: is_positive_mask_v16i8: 961; SSE: # %bb.0: 962; SSE-NEXT: pcmpeqd %xmm2, %xmm2 963; SSE-NEXT: pcmpgtb %xmm2, %xmm0 964; SSE-NEXT: pand %xmm1, %xmm0 965; SSE-NEXT: retq 966; 967; AVX-LABEL: is_positive_mask_v16i8: 968; AVX: # %bb.0: 969; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 970; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 971; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 972; AVX-NEXT: retq 973 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 974 %mask = sext <16 x i1> %cmp to <16 x i8> 975 %and = and <16 x i8> %y, %mask 976 ret <16 x i8> %and 977} 978 979define <4 x i64> @is_positive_mask_v4i64(<4 x i64> %x, <4 x i64> %y) { 980; SSE2-LABEL: is_positive_mask_v4i64: 981; SSE2: # %bb.0: 982; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 983; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 984; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 985; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 986; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 987; SSE2-NEXT: pand %xmm2, %xmm0 988; SSE2-NEXT: pand %xmm3, %xmm1 989; SSE2-NEXT: retq 990; 991; SSE42-LABEL: is_positive_mask_v4i64: 992; SSE42: # %bb.0: 993; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 994; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 995; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 996; SSE42-NEXT: pand %xmm2, %xmm0 997; SSE42-NEXT: pand %xmm3, %xmm1 998; SSE42-NEXT: retq 999; 1000; AVX1-LABEL: is_positive_mask_v4i64: 1001; AVX1: # %bb.0: 1002; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1003; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1004; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1005; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 1006; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1007; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1008; AVX1-NEXT: retq 1009; 1010; AVX2-LABEL: is_positive_mask_v4i64: 1011; AVX2: # %bb.0: 1012; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1013; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1014; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 1015; AVX2-NEXT: retq 1016; 1017; AVX512-LABEL: is_positive_mask_v4i64: 1018; AVX512: # %bb.0: 1019; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1020; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1021; AVX512-NEXT: retq 1022 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1023 %mask = sext <4 x i1> %cmp to <4 x i64> 1024 %and = and <4 x i64> %mask, %y 1025 ret <4 x i64> %and 1026} 1027 1028define <8 x i32> @is_positive_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { 1029; SSE-LABEL: is_positive_mask_v8i32: 1030; SSE: # %bb.0: 1031; SSE-NEXT: psrad $31, %xmm0 1032; SSE-NEXT: pandn %xmm2, %xmm0 1033; SSE-NEXT: psrad $31, %xmm1 1034; SSE-NEXT: pandn %xmm3, %xmm1 1035; SSE-NEXT: retq 1036; 1037; AVX1-LABEL: is_positive_mask_v8i32: 1038; AVX1: # %bb.0: 1039; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1040; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1041; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 1042; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0 1043; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1044; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1045; AVX1-NEXT: retq 1046; 1047; AVX2-LABEL: is_positive_mask_v8i32: 1048; AVX2: # %bb.0: 1049; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1050; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1051; AVX2-NEXT: retq 1052; 1053; AVX512-LABEL: is_positive_mask_v8i32: 1054; AVX512: # %bb.0: 1055; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1056; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1057; AVX512-NEXT: retq 1058 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1059 %mask = sext <8 x i1> %cmp to <8 x i32> 1060 %and = and <8 x i32> %y, %mask 1061 ret <8 x i32> %and 1062} 1063 1064define <16 x i16> @is_positive_mask_v16i16(<16 x i16> %x, <16 x i16> %y) { 1065; SSE-LABEL: is_positive_mask_v16i16: 1066; SSE: # %bb.0: 1067; SSE-NEXT: psraw $15, %xmm0 1068; SSE-NEXT: pandn %xmm2, %xmm0 1069; SSE-NEXT: psraw $15, %xmm1 1070; SSE-NEXT: pandn %xmm3, %xmm1 1071; SSE-NEXT: retq 1072; 1073; AVX1-LABEL: is_positive_mask_v16i16: 1074; AVX1: # %bb.0: 1075; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1076; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1077; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 1078; AVX1-NEXT: vpcmpgtw %xmm3, %xmm0, %xmm0 1079; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1080; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: is_positive_mask_v16i16: 1084; AVX2: # %bb.0: 1085; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1086; AVX2-NEXT: vpandn %ymm1, %ymm0, %ymm0 1087; AVX2-NEXT: retq 1088; 1089; AVX512-LABEL: is_positive_mask_v16i16: 1090; AVX512: # %bb.0: 1091; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1092; AVX512-NEXT: vpandn %ymm1, %ymm0, %ymm0 1093; AVX512-NEXT: retq 1094 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1095 %mask = sext <16 x i1> %cmp to <16 x i16> 1096 %and = and <16 x i16> %mask, %y 1097 ret <16 x i16> %and 1098} 1099 1100define <32 x i8> @is_positive_mask_v32i8(<32 x i8> %x, <32 x i8> %y) { 1101; SSE-LABEL: is_positive_mask_v32i8: 1102; SSE: # %bb.0: 1103; SSE-NEXT: pcmpeqd %xmm4, %xmm4 1104; SSE-NEXT: pcmpgtb %xmm4, %xmm1 1105; SSE-NEXT: pcmpgtb %xmm4, %xmm0 1106; SSE-NEXT: pand %xmm2, %xmm0 1107; SSE-NEXT: pand %xmm3, %xmm1 1108; SSE-NEXT: retq 1109; 1110; AVX1-LABEL: is_positive_mask_v32i8: 1111; AVX1: # %bb.0: 1112; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1113; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1114; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 1115; AVX1-NEXT: vpcmpgtb %xmm3, %xmm0, %xmm0 1116; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1117; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1118; AVX1-NEXT: retq 1119; 1120; AVX2-LABEL: is_positive_mask_v32i8: 1121; AVX2: # %bb.0: 1122; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1123; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1124; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 1125; AVX2-NEXT: retq 1126; 1127; AVX512-LABEL: is_positive_mask_v32i8: 1128; AVX512: # %bb.0: 1129; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1130; AVX512-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1131; AVX512-NEXT: vpand %ymm0, %ymm1, %ymm0 1132; AVX512-NEXT: retq 1133 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1134 %mask = sext <32 x i1> %cmp to <32 x i8> 1135 %and = and <32 x i8> %y, %mask 1136 ret <32 x i8> %and 1137} 1138 1139define <2 x i64> @ispositive_mask_load_v2i64(<2 x i64> %x, ptr %p) { 1140; SSE2-LABEL: ispositive_mask_load_v2i64: 1141; SSE2: # %bb.0: 1142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1143; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 1144; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1145; SSE2-NEXT: pand (%rdi), %xmm0 1146; SSE2-NEXT: retq 1147; 1148; SSE42-LABEL: ispositive_mask_load_v2i64: 1149; SSE42: # %bb.0: 1150; SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1151; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1152; SSE42-NEXT: pand (%rdi), %xmm0 1153; SSE42-NEXT: retq 1154; 1155; AVX1-LABEL: ispositive_mask_load_v2i64: 1156; AVX1: # %bb.0: 1157; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1158; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1159; AVX1-NEXT: vpand (%rdi), %xmm0, %xmm0 1160; AVX1-NEXT: retq 1161; 1162; AVX2-LABEL: ispositive_mask_load_v2i64: 1163; AVX2: # %bb.0: 1164; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1165; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 1166; AVX2-NEXT: vpand (%rdi), %xmm0, %xmm0 1167; AVX2-NEXT: retq 1168; 1169; AVX512-LABEL: ispositive_mask_load_v2i64: 1170; AVX512: # %bb.0: 1171; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 1172; AVX512-NEXT: vpandn (%rdi), %xmm0, %xmm0 1173; AVX512-NEXT: retq 1174 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 1175 %mask = sext <2 x i1> %cmp to <2 x i64> 1176 %y = load <2 x i64>, ptr %p 1177 %and = and <2 x i64> %mask, %y 1178 ret <2 x i64> %and 1179} 1180 1181define <4 x i32> @is_positive_mask_load_v4i32(<4 x i32> %x, ptr %p) { 1182; SSE-LABEL: is_positive_mask_load_v4i32: 1183; SSE: # %bb.0: 1184; SSE-NEXT: psrad $31, %xmm0 1185; SSE-NEXT: pandn (%rdi), %xmm0 1186; SSE-NEXT: retq 1187; 1188; AVX-LABEL: is_positive_mask_load_v4i32: 1189; AVX: # %bb.0: 1190; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 1191; AVX-NEXT: vpandn (%rdi), %xmm0, %xmm0 1192; AVX-NEXT: retq 1193 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 1194 %mask = sext <4 x i1> %cmp to <4 x i32> 1195 %y = load <4 x i32>, ptr %p 1196 %and = and <4 x i32> %y, %mask 1197 ret <4 x i32> %and 1198} 1199 1200define <8 x i16> @is_positive_mask_load_v8i16(<8 x i16> %x, ptr %p) { 1201; SSE-LABEL: is_positive_mask_load_v8i16: 1202; SSE: # %bb.0: 1203; SSE-NEXT: psraw $15, %xmm0 1204; SSE-NEXT: pandn (%rdi), %xmm0 1205; SSE-NEXT: retq 1206; 1207; AVX-LABEL: is_positive_mask_load_v8i16: 1208; AVX: # %bb.0: 1209; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 1210; AVX-NEXT: vpandn (%rdi), %xmm0, %xmm0 1211; AVX-NEXT: retq 1212 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1213 %mask = sext <8 x i1> %cmp to <8 x i16> 1214 %y = load <8 x i16>, ptr %p 1215 %and = and <8 x i16> %mask, %y 1216 ret <8 x i16> %and 1217} 1218 1219define <16 x i8> @is_positive_mask_load_v16i8(<16 x i8> %x, ptr %p) { 1220; SSE-LABEL: is_positive_mask_load_v16i8: 1221; SSE: # %bb.0: 1222; SSE-NEXT: pcmpeqd %xmm1, %xmm1 1223; SSE-NEXT: pcmpgtb %xmm1, %xmm0 1224; SSE-NEXT: pand (%rdi), %xmm0 1225; SSE-NEXT: retq 1226; 1227; AVX-LABEL: is_positive_mask_load_v16i8: 1228; AVX: # %bb.0: 1229; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1230; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 1231; AVX-NEXT: vpand (%rdi), %xmm0, %xmm0 1232; AVX-NEXT: retq 1233 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1234 %mask = sext <16 x i1> %cmp to <16 x i8> 1235 %y = load <16 x i8>, ptr %p 1236 %and = and <16 x i8> %y, %mask 1237 ret <16 x i8> %and 1238} 1239 1240define <4 x i64> @is_positive_mask_load_v4i64(<4 x i64> %x, ptr %p) { 1241; SSE2-LABEL: is_positive_mask_load_v4i64: 1242; SSE2: # %bb.0: 1243; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1244; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1245; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1246; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1247; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1248; SSE2-NEXT: pand (%rdi), %xmm0 1249; SSE2-NEXT: pand 16(%rdi), %xmm1 1250; SSE2-NEXT: retq 1251; 1252; SSE42-LABEL: is_positive_mask_load_v4i64: 1253; SSE42: # %bb.0: 1254; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1255; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 1256; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1257; SSE42-NEXT: pand (%rdi), %xmm0 1258; SSE42-NEXT: pand 16(%rdi), %xmm1 1259; SSE42-NEXT: retq 1260; 1261; AVX1-LABEL: is_positive_mask_load_v4i64: 1262; AVX1: # %bb.0: 1263; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1264; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1265; AVX1-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm1 1266; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1268; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1269; AVX1-NEXT: retq 1270; 1271; AVX2-LABEL: is_positive_mask_load_v4i64: 1272; AVX2: # %bb.0: 1273; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1274; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 1275; AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0 1276; AVX2-NEXT: retq 1277; 1278; AVX512-LABEL: is_positive_mask_load_v4i64: 1279; AVX512: # %bb.0: 1280; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 1281; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1282; AVX512-NEXT: retq 1283 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1284 %mask = sext <4 x i1> %cmp to <4 x i64> 1285 %y = load <4 x i64>, ptr %p 1286 %and = and <4 x i64> %mask, %y 1287 ret <4 x i64> %and 1288} 1289 1290define <8 x i32> @is_positive_mask_load_v8i32(<8 x i32> %x, ptr %p) { 1291; SSE-LABEL: is_positive_mask_load_v8i32: 1292; SSE: # %bb.0: 1293; SSE-NEXT: psrad $31, %xmm0 1294; SSE-NEXT: pandn (%rdi), %xmm0 1295; SSE-NEXT: psrad $31, %xmm1 1296; SSE-NEXT: pandn 16(%rdi), %xmm1 1297; SSE-NEXT: retq 1298; 1299; AVX1-LABEL: is_positive_mask_load_v8i32: 1300; AVX1: # %bb.0: 1301; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1302; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1303; AVX1-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1 1304; AVX1-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 1305; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1306; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1307; AVX1-NEXT: retq 1308; 1309; AVX2-LABEL: is_positive_mask_load_v8i32: 1310; AVX2: # %bb.0: 1311; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1312; AVX2-NEXT: vpandn (%rdi), %ymm0, %ymm0 1313; AVX2-NEXT: retq 1314; 1315; AVX512-LABEL: is_positive_mask_load_v8i32: 1316; AVX512: # %bb.0: 1317; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 1318; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1319; AVX512-NEXT: retq 1320 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1321 %mask = sext <8 x i1> %cmp to <8 x i32> 1322 %y = load <8 x i32>, ptr %p 1323 %and = and <8 x i32> %y, %mask 1324 ret <8 x i32> %and 1325} 1326 1327define <16 x i16> @is_positive_mask_load_v16i16(<16 x i16> %x, ptr %p) { 1328; SSE-LABEL: is_positive_mask_load_v16i16: 1329; SSE: # %bb.0: 1330; SSE-NEXT: psraw $15, %xmm0 1331; SSE-NEXT: pandn (%rdi), %xmm0 1332; SSE-NEXT: psraw $15, %xmm1 1333; SSE-NEXT: pandn 16(%rdi), %xmm1 1334; SSE-NEXT: retq 1335; 1336; AVX1-LABEL: is_positive_mask_load_v16i16: 1337; AVX1: # %bb.0: 1338; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1339; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1340; AVX1-NEXT: vpcmpgtw %xmm2, %xmm1, %xmm1 1341; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 1342; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1343; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1344; AVX1-NEXT: retq 1345; 1346; AVX2-LABEL: is_positive_mask_load_v16i16: 1347; AVX2: # %bb.0: 1348; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 1349; AVX2-NEXT: vpandn (%rdi), %ymm0, %ymm0 1350; AVX2-NEXT: retq 1351; 1352; AVX512-LABEL: is_positive_mask_load_v16i16: 1353; AVX512: # %bb.0: 1354; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 1355; AVX512-NEXT: vpandn (%rdi), %ymm0, %ymm0 1356; AVX512-NEXT: retq 1357 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1358 %mask = sext <16 x i1> %cmp to <16 x i16> 1359 %y = load <16 x i16>, ptr %p 1360 %and = and <16 x i16> %mask, %y 1361 ret <16 x i16> %and 1362} 1363 1364define <32 x i8> @is_positive_mask_load_v32i8(<32 x i8> %x, ptr %p) { 1365; SSE-LABEL: is_positive_mask_load_v32i8: 1366; SSE: # %bb.0: 1367; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1368; SSE-NEXT: pcmpgtb %xmm2, %xmm1 1369; SSE-NEXT: pcmpgtb %xmm2, %xmm0 1370; SSE-NEXT: pand (%rdi), %xmm0 1371; SSE-NEXT: pand 16(%rdi), %xmm1 1372; SSE-NEXT: retq 1373; 1374; AVX1-LABEL: is_positive_mask_load_v32i8: 1375; AVX1: # %bb.0: 1376; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1377; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1378; AVX1-NEXT: vpcmpgtb %xmm2, %xmm1, %xmm1 1379; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1380; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1381; AVX1-NEXT: vandps (%rdi), %ymm0, %ymm0 1382; AVX1-NEXT: retq 1383; 1384; AVX2-LABEL: is_positive_mask_load_v32i8: 1385; AVX2: # %bb.0: 1386; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1387; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 1388; AVX2-NEXT: vpand (%rdi), %ymm0, %ymm0 1389; AVX2-NEXT: retq 1390; 1391; AVX512-LABEL: is_positive_mask_load_v32i8: 1392; AVX512: # %bb.0: 1393; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1394; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 1395; AVX512-NEXT: vpand (%rdi), %ymm0, %ymm0 1396; AVX512-NEXT: retq 1397 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1398 %mask = sext <32 x i1> %cmp to <32 x i8> 1399 %y = load <32 x i8>, ptr %p 1400 %and = and <32 x i8> %y, %mask 1401 ret <32 x i8> %and 1402} 1403 1404define <2 x i1> @ispositive_mask_v2i64_v2i1(<2 x i64> %x, <2 x i1> %y) { 1405; SSE2-LABEL: ispositive_mask_v2i64_v2i1: 1406; SSE2: # %bb.0: 1407; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1408; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1409; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 1410; SSE2-NEXT: pand %xmm1, %xmm0 1411; SSE2-NEXT: retq 1412; 1413; SSE42-LABEL: ispositive_mask_v2i64_v2i1: 1414; SSE42: # %bb.0: 1415; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 1416; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1417; SSE42-NEXT: pand %xmm1, %xmm0 1418; SSE42-NEXT: retq 1419; 1420; AVX1-LABEL: ispositive_mask_v2i64_v2i1: 1421; AVX1: # %bb.0: 1422; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1423; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1424; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1425; AVX1-NEXT: retq 1426; 1427; AVX2-LABEL: ispositive_mask_v2i64_v2i1: 1428; AVX2: # %bb.0: 1429; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1430; AVX2-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 1431; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1432; AVX2-NEXT: retq 1433; 1434; AVX512F-LABEL: ispositive_mask_v2i64_v2i1: 1435; AVX512F: # %bb.0: 1436; AVX512F-NEXT: vpsllq $63, %xmm1, %xmm1 1437; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1438; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm0, %k1 1439; AVX512F-NEXT: vptestmq %xmm1, %xmm1, %k1 {%k1} 1440; AVX512F-NEXT: vmovdqa64 %xmm2, %xmm0 {%k1} {z} 1441; AVX512F-NEXT: retq 1442; 1443; AVX512DQBW-LABEL: ispositive_mask_v2i64_v2i1: 1444; AVX512DQBW: # %bb.0: 1445; AVX512DQBW-NEXT: vpsllq $63, %xmm1, %xmm1 1446; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1447; AVX512DQBW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1448; AVX512DQBW-NEXT: vpcmpgtq %xmm3, %xmm0, %k1 1449; AVX512DQBW-NEXT: vpcmpgtq %xmm1, %xmm2, %k0 {%k1} 1450; AVX512DQBW-NEXT: vpmovm2q %k0, %xmm0 1451; AVX512DQBW-NEXT: retq 1452 %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1> 1453 %and = and <2 x i1> %cmp, %y 1454 ret <2 x i1> %and 1455} 1456 1457define <4 x i1> @is_positive_mask_v4i32_v4i1(<4 x i32> %x, <4 x i1> %y) { 1458; SSE-LABEL: is_positive_mask_v4i32_v4i1: 1459; SSE: # %bb.0: 1460; SSE-NEXT: psrad $31, %xmm0 1461; SSE-NEXT: pandn %xmm1, %xmm0 1462; SSE-NEXT: retq 1463; 1464; AVX1-LABEL: is_positive_mask_v4i32_v4i1: 1465; AVX1: # %bb.0: 1466; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 1467; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0 1468; AVX1-NEXT: retq 1469; 1470; AVX2-LABEL: is_positive_mask_v4i32_v4i1: 1471; AVX2: # %bb.0: 1472; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1473; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 1474; AVX2-NEXT: retq 1475; 1476; AVX512F-LABEL: is_positive_mask_v4i32_v4i1: 1477; AVX512F: # %bb.0: 1478; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 1479; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1480; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm0, %k1 1481; AVX512F-NEXT: vptestmd %xmm1, %xmm1, %k1 {%k1} 1482; AVX512F-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} 1483; AVX512F-NEXT: retq 1484; 1485; AVX512DQBW-LABEL: is_positive_mask_v4i32_v4i1: 1486; AVX512DQBW: # %bb.0: 1487; AVX512DQBW-NEXT: vpslld $31, %xmm1, %xmm1 1488; AVX512DQBW-NEXT: vpmovd2m %xmm1, %k1 1489; AVX512DQBW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1490; AVX512DQBW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 1491; AVX512DQBW-NEXT: vpmovm2d %k0, %xmm0 1492; AVX512DQBW-NEXT: retq 1493 %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> 1494 %and = and <4 x i1> %y, %cmp 1495 ret <4 x i1> %and 1496} 1497 1498define <8 x i1> @is_positive_mask_v8i16_v8i1(<8 x i16> %x, <8 x i1> %y) { 1499; SSE-LABEL: is_positive_mask_v8i16_v8i1: 1500; SSE: # %bb.0: 1501; SSE-NEXT: psraw $15, %xmm0 1502; SSE-NEXT: pandn %xmm1, %xmm0 1503; SSE-NEXT: retq 1504; 1505; AVX1-LABEL: is_positive_mask_v8i16_v8i1: 1506; AVX1: # %bb.0: 1507; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 1508; AVX1-NEXT: vpandn %xmm1, %xmm0, %xmm0 1509; AVX1-NEXT: retq 1510; 1511; AVX2-LABEL: is_positive_mask_v8i16_v8i1: 1512; AVX2: # %bb.0: 1513; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 1514; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 1515; AVX2-NEXT: retq 1516; 1517; AVX512F-LABEL: is_positive_mask_v8i16_v8i1: 1518; AVX512F: # %bb.0: 1519; AVX512F-NEXT: vpsraw $15, %xmm0, %xmm0 1520; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0 1521; AVX512F-NEXT: retq 1522; 1523; AVX512DQBW-LABEL: is_positive_mask_v8i16_v8i1: 1524; AVX512DQBW: # %bb.0: 1525; AVX512DQBW-NEXT: vpsllw $15, %xmm1, %xmm1 1526; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1527; AVX512DQBW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1528; AVX512DQBW-NEXT: vpcmpgtw %xmm3, %xmm0, %k1 1529; AVX512DQBW-NEXT: vpcmpgtw %xmm1, %xmm2, %k0 {%k1} 1530; AVX512DQBW-NEXT: vpmovm2w %k0, %xmm0 1531; AVX512DQBW-NEXT: retq 1532 %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1533 %and = and <8 x i1> %cmp, %y 1534 ret <8 x i1> %and 1535} 1536 1537define <16 x i1> @is_positive_mask_v16i8_v16i1(<16 x i8> %x, <16 x i1> %y) { 1538; SSE-LABEL: is_positive_mask_v16i8_v16i1: 1539; SSE: # %bb.0: 1540; SSE-NEXT: pcmpeqd %xmm2, %xmm2 1541; SSE-NEXT: pcmpgtb %xmm2, %xmm0 1542; SSE-NEXT: pand %xmm1, %xmm0 1543; SSE-NEXT: retq 1544; 1545; AVX1-LABEL: is_positive_mask_v16i8_v16i1: 1546; AVX1: # %bb.0: 1547; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1548; AVX1-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1549; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 1550; AVX1-NEXT: retq 1551; 1552; AVX2-LABEL: is_positive_mask_v16i8_v16i1: 1553; AVX2: # %bb.0: 1554; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1555; AVX2-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1556; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 1557; AVX2-NEXT: retq 1558; 1559; AVX512F-LABEL: is_positive_mask_v16i8_v16i1: 1560; AVX512F: # %bb.0: 1561; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 1562; AVX512F-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 1563; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0 1564; AVX512F-NEXT: retq 1565; 1566; AVX512DQBW-LABEL: is_positive_mask_v16i8_v16i1: 1567; AVX512DQBW: # %bb.0: 1568; AVX512DQBW-NEXT: vpsllw $7, %xmm1, %xmm1 1569; AVX512DQBW-NEXT: vpmovb2m %xmm1, %k1 1570; AVX512DQBW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1571; AVX512DQBW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 1572; AVX512DQBW-NEXT: vpmovm2b %k0, %xmm0 1573; AVX512DQBW-NEXT: retq 1574 %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1575 %and = and <16 x i1> %y, %cmp 1576 ret <16 x i1> %and 1577} 1578 1579define <4 x i1> @is_positive_mask_v4i64_v4i1(<4 x i64> %x, <4 x i1> %y) { 1580; SSE2-LABEL: is_positive_mask_v4i64_v4i1: 1581; SSE2: # %bb.0: 1582; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] 1583; SSE2-NEXT: psrad $31, %xmm0 1584; SSE2-NEXT: pandn %xmm2, %xmm0 1585; SSE2-NEXT: retq 1586; 1587; SSE42-LABEL: is_positive_mask_v4i64_v4i1: 1588; SSE42: # %bb.0: 1589; SSE42-NEXT: pcmpeqd %xmm3, %xmm3 1590; SSE42-NEXT: pcmpgtq %xmm3, %xmm1 1591; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1592; SSE42-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 1593; SSE42-NEXT: andps %xmm2, %xmm0 1594; SSE42-NEXT: retq 1595; 1596; AVX1-LABEL: is_positive_mask_v4i64_v4i1: 1597; AVX1: # %bb.0: 1598; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1599; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1600; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1601; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 1602; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1603; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1604; AVX1-NEXT: vzeroupper 1605; AVX1-NEXT: retq 1606; 1607; AVX2-LABEL: is_positive_mask_v4i64_v4i1: 1608; AVX2: # %bb.0: 1609; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1610; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 1611; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1612; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1613; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1614; AVX2-NEXT: vzeroupper 1615; AVX2-NEXT: retq 1616; 1617; AVX512F-LABEL: is_positive_mask_v4i64_v4i1: 1618; AVX512F: # %bb.0: 1619; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 1620; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1621; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm0, %k1 1622; AVX512F-NEXT: vptestmd %xmm1, %xmm1, %k1 {%k1} 1623; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1624; AVX512F-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1625; AVX512F-NEXT: vzeroupper 1626; AVX512F-NEXT: retq 1627; 1628; AVX512DQBW-LABEL: is_positive_mask_v4i64_v4i1: 1629; AVX512DQBW: # %bb.0: 1630; AVX512DQBW-NEXT: vpslld $31, %xmm1, %xmm1 1631; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1632; AVX512DQBW-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1633; AVX512DQBW-NEXT: vpcmpgtq %ymm3, %ymm0, %k1 1634; AVX512DQBW-NEXT: vpcmpgtd %xmm1, %xmm2, %k0 {%k1} 1635; AVX512DQBW-NEXT: vpmovm2d %k0, %xmm0 1636; AVX512DQBW-NEXT: vzeroupper 1637; AVX512DQBW-NEXT: retq 1638 %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 1639 %and = and <4 x i1> %cmp, %y 1640 ret <4 x i1> %and 1641} 1642 1643define <8 x i1> @is_positive_mask_v8i32_v8i1(<8 x i32> %x, <8 x i1> %y) { 1644; SSE-LABEL: is_positive_mask_v8i32_v8i1: 1645; SSE: # %bb.0: 1646; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1647; SSE-NEXT: pcmpgtd %xmm3, %xmm1 1648; SSE-NEXT: pcmpgtd %xmm3, %xmm0 1649; SSE-NEXT: packssdw %xmm1, %xmm0 1650; SSE-NEXT: pand %xmm2, %xmm0 1651; SSE-NEXT: retq 1652; 1653; AVX1-LABEL: is_positive_mask_v8i32_v8i1: 1654; AVX1: # %bb.0: 1655; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1656; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1657; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 1658; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0 1659; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1660; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0 1661; AVX1-NEXT: vzeroupper 1662; AVX1-NEXT: retq 1663; 1664; AVX2-LABEL: is_positive_mask_v8i32_v8i1: 1665; AVX2: # %bb.0: 1666; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1667; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 1668; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1669; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1670; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 1671; AVX2-NEXT: vzeroupper 1672; AVX2-NEXT: retq 1673; 1674; AVX512F-LABEL: is_positive_mask_v8i32_v8i1: 1675; AVX512F: # %bb.0: 1676; AVX512F-NEXT: vpmovsxwd %xmm1, %ymm1 1677; AVX512F-NEXT: vpslld $31, %ymm1, %ymm1 1678; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1679; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm0, %k1 1680; AVX512F-NEXT: vptestmd %ymm1, %ymm1, %k1 {%k1} 1681; AVX512F-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 1682; AVX512F-NEXT: vpmovdw %ymm0, %xmm0 1683; AVX512F-NEXT: vzeroupper 1684; AVX512F-NEXT: retq 1685; 1686; AVX512DQBW-LABEL: is_positive_mask_v8i32_v8i1: 1687; AVX512DQBW: # %bb.0: 1688; AVX512DQBW-NEXT: vpsllw $15, %xmm1, %xmm1 1689; AVX512DQBW-NEXT: vpmovw2m %xmm1, %k1 1690; AVX512DQBW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1691; AVX512DQBW-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 1692; AVX512DQBW-NEXT: vpmovm2w %k0, %xmm0 1693; AVX512DQBW-NEXT: vzeroupper 1694; AVX512DQBW-NEXT: retq 1695 %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 1696 %and = and <8 x i1> %y, %cmp 1697 ret <8 x i1> %and 1698} 1699 1700define <16 x i1> @is_positive_mask_v16i16_v16i1(<16 x i16> %x, <16 x i1> %y) { 1701; SSE-LABEL: is_positive_mask_v16i16_v16i1: 1702; SSE: # %bb.0: 1703; SSE-NEXT: pcmpeqd %xmm3, %xmm3 1704; SSE-NEXT: pcmpgtw %xmm3, %xmm1 1705; SSE-NEXT: pcmpgtw %xmm3, %xmm0 1706; SSE-NEXT: packsswb %xmm1, %xmm0 1707; SSE-NEXT: pand %xmm2, %xmm0 1708; SSE-NEXT: retq 1709; 1710; AVX1-LABEL: is_positive_mask_v16i16_v16i1: 1711; AVX1: # %bb.0: 1712; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1713; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1714; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 1715; AVX1-NEXT: vpcmpgtw %xmm3, %xmm0, %xmm0 1716; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1717; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 1718; AVX1-NEXT: vzeroupper 1719; AVX1-NEXT: retq 1720; 1721; AVX2-LABEL: is_positive_mask_v16i16_v16i1: 1722; AVX2: # %bb.0: 1723; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1724; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 1725; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 1726; AVX2-NEXT: vpacksswb %xmm2, %xmm0, %xmm0 1727; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 1728; AVX2-NEXT: vzeroupper 1729; AVX2-NEXT: retq 1730; 1731; AVX512F-LABEL: is_positive_mask_v16i16_v16i1: 1732; AVX512F: # %bb.0: 1733; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 1734; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 1735; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 1736; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1737; AVX512F-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 1738; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 1739; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} 1740; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1741; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 1742; AVX512F-NEXT: vzeroupper 1743; AVX512F-NEXT: retq 1744; 1745; AVX512DQBW-LABEL: is_positive_mask_v16i16_v16i1: 1746; AVX512DQBW: # %bb.0: 1747; AVX512DQBW-NEXT: vpsllw $7, %xmm1, %xmm1 1748; AVX512DQBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 1749; AVX512DQBW-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1750; AVX512DQBW-NEXT: vpcmpgtw %ymm3, %ymm0, %k1 1751; AVX512DQBW-NEXT: vpcmpgtb %xmm1, %xmm2, %k0 {%k1} 1752; AVX512DQBW-NEXT: vpmovm2b %k0, %xmm0 1753; AVX512DQBW-NEXT: vzeroupper 1754; AVX512DQBW-NEXT: retq 1755 %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1756 %and = and <16 x i1> %cmp, %y 1757 ret <16 x i1> %and 1758} 1759 1760define <32 x i1> @is_positive_mask_v32i8_v32i1(<32 x i8> %x, <32 x i1> %y) { 1761; SSE2-LABEL: is_positive_mask_v32i8_v32i1: 1762; SSE2: # %bb.0: 1763; SSE2-NEXT: movq %rdi, %rax 1764; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1765; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1766; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1767; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1768; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1769; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1770; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1771; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1772; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1773; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1774; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1775; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1776; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 1777; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] 1778; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] 1779; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1780; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1781; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1782; SSE2-NEXT: movd %r9d, %xmm4 1783; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1784; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 1785; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1786; SSE2-NEXT: movd %r8d, %xmm2 1787; SSE2-NEXT: movd %ecx, %xmm3 1788; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 1789; SSE2-NEXT: movd %edx, %xmm6 1790; SSE2-NEXT: movd %esi, %xmm2 1791; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7] 1792; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 1793; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1794; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0] 1795; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1796; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1797; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1798; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1799; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1800; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] 1801; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1802; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1803; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1804; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1805; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1806; SSE2-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero 1807; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] 1808; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3] 1809; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] 1810; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1811; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1812; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1813; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1814; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 1815; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7] 1816; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 1817; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1818; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 1819; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 1820; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1821; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero 1822; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 1823; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] 1824; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] 1825; SSE2-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm6[0] 1826; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 1827; SSE2-NEXT: pcmpgtb %xmm3, %xmm0 1828; SSE2-NEXT: pand %xmm2, %xmm0 1829; SSE2-NEXT: pcmpgtb %xmm3, %xmm1 1830; SSE2-NEXT: pand %xmm7, %xmm1 1831; SSE2-NEXT: psllw $7, %xmm1 1832; SSE2-NEXT: pmovmskb %xmm1, %ecx 1833; SSE2-NEXT: shll $16, %ecx 1834; SSE2-NEXT: psllw $7, %xmm0 1835; SSE2-NEXT: pmovmskb %xmm0, %edx 1836; SSE2-NEXT: orl %ecx, %edx 1837; SSE2-NEXT: movl %edx, (%rdi) 1838; SSE2-NEXT: retq 1839; 1840; SSE42-LABEL: is_positive_mask_v32i8_v32i1: 1841; SSE42: # %bb.0: 1842; SSE42-NEXT: movq %rdi, %rax 1843; SSE42-NEXT: movd %esi, %xmm2 1844; SSE42-NEXT: pinsrb $1, %edx, %xmm2 1845; SSE42-NEXT: pinsrb $2, %ecx, %xmm2 1846; SSE42-NEXT: pinsrb $3, %r8d, %xmm2 1847; SSE42-NEXT: pinsrb $4, %r9d, %xmm2 1848; SSE42-NEXT: pinsrb $5, {{[0-9]+}}(%rsp), %xmm2 1849; SSE42-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm2 1850; SSE42-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm2 1851; SSE42-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm2 1852; SSE42-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm2 1853; SSE42-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm2 1854; SSE42-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm2 1855; SSE42-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm2 1856; SSE42-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm2 1857; SSE42-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm2 1858; SSE42-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm2 1859; SSE42-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1860; SSE42-NEXT: pinsrb $1, {{[0-9]+}}(%rsp), %xmm3 1861; SSE42-NEXT: pinsrb $2, {{[0-9]+}}(%rsp), %xmm3 1862; SSE42-NEXT: pinsrb $3, {{[0-9]+}}(%rsp), %xmm3 1863; SSE42-NEXT: pinsrb $4, {{[0-9]+}}(%rsp), %xmm3 1864; SSE42-NEXT: pinsrb $5, {{[0-9]+}}(%rsp), %xmm3 1865; SSE42-NEXT: pinsrb $6, {{[0-9]+}}(%rsp), %xmm3 1866; SSE42-NEXT: pinsrb $7, {{[0-9]+}}(%rsp), %xmm3 1867; SSE42-NEXT: pinsrb $8, {{[0-9]+}}(%rsp), %xmm3 1868; SSE42-NEXT: pinsrb $9, {{[0-9]+}}(%rsp), %xmm3 1869; SSE42-NEXT: pinsrb $10, {{[0-9]+}}(%rsp), %xmm3 1870; SSE42-NEXT: pinsrb $11, {{[0-9]+}}(%rsp), %xmm3 1871; SSE42-NEXT: pinsrb $12, {{[0-9]+}}(%rsp), %xmm3 1872; SSE42-NEXT: pinsrb $13, {{[0-9]+}}(%rsp), %xmm3 1873; SSE42-NEXT: pinsrb $14, {{[0-9]+}}(%rsp), %xmm3 1874; SSE42-NEXT: pinsrb $15, {{[0-9]+}}(%rsp), %xmm3 1875; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 1876; SSE42-NEXT: pcmpgtb %xmm4, %xmm1 1877; SSE42-NEXT: pand %xmm3, %xmm1 1878; SSE42-NEXT: pcmpgtb %xmm4, %xmm0 1879; SSE42-NEXT: pand %xmm2, %xmm0 1880; SSE42-NEXT: psllw $7, %xmm0 1881; SSE42-NEXT: pmovmskb %xmm0, %ecx 1882; SSE42-NEXT: psllw $7, %xmm1 1883; SSE42-NEXT: pmovmskb %xmm1, %edx 1884; SSE42-NEXT: shll $16, %edx 1885; SSE42-NEXT: orl %ecx, %edx 1886; SSE42-NEXT: movl %edx, (%rdi) 1887; SSE42-NEXT: retq 1888; 1889; AVX1-LABEL: is_positive_mask_v32i8_v32i1: 1890; AVX1: # %bb.0: 1891; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1892; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1893; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 1894; AVX1-NEXT: vpcmpgtb %xmm3, %xmm0, %xmm0 1895; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1896; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1897; AVX1-NEXT: retq 1898; 1899; AVX2-LABEL: is_positive_mask_v32i8_v32i1: 1900; AVX2: # %bb.0: 1901; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1902; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1903; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm0 1904; AVX2-NEXT: retq 1905; 1906; AVX512F-LABEL: is_positive_mask_v32i8_v32i1: 1907; AVX512F: # %bb.0: 1908; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 1909; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 1910; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 1911; AVX512F-NEXT: retq 1912; 1913; AVX512DQBW-LABEL: is_positive_mask_v32i8_v32i1: 1914; AVX512DQBW: # %bb.0: 1915; AVX512DQBW-NEXT: vpsllw $7, %ymm1, %ymm1 1916; AVX512DQBW-NEXT: vpmovb2m %ymm1, %k1 1917; AVX512DQBW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1918; AVX512DQBW-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} 1919; AVX512DQBW-NEXT: vpmovm2b %k0, %ymm0 1920; AVX512DQBW-NEXT: retq 1921 %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1922 %and = and <32 x i1> %y, %cmp 1923 ret <32 x i1> %and 1924} 1925 1926define <4 x i64> @PR52504(<4 x i16> %t3) { 1927; SSE2-LABEL: PR52504: 1928; SSE2: # %bb.0: 1929; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1930; SSE2-NEXT: psrad $16, %xmm1 1931; SSE2-NEXT: pxor %xmm2, %xmm2 1932; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1933; SSE2-NEXT: movdqa %xmm1, %xmm3 1934; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3] 1935; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1936; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1] 1937; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1938; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 1939; SSE2-NEXT: pand %xmm0, %xmm1 1940; SSE2-NEXT: pxor %xmm4, %xmm0 1941; SSE2-NEXT: por %xmm1, %xmm0 1942; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] 1943; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1944; SSE2-NEXT: pand %xmm1, %xmm3 1945; SSE2-NEXT: pxor %xmm4, %xmm1 1946; SSE2-NEXT: por %xmm3, %xmm1 1947; SSE2-NEXT: retq 1948; 1949; SSE42-LABEL: PR52504: 1950; SSE42: # %bb.0: 1951; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1952; SSE42-NEXT: pmovsxwq %xmm1, %xmm2 1953; SSE42-NEXT: pmovsxwq %xmm0, %xmm3 1954; SSE42-NEXT: pxor %xmm1, %xmm1 1955; SSE42-NEXT: pxor %xmm0, %xmm0 1956; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1957; SSE42-NEXT: por %xmm3, %xmm0 1958; SSE42-NEXT: pcmpgtq %xmm2, %xmm1 1959; SSE42-NEXT: por %xmm2, %xmm1 1960; SSE42-NEXT: retq 1961; 1962; AVX1-LABEL: PR52504: 1963; AVX1: # %bb.0: 1964; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 1965; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1966; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 1967; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1968; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm3 1969; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 1970; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 1971; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 1972; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1973; AVX1-NEXT: retq 1974; 1975; AVX2-LABEL: PR52504: 1976; AVX2: # %bb.0: 1977; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 1978; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1979; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 1980; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 1981; AVX2-NEXT: retq 1982; 1983; AVX512-LABEL: PR52504: 1984; AVX512: # %bb.0: 1985; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 1986; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 1987; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 1988; AVX512-NEXT: retq 1989 %t14 = sext <4 x i16> %t3 to <4 x i64> 1990 %t15 = icmp sgt <4 x i64> %t14, <i64 -1, i64 -1, i64 -1, i64 -1> 1991 %t16 = select <4 x i1> %t15, <4 x i64> %t14, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> 1992 ret <4 x i64> %t16 1993} 1994