1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop | FileCheck %s --check-prefixes=CHECK,XOP 7 8; The condition vector for BLENDV* only cares about the sign bit of each element. 9; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op. 10 11; Test 128-bit vectors for all legal element types. 12 13define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) { 14; CHECK-LABEL: signbit_sel_v16i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 17; CHECK-NEXT: retq 18 %tr = icmp slt <16 x i8> %mask, zeroinitializer 19 %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y 20 ret <16 x i8> %z 21} 22 23; Sorry 16-bit, you're not important enough to support? 24 25define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) { 26; AVX-LABEL: signbit_sel_v8i16: 27; AVX: # %bb.0: 28; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 29; AVX-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 30; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 31; AVX-NEXT: retq 32; 33; XOP-LABEL: signbit_sel_v8i16: 34; XOP: # %bb.0: 35; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 36; XOP-NEXT: vpcomltw %xmm3, %xmm2, %xmm2 37; XOP-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 38; XOP-NEXT: retq 39 %tr = icmp slt <8 x i16> %mask, zeroinitializer 40 %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y 41 ret <8 x i16> %z 42} 43 44define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { 45; AVX12-LABEL: signbit_sel_v4i32: 46; AVX12: # %bb.0: 47; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 48; AVX12-NEXT: retq 49; 50; AVX512F-LABEL: signbit_sel_v4i32: 51; AVX512F: # %bb.0: 52; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 53; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 54; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 55; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 56; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 57; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 58; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 59; AVX512F-NEXT: vzeroupper 60; AVX512F-NEXT: retq 61; 62; AVX512VL-LABEL: signbit_sel_v4i32: 63; AVX512VL: # %bb.0: 64; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 65; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 66; AVX512VL-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} 67; AVX512VL-NEXT: retq 68; 69; XOP-LABEL: signbit_sel_v4i32: 70; XOP: # %bb.0: 71; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 72; XOP-NEXT: retq 73 %tr = icmp slt <4 x i32> %mask, zeroinitializer 74 %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y 75 ret <4 x i32> %z 76} 77 78define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { 79; AVX12-LABEL: signbit_sel_v2i64: 80; AVX12: # %bb.0: 81; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 82; AVX12-NEXT: retq 83; 84; AVX512F-LABEL: signbit_sel_v2i64: 85; AVX512F: # %bb.0: 86; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 87; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 88; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 89; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 90; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 91; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 92; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 93; AVX512F-NEXT: vzeroupper 94; AVX512F-NEXT: retq 95; 96; AVX512VL-LABEL: signbit_sel_v2i64: 97; AVX512VL: # %bb.0: 98; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 99; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 100; AVX512VL-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1} 101; AVX512VL-NEXT: retq 102; 103; XOP-LABEL: signbit_sel_v2i64: 104; XOP: # %bb.0: 105; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 106; XOP-NEXT: retq 107 %tr = icmp slt <2 x i64> %mask, zeroinitializer 108 %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y 109 ret <2 x i64> %z 110} 111 112define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { 113; AVX12-LABEL: signbit_sel_v4f32: 114; AVX12: # %bb.0: 115; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 116; AVX12-NEXT: retq 117; 118; AVX512F-LABEL: signbit_sel_v4f32: 119; AVX512F: # %bb.0: 120; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 121; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 122; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 123; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 124; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 125; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 126; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 127; AVX512F-NEXT: vzeroupper 128; AVX512F-NEXT: retq 129; 130; AVX512VL-LABEL: signbit_sel_v4f32: 131; AVX512VL: # %bb.0: 132; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 133; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 134; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 135; AVX512VL-NEXT: retq 136; 137; XOP-LABEL: signbit_sel_v4f32: 138; XOP: # %bb.0: 139; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 140; XOP-NEXT: retq 141 %tr = icmp slt <4 x i32> %mask, zeroinitializer 142 %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y 143 ret <4 x float> %z 144} 145 146define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { 147; AVX12-LABEL: signbit_sel_v2f64: 148; AVX12: # %bb.0: 149; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 150; AVX12-NEXT: retq 151; 152; AVX512F-LABEL: signbit_sel_v2f64: 153; AVX512F: # %bb.0: 154; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 155; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 156; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 157; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 158; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 159; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 160; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 161; AVX512F-NEXT: vzeroupper 162; AVX512F-NEXT: retq 163; 164; AVX512VL-LABEL: signbit_sel_v2f64: 165; AVX512VL: # %bb.0: 166; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 167; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 168; AVX512VL-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} 169; AVX512VL-NEXT: retq 170; 171; XOP-LABEL: signbit_sel_v2f64: 172; XOP: # %bb.0: 173; XOP-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 174; XOP-NEXT: retq 175 %tr = icmp slt <2 x i64> %mask, zeroinitializer 176 %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y 177 ret <2 x double> %z 178} 179 180; Test 256-bit vectors to see differences between AVX1 and AVX2. 181 182define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) { 183; AVX1-LABEL: signbit_sel_v32i8: 184; AVX1: # %bb.0: 185; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 186; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 187; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 188; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 189; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 190; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 191; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 192; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 193; AVX1-NEXT: retq 194; 195; AVX2-LABEL: signbit_sel_v32i8: 196; AVX2: # %bb.0: 197; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 198; AVX2-NEXT: retq 199; 200; AVX512-LABEL: signbit_sel_v32i8: 201; AVX512: # %bb.0: 202; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 203; AVX512-NEXT: retq 204; 205; XOP-LABEL: signbit_sel_v32i8: 206; XOP: # %bb.0: 207; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 208; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 209; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 210; XOP-NEXT: vpcomltb %xmm4, %xmm2, %xmm2 211; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 212; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 213; XOP-NEXT: retq 214 %tr = icmp slt <32 x i8> %mask, zeroinitializer 215 %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y 216 ret <32 x i8> %z 217} 218 219; Sorry 16-bit, you'll never be important enough to support? 220 221define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) { 222; AVX1-LABEL: signbit_sel_v16i16: 223; AVX1: # %bb.0: 224; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 225; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 226; AVX1-NEXT: vpcmpgtw %xmm3, %xmm4, %xmm3 227; AVX1-NEXT: vpcmpgtw %xmm2, %xmm4, %xmm2 228; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 229; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 230; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 231; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 232; AVX1-NEXT: retq 233; 234; AVX2-LABEL: signbit_sel_v16i16: 235; AVX2: # %bb.0: 236; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 237; AVX2-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 238; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 239; AVX2-NEXT: retq 240; 241; AVX512-LABEL: signbit_sel_v16i16: 242; AVX512: # %bb.0: 243; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 244; AVX512-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 245; AVX512-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 246; AVX512-NEXT: retq 247; 248; XOP-LABEL: signbit_sel_v16i16: 249; XOP: # %bb.0: 250; XOP-NEXT: vextractf128 $1, %ymm2, %xmm3 251; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 252; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 253; XOP-NEXT: vpcomltw %xmm4, %xmm2, %xmm2 254; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 255; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 256; XOP-NEXT: retq 257 %tr = icmp slt <16 x i16> %mask, zeroinitializer 258 %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y 259 ret <16 x i16> %z 260} 261 262define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { 263; AVX12-LABEL: signbit_sel_v8i32: 264; AVX12: # %bb.0: 265; AVX12-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 266; AVX12-NEXT: retq 267; 268; AVX512F-LABEL: signbit_sel_v8i32: 269; AVX512F: # %bb.0: 270; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 271; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 272; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 273; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 274; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 275; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 276; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 277; AVX512F-NEXT: retq 278; 279; AVX512VL-LABEL: signbit_sel_v8i32: 280; AVX512VL: # %bb.0: 281; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 282; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 283; AVX512VL-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} 284; AVX512VL-NEXT: retq 285; 286; XOP-LABEL: signbit_sel_v8i32: 287; XOP: # %bb.0: 288; XOP-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 289; XOP-NEXT: retq 290 %tr = icmp slt <8 x i32> %mask, zeroinitializer 291 %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y 292 ret <8 x i32> %z 293} 294 295define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { 296; AVX12-LABEL: signbit_sel_v4i64: 297; AVX12: # %bb.0: 298; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 299; AVX12-NEXT: retq 300; 301; AVX512F-LABEL: signbit_sel_v4i64: 302; AVX512F: # %bb.0: 303; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 304; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 305; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 306; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 307; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 308; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} 309; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 310; AVX512F-NEXT: retq 311; 312; AVX512VL-LABEL: signbit_sel_v4i64: 313; AVX512VL: # %bb.0: 314; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 315; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 316; AVX512VL-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} 317; AVX512VL-NEXT: retq 318; 319; XOP-LABEL: signbit_sel_v4i64: 320; XOP: # %bb.0: 321; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 322; XOP-NEXT: retq 323 %tr = icmp slt <4 x i64> %mask, zeroinitializer 324 %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y 325 ret <4 x i64> %z 326} 327 328define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { 329; AVX12-LABEL: signbit_sel_v4f64: 330; AVX12: # %bb.0: 331; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 332; AVX12-NEXT: retq 333; 334; AVX512F-LABEL: signbit_sel_v4f64: 335; AVX512F: # %bb.0: 336; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 337; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 338; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 339; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 340; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 341; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 342; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 343; AVX512F-NEXT: retq 344; 345; AVX512VL-LABEL: signbit_sel_v4f64: 346; AVX512VL: # %bb.0: 347; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 348; AVX512VL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 349; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 350; AVX512VL-NEXT: retq 351; 352; XOP-LABEL: signbit_sel_v4f64: 353; XOP: # %bb.0: 354; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 355; XOP-NEXT: retq 356 %tr = icmp slt <4 x i64> %mask, zeroinitializer 357 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 358 ret <4 x double> %z 359} 360 361; Try a condition with a different type than the select operands. 362 363define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) { 364; AVX1-LABEL: signbit_sel_v4f64_small_mask: 365; AVX1: # %bb.0: 366; AVX1-NEXT: vpmovsxdq %xmm2, %xmm3 367; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 368; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 369; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 370; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 371; AVX1-NEXT: retq 372; 373; AVX2-LABEL: signbit_sel_v4f64_small_mask: 374; AVX2: # %bb.0: 375; AVX2-NEXT: vpmovsxdq %xmm2, %ymm2 376; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 377; AVX2-NEXT: retq 378; 379; AVX512F-LABEL: signbit_sel_v4f64_small_mask: 380; AVX512F: # %bb.0: 381; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 382; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 383; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 384; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 385; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 386; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 387; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 388; AVX512F-NEXT: retq 389; 390; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: 391; AVX512VL: # %bb.0: 392; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 393; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 394; AVX512VL-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} 395; AVX512VL-NEXT: retq 396; 397; XOP-LABEL: signbit_sel_v4f64_small_mask: 398; XOP: # %bb.0: 399; XOP-NEXT: vpmovsxdq %xmm2, %xmm3 400; XOP-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 401; XOP-NEXT: vpmovsxdq %xmm2, %xmm2 402; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 403; XOP-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 404; XOP-NEXT: retq 405 %tr = icmp slt <4 x i32> %mask, zeroinitializer 406 %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y 407 ret <4 x double> %z 408} 409 410; Try a 512-bit vector to make sure AVX-512 is handled as expected. 411 412define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) { 413; AVX12-LABEL: signbit_sel_v8f64: 414; AVX12: # %bb.0: 415; AVX12-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 416; AVX12-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 417; AVX12-NEXT: retq 418; 419; AVX512-LABEL: signbit_sel_v8f64: 420; AVX512: # %bb.0: 421; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 422; AVX512-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 423; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} 424; AVX512-NEXT: retq 425; 426; XOP-LABEL: signbit_sel_v8f64: 427; XOP: # %bb.0: 428; XOP-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 429; XOP-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1 430; XOP-NEXT: retq 431 %tr = icmp slt <8 x i64> %mask, zeroinitializer 432 %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y 433 ret <8 x double> %z 434} 435 436; If we have a floating-point compare: 437; (1) Don't die. 438; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. 439 440define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { 441; AVX12-LABEL: signbit_sel_v4f32_fcmp: 442; AVX12: # %bb.0: 443; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 444; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 445; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 446; AVX12-NEXT: retq 447; 448; AVX512F-LABEL: signbit_sel_v4f32_fcmp: 449; AVX512F: # %bb.0: 450; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 451; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 452; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 453; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 454; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 455; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 456; AVX512F-NEXT: vzeroupper 457; AVX512F-NEXT: retq 458; 459; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: 460; AVX512VL: # %bb.0: 461; AVX512VL-NEXT: vxorps %xmm2, %xmm2, %xmm2 462; AVX512VL-NEXT: vcmpltps %xmm2, %xmm0, %k1 463; AVX512VL-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} 464; AVX512VL-NEXT: retq 465; 466; XOP-LABEL: signbit_sel_v4f32_fcmp: 467; XOP: # %bb.0: 468; XOP-NEXT: vxorps %xmm2, %xmm2, %xmm2 469; XOP-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 470; XOP-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 471; XOP-NEXT: retq 472 %cmp = fcmp olt <4 x float> %x, zeroinitializer 473 %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y 474 ret <4 x float> %sel 475} 476 477define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 478; AVX1-LABEL: blend_splat1_mask_cond_v4i64: 479; AVX1: # %bb.0: 480; AVX1-NEXT: vpsllq $63, %xmm0, %xmm3 481; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 482; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0 483; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 484; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 485; AVX1-NEXT: retq 486; 487; AVX2-LABEL: blend_splat1_mask_cond_v4i64: 488; AVX2: # %bb.0: 489; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0 490; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 491; AVX2-NEXT: retq 492; 493; AVX512F-LABEL: blend_splat1_mask_cond_v4i64: 494; AVX512F: # %bb.0: 495; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 496; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 497; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 498; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 499; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 500; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 501; AVX512F-NEXT: retq 502; 503; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64: 504; AVX512VL: # %bb.0: 505; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 506; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 507; AVX512VL-NEXT: retq 508; 509; XOP-LABEL: blend_splat1_mask_cond_v4i64: 510; XOP: # %bb.0: 511; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 512; XOP-NEXT: vpsllq $63, %xmm3, %xmm3 513; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] 514; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 515; XOP-NEXT: vpsllq $63, %xmm0, %xmm0 516; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 517; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 518; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 519; XOP-NEXT: retq 520 %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1> 521 %c = icmp eq <4 x i64> %a, zeroinitializer 522 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 523 ret <4 x i64> %r 524} 525 526define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 527; AVX12-LABEL: blend_splat1_mask_cond_v4i32: 528; AVX12: # %bb.0: 529; AVX12-NEXT: vpslld $31, %xmm0, %xmm0 530; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 531; AVX12-NEXT: retq 532; 533; AVX512F-LABEL: blend_splat1_mask_cond_v4i32: 534; AVX512F: # %bb.0: 535; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 536; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 537; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 538; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 539; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 540; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 541; AVX512F-NEXT: vzeroupper 542; AVX512F-NEXT: retq 543; 544; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32: 545; AVX512VL: # %bb.0: 546; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 547; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 548; AVX512VL-NEXT: retq 549; 550; XOP-LABEL: blend_splat1_mask_cond_v4i32: 551; XOP: # %bb.0: 552; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 553; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 554; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 555; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 556; XOP-NEXT: retq 557 %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> 558 %c = icmp eq <4 x i32> %a, zeroinitializer 559 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 560 ret <4 x i32> %r 561} 562 563define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 564; AVX1-LABEL: blend_splat1_mask_cond_v16i16: 565; AVX1: # %bb.0: 566; AVX1-NEXT: vpsllw $15, %xmm0, %xmm3 567; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 568; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 569; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 570; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 571; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 572; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 573; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 574; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 575; AVX1-NEXT: retq 576; 577; AVX2-LABEL: blend_splat1_mask_cond_v16i16: 578; AVX2: # %bb.0: 579; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 580; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 581; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 582; AVX2-NEXT: retq 583; 584; AVX512-LABEL: blend_splat1_mask_cond_v16i16: 585; AVX512: # %bb.0: 586; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 587; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 588; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 589; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 590; AVX512-NEXT: retq 591; 592; XOP-LABEL: blend_splat1_mask_cond_v16i16: 593; XOP: # %bb.0: 594; XOP-NEXT: vpsllw $15, %xmm0, %xmm3 595; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 596; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 597; XOP-NEXT: vpsllw $15, %xmm0, %xmm0 598; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 599; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 600; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 601; XOP-NEXT: retq 602 %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 603 %c = icmp eq <16 x i16> %a, zeroinitializer 604 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 605 ret <16 x i16> %r 606} 607 608define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 609; AVX12-LABEL: blend_splat1_mask_cond_v16i8: 610; AVX12: # %bb.0: 611; AVX12-NEXT: vpsllw $7, %xmm0, %xmm0 612; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 613; AVX12-NEXT: retq 614; 615; AVX512-LABEL: blend_splat1_mask_cond_v16i8: 616; AVX512: # %bb.0: 617; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 618; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 619; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 620; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 621; AVX512-NEXT: retq 622; 623; XOP-LABEL: blend_splat1_mask_cond_v16i8: 624; XOP: # %bb.0: 625; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 626; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 627; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 628; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 629; XOP-NEXT: retq 630 %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 631 %c = icmp eq <16 x i8> %a, zeroinitializer 632 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 633 ret <16 x i8> %r 634} 635 636define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { 637; AVX12-LABEL: blend_splatmax_mask_cond_v2i64: 638; AVX12: # %bb.0: 639; AVX12-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 640; AVX12-NEXT: retq 641; 642; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64: 643; AVX512F: # %bb.0: 644; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 645; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 646; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 647; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 648; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 649; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 650; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 651; AVX512F-NEXT: vzeroupper 652; AVX512F-NEXT: retq 653; 654; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64: 655; AVX512VL: # %bb.0: 656; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1 657; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} 658; AVX512VL-NEXT: retq 659; 660; XOP-LABEL: blend_splatmax_mask_cond_v2i64: 661; XOP: # %bb.0: 662; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 663; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 664; XOP-NEXT: vpcomneqq %xmm3, %xmm0, %xmm0 665; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 666; XOP-NEXT: retq 667 %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808> 668 %c = icmp eq <2 x i64> %a, zeroinitializer 669 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z 670 ret <2 x i64> %r 671} 672 673define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 674; AVX12-LABEL: blend_splatmax_mask_cond_v8i32: 675; AVX12: # %bb.0: 676; AVX12-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 677; AVX12-NEXT: retq 678; 679; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32: 680; AVX512F: # %bb.0: 681; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 682; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 683; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 684; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 685; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 686; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 687; AVX512F-NEXT: retq 688; 689; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32: 690; AVX512VL: # %bb.0: 691; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 692; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} 693; AVX512VL-NEXT: retq 694; 695; XOP-LABEL: blend_splatmax_mask_cond_v8i32: 696; XOP: # %bb.0: 697; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 698; XOP-NEXT: retq 699 %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 700 %c = icmp eq <8 x i32> %a, zeroinitializer 701 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z 702 ret <8 x i32> %r 703} 704 705define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { 706; AVX12-LABEL: blend_splatmax_mask_cond_v8i16: 707; AVX12: # %bb.0: 708; AVX12-NEXT: vpsraw $15, %xmm0, %xmm0 709; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 710; AVX12-NEXT: retq 711; 712; AVX512-LABEL: blend_splatmax_mask_cond_v8i16: 713; AVX512: # %bb.0: 714; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 715; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 716; AVX512-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 717; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 718; AVX512-NEXT: retq 719; 720; XOP-LABEL: blend_splatmax_mask_cond_v8i16: 721; XOP: # %bb.0: 722; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 723; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 724; XOP-NEXT: vpcomneqw %xmm3, %xmm0, %xmm0 725; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 726; XOP-NEXT: retq 727 %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> 728 %c = icmp eq <8 x i16> %a, zeroinitializer 729 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z 730 ret <8 x i16> %r 731} 732 733define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { 734; AVX1-LABEL: blend_splatmax_mask_cond_v32i8: 735; AVX1: # %bb.0: 736; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 737; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 738; AVX1-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 739; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 740; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 741; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 742; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 743; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 744; AVX1-NEXT: retq 745; 746; AVX2-LABEL: blend_splatmax_mask_cond_v32i8: 747; AVX2: # %bb.0: 748; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 749; AVX2-NEXT: retq 750; 751; AVX512-LABEL: blend_splatmax_mask_cond_v32i8: 752; AVX512: # %bb.0: 753; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 754; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 755; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 756; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 757; AVX512-NEXT: retq 758; 759; XOP-LABEL: blend_splatmax_mask_cond_v32i8: 760; XOP: # %bb.0: 761; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 762; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 763; XOP-NEXT: vpcmpgtb %xmm3, %xmm4, %xmm3 764; XOP-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 765; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 766; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 767; XOP-NEXT: retq 768 %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128> 769 %c = icmp eq <32 x i8> %a, zeroinitializer 770 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z 771 ret <32 x i8> %r 772} 773 774define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 775; AVX1-LABEL: blend_splat_mask_cond_v4i64: 776; AVX1: # %bb.0: 777; AVX1-NEXT: vpsllq $62, %xmm0, %xmm3 778; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 779; AVX1-NEXT: vpsllq $62, %xmm0, %xmm0 780; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 781; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 782; AVX1-NEXT: retq 783; 784; AVX2-LABEL: blend_splat_mask_cond_v4i64: 785; AVX2: # %bb.0: 786; AVX2-NEXT: vpsllq $62, %ymm0, %ymm0 787; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 788; AVX2-NEXT: retq 789; 790; AVX512F-LABEL: blend_splat_mask_cond_v4i64: 791; AVX512F: # %bb.0: 792; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 793; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 794; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 795; AVX512F-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1 796; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 797; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 798; AVX512F-NEXT: retq 799; 800; AVX512VL-LABEL: blend_splat_mask_cond_v4i64: 801; AVX512VL: # %bb.0: 802; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 803; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 804; AVX512VL-NEXT: retq 805; 806; XOP-LABEL: blend_splat_mask_cond_v4i64: 807; XOP: # %bb.0: 808; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 809; XOP-NEXT: vpsllq $62, %xmm3, %xmm3 810; XOP-NEXT: vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553] 811; XOP-NEXT: vpshaq %xmm4, %xmm3, %xmm3 812; XOP-NEXT: vpsllq $62, %xmm0, %xmm0 813; XOP-NEXT: vpshaq %xmm4, %xmm0, %xmm0 814; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 815; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 816; XOP-NEXT: retq 817 %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2> 818 %c = icmp eq <4 x i64> %a, zeroinitializer 819 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 820 ret <4 x i64> %r 821} 822 823define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 824; AVX12-LABEL: blend_splat_mask_cond_v4i32: 825; AVX12: # %bb.0: 826; AVX12-NEXT: vpslld $15, %xmm0, %xmm0 827; AVX12-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 828; AVX12-NEXT: retq 829; 830; AVX512F-LABEL: blend_splat_mask_cond_v4i32: 831; AVX512F: # %bb.0: 832; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 833; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 834; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 835; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1 836; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 837; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 838; AVX512F-NEXT: vzeroupper 839; AVX512F-NEXT: retq 840; 841; AVX512VL-LABEL: blend_splat_mask_cond_v4i32: 842; AVX512VL: # %bb.0: 843; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1 844; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 845; AVX512VL-NEXT: retq 846; 847; XOP-LABEL: blend_splat_mask_cond_v4i32: 848; XOP: # %bb.0: 849; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 850; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 851; XOP-NEXT: vpcomneqd %xmm3, %xmm0, %xmm0 852; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 853; XOP-NEXT: retq 854 %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536> 855 %c = icmp eq <4 x i32> %a, zeroinitializer 856 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 857 ret <4 x i32> %r 858} 859 860define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 861; AVX1-LABEL: blend_splat_mask_cond_v16i16: 862; AVX1: # %bb.0: 863; AVX1-NEXT: vpsllw $5, %xmm0, %xmm3 864; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 865; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 866; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0 867; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 868; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 869; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 870; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0 871; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 872; AVX1-NEXT: retq 873; 874; AVX2-LABEL: blend_splat_mask_cond_v16i16: 875; AVX2: # %bb.0: 876; AVX2-NEXT: vpsllw $5, %ymm0, %ymm0 877; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 878; AVX2-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm0 879; AVX2-NEXT: retq 880; 881; AVX512-LABEL: blend_splat_mask_cond_v16i16: 882; AVX512: # %bb.0: 883; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 884; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 885; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 886; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 887; AVX512-NEXT: retq 888; 889; XOP-LABEL: blend_splat_mask_cond_v16i16: 890; XOP: # %bb.0: 891; XOP-NEXT: vpsllw $5, %xmm0, %xmm3 892; XOP-NEXT: vpsraw $15, %xmm3, %xmm3 893; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 894; XOP-NEXT: vpsllw $5, %xmm0, %xmm0 895; XOP-NEXT: vpsraw $15, %xmm0, %xmm0 896; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 897; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 898; XOP-NEXT: retq 899 %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024> 900 %c = icmp eq <16 x i16> %a, zeroinitializer 901 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 902 ret <16 x i16> %r 903} 904 905define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 906; AVX12-LABEL: blend_splat_mask_cond_v16i8: 907; AVX12: # %bb.0: 908; AVX12-NEXT: vpsllw $5, %xmm0, %xmm0 909; AVX12-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 910; AVX12-NEXT: retq 911; 912; AVX512-LABEL: blend_splat_mask_cond_v16i8: 913; AVX512: # %bb.0: 914; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 915; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 916; AVX512-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 917; AVX512-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 918; AVX512-NEXT: retq 919; 920; XOP-LABEL: blend_splat_mask_cond_v16i8: 921; XOP: # %bb.0: 922; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 923; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 924; XOP-NEXT: vpcomneqb %xmm3, %xmm0, %xmm0 925; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 926; XOP-NEXT: retq 927 %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 928 %c = icmp eq <16 x i8> %a, zeroinitializer 929 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 930 ret <16 x i8> %r 931} 932 933define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { 934; AVX1-LABEL: blend_mask_cond_v2i64: 935; AVX1: # %bb.0: 936; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 937; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 938; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0 939; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 940; AVX1-NEXT: retq 941; 942; AVX2-LABEL: blend_mask_cond_v2i64: 943; AVX2: # %bb.0: 944; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 945; AVX2-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 946; AVX2-NEXT: retq 947; 948; AVX512F-LABEL: blend_mask_cond_v2i64: 949; AVX512F: # %bb.0: 950; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 951; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 952; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 953; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4] 954; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 955; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 956; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 957; AVX512F-NEXT: vzeroupper 958; AVX512F-NEXT: retq 959; 960; AVX512VL-LABEL: blend_mask_cond_v2i64: 961; AVX512VL: # %bb.0: 962; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 963; AVX512VL-NEXT: vpblendmq %xmm1, %xmm2, %xmm0 {%k1} 964; AVX512VL-NEXT: retq 965; 966; XOP-LABEL: blend_mask_cond_v2i64: 967; XOP: # %bb.0: 968; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 969; XOP-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 970; XOP-NEXT: retq 971 %a = and <2 x i64> %x, <i64 1, i64 4> 972 %c = icmp eq <2 x i64> %a, zeroinitializer 973 %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z 974 ret <2 x i64> %r 975} 976 977define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 978; AVX1-LABEL: blend_mask_cond_v4i32: 979; AVX1: # %bb.0: 980; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 981; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 982; AVX1-NEXT: retq 983; 984; AVX2-LABEL: blend_mask_cond_v4i32: 985; AVX2: # %bb.0: 986; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 987; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 988; AVX2-NEXT: retq 989; 990; AVX512F-LABEL: blend_mask_cond_v4i32: 991; AVX512F: # %bb.0: 992; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 993; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 994; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 995; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [65536,512,2,1] 996; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 997; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 998; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 999; AVX512F-NEXT: vzeroupper 1000; AVX512F-NEXT: retq 1001; 1002; AVX512VL-LABEL: blend_mask_cond_v4i32: 1003; AVX512VL: # %bb.0: 1004; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 1005; AVX512VL-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1} 1006; AVX512VL-NEXT: retq 1007; 1008; XOP-LABEL: blend_mask_cond_v4i32: 1009; XOP: # %bb.0: 1010; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1011; XOP-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1012; XOP-NEXT: retq 1013 %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1> 1014 %c = icmp eq <4 x i32> %a, zeroinitializer 1015 %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z 1016 ret <4 x i32> %r 1017} 1018 1019define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) { 1020; AVX-LABEL: blend_mask_cond_v8i16: 1021; AVX: # %bb.0: 1022; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1023; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 1024; AVX-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0 1025; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 1026; AVX-NEXT: retq 1027; 1028; XOP-LABEL: blend_mask_cond_v8i16: 1029; XOP: # %bb.0: 1030; XOP-NEXT: vpxor %xmm3, %xmm3, %xmm3 1031; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1032; XOP-NEXT: vpcomltw %xmm3, %xmm0, %xmm0 1033; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1034; XOP-NEXT: retq 1035 %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096> 1036 %c = icmp eq <8 x i16> %a, zeroinitializer 1037 %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z 1038 ret <8 x i16> %r 1039} 1040 1041define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { 1042; AVX-LABEL: blend_mask_cond_v16i8: 1043; AVX: # %bb.0: 1044; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1045; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 1046; AVX-NEXT: vpcmpeqb %xmm3, %xmm0, %xmm0 1047; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 1048; AVX-NEXT: retq 1049; 1050; XOP-LABEL: blend_mask_cond_v16i8: 1051; XOP: # %bb.0: 1052; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1053; XOP-NEXT: vpblendvb %xmm0, %xmm2, %xmm1, %xmm0 1054; XOP-NEXT: retq 1055 %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2> 1056 %c = icmp eq <16 x i8> %a, zeroinitializer 1057 %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z 1058 ret <16 x i8> %r 1059} 1060 1061define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) { 1062; AVX1-LABEL: blend_mask_cond_v4i64: 1063; AVX1: # %bb.0: 1064; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1065; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1066; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1067; AVX1-NEXT: vpcmpeqq %xmm4, %xmm3, %xmm3 1068; AVX1-NEXT: vpcmpeqq %xmm4, %xmm0, %xmm0 1069; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1070; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 1071; AVX1-NEXT: retq 1072; 1073; AVX2-LABEL: blend_mask_cond_v4i64: 1074; AVX2: # %bb.0: 1075; AVX2-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1076; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 1077; AVX2-NEXT: retq 1078; 1079; AVX512F-LABEL: blend_mask_cond_v4i64: 1080; AVX512F: # %bb.0: 1081; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1082; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1083; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1084; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [2,4,32768,1] 1085; AVX512F-NEXT: vptestnmq %zmm3, %zmm0, %k1 1086; AVX512F-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} 1087; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1088; AVX512F-NEXT: retq 1089; 1090; AVX512VL-LABEL: blend_mask_cond_v4i64: 1091; AVX512VL: # %bb.0: 1092; AVX512VL-NEXT: vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 1093; AVX512VL-NEXT: vpblendmq %ymm1, %ymm2, %ymm0 {%k1} 1094; AVX512VL-NEXT: retq 1095; 1096; XOP-LABEL: blend_mask_cond_v4i64: 1097; XOP: # %bb.0: 1098; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1099; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 1100; XOP-NEXT: vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1101; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1102; XOP-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 1103; XOP-NEXT: retq 1104 %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1> 1105 %c = icmp eq <4 x i64> %a, zeroinitializer 1106 %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z 1107 ret <4 x i64> %r 1108} 1109 1110define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) { 1111; AVX1-LABEL: blend_mask_cond_v8i32: 1112; AVX1: # %bb.0: 1113; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1114; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1115; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1116; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1117; AVX1-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1118; AVX1-NEXT: retq 1119; 1120; AVX2-LABEL: blend_mask_cond_v8i32: 1121; AVX2: # %bb.0: 1122; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1123; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1124; AVX2-NEXT: retq 1125; 1126; AVX512F-LABEL: blend_mask_cond_v8i32: 1127; AVX512F: # %bb.0: 1128; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1129; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1130; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1131; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096] 1132; AVX512F-NEXT: vptestnmd %zmm3, %zmm0, %k1 1133; AVX512F-NEXT: vpblendmd %zmm1, %zmm2, %zmm0 {%k1} 1134; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1135; AVX512F-NEXT: retq 1136; 1137; AVX512VL-LABEL: blend_mask_cond_v8i32: 1138; AVX512VL: # %bb.0: 1139; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 1140; AVX512VL-NEXT: vpblendmd %ymm1, %ymm2, %ymm0 {%k1} 1141; AVX512VL-NEXT: retq 1142; 1143; XOP-LABEL: blend_mask_cond_v8i32: 1144; XOP: # %bb.0: 1145; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 1146; XOP-NEXT: vextractf128 $1, %ymm0, %xmm0 1147; XOP-NEXT: vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1148; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 1149; XOP-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 1150; XOP-NEXT: retq 1151 %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096> 1152 %c = icmp eq <8 x i32> %a, zeroinitializer 1153 %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z 1154 ret <8 x i32> %r 1155} 1156 1157define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { 1158; AVX1-LABEL: blend_mask_cond_v16i16: 1159; AVX1: # %bb.0: 1160; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1161; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1162; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1163; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3 1164; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0 1165; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1166; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 1167; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1168; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 1169; AVX1-NEXT: retq 1170; 1171; AVX2-LABEL: blend_mask_cond_v16i16: 1172; AVX2: # %bb.0: 1173; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1174; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 1175; AVX2-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 1176; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1177; AVX2-NEXT: retq 1178; 1179; AVX512-LABEL: blend_mask_cond_v16i16: 1180; AVX512: # %bb.0: 1181; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1182; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 1183; AVX512-NEXT: vpcmpeqw %ymm3, %ymm0, %ymm0 1184; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1185; AVX512-NEXT: retq 1186; 1187; XOP-LABEL: blend_mask_cond_v16i16: 1188; XOP: # %bb.0: 1189; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 1190; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1191; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1192; XOP-NEXT: vpcomltw %xmm4, %xmm3, %xmm3 1193; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1194; XOP-NEXT: vpcomltw %xmm4, %xmm0, %xmm0 1195; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1196; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 1197; XOP-NEXT: retq 1198 %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024> 1199 %c = icmp eq <16 x i16> %a, zeroinitializer 1200 %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z 1201 ret <16 x i16> %r 1202} 1203 1204define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { 1205; AVX1-LABEL: blend_mask_cond_v32i8: 1206; AVX1: # %bb.0: 1207; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1208; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1209; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 1210; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3 1211; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0 1212; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1213; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 1214; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 1215; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 1216; AVX1-NEXT: retq 1217; 1218; AVX2-LABEL: blend_mask_cond_v32i8: 1219; AVX2: # %bb.0: 1220; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1221; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 1222; AVX2-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1223; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1224; AVX2-NEXT: retq 1225; 1226; AVX512-LABEL: blend_mask_cond_v32i8: 1227; AVX512: # %bb.0: 1228; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1229; AVX512-NEXT: vpxor %xmm3, %xmm3, %xmm3 1230; AVX512-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 1231; AVX512-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 1232; AVX512-NEXT: retq 1233; 1234; XOP-LABEL: blend_mask_cond_v32i8: 1235; XOP: # %bb.0: 1236; XOP-NEXT: vextractf128 $1, %ymm0, %xmm3 1237; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 1238; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1239; XOP-NEXT: vpcomltb %xmm4, %xmm3, %xmm3 1240; XOP-NEXT: vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1241; XOP-NEXT: vpcomltb %xmm4, %xmm0, %xmm0 1242; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1243; XOP-NEXT: vpcmov %ymm0, %ymm1, %ymm2, %ymm0 1244; XOP-NEXT: retq 1245 %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16> 1246 %c = icmp eq <32 x i8> %a, zeroinitializer 1247 %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z 1248 ret <32 x i8> %r 1249} 1250 1251define void @PR46531(ptr %x, ptr %y, ptr %z) { 1252; AVX12-LABEL: PR46531: 1253; AVX12: # %bb.0: 1254; AVX12-NEXT: vmovdqu (%rsi), %xmm0 1255; AVX12-NEXT: vmovdqu (%rdx), %xmm1 1256; AVX12-NEXT: vpor %xmm0, %xmm1, %xmm2 1257; AVX12-NEXT: vpxor %xmm0, %xmm1, %xmm0 1258; AVX12-NEXT: vpslld $31, %xmm1, %xmm1 1259; AVX12-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 1260; AVX12-NEXT: vmovups %xmm0, (%rdi) 1261; AVX12-NEXT: retq 1262; 1263; AVX512F-LABEL: PR46531: 1264; AVX512F: # %bb.0: 1265; AVX512F-NEXT: vmovdqu (%rsi), %xmm0 1266; AVX512F-NEXT: vmovdqu (%rdx), %xmm1 1267; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm2 1268; AVX512F-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1 1269; AVX512F-NEXT: vpxor %xmm0, %xmm1, %xmm0 1270; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} 1271; AVX512F-NEXT: vmovdqu %xmm0, (%rdi) 1272; AVX512F-NEXT: vzeroupper 1273; AVX512F-NEXT: retq 1274; 1275; AVX512VL-LABEL: PR46531: 1276; AVX512VL: # %bb.0: 1277; AVX512VL-NEXT: vmovdqu (%rsi), %xmm0 1278; AVX512VL-NEXT: vmovdqu (%rdx), %xmm1 1279; AVX512VL-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1 1280; AVX512VL-NEXT: vpxor %xmm0, %xmm1, %xmm2 1281; AVX512VL-NEXT: vpord %xmm0, %xmm1, %xmm2 {%k1} 1282; AVX512VL-NEXT: vmovdqu %xmm2, (%rdi) 1283; AVX512VL-NEXT: retq 1284; 1285; XOP-LABEL: PR46531: 1286; XOP: # %bb.0: 1287; XOP-NEXT: vmovdqu (%rsi), %xmm0 1288; XOP-NEXT: vmovdqu (%rdx), %xmm1 1289; XOP-NEXT: vpor %xmm0, %xmm1, %xmm2 1290; XOP-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3 1291; XOP-NEXT: vpxor %xmm4, %xmm4, %xmm4 1292; XOP-NEXT: vpcomneqd %xmm4, %xmm3, %xmm3 1293; XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 1294; XOP-NEXT: vblendvps %xmm3, %xmm0, %xmm2, %xmm0 1295; XOP-NEXT: vmovups %xmm0, (%rdi) 1296; XOP-NEXT: retq 1297 %a = load <4 x i32>, ptr %y, align 4 1298 %b = load <4 x i32>, ptr %z, align 4 1299 %or = or <4 x i32> %b, %a 1300 %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1> 1301 %cmp = icmp eq <4 x i32> %and, zeroinitializer 1302 %xor = xor <4 x i32> %b, %a 1303 %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor 1304 store <4 x i32> %sel, ptr %x, align 4 1305 ret void 1306} 1307 1308attributes #0 = { "no-nans-fp-math"="true" } 1309