1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7 8; AVX128 tests: 9 10define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { 11; SSE2-LABEL: vsel_float: 12; SSE2: # %bb.0: # %entry 13; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 14; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 15; SSE2-NEXT: retq 16; 17; SSSE3-LABEL: vsel_float: 18; SSSE3: # %bb.0: # %entry 19; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 20; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 21; SSSE3-NEXT: retq 22; 23; SSE41-LABEL: vsel_float: 24; SSE41: # %bb.0: # %entry 25; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 26; SSE41-NEXT: retq 27; 28; AVX-LABEL: vsel_float: 29; AVX: # %bb.0: # %entry 30; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 31; AVX-NEXT: retq 32entry: 33 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2 34 ret <4 x float> %vsel 35} 36 37define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) { 38; SSE2-LABEL: vsel_float2: 39; SSE2: # %bb.0: # %entry 40; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 41; SSE2-NEXT: movaps %xmm1, %xmm0 42; SSE2-NEXT: retq 43; 44; SSSE3-LABEL: vsel_float2: 45; SSSE3: # %bb.0: # %entry 46; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 47; SSSE3-NEXT: movaps %xmm1, %xmm0 48; SSSE3-NEXT: retq 49; 50; SSE41-LABEL: vsel_float2: 51; SSE41: # %bb.0: # %entry 52; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: vsel_float2: 56; AVX: # %bb.0: # %entry 57; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 58; AVX-NEXT: retq 59entry: 60 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2 61 ret <4 x float> %vsel 62} 63 64define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { 65; SSE2-LABEL: vsel_4xi8: 66; SSE2: # %bb.0: # %entry 67; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] 68; SSE2-NEXT: andps %xmm2, %xmm0 69; SSE2-NEXT: andnps %xmm1, %xmm2 70; SSE2-NEXT: orps %xmm2, %xmm0 71; SSE2-NEXT: retq 72; 73; SSSE3-LABEL: vsel_4xi8: 74; SSSE3: # %bb.0: # %entry 75; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 76; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,5,6,u,u,u,u,u,u,u,u,u,u,u,u] 77; SSSE3-NEXT: retq 78; 79; SSE41-LABEL: vsel_4xi8: 80; SSE41: # %bb.0: # %entry 81; SSE41-NEXT: movdqa %xmm0, %xmm2 82; SSE41-NEXT: movaps {{.*#+}} xmm0 = <255,255,0,255,u,u,u,u,u,u,u,u,u,u,u,u> 83; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 84; SSE41-NEXT: movdqa %xmm1, %xmm0 85; SSE41-NEXT: retq 86; 87; AVX-LABEL: vsel_4xi8: 88; AVX: # %bb.0: # %entry 89; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <255,255,0,255,u,u,u,u,u,u,u,u,u,u,u,u> 90; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 91; AVX-NEXT: retq 92entry: 93 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 94 ret <4 x i8> %vsel 95} 96 97define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { 98; SSE2-LABEL: vsel_4xi16: 99; SSE2: # %bb.0: # %entry 100; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535] 101; SSE2-NEXT: andps %xmm2, %xmm0 102; SSE2-NEXT: andnps %xmm1, %xmm2 103; SSE2-NEXT: orps %xmm2, %xmm0 104; SSE2-NEXT: retq 105; 106; SSSE3-LABEL: vsel_4xi16: 107; SSSE3: # %bb.0: # %entry 108; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535] 109; SSSE3-NEXT: andps %xmm2, %xmm0 110; SSSE3-NEXT: andnps %xmm1, %xmm2 111; SSSE3-NEXT: orps %xmm2, %xmm0 112; SSSE3-NEXT: retq 113; 114; SSE41-LABEL: vsel_4xi16: 115; SSE41: # %bb.0: # %entry 116; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 117; SSE41-NEXT: retq 118; 119; AVX-LABEL: vsel_4xi16: 120; AVX: # %bb.0: # %entry 121; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7] 122; AVX-NEXT: retq 123entry: 124 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 125 ret <4 x i16> %vsel 126} 127 128define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { 129; SSE2-LABEL: vsel_i32: 130; SSE2: # %bb.0: # %entry 131; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 132; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 133; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 134; SSE2-NEXT: retq 135; 136; SSSE3-LABEL: vsel_i32: 137; SSSE3: # %bb.0: # %entry 138; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 139; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 140; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 141; SSSE3-NEXT: retq 142; 143; SSE41-LABEL: vsel_i32: 144; SSE41: # %bb.0: # %entry 145; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 146; SSE41-NEXT: retq 147; 148; AVX-LABEL: vsel_i32: 149; AVX: # %bb.0: # %entry 150; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 151; AVX-NEXT: retq 152entry: 153 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2 154 ret <4 x i32> %vsel 155} 156 157define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) { 158; SSE2-LABEL: vsel_double: 159; SSE2: # %bb.0: # %entry 160; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 161; SSE2-NEXT: retq 162; 163; SSSE3-LABEL: vsel_double: 164; SSSE3: # %bb.0: # %entry 165; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 166; SSSE3-NEXT: retq 167; 168; SSE41-LABEL: vsel_double: 169; SSE41: # %bb.0: # %entry 170; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 171; SSE41-NEXT: retq 172; 173; AVX-LABEL: vsel_double: 174; AVX: # %bb.0: # %entry 175; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 176; AVX-NEXT: retq 177entry: 178 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2 179 ret <2 x double> %vsel 180} 181 182define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) { 183; SSE2-LABEL: vsel_i64: 184; SSE2: # %bb.0: # %entry 185; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 186; SSE2-NEXT: retq 187; 188; SSSE3-LABEL: vsel_i64: 189; SSSE3: # %bb.0: # %entry 190; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 191; SSSE3-NEXT: retq 192; 193; SSE41-LABEL: vsel_i64: 194; SSE41: # %bb.0: # %entry 195; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 196; SSE41-NEXT: retq 197; 198; AVX-LABEL: vsel_i64: 199; AVX: # %bb.0: # %entry 200; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 201; AVX-NEXT: retq 202entry: 203 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2 204 ret <2 x i64> %vsel 205} 206 207define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { 208; SSE2-LABEL: vsel_8xi16: 209; SSE2: # %bb.0: # %entry 210; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 211; SSE2-NEXT: andps %xmm2, %xmm1 212; SSE2-NEXT: andnps %xmm0, %xmm2 213; SSE2-NEXT: orps %xmm1, %xmm2 214; SSE2-NEXT: movaps %xmm2, %xmm0 215; SSE2-NEXT: retq 216; 217; SSSE3-LABEL: vsel_8xi16: 218; SSSE3: # %bb.0: # %entry 219; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535] 220; SSSE3-NEXT: andps %xmm2, %xmm1 221; SSSE3-NEXT: andnps %xmm0, %xmm2 222; SSSE3-NEXT: orps %xmm1, %xmm2 223; SSSE3-NEXT: movaps %xmm2, %xmm0 224; SSSE3-NEXT: retq 225; 226; SSE41-LABEL: vsel_8xi16: 227; SSE41: # %bb.0: # %entry 228; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 229; SSE41-NEXT: retq 230; 231; AVX-LABEL: vsel_8xi16: 232; AVX: # %bb.0: # %entry 233; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] 234; AVX-NEXT: retq 235entry: 236 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2 237 ret <8 x i16> %vsel 238} 239 240define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { 241; SSE2-LABEL: vsel_i8: 242; SSE2: # %bb.0: # %entry 243; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 244; SSE2-NEXT: andps %xmm2, %xmm1 245; SSE2-NEXT: andnps %xmm0, %xmm2 246; SSE2-NEXT: orps %xmm1, %xmm2 247; SSE2-NEXT: movaps %xmm2, %xmm0 248; SSE2-NEXT: retq 249; 250; SSSE3-LABEL: vsel_i8: 251; SSSE3: # %bb.0: # %entry 252; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero 253; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15] 254; SSSE3-NEXT: por %xmm1, %xmm0 255; SSSE3-NEXT: retq 256; 257; SSE41-LABEL: vsel_i8: 258; SSE41: # %bb.0: # %entry 259; SSE41-NEXT: movdqa %xmm0, %xmm2 260; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 261; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 262; SSE41-NEXT: movdqa %xmm2, %xmm0 263; SSE41-NEXT: retq 264; 265; AVX-LABEL: vsel_i8: 266; AVX: # %bb.0: # %entry 267; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] 268; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 269; AVX-NEXT: retq 270entry: 271 %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2 272 ret <16 x i8> %vsel 273} 274 275 276; AVX256 tests: 277 278define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { 279; SSE2-LABEL: vsel_float8: 280; SSE2: # %bb.0: # %entry 281; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 282; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 283; SSE2-NEXT: movaps %xmm2, %xmm0 284; SSE2-NEXT: movaps %xmm3, %xmm1 285; SSE2-NEXT: retq 286; 287; SSSE3-LABEL: vsel_float8: 288; SSSE3: # %bb.0: # %entry 289; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 290; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 291; SSSE3-NEXT: movaps %xmm2, %xmm0 292; SSSE3-NEXT: movaps %xmm3, %xmm1 293; SSSE3-NEXT: retq 294; 295; SSE41-LABEL: vsel_float8: 296; SSE41: # %bb.0: # %entry 297; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 298; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] 299; SSE41-NEXT: retq 300; 301; AVX-LABEL: vsel_float8: 302; AVX: # %bb.0: # %entry 303; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 304; AVX-NEXT: retq 305entry: 306 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2 307 ret <8 x float> %vsel 308} 309 310define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { 311; SSE2-LABEL: vsel_i328: 312; SSE2: # %bb.0: # %entry 313; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 314; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 315; SSE2-NEXT: movaps %xmm2, %xmm0 316; SSE2-NEXT: movaps %xmm3, %xmm1 317; SSE2-NEXT: retq 318; 319; SSSE3-LABEL: vsel_i328: 320; SSSE3: # %bb.0: # %entry 321; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 322; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] 323; SSSE3-NEXT: movaps %xmm2, %xmm0 324; SSSE3-NEXT: movaps %xmm3, %xmm1 325; SSSE3-NEXT: retq 326; 327; SSE41-LABEL: vsel_i328: 328; SSE41: # %bb.0: # %entry 329; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 330; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3] 331; SSE41-NEXT: retq 332; 333; AVX-LABEL: vsel_i328: 334; AVX: # %bb.0: # %entry 335; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] 336; AVX-NEXT: retq 337entry: 338 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2 339 ret <8 x i32> %vsel 340} 341 342define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) { 343; SSE2-LABEL: vsel_double8: 344; SSE2: # %bb.0: # %entry 345; SSE2-NEXT: movaps %xmm7, %xmm3 346; SSE2-NEXT: movaps %xmm5, %xmm1 347; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 348; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 349; SSE2-NEXT: retq 350; 351; SSSE3-LABEL: vsel_double8: 352; SSSE3: # %bb.0: # %entry 353; SSSE3-NEXT: movaps %xmm7, %xmm3 354; SSSE3-NEXT: movaps %xmm5, %xmm1 355; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 356; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 357; SSSE3-NEXT: retq 358; 359; SSE41-LABEL: vsel_double8: 360; SSE41: # %bb.0: # %entry 361; SSE41-NEXT: movaps %xmm7, %xmm3 362; SSE41-NEXT: movaps %xmm5, %xmm1 363; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 364; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 365; SSE41-NEXT: retq 366; 367; AVX-LABEL: vsel_double8: 368; AVX: # %bb.0: # %entry 369; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] 370; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] 371; AVX-NEXT: retq 372entry: 373 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2 374 ret <8 x double> %vsel 375} 376 377define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) { 378; SSE2-LABEL: vsel_i648: 379; SSE2: # %bb.0: # %entry 380; SSE2-NEXT: movaps %xmm7, %xmm3 381; SSE2-NEXT: movaps %xmm5, %xmm1 382; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 383; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 384; SSE2-NEXT: retq 385; 386; SSSE3-LABEL: vsel_i648: 387; SSSE3: # %bb.0: # %entry 388; SSSE3-NEXT: movaps %xmm7, %xmm3 389; SSSE3-NEXT: movaps %xmm5, %xmm1 390; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 391; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 392; SSSE3-NEXT: retq 393; 394; SSE41-LABEL: vsel_i648: 395; SSE41: # %bb.0: # %entry 396; SSE41-NEXT: movaps %xmm7, %xmm3 397; SSE41-NEXT: movaps %xmm5, %xmm1 398; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] 399; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3] 400; SSE41-NEXT: retq 401; 402; AVX-LABEL: vsel_i648: 403; AVX: # %bb.0: # %entry 404; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7] 405; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7] 406; AVX-NEXT: retq 407entry: 408 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2 409 ret <8 x i64> %vsel 410} 411 412define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) { 413; SSE2-LABEL: vsel_double4: 414; SSE2: # %bb.0: # %entry 415; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 416; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 417; SSE2-NEXT: retq 418; 419; SSSE3-LABEL: vsel_double4: 420; SSSE3: # %bb.0: # %entry 421; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 422; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 423; SSSE3-NEXT: retq 424; 425; SSE41-LABEL: vsel_double4: 426; SSE41: # %bb.0: # %entry 427; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 428; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 429; SSE41-NEXT: retq 430; 431; AVX-LABEL: vsel_double4: 432; AVX: # %bb.0: # %entry 433; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 434; AVX-NEXT: retq 435entry: 436 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2 437 ret <4 x double> %vsel 438} 439 440define <2 x double> @testa(<2 x double> %x, <2 x double> %y) { 441; SSE2-LABEL: testa: 442; SSE2: # %bb.0: # %entry 443; SSE2-NEXT: movapd %xmm1, %xmm2 444; SSE2-NEXT: cmplepd %xmm0, %xmm2 445; SSE2-NEXT: andpd %xmm2, %xmm0 446; SSE2-NEXT: andnpd %xmm1, %xmm2 447; SSE2-NEXT: orpd %xmm2, %xmm0 448; SSE2-NEXT: retq 449; 450; SSSE3-LABEL: testa: 451; SSSE3: # %bb.0: # %entry 452; SSSE3-NEXT: movapd %xmm1, %xmm2 453; SSSE3-NEXT: cmplepd %xmm0, %xmm2 454; SSSE3-NEXT: andpd %xmm2, %xmm0 455; SSSE3-NEXT: andnpd %xmm1, %xmm2 456; SSSE3-NEXT: orpd %xmm2, %xmm0 457; SSSE3-NEXT: retq 458; 459; SSE41-LABEL: testa: 460; SSE41: # %bb.0: # %entry 461; SSE41-NEXT: movapd %xmm0, %xmm2 462; SSE41-NEXT: movapd %xmm1, %xmm0 463; SSE41-NEXT: cmplepd %xmm2, %xmm0 464; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 465; SSE41-NEXT: movapd %xmm1, %xmm0 466; SSE41-NEXT: retq 467; 468; AVX-LABEL: testa: 469; AVX: # %bb.0: # %entry 470; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm2 471; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 472; AVX-NEXT: retq 473entry: 474 %max_is_x = fcmp oge <2 x double> %x, %y 475 %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y 476 ret <2 x double> %max 477} 478 479define <2 x double> @testb(<2 x double> %x, <2 x double> %y) { 480; SSE2-LABEL: testb: 481; SSE2: # %bb.0: # %entry 482; SSE2-NEXT: movapd %xmm1, %xmm2 483; SSE2-NEXT: cmpnlepd %xmm0, %xmm2 484; SSE2-NEXT: andpd %xmm2, %xmm0 485; SSE2-NEXT: andnpd %xmm1, %xmm2 486; SSE2-NEXT: orpd %xmm2, %xmm0 487; SSE2-NEXT: retq 488; 489; SSSE3-LABEL: testb: 490; SSSE3: # %bb.0: # %entry 491; SSSE3-NEXT: movapd %xmm1, %xmm2 492; SSSE3-NEXT: cmpnlepd %xmm0, %xmm2 493; SSSE3-NEXT: andpd %xmm2, %xmm0 494; SSSE3-NEXT: andnpd %xmm1, %xmm2 495; SSSE3-NEXT: orpd %xmm2, %xmm0 496; SSSE3-NEXT: retq 497; 498; SSE41-LABEL: testb: 499; SSE41: # %bb.0: # %entry 500; SSE41-NEXT: movapd %xmm0, %xmm2 501; SSE41-NEXT: movapd %xmm1, %xmm0 502; SSE41-NEXT: cmpnlepd %xmm2, %xmm0 503; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 504; SSE41-NEXT: movapd %xmm1, %xmm0 505; SSE41-NEXT: retq 506; 507; AVX-LABEL: testb: 508; AVX: # %bb.0: # %entry 509; AVX-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm2 510; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 511; AVX-NEXT: retq 512entry: 513 %min_is_x = fcmp ult <2 x double> %x, %y 514 %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y 515 ret <2 x double> %min 516} 517 518; If we can figure out a blend has a constant mask, we should emit the 519; blend instruction with an immediate mask 520define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) { 521; SSE2-LABEL: constant_blendvpd_avx: 522; SSE2: # %bb.0: # %entry 523; SSE2-NEXT: movaps %xmm2, %xmm0 524; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 525; SSE2-NEXT: retq 526; 527; SSSE3-LABEL: constant_blendvpd_avx: 528; SSSE3: # %bb.0: # %entry 529; SSSE3-NEXT: movaps %xmm2, %xmm0 530; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 531; SSSE3-NEXT: retq 532; 533; SSE41-LABEL: constant_blendvpd_avx: 534; SSE41: # %bb.0: # %entry 535; SSE41-NEXT: movaps %xmm2, %xmm0 536; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] 537; SSE41-NEXT: retq 538; 539; AVX-LABEL: constant_blendvpd_avx: 540; AVX: # %bb.0: # %entry 541; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7] 542; AVX-NEXT: retq 543entry: 544 %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab 545 ret <4 x double> %select 546} 547 548define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { 549; SSE2-LABEL: constant_blendvps_avx: 550; SSE2: # %bb.0: # %entry 551; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 552; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 553; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 554; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 555; SSE2-NEXT: movaps %xmm2, %xmm0 556; SSE2-NEXT: movaps %xmm3, %xmm1 557; SSE2-NEXT: retq 558; 559; SSSE3-LABEL: constant_blendvps_avx: 560; SSSE3: # %bb.0: # %entry 561; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0] 562; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0] 563; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0] 564; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0] 565; SSSE3-NEXT: movaps %xmm2, %xmm0 566; SSSE3-NEXT: movaps %xmm3, %xmm1 567; SSSE3-NEXT: retq 568; 569; SSE41-LABEL: constant_blendvps_avx: 570; SSE41: # %bb.0: # %entry 571; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3] 572; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3] 573; SSE41-NEXT: retq 574; 575; AVX-LABEL: constant_blendvps_avx: 576; AVX: # %bb.0: # %entry 577; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7] 578; AVX-NEXT: retq 579entry: 580 %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd 581 ret <8 x float> %select 582} 583 584define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { 585; SSE2-LABEL: constant_pblendvb_avx2: 586; SSE2: # %bb.0: # %entry 587; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 588; SSE2-NEXT: movaps %xmm4, %xmm5 589; SSE2-NEXT: andnps %xmm0, %xmm5 590; SSE2-NEXT: andps %xmm4, %xmm2 591; SSE2-NEXT: orps %xmm2, %xmm5 592; SSE2-NEXT: andps %xmm4, %xmm3 593; SSE2-NEXT: andnps %xmm1, %xmm4 594; SSE2-NEXT: orps %xmm3, %xmm4 595; SSE2-NEXT: movaps %xmm5, %xmm0 596; SSE2-NEXT: movaps %xmm4, %xmm1 597; SSE2-NEXT: retq 598; 599; SSSE3-LABEL: constant_pblendvb_avx2: 600; SSSE3: # %bb.0: # %entry 601; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128] 602; SSSE3-NEXT: pshufb %xmm4, %xmm0 603; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15] 604; SSSE3-NEXT: pshufb %xmm5, %xmm2 605; SSSE3-NEXT: por %xmm2, %xmm0 606; SSSE3-NEXT: pshufb %xmm4, %xmm1 607; SSSE3-NEXT: pshufb %xmm5, %xmm3 608; SSSE3-NEXT: por %xmm3, %xmm1 609; SSSE3-NEXT: retq 610; 611; SSE41-LABEL: constant_pblendvb_avx2: 612; SSE41: # %bb.0: # %entry 613; SSE41-NEXT: movdqa %xmm0, %xmm4 614; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 615; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4 616; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm1 617; SSE41-NEXT: movdqa %xmm4, %xmm0 618; SSE41-NEXT: retq 619; 620; AVX1-LABEL: constant_pblendvb_avx2: 621; AVX1: # %bb.0: # %entry 622; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [18374686483949879295,18374686483949879295,18374686483949879295,18374686483949879295] 623; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0 624; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 625; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 626; AVX1-NEXT: retq 627; 628; AVX2-LABEL: constant_pblendvb_avx2: 629; AVX2: # %bb.0: # %entry 630; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] 631; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 632; AVX2-NEXT: retq 633entry: 634 %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd 635 ret <32 x i8> %select 636} 637 638declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) 639declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) 640 641;; 4 tests for shufflevectors that optimize to blend + immediate 642define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) { 643; SSE2-LABEL: blend_shufflevector_4xfloat: 644; SSE2: # %bb.0: # %entry 645; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 646; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 647; SSE2-NEXT: retq 648; 649; SSSE3-LABEL: blend_shufflevector_4xfloat: 650; SSSE3: # %bb.0: # %entry 651; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3] 652; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3] 653; SSSE3-NEXT: retq 654; 655; SSE41-LABEL: blend_shufflevector_4xfloat: 656; SSE41: # %bb.0: # %entry 657; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 658; SSE41-NEXT: retq 659; 660; AVX-LABEL: blend_shufflevector_4xfloat: 661; AVX: # %bb.0: # %entry 662; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 663; AVX-NEXT: retq 664entry: 665 %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 666 ret <4 x float> %select 667} 668 669define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) { 670; SSE2-LABEL: blend_shufflevector_8xfloat: 671; SSE2: # %bb.0: # %entry 672; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 673; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 674; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 675; SSE2-NEXT: movaps %xmm2, %xmm0 676; SSE2-NEXT: movaps %xmm3, %xmm1 677; SSE2-NEXT: retq 678; 679; SSSE3-LABEL: blend_shufflevector_8xfloat: 680; SSSE3: # %bb.0: # %entry 681; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3] 682; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0] 683; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2] 684; SSSE3-NEXT: movaps %xmm2, %xmm0 685; SSSE3-NEXT: movaps %xmm3, %xmm1 686; SSSE3-NEXT: retq 687; 688; SSE41-LABEL: blend_shufflevector_8xfloat: 689; SSE41: # %bb.0: # %entry 690; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 691; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3] 692; SSE41-NEXT: retq 693; 694; AVX-LABEL: blend_shufflevector_8xfloat: 695; AVX: # %bb.0: # %entry 696; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7] 697; AVX-NEXT: retq 698entry: 699 %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15> 700 ret <8 x float> %select 701} 702 703define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) { 704; SSE2-LABEL: blend_shufflevector_4xdouble: 705; SSE2: # %bb.0: # %entry 706; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 707; SSE2-NEXT: retq 708; 709; SSSE3-LABEL: blend_shufflevector_4xdouble: 710; SSSE3: # %bb.0: # %entry 711; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 712; SSSE3-NEXT: retq 713; 714; SSE41-LABEL: blend_shufflevector_4xdouble: 715; SSE41: # %bb.0: # %entry 716; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] 717; SSE41-NEXT: retq 718; 719; AVX-LABEL: blend_shufflevector_4xdouble: 720; AVX: # %bb.0: # %entry 721; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 722; AVX-NEXT: retq 723entry: 724 %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3> 725 ret <4 x double> %select 726} 727 728define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) { 729; SSE2-LABEL: blend_shufflevector_4xi64: 730; SSE2: # %bb.0: # %entry 731; SSE2-NEXT: movaps %xmm3, %xmm1 732; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 733; SSE2-NEXT: retq 734; 735; SSSE3-LABEL: blend_shufflevector_4xi64: 736; SSSE3: # %bb.0: # %entry 737; SSSE3-NEXT: movaps %xmm3, %xmm1 738; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 739; SSSE3-NEXT: retq 740; 741; SSE41-LABEL: blend_shufflevector_4xi64: 742; SSE41: # %bb.0: # %entry 743; SSE41-NEXT: movaps %xmm3, %xmm1 744; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] 745; SSE41-NEXT: retq 746; 747; AVX-LABEL: blend_shufflevector_4xi64: 748; AVX: # %bb.0: # %entry 749; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7] 750; AVX-NEXT: retq 751entry: 752 %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 753 ret <4 x i64> %select 754} 755 756define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) { 757; SSE2-LABEL: blend_logic_v4i32: 758; SSE2: # %bb.0: # %entry 759; SSE2-NEXT: psrad $31, %xmm0 760; SSE2-NEXT: pand %xmm0, %xmm1 761; SSE2-NEXT: pandn %xmm2, %xmm0 762; SSE2-NEXT: por %xmm1, %xmm0 763; SSE2-NEXT: retq 764; 765; SSSE3-LABEL: blend_logic_v4i32: 766; SSSE3: # %bb.0: # %entry 767; SSSE3-NEXT: psrad $31, %xmm0 768; SSSE3-NEXT: pand %xmm0, %xmm1 769; SSSE3-NEXT: pandn %xmm2, %xmm0 770; SSSE3-NEXT: por %xmm1, %xmm0 771; SSSE3-NEXT: retq 772; 773; SSE41-LABEL: blend_logic_v4i32: 774; SSE41: # %bb.0: # %entry 775; SSE41-NEXT: psrad $31, %xmm0 776; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 777; SSE41-NEXT: movdqa %xmm2, %xmm0 778; SSE41-NEXT: retq 779; 780; AVX-LABEL: blend_logic_v4i32: 781; AVX: # %bb.0: # %entry 782; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 783; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0 784; AVX-NEXT: retq 785entry: 786 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 787 %sub = sub nsw <4 x i32> zeroinitializer, %a 788 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 789 %1 = and <4 x i32> %c, %0 790 %2 = and <4 x i32> %a, %b.lobit 791 %cond = or <4 x i32> %1, %2 792 ret <4 x i32> %cond 793} 794 795define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) { 796; SSE2-LABEL: blend_logic_v8i32: 797; SSE2: # %bb.0: # %entry 798; SSE2-NEXT: psrad $31, %xmm0 799; SSE2-NEXT: psrad $31, %xmm1 800; SSE2-NEXT: pand %xmm1, %xmm3 801; SSE2-NEXT: pandn %xmm5, %xmm1 802; SSE2-NEXT: por %xmm3, %xmm1 803; SSE2-NEXT: pand %xmm0, %xmm2 804; SSE2-NEXT: pandn %xmm4, %xmm0 805; SSE2-NEXT: por %xmm2, %xmm0 806; SSE2-NEXT: retq 807; 808; SSSE3-LABEL: blend_logic_v8i32: 809; SSSE3: # %bb.0: # %entry 810; SSSE3-NEXT: psrad $31, %xmm0 811; SSSE3-NEXT: psrad $31, %xmm1 812; SSSE3-NEXT: pand %xmm1, %xmm3 813; SSSE3-NEXT: pandn %xmm5, %xmm1 814; SSSE3-NEXT: por %xmm3, %xmm1 815; SSSE3-NEXT: pand %xmm0, %xmm2 816; SSSE3-NEXT: pandn %xmm4, %xmm0 817; SSSE3-NEXT: por %xmm2, %xmm0 818; SSSE3-NEXT: retq 819; 820; SSE41-LABEL: blend_logic_v8i32: 821; SSE41: # %bb.0: # %entry 822; SSE41-NEXT: psrad $31, %xmm1 823; SSE41-NEXT: psrad $31, %xmm0 824; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4 825; SSE41-NEXT: movdqa %xmm1, %xmm0 826; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5 827; SSE41-NEXT: movdqa %xmm4, %xmm0 828; SSE41-NEXT: movdqa %xmm5, %xmm1 829; SSE41-NEXT: retq 830; 831; AVX1-LABEL: blend_logic_v8i32: 832; AVX1: # %bb.0: # %entry 833; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3 834; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 835; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 836; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 837; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2 838; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0 839; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 840; AVX1-NEXT: retq 841; 842; AVX2-LABEL: blend_logic_v8i32: 843; AVX2: # %bb.0: # %entry 844; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 845; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 846; AVX2-NEXT: retq 847entry: 848 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 849 %sub = sub nsw <8 x i32> zeroinitializer, %a 850 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 851 %1 = and <8 x i32> %c, %0 852 %2 = and <8 x i32> %a, %b.lobit 853 %cond = or <8 x i32> %1, %2 854 ret <8 x i32> %cond 855} 856 857define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) { 858; SSE-LABEL: blend_neg_logic_v4i32: 859; SSE: # %bb.0: # %entry 860; SSE-NEXT: psrad $31, %xmm1 861; SSE-NEXT: pxor %xmm1, %xmm0 862; SSE-NEXT: psubd %xmm1, %xmm0 863; SSE-NEXT: retq 864; 865; AVX-LABEL: blend_neg_logic_v4i32: 866; AVX: # %bb.0: # %entry 867; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 868; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 869; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 870; AVX-NEXT: retq 871entry: 872 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 873 %sub = sub nsw <4 x i32> zeroinitializer, %a 874 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> 875 %1 = and <4 x i32> %a, %0 876 %2 = and <4 x i32> %b.lobit, %sub 877 %cond = or <4 x i32> %1, %2 878 ret <4 x i32> %cond 879} 880 881define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) { 882; SSE-LABEL: blend_neg_logic_v8i32: 883; SSE: # %bb.0: # %entry 884; SSE-NEXT: psrad $31, %xmm3 885; SSE-NEXT: psrad $31, %xmm2 886; SSE-NEXT: pxor %xmm2, %xmm0 887; SSE-NEXT: psubd %xmm2, %xmm0 888; SSE-NEXT: pxor %xmm3, %xmm1 889; SSE-NEXT: psubd %xmm3, %xmm1 890; SSE-NEXT: retq 891; 892; AVX1-LABEL: blend_neg_logic_v8i32: 893; AVX1: # %bb.0: # %entry 894; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 895; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 896; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 897; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 898; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 899; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 900; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2 901; AVX1-NEXT: vpsubd %xmm0, %xmm3, %xmm3 902; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 903; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 904; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 905; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 906; AVX1-NEXT: retq 907; 908; AVX2-LABEL: blend_neg_logic_v8i32: 909; AVX2: # %bb.0: # %entry 910; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1 911; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 912; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 913; AVX2-NEXT: retq 914entry: 915 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 916 %sub = sub nsw <8 x i32> zeroinitializer, %a 917 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 918 %1 = and <8 x i32> %a, %0 919 %2 = and <8 x i32> %b.lobit, %sub 920 %cond = or <8 x i32> %1, %2 921 ret <8 x i32> %cond 922} 923 924define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { 925; SSE-LABEL: blend_neg_logic_v4i32_2: 926; SSE: # %bb.0: # %entry 927; SSE-NEXT: psrad $31, %xmm1 928; SSE-NEXT: pxor %xmm1, %xmm0 929; SSE-NEXT: psubd %xmm0, %xmm1 930; SSE-NEXT: movdqa %xmm1, %xmm0 931; SSE-NEXT: retq 932; 933; AVX-LABEL: blend_neg_logic_v4i32_2: 934; AVX: # %bb.0: # %entry 935; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 936; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 937; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 938; AVX-NEXT: retq 939entry: 940 %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> 941 %1 = trunc <4 x i32> %0 to <4 x i1> 942 %2 = sub nsw <4 x i32> zeroinitializer, %v 943 %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2 944 ret <4 x i32> %3 945} 946