1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL 5 6define <4 x float> @fadd_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) { 7; AVX2-LABEL: fadd_v4f32: 8; AVX2: # %bb.0: 9; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 10; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 11; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 12; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 13; AVX2-NEXT: retq 14; 15; AVX512F-LABEL: fadd_v4f32: 16; AVX512F: # %bb.0: 17; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 18; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 19; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 20; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 21; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 22; AVX512F-NEXT: vaddps %xmm0, %xmm1, %xmm0 23; AVX512F-NEXT: vzeroupper 24; AVX512F-NEXT: retq 25; 26; AVX512VL-LABEL: fadd_v4f32: 27; AVX512VL: # %bb.0: 28; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 29; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 30; AVX512VL-NEXT: vaddps %xmm2, %xmm1, %xmm1 {%k1} 31; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 32; AVX512VL-NEXT: retq 33 %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0> 34 %r = fadd <4 x float> %x, %s 35 ret <4 x float> %r 36} 37 38define <8 x float> @fadd_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) { 39; AVX2-LABEL: fadd_v8f32_commute: 40; AVX2: # %bb.0: 41; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 42; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 43; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 44; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm3, %ymm0 45; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 46; AVX2-NEXT: retq 47; 48; AVX512F-LABEL: fadd_v8f32_commute: 49; AVX512F: # %bb.0: 50; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 51; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 52; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 53; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 54; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 55; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 56; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0 57; AVX512F-NEXT: retq 58; 59; AVX512VL-LABEL: fadd_v8f32_commute: 60; AVX512VL: # %bb.0: 61; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 62; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 63; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 64; AVX512VL-NEXT: vaddps %ymm2, %ymm1, %ymm1 {%k1} 65; AVX512VL-NEXT: vmovaps %ymm1, %ymm0 66; AVX512VL-NEXT: retq 67 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0> 68 %r = fadd <8 x float> %s, %x 69 ret <8 x float> %r 70} 71 72define <16 x float> @fadd_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 73; AVX2-LABEL: fadd_v16f32_swap: 74; AVX2: # %bb.0: 75; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 76; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 77; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 78; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 79; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 80; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 81; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 82; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 83; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 84; AVX2-NEXT: vaddps %ymm3, %ymm1, %ymm0 85; AVX2-NEXT: vaddps %ymm4, %ymm2, %ymm1 86; AVX2-NEXT: retq 87; 88; AVX512-LABEL: fadd_v16f32_swap: 89; AVX512: # %bb.0: 90; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 91; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 92; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 93; AVX512-NEXT: vaddps %zmm2, %zmm1, %zmm0 94; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 95; AVX512-NEXT: retq 96 %s = select <16 x i1> %b, <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, <16 x float> %y 97 %r = fadd <16 x float> %x, %s 98 ret <16 x float> %r 99} 100 101define <16 x float> @fadd_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 102; AVX2-LABEL: fadd_v16f32_commute_swap: 103; AVX2: # %bb.0: 104; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 105; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 106; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 107; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 108; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 109; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 110; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 111; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 112; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 113; AVX2-NEXT: vaddps %ymm1, %ymm3, %ymm0 114; AVX2-NEXT: vaddps %ymm2, %ymm4, %ymm1 115; AVX2-NEXT: retq 116; 117; AVX512-LABEL: fadd_v16f32_commute_swap: 118; AVX512: # %bb.0: 119; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 120; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 121; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 122; AVX512-NEXT: vaddps %zmm2, %zmm1, %zmm0 123; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 124; AVX512-NEXT: retq 125 %s = select <16 x i1> %b, <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, <16 x float> %y 126 %r = fadd <16 x float> %s, %x 127 ret <16 x float> %r 128} 129 130define <4 x float> @fsub_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) { 131; AVX2-LABEL: fsub_v4f32: 132; AVX2: # %bb.0: 133; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 134; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 135; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 136; AVX2-NEXT: vsubps %xmm0, %xmm1, %xmm0 137; AVX2-NEXT: retq 138; 139; AVX512F-LABEL: fsub_v4f32: 140; AVX512F: # %bb.0: 141; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 142; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 143; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 144; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} {z} 145; AVX512F-NEXT: vsubps %xmm0, %xmm1, %xmm0 146; AVX512F-NEXT: vzeroupper 147; AVX512F-NEXT: retq 148; 149; AVX512VL-LABEL: fsub_v4f32: 150; AVX512VL: # %bb.0: 151; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 152; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 153; AVX512VL-NEXT: vsubps %xmm2, %xmm1, %xmm1 {%k1} 154; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 155; AVX512VL-NEXT: retq 156 %s = select <4 x i1> %b, <4 x float> %y, <4 x float> zeroinitializer 157 %r = fsub <4 x float> %x, %s 158 ret <4 x float> %r 159} 160 161; negative test - fsub is not commutative; there is no identity constant for operand 0 162 163define <8 x float> @fsub_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) { 164; AVX2-LABEL: fsub_v8f32_commute: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 167; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 168; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 169; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 170; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0 171; AVX2-NEXT: retq 172; 173; AVX512F-LABEL: fsub_v8f32_commute: 174; AVX512F: # %bb.0: 175; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 176; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 177; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 178; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 179; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} {z} 180; AVX512F-NEXT: vsubps %ymm1, %ymm0, %ymm0 181; AVX512F-NEXT: retq 182; 183; AVX512VL-LABEL: fsub_v8f32_commute: 184; AVX512VL: # %bb.0: 185; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 186; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 187; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 188; AVX512VL-NEXT: vmovaps %ymm2, %ymm0 {%k1} {z} 189; AVX512VL-NEXT: vsubps %ymm1, %ymm0, %ymm0 190; AVX512VL-NEXT: retq 191 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> zeroinitializer 192 %r = fsub <8 x float> %s, %x 193 ret <8 x float> %r 194} 195 196define <16 x float> @fsub_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 197; AVX2-LABEL: fsub_v16f32_swap: 198; AVX2: # %bb.0: 199; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 200; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 201; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 202; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 203; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 204; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 205; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 206; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 207; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 208; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 209; AVX2-NEXT: vsubps %ymm0, %ymm1, %ymm0 210; AVX2-NEXT: vsubps %ymm4, %ymm2, %ymm1 211; AVX2-NEXT: retq 212; 213; AVX512-LABEL: fsub_v16f32_swap: 214; AVX512: # %bb.0: 215; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 216; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 217; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 218; AVX512-NEXT: vsubps %zmm2, %zmm1, %zmm0 219; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 220; AVX512-NEXT: retq 221 %s = select <16 x i1> %b, <16 x float> zeroinitializer, <16 x float> %y 222 %r = fsub <16 x float> %x, %s 223 ret <16 x float> %r 224} 225 226; negative test - fsub is not commutative; there is no identity constant for operand 0 227 228define <16 x float> @fsub_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 229; AVX2-LABEL: fsub_v16f32_commute_swap: 230; AVX2: # %bb.0: 231; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 232; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 233; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 234; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 235; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 236; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 237; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 238; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 239; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 240; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 241; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0 242; AVX2-NEXT: vsubps %ymm2, %ymm4, %ymm1 243; AVX2-NEXT: retq 244; 245; AVX512-LABEL: fsub_v16f32_commute_swap: 246; AVX512: # %bb.0: 247; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 248; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 249; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1 250; AVX512-NEXT: vmovaps %zmm2, %zmm0 {%k1} {z} 251; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0 252; AVX512-NEXT: retq 253 %s = select <16 x i1> %b, <16 x float> zeroinitializer, <16 x float> %y 254 %r = fsub <16 x float> %s, %x 255 ret <16 x float> %r 256} 257 258define <4 x float> @fmul_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) { 259; AVX2-LABEL: fmul_v4f32: 260; AVX2: # %bb.0: 261; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 262; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 263; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 264; AVX2-NEXT: vmulps %xmm0, %xmm1, %xmm0 265; AVX2-NEXT: retq 266; 267; AVX512F-LABEL: fmul_v4f32: 268; AVX512F: # %bb.0: 269; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 270; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 271; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 272; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 273; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 274; AVX512F-NEXT: vmulps %xmm0, %xmm1, %xmm0 275; AVX512F-NEXT: vzeroupper 276; AVX512F-NEXT: retq 277; 278; AVX512VL-LABEL: fmul_v4f32: 279; AVX512VL: # %bb.0: 280; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 281; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 282; AVX512VL-NEXT: vmulps %xmm2, %xmm1, %xmm1 {%k1} 283; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 284; AVX512VL-NEXT: retq 285 %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> 286 %r = fmul <4 x float> %x, %s 287 ret <4 x float> %r 288} 289 290define <8 x float> @fmul_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) { 291; AVX2-LABEL: fmul_v8f32_commute: 292; AVX2: # %bb.0: 293; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 294; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 295; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 296; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm3, %ymm0 297; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0 298; AVX2-NEXT: retq 299; 300; AVX512F-LABEL: fmul_v8f32_commute: 301; AVX512F: # %bb.0: 302; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 303; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 304; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 305; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 306; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 307; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 308; AVX512F-NEXT: vmulps %ymm1, %ymm0, %ymm0 309; AVX512F-NEXT: retq 310; 311; AVX512VL-LABEL: fmul_v8f32_commute: 312; AVX512VL: # %bb.0: 313; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 314; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 315; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 316; AVX512VL-NEXT: vmulps %ymm2, %ymm1, %ymm1 {%k1} 317; AVX512VL-NEXT: vmovaps %ymm1, %ymm0 318; AVX512VL-NEXT: retq 319 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 320 %r = fmul <8 x float> %s, %x 321 ret <8 x float> %r 322} 323 324define <16 x float> @fmul_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 325; AVX2-LABEL: fmul_v16f32_swap: 326; AVX2: # %bb.0: 327; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 328; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 329; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 330; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 331; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 332; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 333; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 334; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 335; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 336; AVX2-NEXT: vmulps %ymm3, %ymm1, %ymm0 337; AVX2-NEXT: vmulps %ymm4, %ymm2, %ymm1 338; AVX2-NEXT: retq 339; 340; AVX512-LABEL: fmul_v16f32_swap: 341; AVX512: # %bb.0: 342; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 343; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 344; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 345; AVX512-NEXT: vmulps %zmm2, %zmm1, %zmm0 346; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 347; AVX512-NEXT: retq 348 %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y 349 %r = fmul <16 x float> %x, %s 350 ret <16 x float> %r 351} 352 353define <16 x float> @fmul_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 354; AVX2-LABEL: fmul_v16f32_commute_swap: 355; AVX2: # %bb.0: 356; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 357; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 358; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 359; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 360; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 361; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 362; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 363; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 364; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 365; AVX2-NEXT: vmulps %ymm1, %ymm3, %ymm0 366; AVX2-NEXT: vmulps %ymm2, %ymm4, %ymm1 367; AVX2-NEXT: retq 368; 369; AVX512-LABEL: fmul_v16f32_commute_swap: 370; AVX512: # %bb.0: 371; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 372; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 373; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 374; AVX512-NEXT: vmulps %zmm2, %zmm1, %zmm0 375; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 376; AVX512-NEXT: retq 377 %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y 378 %r = fmul <16 x float> %s, %x 379 ret <16 x float> %r 380} 381 382define <4 x float> @fdiv_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) { 383; AVX2-LABEL: fdiv_v4f32: 384; AVX2: # %bb.0: 385; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 386; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 387; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 388; AVX2-NEXT: vdivps %xmm0, %xmm1, %xmm0 389; AVX2-NEXT: retq 390; 391; AVX512F-LABEL: fdiv_v4f32: 392; AVX512F: # %bb.0: 393; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 394; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 395; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 396; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 397; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 398; AVX512F-NEXT: vdivps %xmm0, %xmm1, %xmm0 399; AVX512F-NEXT: vzeroupper 400; AVX512F-NEXT: retq 401; 402; AVX512VL-LABEL: fdiv_v4f32: 403; AVX512VL: # %bb.0: 404; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 405; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 406; AVX512VL-NEXT: vdivps %xmm2, %xmm1, %xmm1 {%k1} 407; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 408; AVX512VL-NEXT: retq 409 %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> 410 %r = fdiv <4 x float> %x, %s 411 ret <4 x float> %r 412} 413 414define <8 x float> @fdiv_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) { 415; AVX2-LABEL: fdiv_v8f32_commute: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 418; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 419; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 420; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm3, %ymm0 421; AVX2-NEXT: vdivps %ymm1, %ymm0, %ymm0 422; AVX2-NEXT: retq 423; 424; AVX512F-LABEL: fdiv_v8f32_commute: 425; AVX512F: # %bb.0: 426; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 427; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 428; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 429; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 430; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 431; AVX512F-NEXT: vmovaps %zmm2, %zmm0 {%k1} 432; AVX512F-NEXT: vdivps %ymm1, %ymm0, %ymm0 433; AVX512F-NEXT: retq 434; 435; AVX512VL-LABEL: fdiv_v8f32_commute: 436; AVX512VL: # %bb.0: 437; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 438; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 439; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 440; AVX512VL-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 441; AVX512VL-NEXT: vmovaps %ymm2, %ymm0 {%k1} 442; AVX512VL-NEXT: vdivps %ymm1, %ymm0, %ymm0 443; AVX512VL-NEXT: retq 444 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 445 %r = fdiv <8 x float> %s, %x 446 ret <8 x float> %r 447} 448 449define <16 x float> @fdiv_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 450; AVX2-LABEL: fdiv_v16f32_swap: 451; AVX2: # %bb.0: 452; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 453; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 454; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 455; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 456; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 457; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 458; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 459; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 460; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 461; AVX2-NEXT: vdivps %ymm3, %ymm1, %ymm0 462; AVX2-NEXT: vdivps %ymm4, %ymm2, %ymm1 463; AVX2-NEXT: retq 464; 465; AVX512-LABEL: fdiv_v16f32_swap: 466; AVX512: # %bb.0: 467; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 468; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 469; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 470; AVX512-NEXT: vdivps %zmm2, %zmm1, %zmm0 471; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 472; AVX512-NEXT: retq 473 %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y 474 %r = fdiv <16 x float> %x, %s 475 ret <16 x float> %r 476} 477 478define <16 x float> @fdiv_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) { 479; AVX2-LABEL: fdiv_v16f32_commute_swap: 480; AVX2: # %bb.0: 481; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 482; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 483; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 484; AVX2-NEXT: vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 485; AVX2-NEXT: vblendvps %ymm5, %ymm6, %ymm3, %ymm3 486; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 487; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 488; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 489; AVX2-NEXT: vblendvps %ymm0, %ymm6, %ymm4, %ymm4 490; AVX2-NEXT: vdivps %ymm1, %ymm3, %ymm0 491; AVX2-NEXT: vdivps %ymm2, %ymm4, %ymm1 492; AVX2-NEXT: retq 493; 494; AVX512-LABEL: fdiv_v16f32_commute_swap: 495; AVX512: # %bb.0: 496; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 497; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 498; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 499; AVX512-NEXT: vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2 {%k1} 500; AVX512-NEXT: vdivps %zmm1, %zmm2, %zmm0 501; AVX512-NEXT: retq 502 %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y 503 %r = fdiv <16 x float> %s, %x 504 ret <16 x float> %r 505} 506 507define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { 508; AVX2-LABEL: fadd_v8f32_cast_cond: 509; AVX2: # %bb.0: 510; AVX2-NEXT: vmovd %edi, %xmm2 511; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 512; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 513; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 514; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 515; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 516; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 517; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 518; AVX2-NEXT: retq 519; 520; AVX512F-LABEL: fadd_v8f32_cast_cond: 521; AVX512F: # %bb.0: 522; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 523; AVX512F-NEXT: kmovw %edi, %k1 524; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 525; AVX512F-NEXT: vmovaps %zmm1, %zmm2 {%k1} 526; AVX512F-NEXT: vaddps %ymm2, %ymm0, %ymm0 527; AVX512F-NEXT: retq 528; 529; AVX512VL-LABEL: fadd_v8f32_cast_cond: 530; AVX512VL: # %bb.0: 531; AVX512VL-NEXT: kmovw %edi, %k1 532; AVX512VL-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} 533; AVX512VL-NEXT: retq 534 %b = bitcast i8 %pb to <8 x i1> 535 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0> 536 %r = fadd <8 x float> %x, %s 537 ret <8 x float> %r 538} 539 540define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) { 541; AVX2-LABEL: fadd_v8f64_cast_cond: 542; AVX2: # %bb.0: 543; AVX2-NEXT: vmovd %edi, %xmm4 544; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 545; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 546; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 547; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 548; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 549; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3 550; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 551; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 552; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 553; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 554; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0 555; AVX2-NEXT: vaddpd %ymm3, %ymm1, %ymm1 556; AVX2-NEXT: retq 557; 558; AVX512-LABEL: fadd_v8f64_cast_cond: 559; AVX512: # %bb.0: 560; AVX512-NEXT: kmovw %edi, %k1 561; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} 562; AVX512-NEXT: retq 563 %b = bitcast i8 %pb to <8 x i1> 564 %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0> 565 %r = fadd <8 x double> %x, %s 566 ret <8 x double> %r 567} 568 569define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { 570; AVX2-LABEL: fsub_v8f32_cast_cond: 571; AVX2: # %bb.0: 572; AVX2-NEXT: vmovd %edi, %xmm2 573; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 574; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 575; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 576; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 577; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 578; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0 579; AVX2-NEXT: retq 580; 581; AVX512F-LABEL: fsub_v8f32_cast_cond: 582; AVX512F: # %bb.0: 583; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 584; AVX512F-NEXT: kmovw %edi, %k1 585; AVX512F-NEXT: vmovaps %zmm1, %zmm1 {%k1} {z} 586; AVX512F-NEXT: vsubps %ymm1, %ymm0, %ymm0 587; AVX512F-NEXT: retq 588; 589; AVX512VL-LABEL: fsub_v8f32_cast_cond: 590; AVX512VL: # %bb.0: 591; AVX512VL-NEXT: kmovw %edi, %k1 592; AVX512VL-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} 593; AVX512VL-NEXT: retq 594 %b = bitcast i8 %pb to <8 x i1> 595 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> zeroinitializer 596 %r = fsub <8 x float> %x, %s 597 ret <8 x float> %r 598} 599 600define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) { 601; AVX2-LABEL: fsub_v8f64_cast_cond: 602; AVX2: # %bb.0: 603; AVX2-NEXT: vmovd %edi, %xmm4 604; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 605; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 606; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 607; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 608; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 609; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 610; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 611; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 612; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 613; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0 614; AVX2-NEXT: vsubpd %ymm3, %ymm1, %ymm1 615; AVX2-NEXT: retq 616; 617; AVX512-LABEL: fsub_v8f64_cast_cond: 618; AVX512: # %bb.0: 619; AVX512-NEXT: kmovw %edi, %k1 620; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0 {%k1} 621; AVX512-NEXT: retq 622 %b = bitcast i8 %pb to <8 x i1> 623 %s = select <8 x i1> %b, <8 x double> %y, <8 x double> zeroinitializer 624 %r = fsub <8 x double> %x, %s 625 ret <8 x double> %r 626} 627 628define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { 629; AVX2-LABEL: fmul_v8f32_cast_cond: 630; AVX2: # %bb.0: 631; AVX2-NEXT: vmovd %edi, %xmm2 632; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 633; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 634; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 635; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 636; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 637; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 638; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0 639; AVX2-NEXT: retq 640; 641; AVX512F-LABEL: fmul_v8f32_cast_cond: 642; AVX512F: # %bb.0: 643; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 644; AVX512F-NEXT: kmovw %edi, %k1 645; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 646; AVX512F-NEXT: vmovaps %zmm1, %zmm2 {%k1} 647; AVX512F-NEXT: vmulps %ymm2, %ymm0, %ymm0 648; AVX512F-NEXT: retq 649; 650; AVX512VL-LABEL: fmul_v8f32_cast_cond: 651; AVX512VL: # %bb.0: 652; AVX512VL-NEXT: kmovw %edi, %k1 653; AVX512VL-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} 654; AVX512VL-NEXT: retq 655 %b = bitcast i8 %pb to <8 x i1> 656 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 657 %r = fmul <8 x float> %x, %s 658 ret <8 x float> %r 659} 660 661define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) { 662; AVX2-LABEL: fmul_v8f64_cast_cond: 663; AVX2: # %bb.0: 664; AVX2-NEXT: vmovd %edi, %xmm4 665; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 666; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 667; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 668; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 669; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 670; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3 671; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 672; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 673; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 674; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 675; AVX2-NEXT: vmulpd %ymm2, %ymm0, %ymm0 676; AVX2-NEXT: vmulpd %ymm3, %ymm1, %ymm1 677; AVX2-NEXT: retq 678; 679; AVX512-LABEL: fmul_v8f64_cast_cond: 680; AVX512: # %bb.0: 681; AVX512-NEXT: kmovw %edi, %k1 682; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 {%k1} 683; AVX512-NEXT: retq 684 %b = bitcast i8 %pb to <8 x i1> 685 %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 686 %r = fmul <8 x double> %x, %s 687 ret <8 x double> %r 688} 689 690define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) { 691; AVX2-LABEL: fdiv_v8f32_cast_cond: 692; AVX2: # %bb.0: 693; AVX2-NEXT: vmovd %edi, %xmm2 694; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 695; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 696; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 697; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 698; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 699; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 700; AVX2-NEXT: vdivps %ymm1, %ymm0, %ymm0 701; AVX2-NEXT: retq 702; 703; AVX512F-LABEL: fdiv_v8f32_cast_cond: 704; AVX512F: # %bb.0: 705; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 706; AVX512F-NEXT: kmovw %edi, %k1 707; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 708; AVX512F-NEXT: vmovaps %zmm1, %zmm2 {%k1} 709; AVX512F-NEXT: vdivps %ymm2, %ymm0, %ymm0 710; AVX512F-NEXT: retq 711; 712; AVX512VL-LABEL: fdiv_v8f32_cast_cond: 713; AVX512VL: # %bb.0: 714; AVX512VL-NEXT: kmovw %edi, %k1 715; AVX512VL-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} 716; AVX512VL-NEXT: retq 717 %b = bitcast i8 %pb to <8 x i1> 718 %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 719 %r = fdiv <8 x float> %x, %s 720 ret <8 x float> %r 721} 722 723define <8 x double> @fdiv_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) { 724; AVX2-LABEL: fdiv_v8f64_cast_cond: 725; AVX2: # %bb.0: 726; AVX2-NEXT: vmovd %edi, %xmm4 727; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 728; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 729; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 730; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 731; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 732; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3 733; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 734; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 735; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 736; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 737; AVX2-NEXT: vdivpd %ymm2, %ymm0, %ymm0 738; AVX2-NEXT: vdivpd %ymm3, %ymm1, %ymm1 739; AVX2-NEXT: retq 740; 741; AVX512-LABEL: fdiv_v8f64_cast_cond: 742; AVX512: # %bb.0: 743; AVX512-NEXT: kmovw %edi, %k1 744; AVX512-NEXT: vdivpd %zmm1, %zmm0, %zmm0 {%k1} 745; AVX512-NEXT: retq 746 %b = bitcast i8 %pb to <8 x i1> 747 %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 748 %r = fdiv <8 x double> %x, %s 749 ret <8 x double> %r 750} 751 752define <4 x i32> @add_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 753; AVX2-LABEL: add_v4i32: 754; AVX2: # %bb.0: 755; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 756; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 757; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 758; AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0 759; AVX2-NEXT: retq 760; 761; AVX512F-LABEL: add_v4i32: 762; AVX512F: # %bb.0: 763; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 764; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 765; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 766; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 767; AVX512F-NEXT: vpaddd %xmm0, %xmm1, %xmm0 768; AVX512F-NEXT: vzeroupper 769; AVX512F-NEXT: retq 770; 771; AVX512VL-LABEL: add_v4i32: 772; AVX512VL: # %bb.0: 773; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 774; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 775; AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1 {%k1} 776; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 777; AVX512VL-NEXT: retq 778 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer 779 %r = add <4 x i32> %x, %s 780 ret <4 x i32> %r 781} 782 783define <8 x i32> @add_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 784; AVX2-LABEL: add_v8i32_commute: 785; AVX2: # %bb.0: 786; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 787; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 788; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 789; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 790; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 791; AVX2-NEXT: retq 792; 793; AVX512F-LABEL: add_v8i32_commute: 794; AVX512F: # %bb.0: 795; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 796; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 797; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 798; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 799; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 800; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0 801; AVX512F-NEXT: retq 802; 803; AVX512VL-LABEL: add_v8i32_commute: 804; AVX512VL: # %bb.0: 805; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 806; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 807; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 808; AVX512VL-NEXT: vpaddd %ymm2, %ymm1, %ymm1 {%k1} 809; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 810; AVX512VL-NEXT: retq 811 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 812 %r = add <8 x i32> %s, %x 813 ret <8 x i32> %r 814} 815 816define <8 x i32> @add_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 817; AVX2-LABEL: add_v8i32_cast_cond: 818; AVX2: # %bb.0: 819; AVX2-NEXT: vmovd %edi, %xmm2 820; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 821; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 822; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 823; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 824; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 825; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 826; AVX2-NEXT: retq 827; 828; AVX512F-LABEL: add_v8i32_cast_cond: 829; AVX512F: # %bb.0: 830; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 831; AVX512F-NEXT: kmovw %edi, %k1 832; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 833; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0 834; AVX512F-NEXT: retq 835; 836; AVX512VL-LABEL: add_v8i32_cast_cond: 837; AVX512VL: # %bb.0: 838; AVX512VL-NEXT: kmovw %edi, %k1 839; AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} 840; AVX512VL-NEXT: retq 841 %b = bitcast i8 %pb to <8 x i1> 842 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 843 %r = add <8 x i32> %x, %s 844 ret <8 x i32> %r 845} 846 847define <8 x i64> @add_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 848; AVX2-LABEL: add_v8i64_cast_cond: 849; AVX2: # %bb.0: 850; AVX2-NEXT: vmovd %edi, %xmm4 851; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 852; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 853; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 854; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 855; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 856; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 857; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 858; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 859; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 860; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 861; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 862; AVX2-NEXT: retq 863; 864; AVX512-LABEL: add_v8i64_cast_cond: 865; AVX512: # %bb.0: 866; AVX512-NEXT: kmovw %edi, %k1 867; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} 868; AVX512-NEXT: retq 869 %b = bitcast i8 %pb to <8 x i1> 870 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer 871 %r = add <8 x i64> %x, %s 872 ret <8 x i64> %r 873} 874 875define <4 x i32> @sub_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 876; AVX2-LABEL: sub_v4i32: 877; AVX2: # %bb.0: 878; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 879; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 880; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 881; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0 882; AVX2-NEXT: retq 883; 884; AVX512F-LABEL: sub_v4i32: 885; AVX512F: # %bb.0: 886; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 887; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 888; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 889; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 890; AVX512F-NEXT: vpsubd %xmm0, %xmm1, %xmm0 891; AVX512F-NEXT: vzeroupper 892; AVX512F-NEXT: retq 893; 894; AVX512VL-LABEL: sub_v4i32: 895; AVX512VL: # %bb.0: 896; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 897; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 898; AVX512VL-NEXT: vpsubd %xmm2, %xmm1, %xmm1 {%k1} 899; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 900; AVX512VL-NEXT: retq 901 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer 902 %r = sub <4 x i32> %x, %s 903 ret <4 x i32> %r 904} 905 906; negative test - sub is not commutative; there is no identity constant for operand 0 907 908define <8 x i32> @sub_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 909; AVX2-LABEL: sub_v8i32_commute: 910; AVX2: # %bb.0: 911; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 912; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 913; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 914; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 915; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 916; AVX2-NEXT: retq 917; 918; AVX512F-LABEL: sub_v8i32_commute: 919; AVX512F: # %bb.0: 920; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 921; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 922; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 923; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 924; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 925; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm0 926; AVX512F-NEXT: retq 927; 928; AVX512VL-LABEL: sub_v8i32_commute: 929; AVX512VL: # %bb.0: 930; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 931; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 932; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 933; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 934; AVX512VL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 935; AVX512VL-NEXT: retq 936 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 937 %r = sub <8 x i32> %s, %x 938 ret <8 x i32> %r 939} 940 941define <16 x i32> @sub_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 942; AVX2-LABEL: sub_v16i32_swap: 943; AVX2: # %bb.0: 944; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 945; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 946; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 947; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 948; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 949; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 950; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 951; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 952; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 953; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 954; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 955; AVX2-NEXT: vpsubd %ymm4, %ymm2, %ymm1 956; AVX2-NEXT: retq 957; 958; AVX512-LABEL: sub_v16i32_swap: 959; AVX512: # %bb.0: 960; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 961; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 962; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 963; AVX512-NEXT: vpsubd %zmm2, %zmm1, %zmm0 964; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 965; AVX512-NEXT: retq 966 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 967 %r = sub <16 x i32> %x, %s 968 ret <16 x i32> %r 969} 970 971; negative test - sub is not commutative; there is no identity constant for operand 0 972 973define <16 x i32> @sub_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 974; AVX2-LABEL: sub_v16i32_commute_swap: 975; AVX2: # %bb.0: 976; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 977; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 978; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 979; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 980; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 981; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 982; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 983; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 984; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 985; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 986; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 987; AVX2-NEXT: vpsubd %ymm2, %ymm4, %ymm1 988; AVX2-NEXT: retq 989; 990; AVX512-LABEL: sub_v16i32_commute_swap: 991; AVX512: # %bb.0: 992; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 993; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 994; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1 995; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 996; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0 997; AVX512-NEXT: retq 998 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 999 %r = sub <16 x i32> %s, %x 1000 ret <16 x i32> %r 1001} 1002 1003define <8 x i32> @sub_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1004; AVX2-LABEL: sub_v8i32_cast_cond: 1005; AVX2: # %bb.0: 1006; AVX2-NEXT: vmovd %edi, %xmm2 1007; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 1008; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 1009; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 1010; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 1011; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1012; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0 1013; AVX2-NEXT: retq 1014; 1015; AVX512F-LABEL: sub_v8i32_cast_cond: 1016; AVX512F: # %bb.0: 1017; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1018; AVX512F-NEXT: kmovw %edi, %k1 1019; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1020; AVX512F-NEXT: vpsubd %ymm1, %ymm0, %ymm0 1021; AVX512F-NEXT: retq 1022; 1023; AVX512VL-LABEL: sub_v8i32_cast_cond: 1024; AVX512VL: # %bb.0: 1025; AVX512VL-NEXT: kmovw %edi, %k1 1026; AVX512VL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} 1027; AVX512VL-NEXT: retq 1028 %b = bitcast i8 %pb to <8 x i1> 1029 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1030 %r = sub <8 x i32> %x, %s 1031 ret <8 x i32> %r 1032} 1033 1034define <8 x i64> @sub_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 1035; AVX2-LABEL: sub_v8i64_cast_cond: 1036; AVX2: # %bb.0: 1037; AVX2-NEXT: vmovd %edi, %xmm4 1038; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 1039; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 1040; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 1041; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 1042; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 1043; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 1044; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 1045; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 1046; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 1047; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 1048; AVX2-NEXT: vpsubq %ymm3, %ymm1, %ymm1 1049; AVX2-NEXT: retq 1050; 1051; AVX512-LABEL: sub_v8i64_cast_cond: 1052; AVX512: # %bb.0: 1053; AVX512-NEXT: kmovw %edi, %k1 1054; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} 1055; AVX512-NEXT: retq 1056 %b = bitcast i8 %pb to <8 x i1> 1057 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer 1058 %r = sub <8 x i64> %x, %s 1059 ret <8 x i64> %r 1060} 1061 1062define <4 x i32> @mul_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 1063; AVX2-LABEL: mul_v4i32: 1064; AVX2: # %bb.0: 1065; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1066; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] 1067; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 1068; AVX2-NEXT: vpmulld %xmm0, %xmm1, %xmm0 1069; AVX2-NEXT: retq 1070; 1071; AVX512F-LABEL: mul_v4i32: 1072; AVX512F: # %bb.0: 1073; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1074; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 1075; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1076; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1] 1077; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} 1078; AVX512F-NEXT: vpmulld %xmm0, %xmm1, %xmm0 1079; AVX512F-NEXT: vzeroupper 1080; AVX512F-NEXT: retq 1081; 1082; AVX512VL-LABEL: mul_v4i32: 1083; AVX512VL: # %bb.0: 1084; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 1085; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1086; AVX512VL-NEXT: vpmulld %xmm2, %xmm1, %xmm1 {%k1} 1087; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 1088; AVX512VL-NEXT: retq 1089 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1090 %r = mul <4 x i32> %x, %s 1091 ret <4 x i32> %r 1092} 1093 1094define <8 x i32> @mul_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1095; AVX2-LABEL: mul_v8i32_commute: 1096; AVX2: # %bb.0: 1097; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1098; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1099; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] 1100; AVX2-NEXT: vblendvps %ymm0, %ymm2, %ymm3, %ymm0 1101; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1102; AVX2-NEXT: retq 1103; 1104; AVX512F-LABEL: mul_v8i32_commute: 1105; AVX512F: # %bb.0: 1106; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1107; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 1108; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 1109; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 1110; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1] 1111; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} 1112; AVX512F-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1113; AVX512F-NEXT: retq 1114; 1115; AVX512VL-LABEL: mul_v8i32_commute: 1116; AVX512VL: # %bb.0: 1117; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 1118; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 1119; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1120; AVX512VL-NEXT: vpmulld %ymm2, %ymm1, %ymm1 {%k1} 1121; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 1122; AVX512VL-NEXT: retq 1123 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1124 %r = mul <8 x i32> %s, %x 1125 ret <8 x i32> %r 1126} 1127 1128define <8 x i32> @mul_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1129; AVX2-LABEL: mul_v8i32_cast_cond: 1130; AVX2: # %bb.0: 1131; AVX2-NEXT: vmovd %edi, %xmm2 1132; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 1133; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 1134; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 1135; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 1136; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] 1137; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1 1138; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 1139; AVX2-NEXT: retq 1140; 1141; AVX512F-LABEL: mul_v8i32_cast_cond: 1142; AVX512F: # %bb.0: 1143; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1144; AVX512F-NEXT: kmovw %edi, %k1 1145; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] 1146; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} 1147; AVX512F-NEXT: vpmulld %ymm2, %ymm0, %ymm0 1148; AVX512F-NEXT: retq 1149; 1150; AVX512VL-LABEL: mul_v8i32_cast_cond: 1151; AVX512VL: # %bb.0: 1152; AVX512VL-NEXT: kmovw %edi, %k1 1153; AVX512VL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 {%k1} 1154; AVX512VL-NEXT: retq 1155 %b = bitcast i8 %pb to <8 x i1> 1156 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1157 %r = mul <8 x i32> %x, %s 1158 ret <8 x i32> %r 1159} 1160 1161define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 1162; AVX2-LABEL: mul_v8i64_cast_cond: 1163; AVX2: # %bb.0: 1164; AVX2-NEXT: vmovd %edi, %xmm4 1165; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 1166; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 1167; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 1168; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 1169; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm6 = [1,1,1,1] 1170; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm6, %ymm3 1171; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 1172; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 1173; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 1174; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm6, %ymm2 1175; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm4 1176; AVX2-NEXT: vpmuludq %ymm2, %ymm4, %ymm4 1177; AVX2-NEXT: vpsrlq $32, %ymm2, %ymm5 1178; AVX2-NEXT: vpmuludq %ymm5, %ymm0, %ymm5 1179; AVX2-NEXT: vpaddq %ymm4, %ymm5, %ymm4 1180; AVX2-NEXT: vpsllq $32, %ymm4, %ymm4 1181; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 1182; AVX2-NEXT: vpaddq %ymm4, %ymm0, %ymm0 1183; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm2 1184; AVX2-NEXT: vpmuludq %ymm3, %ymm2, %ymm2 1185; AVX2-NEXT: vpsrlq $32, %ymm3, %ymm4 1186; AVX2-NEXT: vpmuludq %ymm4, %ymm1, %ymm4 1187; AVX2-NEXT: vpaddq %ymm2, %ymm4, %ymm2 1188; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 1189; AVX2-NEXT: vpmuludq %ymm3, %ymm1, %ymm1 1190; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1191; AVX2-NEXT: retq 1192; 1193; AVX512-LABEL: mul_v8i64_cast_cond: 1194; AVX512: # %bb.0: 1195; AVX512-NEXT: kmovw %edi, %k1 1196; AVX512-NEXT: vpsrlq $32, %zmm1, %zmm2 1197; AVX512-NEXT: vpmuludq %zmm2, %zmm0, %zmm2 1198; AVX512-NEXT: vpsrlq $32, %zmm0, %zmm3 1199; AVX512-NEXT: vpmuludq %zmm1, %zmm3, %zmm3 1200; AVX512-NEXT: vpaddq %zmm3, %zmm2, %zmm2 1201; AVX512-NEXT: vpsllq $32, %zmm2, %zmm2 1202; AVX512-NEXT: vpmuludq %zmm1, %zmm0, %zmm1 1203; AVX512-NEXT: vpaddq %zmm2, %zmm1, %zmm0 {%k1} 1204; AVX512-NEXT: retq 1205 %b = bitcast i8 %pb to <8 x i1> 1206 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 1207 %r = mul <8 x i64> %x, %s 1208 ret <8 x i64> %r 1209} 1210 1211define <4 x i32> @shl_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 1212; AVX2-LABEL: shl_v4i32: 1213; AVX2: # %bb.0: 1214; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1215; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1216; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 1217; AVX2-NEXT: vpsllvd %xmm0, %xmm1, %xmm0 1218; AVX2-NEXT: retq 1219; 1220; AVX512F-LABEL: shl_v4i32: 1221; AVX512F: # %bb.0: 1222; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1223; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 1224; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1225; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1226; AVX512F-NEXT: vpsllvd %xmm0, %xmm1, %xmm0 1227; AVX512F-NEXT: vzeroupper 1228; AVX512F-NEXT: retq 1229; 1230; AVX512VL-LABEL: shl_v4i32: 1231; AVX512VL: # %bb.0: 1232; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 1233; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1234; AVX512VL-NEXT: vpsllvd %xmm2, %xmm1, %xmm1 {%k1} 1235; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 1236; AVX512VL-NEXT: retq 1237 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer 1238 %r = shl <4 x i32> %x, %s 1239 ret <4 x i32> %r 1240} 1241 1242; negative test - shl is not commutative; there is no identity constant for operand 0 1243 1244define <8 x i32> @shl_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1245; AVX2-LABEL: shl_v8i32_commute: 1246; AVX2: # %bb.0: 1247; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1248; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1249; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1250; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1251; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1252; AVX2-NEXT: retq 1253; 1254; AVX512F-LABEL: shl_v8i32_commute: 1255; AVX512F: # %bb.0: 1256; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1257; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 1258; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 1259; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 1260; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1261; AVX512F-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1262; AVX512F-NEXT: retq 1263; 1264; AVX512VL-LABEL: shl_v8i32_commute: 1265; AVX512VL: # %bb.0: 1266; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 1267; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 1268; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1269; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 1270; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1271; AVX512VL-NEXT: retq 1272 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1273 %r = shl <8 x i32> %s, %x 1274 ret <8 x i32> %r 1275} 1276 1277define <16 x i32> @shl_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1278; AVX2-LABEL: shl_v16i32_swap: 1279; AVX2: # %bb.0: 1280; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1281; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1282; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1283; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1284; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1285; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1286; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1287; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1288; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1289; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1290; AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 1291; AVX2-NEXT: vpsllvd %ymm4, %ymm2, %ymm1 1292; AVX2-NEXT: retq 1293; 1294; AVX512-LABEL: shl_v16i32_swap: 1295; AVX512: # %bb.0: 1296; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1297; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1298; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 1299; AVX512-NEXT: vpsllvd %zmm2, %zmm1, %zmm0 1300; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 1301; AVX512-NEXT: retq 1302 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1303 %r = shl <16 x i32> %x, %s 1304 ret <16 x i32> %r 1305} 1306 1307; negative test - shl is not commutative; there is no identity constant for operand 0 1308 1309define <16 x i32> @shl_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1310; AVX2-LABEL: shl_v16i32_commute_swap: 1311; AVX2: # %bb.0: 1312; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1313; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1314; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1315; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1316; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1317; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1318; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1319; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1320; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1321; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1322; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1323; AVX2-NEXT: vpsllvd %ymm2, %ymm4, %ymm1 1324; AVX2-NEXT: retq 1325; 1326; AVX512-LABEL: shl_v16i32_commute_swap: 1327; AVX512: # %bb.0: 1328; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1329; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1330; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1 1331; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1332; AVX512-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 1333; AVX512-NEXT: retq 1334 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1335 %r = shl <16 x i32> %s, %x 1336 ret <16 x i32> %r 1337} 1338 1339define <8 x i32> @shl_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1340; AVX2-LABEL: shl_v8i32_cast_cond: 1341; AVX2: # %bb.0: 1342; AVX2-NEXT: vmovd %edi, %xmm2 1343; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 1344; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 1345; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 1346; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 1347; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1348; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1349; AVX2-NEXT: retq 1350; 1351; AVX512F-LABEL: shl_v8i32_cast_cond: 1352; AVX512F: # %bb.0: 1353; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1354; AVX512F-NEXT: kmovw %edi, %k1 1355; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1356; AVX512F-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 1357; AVX512F-NEXT: retq 1358; 1359; AVX512VL-LABEL: shl_v8i32_cast_cond: 1360; AVX512VL: # %bb.0: 1361; AVX512VL-NEXT: kmovw %edi, %k1 1362; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} 1363; AVX512VL-NEXT: retq 1364 %b = bitcast i8 %pb to <8 x i1> 1365 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1366 %r = shl <8 x i32> %x, %s 1367 ret <8 x i32> %r 1368} 1369 1370define <8 x i64> @shl_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 1371; AVX2-LABEL: shl_v8i64_cast_cond: 1372; AVX2: # %bb.0: 1373; AVX2-NEXT: vmovd %edi, %xmm4 1374; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 1375; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 1376; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 1377; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 1378; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 1379; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 1380; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 1381; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 1382; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 1383; AVX2-NEXT: vpsllvq %ymm2, %ymm0, %ymm0 1384; AVX2-NEXT: vpsllvq %ymm3, %ymm1, %ymm1 1385; AVX2-NEXT: retq 1386; 1387; AVX512-LABEL: shl_v8i64_cast_cond: 1388; AVX512: # %bb.0: 1389; AVX512-NEXT: kmovw %edi, %k1 1390; AVX512-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} 1391; AVX512-NEXT: retq 1392 %b = bitcast i8 %pb to <8 x i1> 1393 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer 1394 %r = shl <8 x i64> %x, %s 1395 ret <8 x i64> %r 1396} 1397 1398define <4 x i32> @lshr_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 1399; AVX2-LABEL: lshr_v4i32: 1400; AVX2: # %bb.0: 1401; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1402; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1403; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 1404; AVX2-NEXT: vpsrlvd %xmm0, %xmm1, %xmm0 1405; AVX2-NEXT: retq 1406; 1407; AVX512F-LABEL: lshr_v4i32: 1408; AVX512F: # %bb.0: 1409; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1410; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 1411; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1412; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1413; AVX512F-NEXT: vpsrlvd %xmm0, %xmm1, %xmm0 1414; AVX512F-NEXT: vzeroupper 1415; AVX512F-NEXT: retq 1416; 1417; AVX512VL-LABEL: lshr_v4i32: 1418; AVX512VL: # %bb.0: 1419; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 1420; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1421; AVX512VL-NEXT: vpsrlvd %xmm2, %xmm1, %xmm1 {%k1} 1422; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 1423; AVX512VL-NEXT: retq 1424 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer 1425 %r = lshr <4 x i32> %x, %s 1426 ret <4 x i32> %r 1427} 1428 1429; negative test - lshr is not commutative; there is no identity constant for operand 0 1430 1431define <8 x i32> @lshr_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1432; AVX2-LABEL: lshr_v8i32_commute: 1433; AVX2: # %bb.0: 1434; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1435; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1436; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1437; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1438; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1439; AVX2-NEXT: retq 1440; 1441; AVX512F-LABEL: lshr_v8i32_commute: 1442; AVX512F: # %bb.0: 1443; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1444; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 1445; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 1446; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 1447; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1448; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1449; AVX512F-NEXT: retq 1450; 1451; AVX512VL-LABEL: lshr_v8i32_commute: 1452; AVX512VL: # %bb.0: 1453; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 1454; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 1455; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1456; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 1457; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1458; AVX512VL-NEXT: retq 1459 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1460 %r = lshr <8 x i32> %s, %x 1461 ret <8 x i32> %r 1462} 1463 1464define <16 x i32> @lshr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1465; AVX2-LABEL: lshr_v16i32_swap: 1466; AVX2: # %bb.0: 1467; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1468; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1469; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1470; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1471; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1472; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1473; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1474; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1475; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1476; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1477; AVX2-NEXT: vpsrlvd %ymm0, %ymm1, %ymm0 1478; AVX2-NEXT: vpsrlvd %ymm4, %ymm2, %ymm1 1479; AVX2-NEXT: retq 1480; 1481; AVX512-LABEL: lshr_v16i32_swap: 1482; AVX512: # %bb.0: 1483; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1484; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1485; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 1486; AVX512-NEXT: vpsrlvd %zmm2, %zmm1, %zmm0 1487; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 1488; AVX512-NEXT: retq 1489 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1490 %r = lshr <16 x i32> %x, %s 1491 ret <16 x i32> %r 1492} 1493 1494; negative test - lshr is not commutative; there is no identity constant for operand 0 1495 1496define <16 x i32> @lshr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1497; AVX2-LABEL: lshr_v16i32_commute_swap: 1498; AVX2: # %bb.0: 1499; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1500; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1501; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1502; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1503; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1504; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1505; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1506; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1507; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1508; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1509; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1510; AVX2-NEXT: vpsrlvd %ymm2, %ymm4, %ymm1 1511; AVX2-NEXT: retq 1512; 1513; AVX512-LABEL: lshr_v16i32_commute_swap: 1514; AVX512: # %bb.0: 1515; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1516; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1517; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1 1518; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1519; AVX512-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 1520; AVX512-NEXT: retq 1521 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1522 %r = lshr <16 x i32> %s, %x 1523 ret <16 x i32> %r 1524} 1525 1526define <8 x i32> @lshr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1527; AVX2-LABEL: lshr_v8i32_cast_cond: 1528; AVX2: # %bb.0: 1529; AVX2-NEXT: vmovd %edi, %xmm2 1530; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 1531; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 1532; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 1533; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 1534; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1535; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1536; AVX2-NEXT: retq 1537; 1538; AVX512F-LABEL: lshr_v8i32_cast_cond: 1539; AVX512F: # %bb.0: 1540; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1541; AVX512F-NEXT: kmovw %edi, %k1 1542; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1543; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 1544; AVX512F-NEXT: retq 1545; 1546; AVX512VL-LABEL: lshr_v8i32_cast_cond: 1547; AVX512VL: # %bb.0: 1548; AVX512VL-NEXT: kmovw %edi, %k1 1549; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} 1550; AVX512VL-NEXT: retq 1551 %b = bitcast i8 %pb to <8 x i1> 1552 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1553 %r = lshr <8 x i32> %x, %s 1554 ret <8 x i32> %r 1555} 1556 1557define <8 x i64> @lshr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 1558; AVX2-LABEL: lshr_v8i64_cast_cond: 1559; AVX2: # %bb.0: 1560; AVX2-NEXT: vmovd %edi, %xmm4 1561; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 1562; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 1563; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 1564; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 1565; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 1566; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 1567; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 1568; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 1569; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 1570; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0 1571; AVX2-NEXT: vpsrlvq %ymm3, %ymm1, %ymm1 1572; AVX2-NEXT: retq 1573; 1574; AVX512-LABEL: lshr_v8i64_cast_cond: 1575; AVX512: # %bb.0: 1576; AVX512-NEXT: kmovw %edi, %k1 1577; AVX512-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} 1578; AVX512-NEXT: retq 1579 %b = bitcast i8 %pb to <8 x i1> 1580 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer 1581 %r = lshr <8 x i64> %x, %s 1582 ret <8 x i64> %r 1583} 1584 1585define <4 x i32> @ashr_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) { 1586; AVX2-LABEL: ashr_v4i32: 1587; AVX2: # %bb.0: 1588; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 1589; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 1590; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 1591; AVX2-NEXT: vpsravd %xmm0, %xmm1, %xmm0 1592; AVX2-NEXT: retq 1593; 1594; AVX512F-LABEL: ashr_v4i32: 1595; AVX512F: # %bb.0: 1596; AVX512F-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1597; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 1598; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 1599; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1600; AVX512F-NEXT: vpsravd %xmm0, %xmm1, %xmm0 1601; AVX512F-NEXT: vzeroupper 1602; AVX512F-NEXT: retq 1603; 1604; AVX512VL-LABEL: ashr_v4i32: 1605; AVX512VL: # %bb.0: 1606; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 1607; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 1608; AVX512VL-NEXT: vpsravd %xmm2, %xmm1, %xmm1 {%k1} 1609; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 1610; AVX512VL-NEXT: retq 1611 %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer 1612 %r = ashr <4 x i32> %x, %s 1613 ret <4 x i32> %r 1614} 1615 1616; negative test - ashr is not commutative; there is no identity constant for operand 0 1617 1618define <8 x i32> @ashr_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1619; AVX2-LABEL: ashr_v8i32_commute: 1620; AVX2: # %bb.0: 1621; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1622; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1623; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1624; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1625; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1626; AVX2-NEXT: retq 1627; 1628; AVX512F-LABEL: ashr_v8i32_commute: 1629; AVX512F: # %bb.0: 1630; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1631; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 1632; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 1633; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 1634; AVX512F-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1635; AVX512F-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1636; AVX512F-NEXT: retq 1637; 1638; AVX512VL-LABEL: ashr_v8i32_commute: 1639; AVX512VL: # %bb.0: 1640; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 1641; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 1642; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 1643; AVX512VL-NEXT: vmovdqa32 %ymm2, %ymm0 {%k1} {z} 1644; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1645; AVX512VL-NEXT: retq 1646 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1647 %r = ashr <8 x i32> %s, %x 1648 ret <8 x i32> %r 1649} 1650 1651define <16 x i32> @ashr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1652; AVX2-LABEL: ashr_v16i32_swap: 1653; AVX2: # %bb.0: 1654; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1655; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1656; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1657; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1658; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1659; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1660; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1661; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1662; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1663; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1664; AVX2-NEXT: vpsravd %ymm0, %ymm1, %ymm0 1665; AVX2-NEXT: vpsravd %ymm4, %ymm2, %ymm1 1666; AVX2-NEXT: retq 1667; 1668; AVX512-LABEL: ashr_v16i32_swap: 1669; AVX512: # %bb.0: 1670; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1671; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1672; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 1673; AVX512-NEXT: vpsravd %zmm2, %zmm1, %zmm0 1674; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} 1675; AVX512-NEXT: retq 1676 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1677 %r = ashr <16 x i32> %x, %s 1678 ret <16 x i32> %r 1679} 1680 1681; negative test - ashr is not commutative; there is no identity constant for operand 0 1682 1683define <16 x i32> @ashr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) { 1684; AVX2-LABEL: ashr_v16i32_commute_swap: 1685; AVX2: # %bb.0: 1686; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 1687; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero 1688; AVX2-NEXT: vpslld $31, %ymm5, %ymm5 1689; AVX2-NEXT: vpsrad $31, %ymm5, %ymm5 1690; AVX2-NEXT: vpandn %ymm4, %ymm5, %ymm4 1691; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1692; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1693; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 1694; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 1695; AVX2-NEXT: vpandn %ymm3, %ymm0, %ymm0 1696; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1697; AVX2-NEXT: vpsravd %ymm2, %ymm4, %ymm1 1698; AVX2-NEXT: retq 1699; 1700; AVX512-LABEL: ashr_v16i32_commute_swap: 1701; AVX512: # %bb.0: 1702; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1703; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 1704; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k1 1705; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} {z} 1706; AVX512-NEXT: vpsravd %zmm1, %zmm0, %zmm0 1707; AVX512-NEXT: retq 1708 %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y 1709 %r = ashr <16 x i32> %s, %x 1710 ret <16 x i32> %r 1711} 1712 1713define <8 x i32> @ashr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) { 1714; AVX2-LABEL: ashr_v8i32_cast_cond: 1715; AVX2: # %bb.0: 1716; AVX2-NEXT: vmovd %edi, %xmm2 1717; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 1718; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128] 1719; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 1720; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 1721; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1 1722; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1723; AVX2-NEXT: retq 1724; 1725; AVX512F-LABEL: ashr_v8i32_cast_cond: 1726; AVX512F: # %bb.0: 1727; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1728; AVX512F-NEXT: kmovw %edi, %k1 1729; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1730; AVX512F-NEXT: vpsravd %ymm1, %ymm0, %ymm0 1731; AVX512F-NEXT: retq 1732; 1733; AVX512VL-LABEL: ashr_v8i32_cast_cond: 1734; AVX512VL: # %bb.0: 1735; AVX512VL-NEXT: kmovw %edi, %k1 1736; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} 1737; AVX512VL-NEXT: retq 1738 %b = bitcast i8 %pb to <8 x i1> 1739 %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer 1740 %r = ashr <8 x i32> %x, %s 1741 ret <8 x i32> %r 1742} 1743 1744define <8 x i64> @ashr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) { 1745; AVX2-LABEL: ashr_v8i64_cast_cond: 1746; AVX2: # %bb.0: 1747; AVX2-NEXT: vmovd %edi, %xmm4 1748; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 1749; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,32,64,128] 1750; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm6 1751; AVX2-NEXT: vpcmpeqq %ymm5, %ymm6, %ymm5 1752; AVX2-NEXT: vpand %ymm3, %ymm5, %ymm3 1753; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [1,2,4,8] 1754; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 1755; AVX2-NEXT: vpcmpeqq %ymm5, %ymm4, %ymm4 1756; AVX2-NEXT: vpand %ymm2, %ymm4, %ymm2 1757; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1758; AVX2-NEXT: vpsrlvq %ymm2, %ymm4, %ymm5 1759; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0 1760; AVX2-NEXT: vpxor %ymm5, %ymm0, %ymm0 1761; AVX2-NEXT: vpsubq %ymm5, %ymm0, %ymm0 1762; AVX2-NEXT: vpsrlvq %ymm3, %ymm4, %ymm2 1763; AVX2-NEXT: vpsrlvq %ymm3, %ymm1, %ymm1 1764; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 1765; AVX2-NEXT: vpsubq %ymm2, %ymm1, %ymm1 1766; AVX2-NEXT: retq 1767; 1768; AVX512-LABEL: ashr_v8i64_cast_cond: 1769; AVX512: # %bb.0: 1770; AVX512-NEXT: kmovw %edi, %k1 1771; AVX512-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} 1772; AVX512-NEXT: retq 1773 %b = bitcast i8 %pb to <8 x i1> 1774 %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer 1775 %r = ashr <8 x i64> %x, %s 1776 ret <8 x i64> %r 1777} 1778