1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2             | FileCheck %s --check-prefixes=AVX2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f          | FileCheck %s --check-prefixes=AVX512,AVX512F
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512VL
5
6define <4 x float> @fadd_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) {
7; AVX2-LABEL: fadd_v4f32:
8; AVX2:       # %bb.0:
9; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
10; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
11; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
12; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
13; AVX2-NEXT:    retq
14;
15; AVX512F-LABEL: fadd_v4f32:
16; AVX512F:       # %bb.0:
17; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
18; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
19; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
20; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
21; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
22; AVX512F-NEXT:    vaddps %xmm0, %xmm1, %xmm0
23; AVX512F-NEXT:    vzeroupper
24; AVX512F-NEXT:    retq
25;
26; AVX512VL-LABEL: fadd_v4f32:
27; AVX512VL:       # %bb.0:
28; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
29; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
30; AVX512VL-NEXT:    vaddps %xmm2, %xmm1, %xmm1 {%k1}
31; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
32; AVX512VL-NEXT:    retq
33  %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>
34  %r = fadd <4 x float> %x, %s
35  ret <4 x float> %r
36}
37
38define <8 x float> @fadd_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) {
39; AVX2-LABEL: fadd_v8f32_commute:
40; AVX2:       # %bb.0:
41; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
42; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
43; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
44; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm3, %ymm0
45; AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
46; AVX2-NEXT:    retq
47;
48; AVX512F-LABEL: fadd_v8f32_commute:
49; AVX512F:       # %bb.0:
50; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
51; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
52; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
53; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
54; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
55; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
56; AVX512F-NEXT:    vaddps %ymm1, %ymm0, %ymm0
57; AVX512F-NEXT:    retq
58;
59; AVX512VL-LABEL: fadd_v8f32_commute:
60; AVX512VL:       # %bb.0:
61; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
62; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
63; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
64; AVX512VL-NEXT:    vaddps %ymm2, %ymm1, %ymm1 {%k1}
65; AVX512VL-NEXT:    vmovaps %ymm1, %ymm0
66; AVX512VL-NEXT:    retq
67  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>
68  %r = fadd <8 x float> %s, %x
69  ret <8 x float> %r
70}
71
72define <16 x float> @fadd_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
73; AVX2-LABEL: fadd_v16f32_swap:
74; AVX2:       # %bb.0:
75; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
76; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
77; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
78; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
79; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
80; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
81; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
82; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
83; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
84; AVX2-NEXT:    vaddps %ymm3, %ymm1, %ymm0
85; AVX2-NEXT:    vaddps %ymm4, %ymm2, %ymm1
86; AVX2-NEXT:    retq
87;
88; AVX512-LABEL: fadd_v16f32_swap:
89; AVX512:       # %bb.0:
90; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
91; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
92; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
93; AVX512-NEXT:    vaddps %zmm2, %zmm1, %zmm0
94; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
95; AVX512-NEXT:    retq
96  %s = select <16 x i1> %b, <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, <16 x float> %y
97  %r = fadd <16 x float> %x, %s
98  ret <16 x float> %r
99}
100
101define <16 x float> @fadd_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
102; AVX2-LABEL: fadd_v16f32_commute_swap:
103; AVX2:       # %bb.0:
104; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
105; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
106; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
107; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
108; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
109; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
110; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
111; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
112; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
113; AVX2-NEXT:    vaddps %ymm1, %ymm3, %ymm0
114; AVX2-NEXT:    vaddps %ymm2, %ymm4, %ymm1
115; AVX2-NEXT:    retq
116;
117; AVX512-LABEL: fadd_v16f32_commute_swap:
118; AVX512:       # %bb.0:
119; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
120; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
121; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
122; AVX512-NEXT:    vaddps %zmm2, %zmm1, %zmm0
123; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
124; AVX512-NEXT:    retq
125  %s = select <16 x i1> %b, <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, <16 x float> %y
126  %r = fadd <16 x float> %s, %x
127  ret <16 x float> %r
128}
129
130define <4 x float> @fsub_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) {
131; AVX2-LABEL: fsub_v4f32:
132; AVX2:       # %bb.0:
133; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
134; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
135; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
136; AVX2-NEXT:    vsubps %xmm0, %xmm1, %xmm0
137; AVX2-NEXT:    retq
138;
139; AVX512F-LABEL: fsub_v4f32:
140; AVX512F:       # %bb.0:
141; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
142; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
143; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
144; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1} {z}
145; AVX512F-NEXT:    vsubps %xmm0, %xmm1, %xmm0
146; AVX512F-NEXT:    vzeroupper
147; AVX512F-NEXT:    retq
148;
149; AVX512VL-LABEL: fsub_v4f32:
150; AVX512VL:       # %bb.0:
151; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
152; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
153; AVX512VL-NEXT:    vsubps %xmm2, %xmm1, %xmm1 {%k1}
154; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
155; AVX512VL-NEXT:    retq
156  %s = select <4 x i1> %b, <4 x float> %y, <4 x float> zeroinitializer
157  %r = fsub <4 x float> %x, %s
158  ret <4 x float> %r
159}
160
161; negative test - fsub is not commutative; there is no identity constant for operand 0
162
163define <8 x float> @fsub_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) {
164; AVX2-LABEL: fsub_v8f32_commute:
165; AVX2:       # %bb.0:
166; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
167; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
168; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
169; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
170; AVX2-NEXT:    vsubps %ymm1, %ymm0, %ymm0
171; AVX2-NEXT:    retq
172;
173; AVX512F-LABEL: fsub_v8f32_commute:
174; AVX512F:       # %bb.0:
175; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
176; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
177; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
178; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
179; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1} {z}
180; AVX512F-NEXT:    vsubps %ymm1, %ymm0, %ymm0
181; AVX512F-NEXT:    retq
182;
183; AVX512VL-LABEL: fsub_v8f32_commute:
184; AVX512VL:       # %bb.0:
185; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
186; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
187; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
188; AVX512VL-NEXT:    vmovaps %ymm2, %ymm0 {%k1} {z}
189; AVX512VL-NEXT:    vsubps %ymm1, %ymm0, %ymm0
190; AVX512VL-NEXT:    retq
191  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> zeroinitializer
192  %r = fsub <8 x float> %s, %x
193  ret <8 x float> %r
194}
195
196define <16 x float> @fsub_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
197; AVX2-LABEL: fsub_v16f32_swap:
198; AVX2:       # %bb.0:
199; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
200; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
201; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
202; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
203; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
204; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
205; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
206; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
207; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
208; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
209; AVX2-NEXT:    vsubps %ymm0, %ymm1, %ymm0
210; AVX2-NEXT:    vsubps %ymm4, %ymm2, %ymm1
211; AVX2-NEXT:    retq
212;
213; AVX512-LABEL: fsub_v16f32_swap:
214; AVX512:       # %bb.0:
215; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
216; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
217; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
218; AVX512-NEXT:    vsubps %zmm2, %zmm1, %zmm0
219; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
220; AVX512-NEXT:    retq
221  %s = select <16 x i1> %b, <16 x float> zeroinitializer, <16 x float> %y
222  %r = fsub <16 x float> %x, %s
223  ret <16 x float> %r
224}
225
226; negative test - fsub is not commutative; there is no identity constant for operand 0
227
228define <16 x float> @fsub_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
229; AVX2-LABEL: fsub_v16f32_commute_swap:
230; AVX2:       # %bb.0:
231; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
232; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
233; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
234; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
235; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
236; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
237; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
238; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
239; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
240; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
241; AVX2-NEXT:    vsubps %ymm1, %ymm0, %ymm0
242; AVX2-NEXT:    vsubps %ymm2, %ymm4, %ymm1
243; AVX2-NEXT:    retq
244;
245; AVX512-LABEL: fsub_v16f32_commute_swap:
246; AVX512:       # %bb.0:
247; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
248; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
249; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k1
250; AVX512-NEXT:    vmovaps %zmm2, %zmm0 {%k1} {z}
251; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm0
252; AVX512-NEXT:    retq
253  %s = select <16 x i1> %b, <16 x float> zeroinitializer, <16 x float> %y
254  %r = fsub <16 x float> %s, %x
255  ret <16 x float> %r
256}
257
258define <4 x float> @fmul_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) {
259; AVX2-LABEL: fmul_v4f32:
260; AVX2:       # %bb.0:
261; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
262; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
263; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
264; AVX2-NEXT:    vmulps %xmm0, %xmm1, %xmm0
265; AVX2-NEXT:    retq
266;
267; AVX512F-LABEL: fmul_v4f32:
268; AVX512F:       # %bb.0:
269; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
270; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
271; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
272; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
273; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
274; AVX512F-NEXT:    vmulps %xmm0, %xmm1, %xmm0
275; AVX512F-NEXT:    vzeroupper
276; AVX512F-NEXT:    retq
277;
278; AVX512VL-LABEL: fmul_v4f32:
279; AVX512VL:       # %bb.0:
280; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
281; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
282; AVX512VL-NEXT:    vmulps %xmm2, %xmm1, %xmm1 {%k1}
283; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
284; AVX512VL-NEXT:    retq
285  %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>
286  %r = fmul <4 x float> %x, %s
287  ret <4 x float> %r
288}
289
290define <8 x float> @fmul_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) {
291; AVX2-LABEL: fmul_v8f32_commute:
292; AVX2:       # %bb.0:
293; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
294; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
295; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
296; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm3, %ymm0
297; AVX2-NEXT:    vmulps %ymm1, %ymm0, %ymm0
298; AVX2-NEXT:    retq
299;
300; AVX512F-LABEL: fmul_v8f32_commute:
301; AVX512F:       # %bb.0:
302; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
303; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
304; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
305; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
306; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
307; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
308; AVX512F-NEXT:    vmulps %ymm1, %ymm0, %ymm0
309; AVX512F-NEXT:    retq
310;
311; AVX512VL-LABEL: fmul_v8f32_commute:
312; AVX512VL:       # %bb.0:
313; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
314; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
315; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
316; AVX512VL-NEXT:    vmulps %ymm2, %ymm1, %ymm1 {%k1}
317; AVX512VL-NEXT:    vmovaps %ymm1, %ymm0
318; AVX512VL-NEXT:    retq
319  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
320  %r = fmul <8 x float> %s, %x
321  ret <8 x float> %r
322}
323
324define <16 x float> @fmul_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
325; AVX2-LABEL: fmul_v16f32_swap:
326; AVX2:       # %bb.0:
327; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
328; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
329; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
330; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
331; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
332; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
333; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
334; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
335; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
336; AVX2-NEXT:    vmulps %ymm3, %ymm1, %ymm0
337; AVX2-NEXT:    vmulps %ymm4, %ymm2, %ymm1
338; AVX2-NEXT:    retq
339;
340; AVX512-LABEL: fmul_v16f32_swap:
341; AVX512:       # %bb.0:
342; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
343; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
344; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
345; AVX512-NEXT:    vmulps %zmm2, %zmm1, %zmm0
346; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
347; AVX512-NEXT:    retq
348  %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y
349  %r = fmul <16 x float> %x, %s
350  ret <16 x float> %r
351}
352
353define <16 x float> @fmul_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
354; AVX2-LABEL: fmul_v16f32_commute_swap:
355; AVX2:       # %bb.0:
356; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
357; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
358; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
359; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
360; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
361; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
362; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
363; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
364; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
365; AVX2-NEXT:    vmulps %ymm1, %ymm3, %ymm0
366; AVX2-NEXT:    vmulps %ymm2, %ymm4, %ymm1
367; AVX2-NEXT:    retq
368;
369; AVX512-LABEL: fmul_v16f32_commute_swap:
370; AVX512:       # %bb.0:
371; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
372; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
373; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
374; AVX512-NEXT:    vmulps %zmm2, %zmm1, %zmm0
375; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
376; AVX512-NEXT:    retq
377  %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y
378  %r = fmul <16 x float> %s, %x
379  ret <16 x float> %r
380}
381
382define <4 x float> @fdiv_v4f32(<4 x i1> %b, <4 x float> noundef %x, <4 x float> noundef %y) {
383; AVX2-LABEL: fdiv_v4f32:
384; AVX2:       # %bb.0:
385; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
386; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
387; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
388; AVX2-NEXT:    vdivps %xmm0, %xmm1, %xmm0
389; AVX2-NEXT:    retq
390;
391; AVX512F-LABEL: fdiv_v4f32:
392; AVX512F:       # %bb.0:
393; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
394; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
395; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
396; AVX512F-NEXT:    vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
397; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
398; AVX512F-NEXT:    vdivps %xmm0, %xmm1, %xmm0
399; AVX512F-NEXT:    vzeroupper
400; AVX512F-NEXT:    retq
401;
402; AVX512VL-LABEL: fdiv_v4f32:
403; AVX512VL:       # %bb.0:
404; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
405; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
406; AVX512VL-NEXT:    vdivps %xmm2, %xmm1, %xmm1 {%k1}
407; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
408; AVX512VL-NEXT:    retq
409  %s = select <4 x i1> %b, <4 x float> %y, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>
410  %r = fdiv <4 x float> %x, %s
411  ret <4 x float> %r
412}
413
414define <8 x float> @fdiv_v8f32_commute(<8 x i1> %b, <8 x float> noundef %x, <8 x float> noundef %y) {
415; AVX2-LABEL: fdiv_v8f32_commute:
416; AVX2:       # %bb.0:
417; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
418; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
419; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
420; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm3, %ymm0
421; AVX2-NEXT:    vdivps %ymm1, %ymm0, %ymm0
422; AVX2-NEXT:    retq
423;
424; AVX512F-LABEL: fdiv_v8f32_commute:
425; AVX512F:       # %bb.0:
426; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
427; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
428; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
429; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
430; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
431; AVX512F-NEXT:    vmovaps %zmm2, %zmm0 {%k1}
432; AVX512F-NEXT:    vdivps %ymm1, %ymm0, %ymm0
433; AVX512F-NEXT:    retq
434;
435; AVX512VL-LABEL: fdiv_v8f32_commute:
436; AVX512VL:       # %bb.0:
437; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
438; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
439; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
440; AVX512VL-NEXT:    vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
441; AVX512VL-NEXT:    vmovaps %ymm2, %ymm0 {%k1}
442; AVX512VL-NEXT:    vdivps %ymm1, %ymm0, %ymm0
443; AVX512VL-NEXT:    retq
444  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
445  %r = fdiv <8 x float> %s, %x
446  ret <8 x float> %r
447}
448
449define <16 x float> @fdiv_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
450; AVX2-LABEL: fdiv_v16f32_swap:
451; AVX2:       # %bb.0:
452; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
453; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
454; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
455; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
456; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
457; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
458; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
459; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
460; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
461; AVX2-NEXT:    vdivps %ymm3, %ymm1, %ymm0
462; AVX2-NEXT:    vdivps %ymm4, %ymm2, %ymm1
463; AVX2-NEXT:    retq
464;
465; AVX512-LABEL: fdiv_v16f32_swap:
466; AVX512:       # %bb.0:
467; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
468; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
469; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
470; AVX512-NEXT:    vdivps %zmm2, %zmm1, %zmm0
471; AVX512-NEXT:    vmovaps %zmm1, %zmm0 {%k1}
472; AVX512-NEXT:    retq
473  %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y
474  %r = fdiv <16 x float> %x, %s
475  ret <16 x float> %r
476}
477
478define <16 x float> @fdiv_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef %x, <16 x float> noundef %y) {
479; AVX2-LABEL: fdiv_v16f32_commute_swap:
480; AVX2:       # %bb.0:
481; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
482; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
483; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
484; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
485; AVX2-NEXT:    vblendvps %ymm5, %ymm6, %ymm3, %ymm3
486; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
487; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
488; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
489; AVX2-NEXT:    vblendvps %ymm0, %ymm6, %ymm4, %ymm4
490; AVX2-NEXT:    vdivps %ymm1, %ymm3, %ymm0
491; AVX2-NEXT:    vdivps %ymm2, %ymm4, %ymm1
492; AVX2-NEXT:    retq
493;
494; AVX512-LABEL: fdiv_v16f32_commute_swap:
495; AVX512:       # %bb.0:
496; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
497; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
498; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
499; AVX512-NEXT:    vbroadcastss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2 {%k1}
500; AVX512-NEXT:    vdivps %zmm1, %zmm2, %zmm0
501; AVX512-NEXT:    retq
502  %s = select <16 x i1> %b, <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, <16 x float> %y
503  %r = fdiv <16 x float> %s, %x
504  ret <16 x float> %r
505}
506
507define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
508; AVX2-LABEL: fadd_v8f32_cast_cond:
509; AVX2:       # %bb.0:
510; AVX2-NEXT:    vmovd %edi, %xmm2
511; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
512; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
513; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
514; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
515; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
516; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm3, %ymm1
517; AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
518; AVX2-NEXT:    retq
519;
520; AVX512F-LABEL: fadd_v8f32_cast_cond:
521; AVX512F:       # %bb.0:
522; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
523; AVX512F-NEXT:    kmovw %edi, %k1
524; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
525; AVX512F-NEXT:    vmovaps %zmm1, %zmm2 {%k1}
526; AVX512F-NEXT:    vaddps %ymm2, %ymm0, %ymm0
527; AVX512F-NEXT:    retq
528;
529; AVX512VL-LABEL: fadd_v8f32_cast_cond:
530; AVX512VL:       # %bb.0:
531; AVX512VL-NEXT:    kmovw %edi, %k1
532; AVX512VL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1}
533; AVX512VL-NEXT:    retq
534  %b = bitcast i8 %pb to <8 x i1>
535  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>
536  %r = fadd <8 x float> %x, %s
537  ret <8 x float> %r
538}
539
540define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
541; AVX2-LABEL: fadd_v8f64_cast_cond:
542; AVX2:       # %bb.0:
543; AVX2-NEXT:    vmovd %edi, %xmm4
544; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
545; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
546; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
547; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
548; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm6 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
549; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
550; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
551; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
552; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
553; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
554; AVX2-NEXT:    vaddpd %ymm2, %ymm0, %ymm0
555; AVX2-NEXT:    vaddpd %ymm3, %ymm1, %ymm1
556; AVX2-NEXT:    retq
557;
558; AVX512-LABEL: fadd_v8f64_cast_cond:
559; AVX512:       # %bb.0:
560; AVX512-NEXT:    kmovw %edi, %k1
561; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1}
562; AVX512-NEXT:    retq
563  %b = bitcast i8 %pb to <8 x i1>
564  %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>
565  %r = fadd <8 x double> %x, %s
566  ret <8 x double> %r
567}
568
569define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
570; AVX2-LABEL: fsub_v8f32_cast_cond:
571; AVX2:       # %bb.0:
572; AVX2-NEXT:    vmovd %edi, %xmm2
573; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
574; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
575; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
576; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
577; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
578; AVX2-NEXT:    vsubps %ymm1, %ymm0, %ymm0
579; AVX2-NEXT:    retq
580;
581; AVX512F-LABEL: fsub_v8f32_cast_cond:
582; AVX512F:       # %bb.0:
583; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
584; AVX512F-NEXT:    kmovw %edi, %k1
585; AVX512F-NEXT:    vmovaps %zmm1, %zmm1 {%k1} {z}
586; AVX512F-NEXT:    vsubps %ymm1, %ymm0, %ymm0
587; AVX512F-NEXT:    retq
588;
589; AVX512VL-LABEL: fsub_v8f32_cast_cond:
590; AVX512VL:       # %bb.0:
591; AVX512VL-NEXT:    kmovw %edi, %k1
592; AVX512VL-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1}
593; AVX512VL-NEXT:    retq
594  %b = bitcast i8 %pb to <8 x i1>
595  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> zeroinitializer
596  %r = fsub <8 x float> %x, %s
597  ret <8 x float> %r
598}
599
600define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
601; AVX2-LABEL: fsub_v8f64_cast_cond:
602; AVX2:       # %bb.0:
603; AVX2-NEXT:    vmovd %edi, %xmm4
604; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
605; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
606; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
607; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
608; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
609; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
610; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
611; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
612; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
613; AVX2-NEXT:    vsubpd %ymm2, %ymm0, %ymm0
614; AVX2-NEXT:    vsubpd %ymm3, %ymm1, %ymm1
615; AVX2-NEXT:    retq
616;
617; AVX512-LABEL: fsub_v8f64_cast_cond:
618; AVX512:       # %bb.0:
619; AVX512-NEXT:    kmovw %edi, %k1
620; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0 {%k1}
621; AVX512-NEXT:    retq
622  %b = bitcast i8 %pb to <8 x i1>
623  %s = select <8 x i1> %b, <8 x double> %y, <8 x double> zeroinitializer
624  %r = fsub <8 x double> %x, %s
625  ret <8 x double> %r
626}
627
628define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
629; AVX2-LABEL: fmul_v8f32_cast_cond:
630; AVX2:       # %bb.0:
631; AVX2-NEXT:    vmovd %edi, %xmm2
632; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
633; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
634; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
635; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
636; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
637; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm3, %ymm1
638; AVX2-NEXT:    vmulps %ymm1, %ymm0, %ymm0
639; AVX2-NEXT:    retq
640;
641; AVX512F-LABEL: fmul_v8f32_cast_cond:
642; AVX512F:       # %bb.0:
643; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
644; AVX512F-NEXT:    kmovw %edi, %k1
645; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
646; AVX512F-NEXT:    vmovaps %zmm1, %zmm2 {%k1}
647; AVX512F-NEXT:    vmulps %ymm2, %ymm0, %ymm0
648; AVX512F-NEXT:    retq
649;
650; AVX512VL-LABEL: fmul_v8f32_cast_cond:
651; AVX512VL:       # %bb.0:
652; AVX512VL-NEXT:    kmovw %edi, %k1
653; AVX512VL-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1}
654; AVX512VL-NEXT:    retq
655  %b = bitcast i8 %pb to <8 x i1>
656  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
657  %r = fmul <8 x float> %x, %s
658  ret <8 x float> %r
659}
660
661define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
662; AVX2-LABEL: fmul_v8f64_cast_cond:
663; AVX2:       # %bb.0:
664; AVX2-NEXT:    vmovd %edi, %xmm4
665; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
666; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
667; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
668; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
669; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
670; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
671; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
672; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
673; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
674; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
675; AVX2-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
676; AVX2-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
677; AVX2-NEXT:    retq
678;
679; AVX512-LABEL: fmul_v8f64_cast_cond:
680; AVX512:       # %bb.0:
681; AVX512-NEXT:    kmovw %edi, %k1
682; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0 {%k1}
683; AVX512-NEXT:    retq
684  %b = bitcast i8 %pb to <8 x i1>
685  %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
686  %r = fmul <8 x double> %x, %s
687  ret <8 x double> %r
688}
689
690define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
691; AVX2-LABEL: fdiv_v8f32_cast_cond:
692; AVX2:       # %bb.0:
693; AVX2-NEXT:    vmovd %edi, %xmm2
694; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
695; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
696; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
697; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
698; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
699; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm3, %ymm1
700; AVX2-NEXT:    vdivps %ymm1, %ymm0, %ymm0
701; AVX2-NEXT:    retq
702;
703; AVX512F-LABEL: fdiv_v8f32_cast_cond:
704; AVX512F:       # %bb.0:
705; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
706; AVX512F-NEXT:    kmovw %edi, %k1
707; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
708; AVX512F-NEXT:    vmovaps %zmm1, %zmm2 {%k1}
709; AVX512F-NEXT:    vdivps %ymm2, %ymm0, %ymm0
710; AVX512F-NEXT:    retq
711;
712; AVX512VL-LABEL: fdiv_v8f32_cast_cond:
713; AVX512VL:       # %bb.0:
714; AVX512VL-NEXT:    kmovw %edi, %k1
715; AVX512VL-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1}
716; AVX512VL-NEXT:    retq
717  %b = bitcast i8 %pb to <8 x i1>
718  %s = select <8 x i1> %b, <8 x float> %y, <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
719  %r = fdiv <8 x float> %x, %s
720  ret <8 x float> %r
721}
722
723define <8 x double> @fdiv_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> noundef %x, <8 x double> noundef %y) {
724; AVX2-LABEL: fdiv_v8f64_cast_cond:
725; AVX2:       # %bb.0:
726; AVX2-NEXT:    vmovd %edi, %xmm4
727; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
728; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
729; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
730; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
731; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm6 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
732; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
733; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
734; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
735; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
736; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
737; AVX2-NEXT:    vdivpd %ymm2, %ymm0, %ymm0
738; AVX2-NEXT:    vdivpd %ymm3, %ymm1, %ymm1
739; AVX2-NEXT:    retq
740;
741; AVX512-LABEL: fdiv_v8f64_cast_cond:
742; AVX512:       # %bb.0:
743; AVX512-NEXT:    kmovw %edi, %k1
744; AVX512-NEXT:    vdivpd %zmm1, %zmm0, %zmm0 {%k1}
745; AVX512-NEXT:    retq
746  %b = bitcast i8 %pb to <8 x i1>
747  %s = select <8 x i1> %b, <8 x double> %y, <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
748  %r = fdiv <8 x double> %x, %s
749  ret <8 x double> %r
750}
751
752define <4 x i32> @add_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
753; AVX2-LABEL: add_v4i32:
754; AVX2:       # %bb.0:
755; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
756; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
757; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
758; AVX2-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
759; AVX2-NEXT:    retq
760;
761; AVX512F-LABEL: add_v4i32:
762; AVX512F:       # %bb.0:
763; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
764; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
765; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
766; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
767; AVX512F-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
768; AVX512F-NEXT:    vzeroupper
769; AVX512F-NEXT:    retq
770;
771; AVX512VL-LABEL: add_v4i32:
772; AVX512VL:       # %bb.0:
773; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
774; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
775; AVX512VL-NEXT:    vpaddd %xmm2, %xmm1, %xmm1 {%k1}
776; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
777; AVX512VL-NEXT:    retq
778  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer
779  %r = add <4 x i32> %x, %s
780  ret <4 x i32> %r
781}
782
783define <8 x i32> @add_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
784; AVX2-LABEL: add_v8i32_commute:
785; AVX2:       # %bb.0:
786; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
787; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
788; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
789; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
790; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
791; AVX2-NEXT:    retq
792;
793; AVX512F-LABEL: add_v8i32_commute:
794; AVX512F:       # %bb.0:
795; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
796; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
797; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
798; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
799; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
800; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
801; AVX512F-NEXT:    retq
802;
803; AVX512VL-LABEL: add_v8i32_commute:
804; AVX512VL:       # %bb.0:
805; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
806; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
807; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
808; AVX512VL-NEXT:    vpaddd %ymm2, %ymm1, %ymm1 {%k1}
809; AVX512VL-NEXT:    vmovdqa %ymm1, %ymm0
810; AVX512VL-NEXT:    retq
811  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
812  %r = add <8 x i32> %s, %x
813  ret <8 x i32> %r
814}
815
816define <8 x i32> @add_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
817; AVX2-LABEL: add_v8i32_cast_cond:
818; AVX2:       # %bb.0:
819; AVX2-NEXT:    vmovd %edi, %xmm2
820; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
821; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
822; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
823; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
824; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
825; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
826; AVX2-NEXT:    retq
827;
828; AVX512F-LABEL: add_v8i32_cast_cond:
829; AVX512F:       # %bb.0:
830; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
831; AVX512F-NEXT:    kmovw %edi, %k1
832; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
833; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
834; AVX512F-NEXT:    retq
835;
836; AVX512VL-LABEL: add_v8i32_cast_cond:
837; AVX512VL:       # %bb.0:
838; AVX512VL-NEXT:    kmovw %edi, %k1
839; AVX512VL-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1}
840; AVX512VL-NEXT:    retq
841  %b = bitcast i8 %pb to <8 x i1>
842  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
843  %r = add <8 x i32> %x, %s
844  ret <8 x i32> %r
845}
846
847define <8 x i64> @add_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
848; AVX2-LABEL: add_v8i64_cast_cond:
849; AVX2:       # %bb.0:
850; AVX2-NEXT:    vmovd %edi, %xmm4
851; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
852; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
853; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
854; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
855; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
856; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
857; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
858; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
859; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
860; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
861; AVX2-NEXT:    vpaddq %ymm3, %ymm1, %ymm1
862; AVX2-NEXT:    retq
863;
864; AVX512-LABEL: add_v8i64_cast_cond:
865; AVX512:       # %bb.0:
866; AVX512-NEXT:    kmovw %edi, %k1
867; AVX512-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1}
868; AVX512-NEXT:    retq
869  %b = bitcast i8 %pb to <8 x i1>
870  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer
871  %r = add <8 x i64> %x, %s
872  ret <8 x i64> %r
873}
874
875define <4 x i32> @sub_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
876; AVX2-LABEL: sub_v4i32:
877; AVX2:       # %bb.0:
878; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
879; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
880; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
881; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
882; AVX2-NEXT:    retq
883;
884; AVX512F-LABEL: sub_v4i32:
885; AVX512F:       # %bb.0:
886; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
887; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
888; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
889; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
890; AVX512F-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
891; AVX512F-NEXT:    vzeroupper
892; AVX512F-NEXT:    retq
893;
894; AVX512VL-LABEL: sub_v4i32:
895; AVX512VL:       # %bb.0:
896; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
897; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
898; AVX512VL-NEXT:    vpsubd %xmm2, %xmm1, %xmm1 {%k1}
899; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
900; AVX512VL-NEXT:    retq
901  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer
902  %r = sub <4 x i32> %x, %s
903  ret <4 x i32> %r
904}
905
906; negative test - sub is not commutative; there is no identity constant for operand 0
907
908define <8 x i32> @sub_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
909; AVX2-LABEL: sub_v8i32_commute:
910; AVX2:       # %bb.0:
911; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
912; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
913; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
914; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
915; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
916; AVX2-NEXT:    retq
917;
918; AVX512F-LABEL: sub_v8i32_commute:
919; AVX512F:       # %bb.0:
920; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
921; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
922; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
923; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
924; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
925; AVX512F-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
926; AVX512F-NEXT:    retq
927;
928; AVX512VL-LABEL: sub_v8i32_commute:
929; AVX512VL:       # %bb.0:
930; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
931; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
932; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
933; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
934; AVX512VL-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
935; AVX512VL-NEXT:    retq
936  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
937  %r = sub <8 x i32> %s, %x
938  ret <8 x i32> %r
939}
940
941define <16 x i32> @sub_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
942; AVX2-LABEL: sub_v16i32_swap:
943; AVX2:       # %bb.0:
944; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
945; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
946; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
947; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
948; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
949; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
950; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
951; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
952; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
953; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
954; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
955; AVX2-NEXT:    vpsubd %ymm4, %ymm2, %ymm1
956; AVX2-NEXT:    retq
957;
958; AVX512-LABEL: sub_v16i32_swap:
959; AVX512:       # %bb.0:
960; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
961; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
962; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
963; AVX512-NEXT:    vpsubd %zmm2, %zmm1, %zmm0
964; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
965; AVX512-NEXT:    retq
966  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
967  %r = sub <16 x i32> %x, %s
968  ret <16 x i32> %r
969}
970
971; negative test - sub is not commutative; there is no identity constant for operand 0
972
973define <16 x i32> @sub_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
974; AVX2-LABEL: sub_v16i32_commute_swap:
975; AVX2:       # %bb.0:
976; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
977; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
978; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
979; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
980; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
981; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
982; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
983; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
984; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
985; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
986; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
987; AVX2-NEXT:    vpsubd %ymm2, %ymm4, %ymm1
988; AVX2-NEXT:    retq
989;
990; AVX512-LABEL: sub_v16i32_commute_swap:
991; AVX512:       # %bb.0:
992; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
993; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
994; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k1
995; AVX512-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
996; AVX512-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
997; AVX512-NEXT:    retq
998  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
999  %r = sub <16 x i32> %s, %x
1000  ret <16 x i32> %r
1001}
1002
1003define <8 x i32> @sub_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1004; AVX2-LABEL: sub_v8i32_cast_cond:
1005; AVX2:       # %bb.0:
1006; AVX2-NEXT:    vmovd %edi, %xmm2
1007; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
1008; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1009; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
1010; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
1011; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1012; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
1013; AVX2-NEXT:    retq
1014;
1015; AVX512F-LABEL: sub_v8i32_cast_cond:
1016; AVX512F:       # %bb.0:
1017; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1018; AVX512F-NEXT:    kmovw %edi, %k1
1019; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1020; AVX512F-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
1021; AVX512F-NEXT:    retq
1022;
1023; AVX512VL-LABEL: sub_v8i32_cast_cond:
1024; AVX512VL:       # %bb.0:
1025; AVX512VL-NEXT:    kmovw %edi, %k1
1026; AVX512VL-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1}
1027; AVX512VL-NEXT:    retq
1028  %b = bitcast i8 %pb to <8 x i1>
1029  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1030  %r = sub <8 x i32> %x, %s
1031  ret <8 x i32> %r
1032}
1033
1034define <8 x i64> @sub_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
1035; AVX2-LABEL: sub_v8i64_cast_cond:
1036; AVX2:       # %bb.0:
1037; AVX2-NEXT:    vmovd %edi, %xmm4
1038; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
1039; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
1040; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
1041; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
1042; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
1043; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
1044; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
1045; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
1046; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
1047; AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
1048; AVX2-NEXT:    vpsubq %ymm3, %ymm1, %ymm1
1049; AVX2-NEXT:    retq
1050;
1051; AVX512-LABEL: sub_v8i64_cast_cond:
1052; AVX512:       # %bb.0:
1053; AVX512-NEXT:    kmovw %edi, %k1
1054; AVX512-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1}
1055; AVX512-NEXT:    retq
1056  %b = bitcast i8 %pb to <8 x i1>
1057  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer
1058  %r = sub <8 x i64> %x, %s
1059  ret <8 x i64> %r
1060}
1061
1062define <4 x i32> @mul_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
1063; AVX2-LABEL: mul_v4i32:
1064; AVX2:       # %bb.0:
1065; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1066; AVX2-NEXT:    vbroadcastss {{.*#+}} xmm3 = [1,1,1,1]
1067; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm3, %xmm0
1068; AVX2-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
1069; AVX2-NEXT:    retq
1070;
1071; AVX512F-LABEL: mul_v4i32:
1072; AVX512F:       # %bb.0:
1073; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1074; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
1075; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1076; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [1,1,1,1]
1077; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1}
1078; AVX512F-NEXT:    vpmulld %xmm0, %xmm1, %xmm0
1079; AVX512F-NEXT:    vzeroupper
1080; AVX512F-NEXT:    retq
1081;
1082; AVX512VL-LABEL: mul_v4i32:
1083; AVX512VL:       # %bb.0:
1084; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
1085; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1086; AVX512VL-NEXT:    vpmulld %xmm2, %xmm1, %xmm1 {%k1}
1087; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
1088; AVX512VL-NEXT:    retq
1089  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1090  %r = mul <4 x i32> %x, %s
1091  ret <4 x i32> %r
1092}
1093
1094define <8 x i32> @mul_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1095; AVX2-LABEL: mul_v8i32_commute:
1096; AVX2:       # %bb.0:
1097; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1098; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1099; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
1100; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm3, %ymm0
1101; AVX2-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
1102; AVX2-NEXT:    retq
1103;
1104; AVX512F-LABEL: mul_v8i32_commute:
1105; AVX512F:       # %bb.0:
1106; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1107; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
1108; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
1109; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
1110; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [1,1,1,1,1,1,1,1]
1111; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1}
1112; AVX512F-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
1113; AVX512F-NEXT:    retq
1114;
1115; AVX512VL-LABEL: mul_v8i32_commute:
1116; AVX512VL:       # %bb.0:
1117; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
1118; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
1119; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1120; AVX512VL-NEXT:    vpmulld %ymm2, %ymm1, %ymm1 {%k1}
1121; AVX512VL-NEXT:    vmovdqa %ymm1, %ymm0
1122; AVX512VL-NEXT:    retq
1123  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1124  %r = mul <8 x i32> %s, %x
1125  ret <8 x i32> %r
1126}
1127
1128define <8 x i32> @mul_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1129; AVX2-LABEL: mul_v8i32_cast_cond:
1130; AVX2:       # %bb.0:
1131; AVX2-NEXT:    vmovd %edi, %xmm2
1132; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
1133; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1134; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
1135; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
1136; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1]
1137; AVX2-NEXT:    vblendvps %ymm2, %ymm1, %ymm3, %ymm1
1138; AVX2-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
1139; AVX2-NEXT:    retq
1140;
1141; AVX512F-LABEL: mul_v8i32_cast_cond:
1142; AVX512F:       # %bb.0:
1143; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1144; AVX512F-NEXT:    kmovw %edi, %k1
1145; AVX512F-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
1146; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm2 {%k1}
1147; AVX512F-NEXT:    vpmulld %ymm2, %ymm0, %ymm0
1148; AVX512F-NEXT:    retq
1149;
1150; AVX512VL-LABEL: mul_v8i32_cast_cond:
1151; AVX512VL:       # %bb.0:
1152; AVX512VL-NEXT:    kmovw %edi, %k1
1153; AVX512VL-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 {%k1}
1154; AVX512VL-NEXT:    retq
1155  %b = bitcast i8 %pb to <8 x i1>
1156  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1157  %r = mul <8 x i32> %x, %s
1158  ret <8 x i32> %r
1159}
1160
1161define <8 x i64> @mul_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
1162; AVX2-LABEL: mul_v8i64_cast_cond:
1163; AVX2:       # %bb.0:
1164; AVX2-NEXT:    vmovd %edi, %xmm4
1165; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
1166; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
1167; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
1168; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
1169; AVX2-NEXT:    vbroadcastsd {{.*#+}} ymm6 = [1,1,1,1]
1170; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm6, %ymm3
1171; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
1172; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
1173; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
1174; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm6, %ymm2
1175; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm4
1176; AVX2-NEXT:    vpmuludq %ymm2, %ymm4, %ymm4
1177; AVX2-NEXT:    vpsrlq $32, %ymm2, %ymm5
1178; AVX2-NEXT:    vpmuludq %ymm5, %ymm0, %ymm5
1179; AVX2-NEXT:    vpaddq %ymm4, %ymm5, %ymm4
1180; AVX2-NEXT:    vpsllq $32, %ymm4, %ymm4
1181; AVX2-NEXT:    vpmuludq %ymm2, %ymm0, %ymm0
1182; AVX2-NEXT:    vpaddq %ymm4, %ymm0, %ymm0
1183; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm2
1184; AVX2-NEXT:    vpmuludq %ymm3, %ymm2, %ymm2
1185; AVX2-NEXT:    vpsrlq $32, %ymm3, %ymm4
1186; AVX2-NEXT:    vpmuludq %ymm4, %ymm1, %ymm4
1187; AVX2-NEXT:    vpaddq %ymm2, %ymm4, %ymm2
1188; AVX2-NEXT:    vpsllq $32, %ymm2, %ymm2
1189; AVX2-NEXT:    vpmuludq %ymm3, %ymm1, %ymm1
1190; AVX2-NEXT:    vpaddq %ymm2, %ymm1, %ymm1
1191; AVX2-NEXT:    retq
1192;
1193; AVX512-LABEL: mul_v8i64_cast_cond:
1194; AVX512:       # %bb.0:
1195; AVX512-NEXT:    kmovw %edi, %k1
1196; AVX512-NEXT:    vpsrlq $32, %zmm1, %zmm2
1197; AVX512-NEXT:    vpmuludq %zmm2, %zmm0, %zmm2
1198; AVX512-NEXT:    vpsrlq $32, %zmm0, %zmm3
1199; AVX512-NEXT:    vpmuludq %zmm1, %zmm3, %zmm3
1200; AVX512-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
1201; AVX512-NEXT:    vpsllq $32, %zmm2, %zmm2
1202; AVX512-NEXT:    vpmuludq %zmm1, %zmm0, %zmm1
1203; AVX512-NEXT:    vpaddq %zmm2, %zmm1, %zmm0 {%k1}
1204; AVX512-NEXT:    retq
1205  %b = bitcast i8 %pb to <8 x i1>
1206  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
1207  %r = mul <8 x i64> %x, %s
1208  ret <8 x i64> %r
1209}
1210
1211define <4 x i32> @shl_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
1212; AVX2-LABEL: shl_v4i32:
1213; AVX2:       # %bb.0:
1214; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1215; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1216; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
1217; AVX2-NEXT:    vpsllvd %xmm0, %xmm1, %xmm0
1218; AVX2-NEXT:    retq
1219;
1220; AVX512F-LABEL: shl_v4i32:
1221; AVX512F:       # %bb.0:
1222; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1223; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
1224; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1225; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1226; AVX512F-NEXT:    vpsllvd %xmm0, %xmm1, %xmm0
1227; AVX512F-NEXT:    vzeroupper
1228; AVX512F-NEXT:    retq
1229;
1230; AVX512VL-LABEL: shl_v4i32:
1231; AVX512VL:       # %bb.0:
1232; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
1233; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1234; AVX512VL-NEXT:    vpsllvd %xmm2, %xmm1, %xmm1 {%k1}
1235; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
1236; AVX512VL-NEXT:    retq
1237  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer
1238  %r = shl <4 x i32> %x, %s
1239  ret <4 x i32> %r
1240}
1241
1242; negative test - shl is not commutative; there is no identity constant for operand 0
1243
1244define <8 x i32> @shl_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1245; AVX2-LABEL: shl_v8i32_commute:
1246; AVX2:       # %bb.0:
1247; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1248; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1249; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1250; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
1251; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1252; AVX2-NEXT:    retq
1253;
1254; AVX512F-LABEL: shl_v8i32_commute:
1255; AVX512F:       # %bb.0:
1256; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1257; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
1258; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
1259; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
1260; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1261; AVX512F-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1262; AVX512F-NEXT:    retq
1263;
1264; AVX512VL-LABEL: shl_v8i32_commute:
1265; AVX512VL:       # %bb.0:
1266; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
1267; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
1268; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1269; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
1270; AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1271; AVX512VL-NEXT:    retq
1272  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1273  %r = shl <8 x i32> %s, %x
1274  ret <8 x i32> %r
1275}
1276
1277define <16 x i32> @shl_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1278; AVX2-LABEL: shl_v16i32_swap:
1279; AVX2:       # %bb.0:
1280; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1281; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1282; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1283; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1284; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1285; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1286; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1287; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1288; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1289; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1290; AVX2-NEXT:    vpsllvd %ymm0, %ymm1, %ymm0
1291; AVX2-NEXT:    vpsllvd %ymm4, %ymm2, %ymm1
1292; AVX2-NEXT:    retq
1293;
1294; AVX512-LABEL: shl_v16i32_swap:
1295; AVX512:       # %bb.0:
1296; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1297; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1298; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
1299; AVX512-NEXT:    vpsllvd %zmm2, %zmm1, %zmm0
1300; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
1301; AVX512-NEXT:    retq
1302  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1303  %r = shl <16 x i32> %x, %s
1304  ret <16 x i32> %r
1305}
1306
1307; negative test - shl is not commutative; there is no identity constant for operand 0
1308
1309define <16 x i32> @shl_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1310; AVX2-LABEL: shl_v16i32_commute_swap:
1311; AVX2:       # %bb.0:
1312; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1313; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1314; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1315; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1316; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1317; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1318; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1319; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1320; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1321; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1322; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1323; AVX2-NEXT:    vpsllvd %ymm2, %ymm4, %ymm1
1324; AVX2-NEXT:    retq
1325;
1326; AVX512-LABEL: shl_v16i32_commute_swap:
1327; AVX512:       # %bb.0:
1328; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1329; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1330; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1331; AVX512-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1332; AVX512-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
1333; AVX512-NEXT:    retq
1334  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1335  %r = shl <16 x i32> %s, %x
1336  ret <16 x i32> %r
1337}
1338
1339define <8 x i32> @shl_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1340; AVX2-LABEL: shl_v8i32_cast_cond:
1341; AVX2:       # %bb.0:
1342; AVX2-NEXT:    vmovd %edi, %xmm2
1343; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
1344; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1345; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
1346; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
1347; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1348; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1349; AVX2-NEXT:    retq
1350;
1351; AVX512F-LABEL: shl_v8i32_cast_cond:
1352; AVX512F:       # %bb.0:
1353; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1354; AVX512F-NEXT:    kmovw %edi, %k1
1355; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1356; AVX512F-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
1357; AVX512F-NEXT:    retq
1358;
1359; AVX512VL-LABEL: shl_v8i32_cast_cond:
1360; AVX512VL:       # %bb.0:
1361; AVX512VL-NEXT:    kmovw %edi, %k1
1362; AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1}
1363; AVX512VL-NEXT:    retq
1364  %b = bitcast i8 %pb to <8 x i1>
1365  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1366  %r = shl <8 x i32> %x, %s
1367  ret <8 x i32> %r
1368}
1369
1370define <8 x i64> @shl_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
1371; AVX2-LABEL: shl_v8i64_cast_cond:
1372; AVX2:       # %bb.0:
1373; AVX2-NEXT:    vmovd %edi, %xmm4
1374; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
1375; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
1376; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
1377; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
1378; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
1379; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
1380; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
1381; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
1382; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
1383; AVX2-NEXT:    vpsllvq %ymm2, %ymm0, %ymm0
1384; AVX2-NEXT:    vpsllvq %ymm3, %ymm1, %ymm1
1385; AVX2-NEXT:    retq
1386;
1387; AVX512-LABEL: shl_v8i64_cast_cond:
1388; AVX512:       # %bb.0:
1389; AVX512-NEXT:    kmovw %edi, %k1
1390; AVX512-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1}
1391; AVX512-NEXT:    retq
1392  %b = bitcast i8 %pb to <8 x i1>
1393  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer
1394  %r = shl <8 x i64> %x, %s
1395  ret <8 x i64> %r
1396}
1397
1398define <4 x i32> @lshr_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
1399; AVX2-LABEL: lshr_v4i32:
1400; AVX2:       # %bb.0:
1401; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1402; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1403; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
1404; AVX2-NEXT:    vpsrlvd %xmm0, %xmm1, %xmm0
1405; AVX2-NEXT:    retq
1406;
1407; AVX512F-LABEL: lshr_v4i32:
1408; AVX512F:       # %bb.0:
1409; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1410; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
1411; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1412; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1413; AVX512F-NEXT:    vpsrlvd %xmm0, %xmm1, %xmm0
1414; AVX512F-NEXT:    vzeroupper
1415; AVX512F-NEXT:    retq
1416;
1417; AVX512VL-LABEL: lshr_v4i32:
1418; AVX512VL:       # %bb.0:
1419; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
1420; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1421; AVX512VL-NEXT:    vpsrlvd %xmm2, %xmm1, %xmm1 {%k1}
1422; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
1423; AVX512VL-NEXT:    retq
1424  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer
1425  %r = lshr <4 x i32> %x, %s
1426  ret <4 x i32> %r
1427}
1428
1429; negative test - lshr is not commutative; there is no identity constant for operand 0
1430
1431define <8 x i32> @lshr_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1432; AVX2-LABEL: lshr_v8i32_commute:
1433; AVX2:       # %bb.0:
1434; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1435; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1436; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1437; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
1438; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1439; AVX2-NEXT:    retq
1440;
1441; AVX512F-LABEL: lshr_v8i32_commute:
1442; AVX512F:       # %bb.0:
1443; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1444; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
1445; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
1446; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
1447; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1448; AVX512F-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1449; AVX512F-NEXT:    retq
1450;
1451; AVX512VL-LABEL: lshr_v8i32_commute:
1452; AVX512VL:       # %bb.0:
1453; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
1454; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
1455; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1456; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
1457; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1458; AVX512VL-NEXT:    retq
1459  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1460  %r = lshr <8 x i32> %s, %x
1461  ret <8 x i32> %r
1462}
1463
1464define <16 x i32> @lshr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1465; AVX2-LABEL: lshr_v16i32_swap:
1466; AVX2:       # %bb.0:
1467; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1468; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1469; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1470; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1471; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1472; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1473; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1474; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1475; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1476; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1477; AVX2-NEXT:    vpsrlvd %ymm0, %ymm1, %ymm0
1478; AVX2-NEXT:    vpsrlvd %ymm4, %ymm2, %ymm1
1479; AVX2-NEXT:    retq
1480;
1481; AVX512-LABEL: lshr_v16i32_swap:
1482; AVX512:       # %bb.0:
1483; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1484; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1485; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
1486; AVX512-NEXT:    vpsrlvd %zmm2, %zmm1, %zmm0
1487; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
1488; AVX512-NEXT:    retq
1489  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1490  %r = lshr <16 x i32> %x, %s
1491  ret <16 x i32> %r
1492}
1493
1494; negative test - lshr is not commutative; there is no identity constant for operand 0
1495
1496define <16 x i32> @lshr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1497; AVX2-LABEL: lshr_v16i32_commute_swap:
1498; AVX2:       # %bb.0:
1499; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1500; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1501; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1502; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1503; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1504; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1505; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1506; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1507; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1508; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1509; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1510; AVX2-NEXT:    vpsrlvd %ymm2, %ymm4, %ymm1
1511; AVX2-NEXT:    retq
1512;
1513; AVX512-LABEL: lshr_v16i32_commute_swap:
1514; AVX512:       # %bb.0:
1515; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1516; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1517; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1518; AVX512-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1519; AVX512-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
1520; AVX512-NEXT:    retq
1521  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1522  %r = lshr <16 x i32> %s, %x
1523  ret <16 x i32> %r
1524}
1525
1526define <8 x i32> @lshr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1527; AVX2-LABEL: lshr_v8i32_cast_cond:
1528; AVX2:       # %bb.0:
1529; AVX2-NEXT:    vmovd %edi, %xmm2
1530; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
1531; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1532; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
1533; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
1534; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1535; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1536; AVX2-NEXT:    retq
1537;
1538; AVX512F-LABEL: lshr_v8i32_cast_cond:
1539; AVX512F:       # %bb.0:
1540; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1541; AVX512F-NEXT:    kmovw %edi, %k1
1542; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1543; AVX512F-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
1544; AVX512F-NEXT:    retq
1545;
1546; AVX512VL-LABEL: lshr_v8i32_cast_cond:
1547; AVX512VL:       # %bb.0:
1548; AVX512VL-NEXT:    kmovw %edi, %k1
1549; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1}
1550; AVX512VL-NEXT:    retq
1551  %b = bitcast i8 %pb to <8 x i1>
1552  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1553  %r = lshr <8 x i32> %x, %s
1554  ret <8 x i32> %r
1555}
1556
1557define <8 x i64> @lshr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
1558; AVX2-LABEL: lshr_v8i64_cast_cond:
1559; AVX2:       # %bb.0:
1560; AVX2-NEXT:    vmovd %edi, %xmm4
1561; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
1562; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
1563; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
1564; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
1565; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
1566; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
1567; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
1568; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
1569; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
1570; AVX2-NEXT:    vpsrlvq %ymm2, %ymm0, %ymm0
1571; AVX2-NEXT:    vpsrlvq %ymm3, %ymm1, %ymm1
1572; AVX2-NEXT:    retq
1573;
1574; AVX512-LABEL: lshr_v8i64_cast_cond:
1575; AVX512:       # %bb.0:
1576; AVX512-NEXT:    kmovw %edi, %k1
1577; AVX512-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1}
1578; AVX512-NEXT:    retq
1579  %b = bitcast i8 %pb to <8 x i1>
1580  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer
1581  %r = lshr <8 x i64> %x, %s
1582  ret <8 x i64> %r
1583}
1584
1585define <4 x i32> @ashr_v4i32(<4 x i1> %b, <4 x i32> noundef %x, <4 x i32> noundef %y) {
1586; AVX2-LABEL: ashr_v4i32:
1587; AVX2:       # %bb.0:
1588; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1589; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1590; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
1591; AVX2-NEXT:    vpsravd %xmm0, %xmm1, %xmm0
1592; AVX2-NEXT:    retq
1593;
1594; AVX512F-LABEL: ashr_v4i32:
1595; AVX512F:       # %bb.0:
1596; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
1597; AVX512F-NEXT:    vpslld $31, %xmm0, %xmm0
1598; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1599; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1600; AVX512F-NEXT:    vpsravd %xmm0, %xmm1, %xmm0
1601; AVX512F-NEXT:    vzeroupper
1602; AVX512F-NEXT:    retq
1603;
1604; AVX512VL-LABEL: ashr_v4i32:
1605; AVX512VL:       # %bb.0:
1606; AVX512VL-NEXT:    vpslld $31, %xmm0, %xmm0
1607; AVX512VL-NEXT:    vptestmd %xmm0, %xmm0, %k1
1608; AVX512VL-NEXT:    vpsravd %xmm2, %xmm1, %xmm1 {%k1}
1609; AVX512VL-NEXT:    vmovdqa %xmm1, %xmm0
1610; AVX512VL-NEXT:    retq
1611  %s = select <4 x i1> %b, <4 x i32> %y, <4 x i32> zeroinitializer
1612  %r = ashr <4 x i32> %x, %s
1613  ret <4 x i32> %r
1614}
1615
1616; negative test - ashr is not commutative; there is no identity constant for operand 0
1617
1618define <8 x i32> @ashr_v8i32_commute(<8 x i1> %b, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1619; AVX2-LABEL: ashr_v8i32_commute:
1620; AVX2:       # %bb.0:
1621; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1622; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1623; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1624; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
1625; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1626; AVX2-NEXT:    retq
1627;
1628; AVX512F-LABEL: ashr_v8i32_commute:
1629; AVX512F:       # %bb.0:
1630; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1631; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
1632; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
1633; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
1634; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1635; AVX512F-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1636; AVX512F-NEXT:    retq
1637;
1638; AVX512VL-LABEL: ashr_v8i32_commute:
1639; AVX512VL:       # %bb.0:
1640; AVX512VL-NEXT:    vpmovsxwd %xmm0, %ymm0
1641; AVX512VL-NEXT:    vpslld $31, %ymm0, %ymm0
1642; AVX512VL-NEXT:    vptestmd %ymm0, %ymm0, %k1
1643; AVX512VL-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
1644; AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1645; AVX512VL-NEXT:    retq
1646  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1647  %r = ashr <8 x i32> %s, %x
1648  ret <8 x i32> %r
1649}
1650
1651define <16 x i32> @ashr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1652; AVX2-LABEL: ashr_v16i32_swap:
1653; AVX2:       # %bb.0:
1654; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1655; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1656; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1657; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1658; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1659; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1660; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1661; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1662; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1663; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1664; AVX2-NEXT:    vpsravd %ymm0, %ymm1, %ymm0
1665; AVX2-NEXT:    vpsravd %ymm4, %ymm2, %ymm1
1666; AVX2-NEXT:    retq
1667;
1668; AVX512-LABEL: ashr_v16i32_swap:
1669; AVX512:       # %bb.0:
1670; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1671; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1672; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
1673; AVX512-NEXT:    vpsravd %zmm2, %zmm1, %zmm0
1674; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
1675; AVX512-NEXT:    retq
1676  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1677  %r = ashr <16 x i32> %x, %s
1678  ret <16 x i32> %r
1679}
1680
1681; negative test - ashr is not commutative; there is no identity constant for operand 0
1682
1683define <16 x i32> @ashr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i32> noundef %y) {
1684; AVX2-LABEL: ashr_v16i32_commute_swap:
1685; AVX2:       # %bb.0:
1686; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1687; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
1688; AVX2-NEXT:    vpslld $31, %ymm5, %ymm5
1689; AVX2-NEXT:    vpsrad $31, %ymm5, %ymm5
1690; AVX2-NEXT:    vpandn %ymm4, %ymm5, %ymm4
1691; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1692; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1693; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
1694; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1695; AVX2-NEXT:    vpandn %ymm3, %ymm0, %ymm0
1696; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1697; AVX2-NEXT:    vpsravd %ymm2, %ymm4, %ymm1
1698; AVX2-NEXT:    retq
1699;
1700; AVX512-LABEL: ashr_v16i32_commute_swap:
1701; AVX512:       # %bb.0:
1702; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
1703; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
1704; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k1
1705; AVX512-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1} {z}
1706; AVX512-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
1707; AVX512-NEXT:    retq
1708  %s = select <16 x i1> %b, <16 x i32> zeroinitializer, <16 x i32> %y
1709  %r = ashr <16 x i32> %s, %x
1710  ret <16 x i32> %r
1711}
1712
1713define <8 x i32> @ashr_v8i32_cast_cond(i8 noundef zeroext %pb, <8 x i32> noundef %x, <8 x i32> noundef %y) {
1714; AVX2-LABEL: ashr_v8i32_cast_cond:
1715; AVX2:       # %bb.0:
1716; AVX2-NEXT:    vmovd %edi, %xmm2
1717; AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
1718; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
1719; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
1720; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm2, %ymm2
1721; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
1722; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1723; AVX2-NEXT:    retq
1724;
1725; AVX512F-LABEL: ashr_v8i32_cast_cond:
1726; AVX512F:       # %bb.0:
1727; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1728; AVX512F-NEXT:    kmovw %edi, %k1
1729; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
1730; AVX512F-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
1731; AVX512F-NEXT:    retq
1732;
1733; AVX512VL-LABEL: ashr_v8i32_cast_cond:
1734; AVX512VL:       # %bb.0:
1735; AVX512VL-NEXT:    kmovw %edi, %k1
1736; AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1}
1737; AVX512VL-NEXT:    retq
1738  %b = bitcast i8 %pb to <8 x i1>
1739  %s = select <8 x i1> %b, <8 x i32> %y, <8 x i32> zeroinitializer
1740  %r = ashr <8 x i32> %x, %s
1741  ret <8 x i32> %r
1742}
1743
1744define <8 x i64> @ashr_v8i64_cast_cond(i8 noundef zeroext %pb, <8 x i64> noundef %x, <8 x i64> noundef %y) {
1745; AVX2-LABEL: ashr_v8i64_cast_cond:
1746; AVX2:       # %bb.0:
1747; AVX2-NEXT:    vmovd %edi, %xmm4
1748; AVX2-NEXT:    vpbroadcastb %xmm4, %ymm4
1749; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [16,32,64,128]
1750; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm6
1751; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm6, %ymm5
1752; AVX2-NEXT:    vpand %ymm3, %ymm5, %ymm3
1753; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,2,4,8]
1754; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
1755; AVX2-NEXT:    vpcmpeqq %ymm5, %ymm4, %ymm4
1756; AVX2-NEXT:    vpand %ymm2, %ymm4, %ymm2
1757; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1758; AVX2-NEXT:    vpsrlvq %ymm2, %ymm4, %ymm5
1759; AVX2-NEXT:    vpsrlvq %ymm2, %ymm0, %ymm0
1760; AVX2-NEXT:    vpxor %ymm5, %ymm0, %ymm0
1761; AVX2-NEXT:    vpsubq %ymm5, %ymm0, %ymm0
1762; AVX2-NEXT:    vpsrlvq %ymm3, %ymm4, %ymm2
1763; AVX2-NEXT:    vpsrlvq %ymm3, %ymm1, %ymm1
1764; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm1
1765; AVX2-NEXT:    vpsubq %ymm2, %ymm1, %ymm1
1766; AVX2-NEXT:    retq
1767;
1768; AVX512-LABEL: ashr_v8i64_cast_cond:
1769; AVX512:       # %bb.0:
1770; AVX512-NEXT:    kmovw %edi, %k1
1771; AVX512-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1}
1772; AVX512-NEXT:    retq
1773  %b = bitcast i8 %pb to <8 x i1>
1774  %s = select <8 x i1> %b, <8 x i64> %y, <8 x i64> zeroinitializer
1775  %r = ashr <8 x i64> %x, %s
1776  ret <8 x i64> %r
1777}
1778