1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx       | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2      | FileCheck %s --check-prefixes=CHECK,AVX,AVX12,AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f   | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512VL
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=xop       | FileCheck %s --check-prefixes=CHECK,XOP
7
8; The condition vector for BLENDV* only cares about the sign bit of each element.
9; So in these tests, if we generate BLENDV*, we should be able to remove the redundant cmp op.
10
11; Test 128-bit vectors for all legal element types.
12
13define <16 x i8> @signbit_sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) {
14; CHECK-LABEL: signbit_sel_v16i8:
15; CHECK:       # %bb.0:
16; CHECK-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
17; CHECK-NEXT:    retq
18  %tr = icmp slt <16 x i8> %mask, zeroinitializer
19  %z = select <16 x i1> %tr, <16 x i8> %x, <16 x i8> %y
20  ret <16 x i8> %z
21}
22
23; Sorry 16-bit, you're not important enough to support?
24
25define <8 x i16> @signbit_sel_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) {
26; AVX-LABEL: signbit_sel_v8i16:
27; AVX:       # %bb.0:
28; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
29; AVX-NEXT:    vpcmpgtw %xmm2, %xmm3, %xmm2
30; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
31; AVX-NEXT:    retq
32;
33; XOP-LABEL: signbit_sel_v8i16:
34; XOP:       # %bb.0:
35; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
36; XOP-NEXT:    vpcomltw %xmm3, %xmm2, %xmm2
37; XOP-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
38; XOP-NEXT:    retq
39  %tr = icmp slt <8 x i16> %mask, zeroinitializer
40  %z = select <8 x i1> %tr, <8 x i16> %x, <8 x i16> %y
41  ret <8 x i16> %z
42}
43
44define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
45; AVX12-LABEL: signbit_sel_v4i32:
46; AVX12:       # %bb.0:
47; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
48; AVX12-NEXT:    retq
49;
50; AVX512F-LABEL: signbit_sel_v4i32:
51; AVX512F:       # %bb.0:
52; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
53; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
54; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
55; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
56; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
57; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
58; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
59; AVX512F-NEXT:    vzeroupper
60; AVX512F-NEXT:    retq
61;
62; AVX512VL-LABEL: signbit_sel_v4i32:
63; AVX512VL:       # %bb.0:
64; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
65; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
66; AVX512VL-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
67; AVX512VL-NEXT:    retq
68;
69; XOP-LABEL: signbit_sel_v4i32:
70; XOP:       # %bb.0:
71; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
72; XOP-NEXT:    retq
73  %tr = icmp slt <4 x i32> %mask, zeroinitializer
74  %z = select <4 x i1> %tr, <4 x i32> %x, <4 x i32> %y
75  ret <4 x i32> %z
76}
77
78define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) {
79; AVX12-LABEL: signbit_sel_v2i64:
80; AVX12:       # %bb.0:
81; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
82; AVX12-NEXT:    retq
83;
84; AVX512F-LABEL: signbit_sel_v2i64:
85; AVX512F:       # %bb.0:
86; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
87; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
88; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
89; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
90; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
91; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
92; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
93; AVX512F-NEXT:    vzeroupper
94; AVX512F-NEXT:    retq
95;
96; AVX512VL-LABEL: signbit_sel_v2i64:
97; AVX512VL:       # %bb.0:
98; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
99; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
100; AVX512VL-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
101; AVX512VL-NEXT:    retq
102;
103; XOP-LABEL: signbit_sel_v2i64:
104; XOP:       # %bb.0:
105; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
106; XOP-NEXT:    retq
107  %tr = icmp slt <2 x i64> %mask, zeroinitializer
108  %z = select <2 x i1> %tr, <2 x i64> %x, <2 x i64> %y
109  ret <2 x i64> %z
110}
111
112define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) {
113; AVX12-LABEL: signbit_sel_v4f32:
114; AVX12:       # %bb.0:
115; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
116; AVX12-NEXT:    retq
117;
118; AVX512F-LABEL: signbit_sel_v4f32:
119; AVX512F:       # %bb.0:
120; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
121; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
122; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
123; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
124; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
125; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
126; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
127; AVX512F-NEXT:    vzeroupper
128; AVX512F-NEXT:    retq
129;
130; AVX512VL-LABEL: signbit_sel_v4f32:
131; AVX512VL:       # %bb.0:
132; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
133; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
134; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
135; AVX512VL-NEXT:    retq
136;
137; XOP-LABEL: signbit_sel_v4f32:
138; XOP:       # %bb.0:
139; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
140; XOP-NEXT:    retq
141  %tr = icmp slt <4 x i32> %mask, zeroinitializer
142  %z = select <4 x i1> %tr, <4 x float> %x, <4 x float> %y
143  ret <4 x float> %z
144}
145
146define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) {
147; AVX12-LABEL: signbit_sel_v2f64:
148; AVX12:       # %bb.0:
149; AVX12-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
150; AVX12-NEXT:    retq
151;
152; AVX512F-LABEL: signbit_sel_v2f64:
153; AVX512F:       # %bb.0:
154; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
155; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
156; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
157; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
158; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
159; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
160; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
161; AVX512F-NEXT:    vzeroupper
162; AVX512F-NEXT:    retq
163;
164; AVX512VL-LABEL: signbit_sel_v2f64:
165; AVX512VL:       # %bb.0:
166; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
167; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
168; AVX512VL-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
169; AVX512VL-NEXT:    retq
170;
171; XOP-LABEL: signbit_sel_v2f64:
172; XOP:       # %bb.0:
173; XOP-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
174; XOP-NEXT:    retq
175  %tr = icmp slt <2 x i64> %mask, zeroinitializer
176  %z = select <2 x i1> %tr, <2 x double> %x, <2 x double> %y
177  ret <2 x double> %z
178}
179
180; Test 256-bit vectors to see differences between AVX1 and AVX2.
181
182define <32 x i8> @signbit_sel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %mask) {
183; AVX1-LABEL: signbit_sel_v32i8:
184; AVX1:       # %bb.0:
185; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
186; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
187; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
188; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
189; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
190; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
191; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
192; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
193; AVX1-NEXT:    retq
194;
195; AVX2-LABEL: signbit_sel_v32i8:
196; AVX2:       # %bb.0:
197; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
198; AVX2-NEXT:    retq
199;
200; AVX512-LABEL: signbit_sel_v32i8:
201; AVX512:       # %bb.0:
202; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
203; AVX512-NEXT:    retq
204;
205; XOP-LABEL: signbit_sel_v32i8:
206; XOP:       # %bb.0:
207; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
208; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
209; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
210; XOP-NEXT:    vpcomltb %xmm4, %xmm2, %xmm2
211; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
212; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
213; XOP-NEXT:    retq
214  %tr = icmp slt <32 x i8> %mask, zeroinitializer
215  %z = select <32 x i1> %tr, <32 x i8> %x, <32 x i8> %y
216  ret <32 x i8> %z
217}
218
219; Sorry 16-bit, you'll never be important enough to support?
220
221define <16 x i16> @signbit_sel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %mask) {
222; AVX1-LABEL: signbit_sel_v16i16:
223; AVX1:       # %bb.0:
224; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
225; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
226; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm4, %xmm3
227; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm4, %xmm2
228; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
229; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
230; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
231; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
232; AVX1-NEXT:    retq
233;
234; AVX2-LABEL: signbit_sel_v16i16:
235; AVX2:       # %bb.0:
236; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
237; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
238; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
239; AVX2-NEXT:    retq
240;
241; AVX512-LABEL: signbit_sel_v16i16:
242; AVX512:       # %bb.0:
243; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
244; AVX512-NEXT:    vpcmpgtw %ymm2, %ymm3, %ymm2
245; AVX512-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
246; AVX512-NEXT:    retq
247;
248; XOP-LABEL: signbit_sel_v16i16:
249; XOP:       # %bb.0:
250; XOP-NEXT:    vextractf128 $1, %ymm2, %xmm3
251; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
252; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
253; XOP-NEXT:    vpcomltw %xmm4, %xmm2, %xmm2
254; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
255; XOP-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
256; XOP-NEXT:    retq
257  %tr = icmp slt <16 x i16> %mask, zeroinitializer
258  %z = select <16 x i1> %tr, <16 x i16> %x, <16 x i16> %y
259  ret <16 x i16> %z
260}
261
262define <8 x i32> @signbit_sel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) {
263; AVX12-LABEL: signbit_sel_v8i32:
264; AVX12:       # %bb.0:
265; AVX12-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
266; AVX12-NEXT:    retq
267;
268; AVX512F-LABEL: signbit_sel_v8i32:
269; AVX512F:       # %bb.0:
270; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
271; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
272; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
273; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
274; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
275; AVX512F-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
276; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
277; AVX512F-NEXT:    retq
278;
279; AVX512VL-LABEL: signbit_sel_v8i32:
280; AVX512VL:       # %bb.0:
281; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
282; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
283; AVX512VL-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
284; AVX512VL-NEXT:    retq
285;
286; XOP-LABEL: signbit_sel_v8i32:
287; XOP:       # %bb.0:
288; XOP-NEXT:    vblendvps %ymm2, %ymm0, %ymm1, %ymm0
289; XOP-NEXT:    retq
290  %tr = icmp slt <8 x i32> %mask, zeroinitializer
291  %z = select <8 x i1> %tr, <8 x i32> %x, <8 x i32> %y
292  ret <8 x i32> %z
293}
294
295define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) {
296; AVX12-LABEL: signbit_sel_v4i64:
297; AVX12:       # %bb.0:
298; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
299; AVX12-NEXT:    retq
300;
301; AVX512F-LABEL: signbit_sel_v4i64:
302; AVX512F:       # %bb.0:
303; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
304; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
305; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
306; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
307; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
308; AVX512F-NEXT:    vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
309; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
310; AVX512F-NEXT:    retq
311;
312; AVX512VL-LABEL: signbit_sel_v4i64:
313; AVX512VL:       # %bb.0:
314; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
315; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
316; AVX512VL-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
317; AVX512VL-NEXT:    retq
318;
319; XOP-LABEL: signbit_sel_v4i64:
320; XOP:       # %bb.0:
321; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
322; XOP-NEXT:    retq
323  %tr = icmp slt <4 x i64> %mask, zeroinitializer
324  %z = select <4 x i1> %tr, <4 x i64> %x, <4 x i64> %y
325  ret <4 x i64> %z
326}
327
328define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) {
329; AVX12-LABEL: signbit_sel_v4f64:
330; AVX12:       # %bb.0:
331; AVX12-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
332; AVX12-NEXT:    retq
333;
334; AVX512F-LABEL: signbit_sel_v4f64:
335; AVX512F:       # %bb.0:
336; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
337; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
338; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
339; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
340; AVX512F-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
341; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
342; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
343; AVX512F-NEXT:    retq
344;
345; AVX512VL-LABEL: signbit_sel_v4f64:
346; AVX512VL:       # %bb.0:
347; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
348; AVX512VL-NEXT:    vpcmpgtq %ymm2, %ymm3, %k1
349; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
350; AVX512VL-NEXT:    retq
351;
352; XOP-LABEL: signbit_sel_v4f64:
353; XOP:       # %bb.0:
354; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
355; XOP-NEXT:    retq
356  %tr = icmp slt <4 x i64> %mask, zeroinitializer
357  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
358  ret <4 x double> %z
359}
360
361; Try a condition with a different type than the select operands.
362
363define <4 x double> @signbit_sel_v4f64_small_mask(<4 x double> %x, <4 x double> %y, <4 x i32> %mask) {
364; AVX1-LABEL: signbit_sel_v4f64_small_mask:
365; AVX1:       # %bb.0:
366; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm3
367; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
368; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
369; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
370; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
371; AVX1-NEXT:    retq
372;
373; AVX2-LABEL: signbit_sel_v4f64_small_mask:
374; AVX2:       # %bb.0:
375; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
376; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
377; AVX2-NEXT:    retq
378;
379; AVX512F-LABEL: signbit_sel_v4f64_small_mask:
380; AVX512F:       # %bb.0:
381; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
382; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
383; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
384; AVX512F-NEXT:    vpxor %xmm3, %xmm3, %xmm3
385; AVX512F-NEXT:    vpcmpgtd %zmm2, %zmm3, %k1
386; AVX512F-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
387; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
388; AVX512F-NEXT:    retq
389;
390; AVX512VL-LABEL: signbit_sel_v4f64_small_mask:
391; AVX512VL:       # %bb.0:
392; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
393; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
394; AVX512VL-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
395; AVX512VL-NEXT:    retq
396;
397; XOP-LABEL: signbit_sel_v4f64_small_mask:
398; XOP:       # %bb.0:
399; XOP-NEXT:    vpmovsxdq %xmm2, %xmm3
400; XOP-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
401; XOP-NEXT:    vpmovsxdq %xmm2, %xmm2
402; XOP-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
403; XOP-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
404; XOP-NEXT:    retq
405  %tr = icmp slt <4 x i32> %mask, zeroinitializer
406  %z = select <4 x i1> %tr, <4 x double> %x, <4 x double> %y
407  ret <4 x double> %z
408}
409
410; Try a 512-bit vector to make sure AVX-512 is handled as expected.
411
412define <8 x double> @signbit_sel_v8f64(<8 x double> %x, <8 x double> %y, <8 x i64> %mask) {
413; AVX12-LABEL: signbit_sel_v8f64:
414; AVX12:       # %bb.0:
415; AVX12-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
416; AVX12-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
417; AVX12-NEXT:    retq
418;
419; AVX512-LABEL: signbit_sel_v8f64:
420; AVX512:       # %bb.0:
421; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
422; AVX512-NEXT:    vpcmpgtq %zmm2, %zmm3, %k1
423; AVX512-NEXT:    vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
424; AVX512-NEXT:    retq
425;
426; XOP-LABEL: signbit_sel_v8f64:
427; XOP:       # %bb.0:
428; XOP-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
429; XOP-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
430; XOP-NEXT:    retq
431  %tr = icmp slt <8 x i64> %mask, zeroinitializer
432  %z = select <8 x i1> %tr, <8 x double> %x, <8 x double> %y
433  ret <8 x double> %z
434}
435
436; If we have a floating-point compare:
437; (1) Don't die.
438; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded.
439
440define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 {
441; AVX12-LABEL: signbit_sel_v4f32_fcmp:
442; AVX12:       # %bb.0:
443; AVX12-NEXT:    vxorps %xmm2, %xmm2, %xmm2
444; AVX12-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
445; AVX12-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
446; AVX12-NEXT:    retq
447;
448; AVX512F-LABEL: signbit_sel_v4f32_fcmp:
449; AVX512F:       # %bb.0:
450; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
451; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
452; AVX512F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
453; AVX512F-NEXT:    vcmpltps %zmm2, %zmm0, %k1
454; AVX512F-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
455; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
456; AVX512F-NEXT:    vzeroupper
457; AVX512F-NEXT:    retq
458;
459; AVX512VL-LABEL: signbit_sel_v4f32_fcmp:
460; AVX512VL:       # %bb.0:
461; AVX512VL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
462; AVX512VL-NEXT:    vcmpltps %xmm2, %xmm0, %k1
463; AVX512VL-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
464; AVX512VL-NEXT:    retq
465;
466; XOP-LABEL: signbit_sel_v4f32_fcmp:
467; XOP:       # %bb.0:
468; XOP-NEXT:    vxorps %xmm2, %xmm2, %xmm2
469; XOP-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
470; XOP-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
471; XOP-NEXT:    retq
472  %cmp = fcmp olt <4 x float> %x, zeroinitializer
473  %sel = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
474  ret <4 x float> %sel
475}
476
477define <4 x i64> @blend_splat1_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
478; AVX1-LABEL: blend_splat1_mask_cond_v4i64:
479; AVX1:       # %bb.0:
480; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm3
481; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
482; AVX1-NEXT:    vpsllq $63, %xmm0, %xmm0
483; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
484; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
485; AVX1-NEXT:    retq
486;
487; AVX2-LABEL: blend_splat1_mask_cond_v4i64:
488; AVX2:       # %bb.0:
489; AVX2-NEXT:    vpsllq $63, %ymm0, %ymm0
490; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
491; AVX2-NEXT:    retq
492;
493; AVX512F-LABEL: blend_splat1_mask_cond_v4i64:
494; AVX512F:       # %bb.0:
495; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
496; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
497; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
498; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
499; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
500; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
501; AVX512F-NEXT:    retq
502;
503; AVX512VL-LABEL: blend_splat1_mask_cond_v4i64:
504; AVX512VL:       # %bb.0:
505; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
506; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
507; AVX512VL-NEXT:    retq
508;
509; XOP-LABEL: blend_splat1_mask_cond_v4i64:
510; XOP:       # %bb.0:
511; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
512; XOP-NEXT:    vpsllq $63, %xmm3, %xmm3
513; XOP-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
514; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
515; XOP-NEXT:    vpsllq $63, %xmm0, %xmm0
516; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
517; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
518; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
519; XOP-NEXT:    retq
520  %a = and <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
521  %c = icmp eq <4 x i64> %a, zeroinitializer
522  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
523  ret <4 x i64> %r
524}
525
526define <4 x i32> @blend_splat1_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
527; AVX12-LABEL: blend_splat1_mask_cond_v4i32:
528; AVX12:       # %bb.0:
529; AVX12-NEXT:    vpslld $31, %xmm0, %xmm0
530; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
531; AVX12-NEXT:    retq
532;
533; AVX512F-LABEL: blend_splat1_mask_cond_v4i32:
534; AVX512F:       # %bb.0:
535; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
536; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
537; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
538; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
539; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
540; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
541; AVX512F-NEXT:    vzeroupper
542; AVX512F-NEXT:    retq
543;
544; AVX512VL-LABEL: blend_splat1_mask_cond_v4i32:
545; AVX512VL:       # %bb.0:
546; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
547; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
548; AVX512VL-NEXT:    retq
549;
550; XOP-LABEL: blend_splat1_mask_cond_v4i32:
551; XOP:       # %bb.0:
552; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
553; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
554; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
555; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
556; XOP-NEXT:    retq
557  %a = and <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
558  %c = icmp eq <4 x i32> %a, zeroinitializer
559  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
560  ret <4 x i32> %r
561}
562
563define <16 x i16> @blend_splat1_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
564; AVX1-LABEL: blend_splat1_mask_cond_v16i16:
565; AVX1:       # %bb.0:
566; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm3
567; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
568; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
569; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
570; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
571; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
572; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
573; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
574; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
575; AVX1-NEXT:    retq
576;
577; AVX2-LABEL: blend_splat1_mask_cond_v16i16:
578; AVX2:       # %bb.0:
579; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
580; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
581; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
582; AVX2-NEXT:    retq
583;
584; AVX512-LABEL: blend_splat1_mask_cond_v16i16:
585; AVX512:       # %bb.0:
586; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
587; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
588; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
589; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
590; AVX512-NEXT:    retq
591;
592; XOP-LABEL: blend_splat1_mask_cond_v16i16:
593; XOP:       # %bb.0:
594; XOP-NEXT:    vpsllw $15, %xmm0, %xmm3
595; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
596; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
597; XOP-NEXT:    vpsllw $15, %xmm0, %xmm0
598; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
599; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
600; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
601; XOP-NEXT:    retq
602  %a = and <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
603  %c = icmp eq <16 x i16> %a, zeroinitializer
604  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
605  ret <16 x i16> %r
606}
607
608define <16 x i8> @blend_splat1_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
609; AVX12-LABEL: blend_splat1_mask_cond_v16i8:
610; AVX12:       # %bb.0:
611; AVX12-NEXT:    vpsllw $7, %xmm0, %xmm0
612; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
613; AVX12-NEXT:    retq
614;
615; AVX512-LABEL: blend_splat1_mask_cond_v16i8:
616; AVX512:       # %bb.0:
617; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
618; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
619; AVX512-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
620; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
621; AVX512-NEXT:    retq
622;
623; XOP-LABEL: blend_splat1_mask_cond_v16i8:
624; XOP:       # %bb.0:
625; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
626; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
627; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
628; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
629; XOP-NEXT:    retq
630  %a = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
631  %c = icmp eq <16 x i8> %a, zeroinitializer
632  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
633  ret <16 x i8> %r
634}
635
636define <2 x i64> @blend_splatmax_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
637; AVX12-LABEL: blend_splatmax_mask_cond_v2i64:
638; AVX12:       # %bb.0:
639; AVX12-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
640; AVX12-NEXT:    retq
641;
642; AVX512F-LABEL: blend_splatmax_mask_cond_v2i64:
643; AVX512F:       # %bb.0:
644; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
645; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
646; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
647; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
648; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
649; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
650; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
651; AVX512F-NEXT:    vzeroupper
652; AVX512F-NEXT:    retq
653;
654; AVX512VL-LABEL: blend_splatmax_mask_cond_v2i64:
655; AVX512VL:       # %bb.0:
656; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %k1
657; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
658; AVX512VL-NEXT:    retq
659;
660; XOP-LABEL: blend_splatmax_mask_cond_v2i64:
661; XOP:       # %bb.0:
662; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
663; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
664; XOP-NEXT:    vpcomneqq %xmm3, %xmm0, %xmm0
665; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
666; XOP-NEXT:    retq
667  %a = and <2 x i64> %x, <i64 9223372036854775808, i64 9223372036854775808>
668  %c = icmp eq <2 x i64> %a, zeroinitializer
669  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
670  ret <2 x i64> %r
671}
672
673define <8 x i32> @blend_splatmax_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
674; AVX12-LABEL: blend_splatmax_mask_cond_v8i32:
675; AVX12:       # %bb.0:
676; AVX12-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
677; AVX12-NEXT:    retq
678;
679; AVX512F-LABEL: blend_splatmax_mask_cond_v8i32:
680; AVX512F:       # %bb.0:
681; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
682; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
683; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
684; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
685; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
686; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
687; AVX512F-NEXT:    retq
688;
689; AVX512VL-LABEL: blend_splatmax_mask_cond_v8i32:
690; AVX512VL:       # %bb.0:
691; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1
692; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
693; AVX512VL-NEXT:    retq
694;
695; XOP-LABEL: blend_splatmax_mask_cond_v8i32:
696; XOP:       # %bb.0:
697; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
698; XOP-NEXT:    retq
699  %a = and <8 x i32> %x, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
700  %c = icmp eq <8 x i32> %a, zeroinitializer
701  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
702  ret <8 x i32> %r
703}
704
705define <8 x i16> @blend_splatmax_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
706; AVX12-LABEL: blend_splatmax_mask_cond_v8i16:
707; AVX12:       # %bb.0:
708; AVX12-NEXT:    vpsraw $15, %xmm0, %xmm0
709; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
710; AVX12-NEXT:    retq
711;
712; AVX512-LABEL: blend_splatmax_mask_cond_v8i16:
713; AVX512:       # %bb.0:
714; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
715; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
716; AVX512-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
717; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
718; AVX512-NEXT:    retq
719;
720; XOP-LABEL: blend_splatmax_mask_cond_v8i16:
721; XOP:       # %bb.0:
722; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
723; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
724; XOP-NEXT:    vpcomneqw %xmm3, %xmm0, %xmm0
725; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
726; XOP-NEXT:    retq
727  %a = and <8 x i16> %x, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768>
728  %c = icmp eq <8 x i16> %a, zeroinitializer
729  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
730  ret <8 x i16> %r
731}
732
733define <32 x i8> @blend_splatmax_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
734; AVX1-LABEL: blend_splatmax_mask_cond_v32i8:
735; AVX1:       # %bb.0:
736; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
737; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
738; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
739; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
740; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
741; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
742; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
743; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
744; AVX1-NEXT:    retq
745;
746; AVX2-LABEL: blend_splatmax_mask_cond_v32i8:
747; AVX2:       # %bb.0:
748; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
749; AVX2-NEXT:    retq
750;
751; AVX512-LABEL: blend_splatmax_mask_cond_v32i8:
752; AVX512:       # %bb.0:
753; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
754; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
755; AVX512-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
756; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
757; AVX512-NEXT:    retq
758;
759; XOP-LABEL: blend_splatmax_mask_cond_v32i8:
760; XOP:       # %bb.0:
761; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
762; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
763; XOP-NEXT:    vpcmpgtb %xmm3, %xmm4, %xmm3
764; XOP-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
765; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
766; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
767; XOP-NEXT:    retq
768  %a = and <32 x i8> %x, <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>
769  %c = icmp eq <32 x i8> %a, zeroinitializer
770  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
771  ret <32 x i8> %r
772}
773
774define <4 x i64> @blend_splat_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
775; AVX1-LABEL: blend_splat_mask_cond_v4i64:
776; AVX1:       # %bb.0:
777; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm3
778; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
779; AVX1-NEXT:    vpsllq $62, %xmm0, %xmm0
780; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
781; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
782; AVX1-NEXT:    retq
783;
784; AVX2-LABEL: blend_splat_mask_cond_v4i64:
785; AVX2:       # %bb.0:
786; AVX2-NEXT:    vpsllq $62, %ymm0, %ymm0
787; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
788; AVX2-NEXT:    retq
789;
790; AVX512F-LABEL: blend_splat_mask_cond_v4i64:
791; AVX512F:       # %bb.0:
792; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
793; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
794; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
795; AVX512F-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %k1
796; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
797; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
798; AVX512F-NEXT:    retq
799;
800; AVX512VL-LABEL: blend_splat_mask_cond_v4i64:
801; AVX512VL:       # %bb.0:
802; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
803; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
804; AVX512VL-NEXT:    retq
805;
806; XOP-LABEL: blend_splat_mask_cond_v4i64:
807; XOP:       # %bb.0:
808; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
809; XOP-NEXT:    vpsllq $62, %xmm3, %xmm3
810; XOP-NEXT:    vmovdqa {{.*#+}} xmm4 = [18446744073709551553,18446744073709551553]
811; XOP-NEXT:    vpshaq %xmm4, %xmm3, %xmm3
812; XOP-NEXT:    vpsllq $62, %xmm0, %xmm0
813; XOP-NEXT:    vpshaq %xmm4, %xmm0, %xmm0
814; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
815; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
816; XOP-NEXT:    retq
817  %a = and <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
818  %c = icmp eq <4 x i64> %a, zeroinitializer
819  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
820  ret <4 x i64> %r
821}
822
823define <4 x i32> @blend_splat_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
824; AVX12-LABEL: blend_splat_mask_cond_v4i32:
825; AVX12:       # %bb.0:
826; AVX12-NEXT:    vpslld $15, %xmm0, %xmm0
827; AVX12-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
828; AVX12-NEXT:    retq
829;
830; AVX512F-LABEL: blend_splat_mask_cond_v4i32:
831; AVX512F:       # %bb.0:
832; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
833; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
834; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
835; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %k1
836; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
837; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
838; AVX512F-NEXT:    vzeroupper
839; AVX512F-NEXT:    retq
840;
841; AVX512VL-LABEL: blend_splat_mask_cond_v4i32:
842; AVX512VL:       # %bb.0:
843; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
844; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
845; AVX512VL-NEXT:    retq
846;
847; XOP-LABEL: blend_splat_mask_cond_v4i32:
848; XOP:       # %bb.0:
849; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
850; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
851; XOP-NEXT:    vpcomneqd %xmm3, %xmm0, %xmm0
852; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
853; XOP-NEXT:    retq
854  %a = and <4 x i32> %x, <i32 65536, i32 65536, i32 65536, i32 65536>
855  %c = icmp eq <4 x i32> %a, zeroinitializer
856  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
857  ret <4 x i32> %r
858}
859
860define <16 x i16> @blend_splat_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
861; AVX1-LABEL: blend_splat_mask_cond_v16i16:
862; AVX1:       # %bb.0:
863; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm3
864; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
865; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
866; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
867; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
868; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
869; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
870; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
871; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
872; AVX1-NEXT:    retq
873;
874; AVX2-LABEL: blend_splat_mask_cond_v16i16:
875; AVX2:       # %bb.0:
876; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm0
877; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
878; AVX2-NEXT:    vpblendvb %ymm0, %ymm2, %ymm1, %ymm0
879; AVX2-NEXT:    retq
880;
881; AVX512-LABEL: blend_splat_mask_cond_v16i16:
882; AVX512:       # %bb.0:
883; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
884; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
885; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
886; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
887; AVX512-NEXT:    retq
888;
889; XOP-LABEL: blend_splat_mask_cond_v16i16:
890; XOP:       # %bb.0:
891; XOP-NEXT:    vpsllw $5, %xmm0, %xmm3
892; XOP-NEXT:    vpsraw $15, %xmm3, %xmm3
893; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
894; XOP-NEXT:    vpsllw $5, %xmm0, %xmm0
895; XOP-NEXT:    vpsraw $15, %xmm0, %xmm0
896; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
897; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
898; XOP-NEXT:    retq
899  %a = and <16 x i16> %x, <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>
900  %c = icmp eq <16 x i16> %a, zeroinitializer
901  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
902  ret <16 x i16> %r
903}
904
905define <16 x i8> @blend_splat_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
906; AVX12-LABEL: blend_splat_mask_cond_v16i8:
907; AVX12:       # %bb.0:
908; AVX12-NEXT:    vpsllw $5, %xmm0, %xmm0
909; AVX12-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
910; AVX12-NEXT:    retq
911;
912; AVX512-LABEL: blend_splat_mask_cond_v16i8:
913; AVX512:       # %bb.0:
914; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
915; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
916; AVX512-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
917; AVX512-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
918; AVX512-NEXT:    retq
919;
920; XOP-LABEL: blend_splat_mask_cond_v16i8:
921; XOP:       # %bb.0:
922; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
923; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
924; XOP-NEXT:    vpcomneqb %xmm3, %xmm0, %xmm0
925; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
926; XOP-NEXT:    retq
927  %a = and <16 x i8> %x, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
928  %c = icmp eq <16 x i8> %a, zeroinitializer
929  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
930  ret <16 x i8> %r
931}
932
933define <2 x i64> @blend_mask_cond_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
934; AVX1-LABEL: blend_mask_cond_v2i64:
935; AVX1:       # %bb.0:
936; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
937; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
938; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
939; AVX1-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
940; AVX1-NEXT:    retq
941;
942; AVX2-LABEL: blend_mask_cond_v2i64:
943; AVX2:       # %bb.0:
944; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
945; AVX2-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
946; AVX2-NEXT:    retq
947;
948; AVX512F-LABEL: blend_mask_cond_v2i64:
949; AVX512F:       # %bb.0:
950; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
951; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
952; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
953; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,4]
954; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
955; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
956; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
957; AVX512F-NEXT:    vzeroupper
958; AVX512F-NEXT:    retq
959;
960; AVX512VL-LABEL: blend_mask_cond_v2i64:
961; AVX512VL:       # %bb.0:
962; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
963; AVX512VL-NEXT:    vpblendmq %xmm1, %xmm2, %xmm0 {%k1}
964; AVX512VL-NEXT:    retq
965;
966; XOP-LABEL: blend_mask_cond_v2i64:
967; XOP:       # %bb.0:
968; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
969; XOP-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
970; XOP-NEXT:    retq
971  %a = and <2 x i64> %x, <i64 1, i64 4>
972  %c = icmp eq <2 x i64> %a, zeroinitializer
973  %r = select <2 x i1> %c, <2 x i64> %y, <2 x i64> %z
974  ret <2 x i64> %r
975}
976
977define <4 x i32> @blend_mask_cond_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
978; AVX1-LABEL: blend_mask_cond_v4i32:
979; AVX1:       # %bb.0:
980; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
981; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
982; AVX1-NEXT:    retq
983;
984; AVX2-LABEL: blend_mask_cond_v4i32:
985; AVX2:       # %bb.0:
986; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
987; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
988; AVX2-NEXT:    retq
989;
990; AVX512F-LABEL: blend_mask_cond_v4i32:
991; AVX512F:       # %bb.0:
992; AVX512F-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
993; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
994; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
995; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [65536,512,2,1]
996; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
997; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
998; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
999; AVX512F-NEXT:    vzeroupper
1000; AVX512F-NEXT:    retq
1001;
1002; AVX512VL-LABEL: blend_mask_cond_v4i32:
1003; AVX512VL:       # %bb.0:
1004; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
1005; AVX512VL-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
1006; AVX512VL-NEXT:    retq
1007;
1008; XOP-LABEL: blend_mask_cond_v4i32:
1009; XOP:       # %bb.0:
1010; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1011; XOP-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
1012; XOP-NEXT:    retq
1013  %a = and <4 x i32> %x, <i32 65536, i32 512, i32 2, i32 1>
1014  %c = icmp eq <4 x i32> %a, zeroinitializer
1015  %r = select <4 x i1> %c, <4 x i32> %y, <4 x i32> %z
1016  ret <4 x i32> %r
1017}
1018
1019define <8 x i16> @blend_mask_cond_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
1020; AVX-LABEL: blend_mask_cond_v8i16:
1021; AVX:       # %bb.0:
1022; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1023; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1024; AVX-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
1025; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1026; AVX-NEXT:    retq
1027;
1028; XOP-LABEL: blend_mask_cond_v8i16:
1029; XOP:       # %bb.0:
1030; XOP-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1031; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1032; XOP-NEXT:    vpcomltw %xmm3, %xmm0, %xmm0
1033; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1034; XOP-NEXT:    retq
1035  %a = and <8 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 1024, i16 2, i16 4096>
1036  %c = icmp eq <8 x i16> %a, zeroinitializer
1037  %r = select <8 x i1> %c, <8 x i16> %y, <8 x i16> %z
1038  ret <8 x i16> %r
1039}
1040
1041define <16 x i8> @blend_mask_cond_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
1042; AVX-LABEL: blend_mask_cond_v16i8:
1043; AVX:       # %bb.0:
1044; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1045; AVX-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1046; AVX-NEXT:    vpcmpeqb %xmm3, %xmm0, %xmm0
1047; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
1048; AVX-NEXT:    retq
1049;
1050; XOP-LABEL: blend_mask_cond_v16i8:
1051; XOP:       # %bb.0:
1052; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1053; XOP-NEXT:    vpblendvb %xmm0, %xmm2, %xmm1, %xmm0
1054; XOP-NEXT:    retq
1055  %a = and <16 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2>
1056  %c = icmp eq <16 x i8> %a, zeroinitializer
1057  %r = select <16 x i1> %c, <16 x i8> %y, <16 x i8> %z
1058  ret <16 x i8> %r
1059}
1060
1061define <4 x i64> @blend_mask_cond_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %z) {
1062; AVX1-LABEL: blend_mask_cond_v4i64:
1063; AVX1:       # %bb.0:
1064; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1065; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1066; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1067; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm3, %xmm3
1068; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm0, %xmm0
1069; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1070; AVX1-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
1071; AVX1-NEXT:    retq
1072;
1073; AVX2-LABEL: blend_mask_cond_v4i64:
1074; AVX2:       # %bb.0:
1075; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1076; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1077; AVX2-NEXT:    retq
1078;
1079; AVX512F-LABEL: blend_mask_cond_v4i64:
1080; AVX512F:       # %bb.0:
1081; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1082; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1083; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1084; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [2,4,32768,1]
1085; AVX512F-NEXT:    vptestnmq %zmm3, %zmm0, %k1
1086; AVX512F-NEXT:    vpblendmq %zmm1, %zmm2, %zmm0 {%k1}
1087; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1088; AVX512F-NEXT:    retq
1089;
1090; AVX512VL-LABEL: blend_mask_cond_v4i64:
1091; AVX512VL:       # %bb.0:
1092; AVX512VL-NEXT:    vptestnmq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1093; AVX512VL-NEXT:    vpblendmq %ymm1, %ymm2, %ymm0 {%k1}
1094; AVX512VL-NEXT:    retq
1095;
1096; XOP-LABEL: blend_mask_cond_v4i64:
1097; XOP:       # %bb.0:
1098; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1099; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
1100; XOP-NEXT:    vpshlq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1101; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1102; XOP-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
1103; XOP-NEXT:    retq
1104  %a = and <4 x i64> %x, <i64 2, i64 4, i64 32768, i64 1>
1105  %c = icmp eq <4 x i64> %a, zeroinitializer
1106  %r = select <4 x i1> %c, <4 x i64> %y, <4 x i64> %z
1107  ret <4 x i64> %r
1108}
1109
1110define <8 x i32> @blend_mask_cond_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
1111; AVX1-LABEL: blend_mask_cond_v8i32:
1112; AVX1:       # %bb.0:
1113; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1114; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
1115; AVX1-NEXT:    vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1116; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1117; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1118; AVX1-NEXT:    retq
1119;
1120; AVX2-LABEL: blend_mask_cond_v8i32:
1121; AVX2:       # %bb.0:
1122; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1123; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1124; AVX2-NEXT:    retq
1125;
1126; AVX512F-LABEL: blend_mask_cond_v8i32:
1127; AVX512F:       # %bb.0:
1128; AVX512F-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
1129; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1130; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1131; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,2,8,4,8,1024,2,4096]
1132; AVX512F-NEXT:    vptestnmd %zmm3, %zmm0, %k1
1133; AVX512F-NEXT:    vpblendmd %zmm1, %zmm2, %zmm0 {%k1}
1134; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1135; AVX512F-NEXT:    retq
1136;
1137; AVX512VL-LABEL: blend_mask_cond_v8i32:
1138; AVX512VL:       # %bb.0:
1139; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
1140; AVX512VL-NEXT:    vpblendmd %ymm1, %ymm2, %ymm0 {%k1}
1141; AVX512VL-NEXT:    retq
1142;
1143; XOP-LABEL: blend_mask_cond_v8i32:
1144; XOP:       # %bb.0:
1145; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3
1146; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm0
1147; XOP-NEXT:    vpshld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1148; XOP-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1149; XOP-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
1150; XOP-NEXT:    retq
1151  %a = and <8 x i32> %x, <i32 1, i32 2, i32 8, i32 4, i32 8, i32 1024, i32 2, i32 4096>
1152  %c = icmp eq <8 x i32> %a, zeroinitializer
1153  %r = select <8 x i1> %c, <8 x i32> %y, <8 x i32> %z
1154  ret <8 x i32> %r
1155}
1156
1157define <16 x i16> @blend_mask_cond_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) {
1158; AVX1-LABEL: blend_mask_cond_v16i16:
1159; AVX1:       # %bb.0:
1160; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1161; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1162; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1163; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
1164; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
1165; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1166; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
1167; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1168; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
1169; AVX1-NEXT:    retq
1170;
1171; AVX2-LABEL: blend_mask_cond_v16i16:
1172; AVX2:       # %bb.0:
1173; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1174; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1175; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
1176; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1177; AVX2-NEXT:    retq
1178;
1179; AVX512-LABEL: blend_mask_cond_v16i16:
1180; AVX512:       # %bb.0:
1181; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1182; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1183; AVX512-NEXT:    vpcmpeqw %ymm3, %ymm0, %ymm0
1184; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1185; AVX512-NEXT:    retq
1186;
1187; XOP-LABEL: blend_mask_cond_v16i16:
1188; XOP:       # %bb.0:
1189; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
1190; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1191; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1192; XOP-NEXT:    vpcomltw %xmm4, %xmm3, %xmm3
1193; XOP-NEXT:    vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1194; XOP-NEXT:    vpcomltw %xmm4, %xmm0, %xmm0
1195; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1196; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1197; XOP-NEXT:    retq
1198  %a = and <16 x i16> %x, <i16 1, i16 2, i16 8, i16 4, i16 8, i16 2, i16 2, i16 2, i16 2, i16 8, i16 8, i16 64, i16 64, i16 1024, i16 4096, i16 1024>
1199  %c = icmp eq <16 x i16> %a, zeroinitializer
1200  %r = select <16 x i1> %c, <16 x i16> %y, <16 x i16> %z
1201  ret <16 x i16> %r
1202}
1203
1204define <32 x i8> @blend_mask_cond_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %z) {
1205; AVX1-LABEL: blend_mask_cond_v32i8:
1206; AVX1:       # %bb.0:
1207; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1208; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1209; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1210; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
1211; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
1212; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1213; AVX1-NEXT:    vandnps %ymm2, %ymm0, %ymm2
1214; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1215; AVX1-NEXT:    vorps %ymm2, %ymm0, %ymm0
1216; AVX1-NEXT:    retq
1217;
1218; AVX2-LABEL: blend_mask_cond_v32i8:
1219; AVX2:       # %bb.0:
1220; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1221; AVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1222; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1223; AVX2-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1224; AVX2-NEXT:    retq
1225;
1226; AVX512-LABEL: blend_mask_cond_v32i8:
1227; AVX512:       # %bb.0:
1228; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1229; AVX512-NEXT:    vpxor %xmm3, %xmm3, %xmm3
1230; AVX512-NEXT:    vpcmpeqb %ymm3, %ymm0, %ymm0
1231; AVX512-NEXT:    vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
1232; AVX512-NEXT:    retq
1233;
1234; XOP-LABEL: blend_mask_cond_v32i8:
1235; XOP:       # %bb.0:
1236; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
1237; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
1238; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1239; XOP-NEXT:    vpcomltb %xmm4, %xmm3, %xmm3
1240; XOP-NEXT:    vpshlb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1241; XOP-NEXT:    vpcomltb %xmm4, %xmm0, %xmm0
1242; XOP-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
1243; XOP-NEXT:    vpcmov %ymm0, %ymm1, %ymm2, %ymm0
1244; XOP-NEXT:    retq
1245  %a = and <32 x i8> %x, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 2, i8 2, i8 2, i8 2, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 4, i8 4, i8 4, i8 4, i8 128, i8 4, i8 2, i8 16>
1246  %c = icmp eq <32 x i8> %a, zeroinitializer
1247  %r = select <32 x i1> %c, <32 x i8> %y, <32 x i8> %z
1248  ret <32 x i8> %r
1249}
1250
1251define void @PR46531(ptr %x, ptr %y, ptr %z) {
1252; AVX12-LABEL: PR46531:
1253; AVX12:       # %bb.0:
1254; AVX12-NEXT:    vmovdqu (%rsi), %xmm0
1255; AVX12-NEXT:    vmovdqu (%rdx), %xmm1
1256; AVX12-NEXT:    vpor %xmm0, %xmm1, %xmm2
1257; AVX12-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1258; AVX12-NEXT:    vpslld $31, %xmm1, %xmm1
1259; AVX12-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
1260; AVX12-NEXT:    vmovups %xmm0, (%rdi)
1261; AVX12-NEXT:    retq
1262;
1263; AVX512F-LABEL: PR46531:
1264; AVX512F:       # %bb.0:
1265; AVX512F-NEXT:    vmovdqu (%rsi), %xmm0
1266; AVX512F-NEXT:    vmovdqu (%rdx), %xmm1
1267; AVX512F-NEXT:    vpor %xmm0, %xmm1, %xmm2
1268; AVX512F-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1269; AVX512F-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1270; AVX512F-NEXT:    vmovdqa32 %zmm2, %zmm0 {%k1}
1271; AVX512F-NEXT:    vmovdqu %xmm0, (%rdi)
1272; AVX512F-NEXT:    vzeroupper
1273; AVX512F-NEXT:    retq
1274;
1275; AVX512VL-LABEL: PR46531:
1276; AVX512VL:       # %bb.0:
1277; AVX512VL-NEXT:    vmovdqu (%rsi), %xmm0
1278; AVX512VL-NEXT:    vmovdqu (%rdx), %xmm1
1279; AVX512VL-NEXT:    vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1280; AVX512VL-NEXT:    vpxor %xmm0, %xmm1, %xmm2
1281; AVX512VL-NEXT:    vpord %xmm0, %xmm1, %xmm2 {%k1}
1282; AVX512VL-NEXT:    vmovdqu %xmm2, (%rdi)
1283; AVX512VL-NEXT:    retq
1284;
1285; XOP-LABEL: PR46531:
1286; XOP:       # %bb.0:
1287; XOP-NEXT:    vmovdqu (%rsi), %xmm0
1288; XOP-NEXT:    vmovdqu (%rdx), %xmm1
1289; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm2
1290; XOP-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm3
1291; XOP-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1292; XOP-NEXT:    vpcomneqd %xmm4, %xmm3, %xmm3
1293; XOP-NEXT:    vpxor %xmm0, %xmm1, %xmm0
1294; XOP-NEXT:    vblendvps %xmm3, %xmm0, %xmm2, %xmm0
1295; XOP-NEXT:    vmovups %xmm0, (%rdi)
1296; XOP-NEXT:    retq
1297  %a = load <4 x i32>, ptr %y, align 4
1298  %b = load <4 x i32>, ptr %z, align 4
1299  %or = or <4 x i32> %b, %a
1300  %and = and <4 x i32> %b, <i32 1, i32 1, i32 1, i32 1>
1301  %cmp = icmp eq <4 x i32> %and, zeroinitializer
1302  %xor = xor <4 x i32> %b, %a
1303  %sel = select <4 x i1> %cmp, <4 x i32> %or, <4 x i32> %xor
1304  store <4 x i32> %sel, ptr %x, align 4
1305  ret void
1306}
1307
1308attributes #0 = { "no-nans-fp-math"="true" }
1309