1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2                                   | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2                                 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx                                    | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2                                   | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl                      | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw  | FileCheck %s --check-prefixes=CHECK,AVX,AVX512,AVX512DQBW
8
9; Lower common integer comparisons such as 'isPositive' efficiently:
10; https://llvm.org/bugs/show_bug.cgi?id=26701
11
12define <16 x i8> @test_pcmpgtb(<16 x i8> %x) {
13; SSE-LABEL: test_pcmpgtb:
14; SSE:       # %bb.0:
15; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
16; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
17; SSE-NEXT:    retq
18;
19; AVX-LABEL: test_pcmpgtb:
20; AVX:       # %bb.0:
21; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
22; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
23; AVX-NEXT:    retq
24  %sign = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
25  %not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
26  ret <16 x i8> %not
27}
28
29define <8 x i16> @test_pcmpgtw(<8 x i16> %x) {
30; SSE-LABEL: test_pcmpgtw:
31; SSE:       # %bb.0:
32; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
33; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
34; SSE-NEXT:    retq
35;
36; AVX-LABEL: test_pcmpgtw:
37; AVX:       # %bb.0:
38; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
39; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
40; AVX-NEXT:    retq
41  %sign = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
42  %not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
43  ret <8 x i16> %not
44}
45
46define <4 x i32> @test_pcmpgtd(<4 x i32> %x) {
47; SSE-LABEL: test_pcmpgtd:
48; SSE:       # %bb.0:
49; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
50; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
51; SSE-NEXT:    retq
52;
53; AVX-LABEL: test_pcmpgtd:
54; AVX:       # %bb.0:
55; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
56; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
57; AVX-NEXT:    retq
58  %sign = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
59  %not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
60  ret <4 x i32> %not
61}
62
63define <2 x i64> @test_pcmpgtq(<2 x i64> %x) {
64; SSE2-LABEL: test_pcmpgtq:
65; SSE2:       # %bb.0:
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
67; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
68; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
69; SSE2-NEXT:    retq
70;
71; SSE42-LABEL: test_pcmpgtq:
72; SSE42:       # %bb.0:
73; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
74; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
75; SSE42-NEXT:    retq
76;
77; AVX-LABEL: test_pcmpgtq:
78; AVX:       # %bb.0:
79; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
80; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
81; AVX-NEXT:    retq
82  %sign = ashr <2 x i64> %x, <i64 63, i64 63>
83  %not = xor <2 x i64> %sign, <i64 -1, i64 -1>
84  ret <2 x i64> %not
85}
86
87define <1 x i128> @test_strange_type(<1 x i128> %x) {
88; CHECK-LABEL: test_strange_type:
89; CHECK:       # %bb.0:
90; CHECK-NEXT:    movq %rsi, %rax
91; CHECK-NEXT:    sarq $63, %rax
92; CHECK-NEXT:    notq %rax
93; CHECK-NEXT:    movq %rax, %rdx
94; CHECK-NEXT:    retq
95  %sign = ashr <1 x i128> %x, <i128 127>
96  %not = xor <1 x i128> %sign, <i128 -1>
97  ret <1 x i128> %not
98}
99
100define <32 x i8> @test_pcmpgtb_256(<32 x i8> %x) {
101; SSE-LABEL: test_pcmpgtb_256:
102; SSE:       # %bb.0:
103; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
104; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
105; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
106; SSE-NEXT:    retq
107;
108; AVX1-LABEL: test_pcmpgtb_256:
109; AVX1:       # %bb.0:
110; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
111; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
112; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
113; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
114; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
115; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
116; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
117; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
118; AVX1-NEXT:    retq
119;
120; AVX2-LABEL: test_pcmpgtb_256:
121; AVX2:       # %bb.0:
122; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
123; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
124; AVX2-NEXT:    retq
125;
126; AVX512-LABEL: test_pcmpgtb_256:
127; AVX512:       # %bb.0:
128; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
129; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
130; AVX512-NEXT:    retq
131  %sign = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
132  %not = xor <32 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
133  ret <32 x i8> %not
134}
135
136define <16 x i16> @test_pcmpgtw_256(<16 x i16> %x) {
137; SSE-LABEL: test_pcmpgtw_256:
138; SSE:       # %bb.0:
139; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
140; SSE-NEXT:    pcmpgtw %xmm2, %xmm0
141; SSE-NEXT:    pcmpgtw %xmm2, %xmm1
142; SSE-NEXT:    retq
143;
144; AVX1-LABEL: test_pcmpgtw_256:
145; AVX1:       # %bb.0:
146; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
147; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
148; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
149; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
150; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
151; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
152; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
153; AVX1-NEXT:    retq
154;
155; AVX2-LABEL: test_pcmpgtw_256:
156; AVX2:       # %bb.0:
157; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
158; AVX2-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
159; AVX2-NEXT:    retq
160;
161; AVX512-LABEL: test_pcmpgtw_256:
162; AVX512:       # %bb.0:
163; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
164; AVX512-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
165; AVX512-NEXT:    retq
166  %sign = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
167  %not = xor <16 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
168  ret <16 x i16> %not
169}
170
171define <8 x i32> @test_pcmpgtd_256(<8 x i32> %x) {
172; SSE-LABEL: test_pcmpgtd_256:
173; SSE:       # %bb.0:
174; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
175; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
176; SSE-NEXT:    pcmpgtd %xmm2, %xmm1
177; SSE-NEXT:    retq
178;
179; AVX1-LABEL: test_pcmpgtd_256:
180; AVX1:       # %bb.0:
181; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
182; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
183; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
184; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
185; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
186; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
187; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
188; AVX1-NEXT:    retq
189;
190; AVX2-LABEL: test_pcmpgtd_256:
191; AVX2:       # %bb.0:
192; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
193; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
194; AVX2-NEXT:    retq
195;
196; AVX512-LABEL: test_pcmpgtd_256:
197; AVX512:       # %bb.0:
198; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
199; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
200; AVX512-NEXT:    retq
201  %sign = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
202  %not = xor <8 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
203  ret <8 x i32> %not
204}
205
206define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) {
207; SSE2-LABEL: test_pcmpgtq_256:
208; SSE2:       # %bb.0:
209; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
210; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
211; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
212; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
213; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
214; SSE2-NEXT:    retq
215;
216; SSE42-LABEL: test_pcmpgtq_256:
217; SSE42:       # %bb.0:
218; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
219; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
220; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
221; SSE42-NEXT:    retq
222;
223; AVX1-LABEL: test_pcmpgtq_256:
224; AVX1:       # %bb.0:
225; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
226; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
227; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm1
228; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm0
229; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
230; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
231; AVX1-NEXT:    vcmptrueps %ymm1, %ymm1, %ymm1
232; AVX1-NEXT:    vxorps %ymm1, %ymm0, %ymm0
233; AVX1-NEXT:    retq
234;
235; AVX2-LABEL: test_pcmpgtq_256:
236; AVX2:       # %bb.0:
237; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
238; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
239; AVX2-NEXT:    retq
240;
241; AVX512-LABEL: test_pcmpgtq_256:
242; AVX512:       # %bb.0:
243; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
244; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
245; AVX512-NEXT:    retq
246  %sign = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
247  %not = xor <4 x i64> %sign, <i64 -1, i64 -1, i64 -1, i64 -1>
248  ret <4 x i64> %not
249}
250
251define <16 x i8> @cmpeq_zext_v16i8(<16 x i8> %a, <16 x i8> %b) {
252; SSE-LABEL: cmpeq_zext_v16i8:
253; SSE:       # %bb.0:
254; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
255; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
256; SSE-NEXT:    retq
257;
258; AVX-LABEL: cmpeq_zext_v16i8:
259; AVX:       # %bb.0:
260; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
261; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
262; AVX-NEXT:    retq
263  %cmp = icmp eq <16 x i8> %a, %b
264  %zext = zext <16 x i1> %cmp to <16 x i8>
265  ret <16 x i8> %zext
266}
267
268define <16 x i16> @cmpeq_zext_v16i16(<16 x i16> %a, <16 x i16> %b) {
269; SSE-LABEL: cmpeq_zext_v16i16:
270; SSE:       # %bb.0:
271; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
272; SSE-NEXT:    psrlw $15, %xmm0
273; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
274; SSE-NEXT:    psrlw $15, %xmm1
275; SSE-NEXT:    retq
276;
277; AVX1-LABEL: cmpeq_zext_v16i16:
278; AVX1:       # %bb.0:
279; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
280; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
281; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
282; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
283; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
284; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
285; AVX1-NEXT:    retq
286;
287; AVX2-LABEL: cmpeq_zext_v16i16:
288; AVX2:       # %bb.0:
289; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
290; AVX2-NEXT:    vpsrlw $15, %ymm0, %ymm0
291; AVX2-NEXT:    retq
292;
293; AVX512-LABEL: cmpeq_zext_v16i16:
294; AVX512:       # %bb.0:
295; AVX512-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
296; AVX512-NEXT:    vpsrlw $15, %ymm0, %ymm0
297; AVX512-NEXT:    retq
298  %cmp = icmp eq <16 x i16> %a, %b
299  %zext = zext <16 x i1> %cmp to <16 x i16>
300  ret <16 x i16> %zext
301}
302
303define <4 x i32> @cmpeq_zext_v4i32(<4 x i32> %a, <4 x i32> %b) {
304; SSE-LABEL: cmpeq_zext_v4i32:
305; SSE:       # %bb.0:
306; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
307; SSE-NEXT:    psrld $31, %xmm0
308; SSE-NEXT:    retq
309;
310; AVX-LABEL: cmpeq_zext_v4i32:
311; AVX:       # %bb.0:
312; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
313; AVX-NEXT:    vpsrld $31, %xmm0, %xmm0
314; AVX-NEXT:    retq
315  %cmp = icmp eq <4 x i32> %a, %b
316  %zext = zext <4 x i1> %cmp to <4 x i32>
317  ret <4 x i32> %zext
318}
319
320define <4 x i64> @cmpeq_zext_v4i64(<4 x i64> %a, <4 x i64> %b) {
321; SSE2-LABEL: cmpeq_zext_v4i64:
322; SSE2:       # %bb.0:
323; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
324; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
325; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1,1]
326; SSE2-NEXT:    pand %xmm4, %xmm2
327; SSE2-NEXT:    pand %xmm2, %xmm0
328; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
329; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
330; SSE2-NEXT:    pand %xmm4, %xmm2
331; SSE2-NEXT:    pand %xmm2, %xmm1
332; SSE2-NEXT:    retq
333;
334; SSE42-LABEL: cmpeq_zext_v4i64:
335; SSE42:       # %bb.0:
336; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
337; SSE42-NEXT:    psrlq $63, %xmm0
338; SSE42-NEXT:    pcmpeqq %xmm3, %xmm1
339; SSE42-NEXT:    psrlq $63, %xmm1
340; SSE42-NEXT:    retq
341;
342; AVX1-LABEL: cmpeq_zext_v4i64:
343; AVX1:       # %bb.0:
344; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
345; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
346; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
347; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
348; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
349; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
350; AVX1-NEXT:    retq
351;
352; AVX2-LABEL: cmpeq_zext_v4i64:
353; AVX2:       # %bb.0:
354; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
355; AVX2-NEXT:    vpsrlq $63, %ymm0, %ymm0
356; AVX2-NEXT:    retq
357;
358; AVX512-LABEL: cmpeq_zext_v4i64:
359; AVX512:       # %bb.0:
360; AVX512-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
361; AVX512-NEXT:    vpsrlq $63, %ymm0, %ymm0
362; AVX512-NEXT:    retq
363  %cmp = icmp eq <4 x i64> %a, %b
364  %zext = zext <4 x i1> %cmp to <4 x i64>
365  ret <4 x i64> %zext
366}
367
368define <32 x i8> @cmpgt_zext_v32i8(<32 x i8> %a, <32 x i8> %b) {
369; SSE-LABEL: cmpgt_zext_v32i8:
370; SSE:       # %bb.0:
371; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
372; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
373; SSE-NEXT:    pand %xmm2, %xmm0
374; SSE-NEXT:    pcmpgtb %xmm3, %xmm1
375; SSE-NEXT:    pand %xmm2, %xmm1
376; SSE-NEXT:    retq
377;
378; AVX1-LABEL: cmpgt_zext_v32i8:
379; AVX1:       # %bb.0:
380; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
381; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
382; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
383; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
384; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
385; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
386; AVX1-NEXT:    retq
387;
388; AVX2-LABEL: cmpgt_zext_v32i8:
389; AVX2:       # %bb.0:
390; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
391; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
392; AVX2-NEXT:    retq
393;
394; AVX512-LABEL: cmpgt_zext_v32i8:
395; AVX512:       # %bb.0:
396; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
397; AVX512-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
398; AVX512-NEXT:    retq
399  %cmp = icmp sgt <32 x i8> %a, %b
400  %zext = zext <32 x i1> %cmp to <32 x i8>
401  ret <32 x i8> %zext
402}
403
404define <8 x i16> @cmpgt_zext_v8i16(<8 x i16> %a, <8 x i16> %b) {
405; SSE-LABEL: cmpgt_zext_v8i16:
406; SSE:       # %bb.0:
407; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
408; SSE-NEXT:    psrlw $15, %xmm0
409; SSE-NEXT:    retq
410;
411; AVX-LABEL: cmpgt_zext_v8i16:
412; AVX:       # %bb.0:
413; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
414; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm0
415; AVX-NEXT:    retq
416  %cmp = icmp sgt <8 x i16> %a, %b
417  %zext = zext <8 x i1> %cmp to <8 x i16>
418  ret <8 x i16> %zext
419}
420
421define <8 x i32> @cmpgt_zext_v8i32(<8 x i32> %a, <8 x i32> %b) {
422; SSE-LABEL: cmpgt_zext_v8i32:
423; SSE:       # %bb.0:
424; SSE-NEXT:    pcmpgtd %xmm2, %xmm0
425; SSE-NEXT:    psrld $31, %xmm0
426; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
427; SSE-NEXT:    psrld $31, %xmm1
428; SSE-NEXT:    retq
429;
430; AVX1-LABEL: cmpgt_zext_v8i32:
431; AVX1:       # %bb.0:
432; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
433; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
434; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm3, %xmm2
435; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
436; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
437; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
438; AVX1-NEXT:    retq
439;
440; AVX2-LABEL: cmpgt_zext_v8i32:
441; AVX2:       # %bb.0:
442; AVX2-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
443; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
444; AVX2-NEXT:    retq
445;
446; AVX512-LABEL: cmpgt_zext_v8i32:
447; AVX512:       # %bb.0:
448; AVX512-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
449; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
450; AVX512-NEXT:    retq
451  %cmp = icmp sgt <8 x i32> %a, %b
452  %zext = zext <8 x i1> %cmp to <8 x i32>
453  ret <8 x i32> %zext
454}
455
456define <2 x i64> @cmpgt_zext_v2i64(<2 x i64> %a, <2 x i64> %b) {
457; SSE2-LABEL: cmpgt_zext_v2i64:
458; SSE2:       # %bb.0:
459; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
460; SSE2-NEXT:    pxor %xmm2, %xmm1
461; SSE2-NEXT:    pxor %xmm2, %xmm0
462; SSE2-NEXT:    movdqa %xmm0, %xmm2
463; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
464; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
465; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
466; SSE2-NEXT:    pand %xmm2, %xmm1
467; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
468; SSE2-NEXT:    por %xmm1, %xmm0
469; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
470; SSE2-NEXT:    retq
471;
472; SSE42-LABEL: cmpgt_zext_v2i64:
473; SSE42:       # %bb.0:
474; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
475; SSE42-NEXT:    psrlq $63, %xmm0
476; SSE42-NEXT:    retq
477;
478; AVX-LABEL: cmpgt_zext_v2i64:
479; AVX:       # %bb.0:
480; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
481; AVX-NEXT:    vpsrlq $63, %xmm0, %xmm0
482; AVX-NEXT:    retq
483  %cmp = icmp sgt <2 x i64> %a, %b
484  %zext = zext <2 x i1> %cmp to <2 x i64>
485  ret <2 x i64> %zext
486}
487
488; Test that we optimize a zext of a vector setcc ne zero where all bits but the
489; lsb are known to be zero.
490define <8 x i32> @cmpne_knownzeros_zext_v8i16_v8i32(<8 x i16> %x) {
491; SSE2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
492; SSE2:       # %bb.0:
493; SSE2-NEXT:    movdqa %xmm0, %xmm1
494; SSE2-NEXT:    psrlw $15, %xmm1
495; SSE2-NEXT:    pxor %xmm2, %xmm2
496; SSE2-NEXT:    movdqa %xmm1, %xmm0
497; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
498; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
499; SSE2-NEXT:    retq
500;
501; SSE42-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
502; SSE42:       # %bb.0:
503; SSE42-NEXT:    movdqa %xmm0, %xmm1
504; SSE42-NEXT:    psrlw $15, %xmm1
505; SSE42-NEXT:    pxor %xmm2, %xmm2
506; SSE42-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
507; SSE42-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
508; SSE42-NEXT:    retq
509;
510; AVX1-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
511; AVX1:       # %bb.0:
512; AVX1-NEXT:    vpsrlw $15, %xmm0, %xmm0
513; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
514; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
515; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
516; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
517; AVX1-NEXT:    retq
518;
519; AVX2-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
520; AVX2:       # %bb.0:
521; AVX2-NEXT:    vpsrlw $15, %xmm0, %xmm0
522; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
523; AVX2-NEXT:    retq
524;
525; AVX512-LABEL: cmpne_knownzeros_zext_v8i16_v8i32:
526; AVX512:       # %bb.0:
527; AVX512-NEXT:    vpsrlw $15, %xmm0, %xmm0
528; AVX512-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
529; AVX512-NEXT:    retq
530  %a = lshr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
531  %b = icmp ne <8 x i16> %a, zeroinitializer
532  %c = zext <8 x i1> %b to <8 x i32>
533  ret <8 x i32> %c
534}
535
536define <8 x i32> @cmpne_knownzeros_zext_v8i32_v8i32(<8 x i32> %x) {
537; SSE-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
538; SSE:       # %bb.0:
539; SSE-NEXT:    psrld $31, %xmm0
540; SSE-NEXT:    psrld $31, %xmm1
541; SSE-NEXT:    retq
542;
543; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
544; AVX1:       # %bb.0:
545; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm1
546; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
547; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
548; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
549; AVX1-NEXT:    retq
550;
551; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
552; AVX2:       # %bb.0:
553; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
554; AVX2-NEXT:    retq
555;
556; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i32:
557; AVX512:       # %bb.0:
558; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
559; AVX512-NEXT:    retq
560  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
561  %b = icmp ne <8 x i32> %a, zeroinitializer
562  %c = zext <8 x i1> %b to <8 x i32>
563  ret <8 x i32> %c
564}
565
566define <8 x i16> @cmpne_knownzeros_zext_v8i32_v8i16(<8 x i32> %x) {
567; SSE2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
568; SSE2:       # %bb.0:
569; SSE2-NEXT:    psrld $31, %xmm1
570; SSE2-NEXT:    psrld $31, %xmm0
571; SSE2-NEXT:    packuswb %xmm1, %xmm0
572; SSE2-NEXT:    retq
573;
574; SSE42-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
575; SSE42:       # %bb.0:
576; SSE42-NEXT:    psrld $31, %xmm1
577; SSE42-NEXT:    psrld $31, %xmm0
578; SSE42-NEXT:    packusdw %xmm1, %xmm0
579; SSE42-NEXT:    retq
580;
581; AVX1-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
582; AVX1:       # %bb.0:
583; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
584; AVX1-NEXT:    vpsrld $31, %xmm1, %xmm1
585; AVX1-NEXT:    vpsrld $31, %xmm0, %xmm0
586; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
587; AVX1-NEXT:    vzeroupper
588; AVX1-NEXT:    retq
589;
590; AVX2-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
591; AVX2:       # %bb.0:
592; AVX2-NEXT:    vpsrld $31, %ymm0, %ymm0
593; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
594; AVX2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
595; AVX2-NEXT:    vzeroupper
596; AVX2-NEXT:    retq
597;
598; AVX512-LABEL: cmpne_knownzeros_zext_v8i32_v8i16:
599; AVX512:       # %bb.0:
600; AVX512-NEXT:    vpsrld $31, %ymm0, %ymm0
601; AVX512-NEXT:    vpmovdw %ymm0, %xmm0
602; AVX512-NEXT:    vzeroupper
603; AVX512-NEXT:    retq
604  %a = lshr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
605  %b = icmp ne <8 x i32> %a, zeroinitializer
606  %c = zext <8 x i1> %b to <8 x i16>
607  ret <8 x i16> %c
608}
609
610; PR26697
611define <4 x i32> @cmpeq_one_mask_bit(<4 x i32> %mask) {
612; SSE-LABEL: cmpeq_one_mask_bit:
613; SSE:       # %bb.0:
614; SSE-NEXT:    psrad $31, %xmm0
615; SSE-NEXT:    retq
616;
617; AVX-LABEL: cmpeq_one_mask_bit:
618; AVX:       # %bb.0:
619; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
620; AVX-NEXT:    retq
621  %mask_signbit = and <4 x i32> %mask, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
622  %mask_bool = icmp ne <4 x i32> %mask_signbit, zeroinitializer
623  %mask_bool_ext = sext <4 x i1> %mask_bool to <4 x i32>
624  ret <4 x i32> %mask_bool_ext
625}
626
627define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %x, <2 x i64> %y) {
628; SSE2-LABEL: not_signbit_mask_v2i64:
629; SSE2:       # %bb.0:
630; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
631; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
632; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
633; SSE2-NEXT:    pand %xmm1, %xmm0
634; SSE2-NEXT:    retq
635;
636; SSE42-LABEL: not_signbit_mask_v2i64:
637; SSE42:       # %bb.0:
638; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
639; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
640; SSE42-NEXT:    pand %xmm1, %xmm0
641; SSE42-NEXT:    retq
642;
643; AVX1-LABEL: not_signbit_mask_v2i64:
644; AVX1:       # %bb.0:
645; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
646; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
647; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
648; AVX1-NEXT:    retq
649;
650; AVX2-LABEL: not_signbit_mask_v2i64:
651; AVX2:       # %bb.0:
652; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
653; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
654; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
655; AVX2-NEXT:    retq
656;
657; AVX512-LABEL: not_signbit_mask_v2i64:
658; AVX512:       # %bb.0:
659; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
660; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
661; AVX512-NEXT:    retq
662  %sh = ashr <2 x i64> %x, <i64 63, i64 63>
663  %not = xor <2 x i64> %sh, <i64 -1, i64 -1>
664  %and = and <2 x i64> %y, %not
665  ret <2 x i64> %and
666}
667
668define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %x, <4 x i32> %y) {
669; SSE-LABEL: not_signbit_mask_v4i32:
670; SSE:       # %bb.0:
671; SSE-NEXT:    psrad $31, %xmm0
672; SSE-NEXT:    pandn %xmm1, %xmm0
673; SSE-NEXT:    retq
674;
675; AVX-LABEL: not_signbit_mask_v4i32:
676; AVX:       # %bb.0:
677; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
678; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
679; AVX-NEXT:    retq
680  %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
681  %not = xor <4 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1>
682  %and = and <4 x i32> %not, %y
683  ret <4 x i32> %and
684}
685
686define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %x, <8 x i16> %y) {
687; SSE-LABEL: not_signbit_mask_v8i16:
688; SSE:       # %bb.0:
689; SSE-NEXT:    psraw $15, %xmm0
690; SSE-NEXT:    pandn %xmm1, %xmm0
691; SSE-NEXT:    retq
692;
693; AVX-LABEL: not_signbit_mask_v8i16:
694; AVX:       # %bb.0:
695; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
696; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
697; AVX-NEXT:    retq
698  %sh = ashr <8 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
699  %not = xor <8 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
700  %and = and <8 x i16> %y, %not
701  ret <8 x i16> %and
702}
703
704define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %x, <16 x i8> %y) {
705; SSE-LABEL: not_signbit_mask_v16i8:
706; SSE:       # %bb.0:
707; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
708; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
709; SSE-NEXT:    pand %xmm1, %xmm0
710; SSE-NEXT:    retq
711;
712; AVX-LABEL: not_signbit_mask_v16i8:
713; AVX:       # %bb.0:
714; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
715; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
716; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
717; AVX-NEXT:    retq
718  %sh = ashr <16 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
719  %not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
720  %and = and <16 x i8> %not, %y
721  ret <16 x i8> %and
722}
723
724define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %x, <4 x i64> %y) {
725; SSE2-LABEL: not_signbit_mask_v4i64:
726; SSE2:       # %bb.0:
727; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
728; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
729; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
730; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
731; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
732; SSE2-NEXT:    pand %xmm2, %xmm0
733; SSE2-NEXT:    pand %xmm3, %xmm1
734; SSE2-NEXT:    retq
735;
736; SSE42-LABEL: not_signbit_mask_v4i64:
737; SSE42:       # %bb.0:
738; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
739; SSE42-NEXT:    pcmpgtq %xmm4, %xmm1
740; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
741; SSE42-NEXT:    pand %xmm2, %xmm0
742; SSE42-NEXT:    pand %xmm3, %xmm1
743; SSE42-NEXT:    retq
744;
745; AVX1-LABEL: not_signbit_mask_v4i64:
746; AVX1:       # %bb.0:
747; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
748; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
749; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
750; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm3, %xmm0
751; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
752; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
753; AVX1-NEXT:    retq
754;
755; AVX2-LABEL: not_signbit_mask_v4i64:
756; AVX2:       # %bb.0:
757; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
758; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
759; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
760; AVX2-NEXT:    retq
761;
762; AVX512-LABEL: not_signbit_mask_v4i64:
763; AVX512:       # %bb.0:
764; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
765; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
766; AVX512-NEXT:    retq
767  %sh = ashr <4 x i64> %x, <i64 63, i64 63, i64 63, i64 63>
768  %not = xor <4 x i64> %sh, <i64 -1, i64 -1, i64 -1, i64 -1>
769  %and = and <4 x i64> %y, %not
770  ret <4 x i64> %and
771}
772
773define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
774; SSE-LABEL: not_signbit_mask_v8i32:
775; SSE:       # %bb.0:
776; SSE-NEXT:    psrad $31, %xmm0
777; SSE-NEXT:    pandn %xmm2, %xmm0
778; SSE-NEXT:    psrad $31, %xmm1
779; SSE-NEXT:    pandn %xmm3, %xmm1
780; SSE-NEXT:    retq
781;
782; AVX1-LABEL: not_signbit_mask_v8i32:
783; AVX1:       # %bb.0:
784; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
785; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
786; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
787; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
788; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
789; AVX1-NEXT:    retq
790;
791; AVX2-LABEL: not_signbit_mask_v8i32:
792; AVX2:       # %bb.0:
793; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
794; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
795; AVX2-NEXT:    retq
796;
797; AVX512-LABEL: not_signbit_mask_v8i32:
798; AVX512:       # %bb.0:
799; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
800; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
801; AVX512-NEXT:    retq
802  %sh = ashr <8 x i32> %x, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
803  %not = xor <8 x i32> %sh, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
804  %and = and <8 x i32> %not, %y
805  ret <8 x i32> %and
806}
807
808define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %x, <16 x i16> %y) {
809; SSE-LABEL: not_signbit_mask_v16i16:
810; SSE:       # %bb.0:
811; SSE-NEXT:    psraw $15, %xmm0
812; SSE-NEXT:    pandn %xmm2, %xmm0
813; SSE-NEXT:    psraw $15, %xmm1
814; SSE-NEXT:    pandn %xmm3, %xmm1
815; SSE-NEXT:    retq
816;
817; AVX1-LABEL: not_signbit_mask_v16i16:
818; AVX1:       # %bb.0:
819; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
820; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
821; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
822; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
823; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
824; AVX1-NEXT:    retq
825;
826; AVX2-LABEL: not_signbit_mask_v16i16:
827; AVX2:       # %bb.0:
828; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
829; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
830; AVX2-NEXT:    retq
831;
832; AVX512-LABEL: not_signbit_mask_v16i16:
833; AVX512:       # %bb.0:
834; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
835; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
836; AVX512-NEXT:    retq
837  %sh = ashr <16 x i16> %x, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
838  %not = xor <16 x i16> %sh, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
839  %and = and <16 x i16> %y, %not
840  ret <16 x i16> %and
841}
842
843define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %x, <32 x i8> %y) {
844; SSE-LABEL: not_signbit_mask_v32i8:
845; SSE:       # %bb.0:
846; SSE-NEXT:    pcmpeqd %xmm4, %xmm4
847; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
848; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
849; SSE-NEXT:    pand %xmm2, %xmm0
850; SSE-NEXT:    pand %xmm3, %xmm1
851; SSE-NEXT:    retq
852;
853; AVX1-LABEL: not_signbit_mask_v32i8:
854; AVX1:       # %bb.0:
855; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
856; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
857; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm3, %xmm2
858; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm3, %xmm0
859; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
860; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm0
861; AVX1-NEXT:    retq
862;
863; AVX2-LABEL: not_signbit_mask_v32i8:
864; AVX2:       # %bb.0:
865; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
866; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
867; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
868; AVX2-NEXT:    retq
869;
870; AVX512-LABEL: not_signbit_mask_v32i8:
871; AVX512:       # %bb.0:
872; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
873; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
874; AVX512-NEXT:    vpand %ymm1, %ymm0, %ymm0
875; AVX512-NEXT:    retq
876  %sh = ashr <32 x i8> %x, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
877  %not = xor <32 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
878  %and = and <32 x i8> %not, %y
879  ret <32 x i8> %and
880}
881
882define <2 x i64> @ispositive_mask_v2i64(<2 x i64> %x, <2 x i64> %y) {
883; SSE2-LABEL: ispositive_mask_v2i64:
884; SSE2:       # %bb.0:
885; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
886; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
887; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
888; SSE2-NEXT:    pand %xmm1, %xmm0
889; SSE2-NEXT:    retq
890;
891; SSE42-LABEL: ispositive_mask_v2i64:
892; SSE42:       # %bb.0:
893; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
894; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
895; SSE42-NEXT:    pand %xmm1, %xmm0
896; SSE42-NEXT:    retq
897;
898; AVX1-LABEL: ispositive_mask_v2i64:
899; AVX1:       # %bb.0:
900; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
901; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
902; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
903; AVX1-NEXT:    retq
904;
905; AVX2-LABEL: ispositive_mask_v2i64:
906; AVX2:       # %bb.0:
907; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
908; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
909; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
910; AVX2-NEXT:    retq
911;
912; AVX512-LABEL: ispositive_mask_v2i64:
913; AVX512:       # %bb.0:
914; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
915; AVX512-NEXT:    vpandn %xmm1, %xmm0, %xmm0
916; AVX512-NEXT:    retq
917  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
918  %mask = sext <2 x i1> %cmp to <2 x i64>
919  %and = and <2 x i64> %mask, %y
920  ret <2 x i64> %and
921}
922
923define <4 x i32> @is_positive_mask_v4i32(<4 x i32> %x, <4 x i32> %y) {
924; SSE-LABEL: is_positive_mask_v4i32:
925; SSE:       # %bb.0:
926; SSE-NEXT:    psrad $31, %xmm0
927; SSE-NEXT:    pandn %xmm1, %xmm0
928; SSE-NEXT:    retq
929;
930; AVX-LABEL: is_positive_mask_v4i32:
931; AVX:       # %bb.0:
932; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
933; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
934; AVX-NEXT:    retq
935  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
936  %mask = sext <4 x i1> %cmp to <4 x i32>
937  %and = and <4 x i32> %y, %mask
938  ret <4 x i32> %and
939}
940
941define <8 x i16> @is_positive_mask_v8i16(<8 x i16> %x, <8 x i16> %y) {
942; SSE-LABEL: is_positive_mask_v8i16:
943; SSE:       # %bb.0:
944; SSE-NEXT:    psraw $15, %xmm0
945; SSE-NEXT:    pandn %xmm1, %xmm0
946; SSE-NEXT:    retq
947;
948; AVX-LABEL: is_positive_mask_v8i16:
949; AVX:       # %bb.0:
950; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
951; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
952; AVX-NEXT:    retq
953  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
954  %mask = sext <8 x i1> %cmp to <8 x i16>
955  %and = and <8 x i16> %mask, %y
956  ret <8 x i16> %and
957}
958
959define <16 x i8> @is_positive_mask_v16i8(<16 x i8> %x, <16 x i8> %y) {
960; SSE-LABEL: is_positive_mask_v16i8:
961; SSE:       # %bb.0:
962; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
963; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
964; SSE-NEXT:    pand %xmm1, %xmm0
965; SSE-NEXT:    retq
966;
967; AVX-LABEL: is_positive_mask_v16i8:
968; AVX:       # %bb.0:
969; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
970; AVX-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
971; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
972; AVX-NEXT:    retq
973  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
974  %mask = sext <16 x i1> %cmp to <16 x i8>
975  %and = and <16 x i8> %y, %mask
976  ret <16 x i8> %and
977}
978
979define <4 x i64> @is_positive_mask_v4i64(<4 x i64> %x, <4 x i64> %y) {
980; SSE2-LABEL: is_positive_mask_v4i64:
981; SSE2:       # %bb.0:
982; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
983; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
984; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
985; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
986; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
987; SSE2-NEXT:    pand %xmm2, %xmm0
988; SSE2-NEXT:    pand %xmm3, %xmm1
989; SSE2-NEXT:    retq
990;
991; SSE42-LABEL: is_positive_mask_v4i64:
992; SSE42:       # %bb.0:
993; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
994; SSE42-NEXT:    pcmpgtq %xmm4, %xmm1
995; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
996; SSE42-NEXT:    pand %xmm2, %xmm0
997; SSE42-NEXT:    pand %xmm3, %xmm1
998; SSE42-NEXT:    retq
999;
1000; AVX1-LABEL: is_positive_mask_v4i64:
1001; AVX1:       # %bb.0:
1002; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1003; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1004; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1005; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
1006; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1007; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1008; AVX1-NEXT:    retq
1009;
1010; AVX2-LABEL: is_positive_mask_v4i64:
1011; AVX2:       # %bb.0:
1012; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1013; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
1014; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
1015; AVX2-NEXT:    retq
1016;
1017; AVX512-LABEL: is_positive_mask_v4i64:
1018; AVX512:       # %bb.0:
1019; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
1020; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1021; AVX512-NEXT:    retq
1022  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1023  %mask = sext <4 x i1> %cmp to <4 x i64>
1024  %and = and <4 x i64> %mask, %y
1025  ret <4 x i64> %and
1026}
1027
1028define <8 x i32> @is_positive_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
1029; SSE-LABEL: is_positive_mask_v8i32:
1030; SSE:       # %bb.0:
1031; SSE-NEXT:    psrad $31, %xmm0
1032; SSE-NEXT:    pandn %xmm2, %xmm0
1033; SSE-NEXT:    psrad $31, %xmm1
1034; SSE-NEXT:    pandn %xmm3, %xmm1
1035; SSE-NEXT:    retq
1036;
1037; AVX1-LABEL: is_positive_mask_v8i32:
1038; AVX1:       # %bb.0:
1039; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1040; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1041; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
1042; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm0, %xmm0
1043; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1044; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1045; AVX1-NEXT:    retq
1046;
1047; AVX2-LABEL: is_positive_mask_v8i32:
1048; AVX2:       # %bb.0:
1049; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1050; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1051; AVX2-NEXT:    retq
1052;
1053; AVX512-LABEL: is_positive_mask_v8i32:
1054; AVX512:       # %bb.0:
1055; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1056; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1057; AVX512-NEXT:    retq
1058  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1059  %mask = sext <8 x i1> %cmp to <8 x i32>
1060  %and = and <8 x i32> %y, %mask
1061  ret <8 x i32> %and
1062}
1063
1064define <16 x i16> @is_positive_mask_v16i16(<16 x i16> %x, <16 x i16> %y) {
1065; SSE-LABEL: is_positive_mask_v16i16:
1066; SSE:       # %bb.0:
1067; SSE-NEXT:    psraw $15, %xmm0
1068; SSE-NEXT:    pandn %xmm2, %xmm0
1069; SSE-NEXT:    psraw $15, %xmm1
1070; SSE-NEXT:    pandn %xmm3, %xmm1
1071; SSE-NEXT:    retq
1072;
1073; AVX1-LABEL: is_positive_mask_v16i16:
1074; AVX1:       # %bb.0:
1075; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1076; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1077; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm2, %xmm2
1078; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm0, %xmm0
1079; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1080; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
1081; AVX1-NEXT:    retq
1082;
1083; AVX2-LABEL: is_positive_mask_v16i16:
1084; AVX2:       # %bb.0:
1085; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
1086; AVX2-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1087; AVX2-NEXT:    retq
1088;
1089; AVX512-LABEL: is_positive_mask_v16i16:
1090; AVX512:       # %bb.0:
1091; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
1092; AVX512-NEXT:    vpandn %ymm1, %ymm0, %ymm0
1093; AVX512-NEXT:    retq
1094  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1095  %mask = sext <16 x i1> %cmp to <16 x i16>
1096  %and = and <16 x i16> %mask, %y
1097  ret <16 x i16> %and
1098}
1099
1100define <32 x i8> @is_positive_mask_v32i8(<32 x i8> %x, <32 x i8> %y) {
1101; SSE-LABEL: is_positive_mask_v32i8:
1102; SSE:       # %bb.0:
1103; SSE-NEXT:    pcmpeqd %xmm4, %xmm4
1104; SSE-NEXT:    pcmpgtb %xmm4, %xmm1
1105; SSE-NEXT:    pcmpgtb %xmm4, %xmm0
1106; SSE-NEXT:    pand %xmm2, %xmm0
1107; SSE-NEXT:    pand %xmm3, %xmm1
1108; SSE-NEXT:    retq
1109;
1110; AVX1-LABEL: is_positive_mask_v32i8:
1111; AVX1:       # %bb.0:
1112; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1113; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1114; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
1115; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm0, %xmm0
1116; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1117; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1118; AVX1-NEXT:    retq
1119;
1120; AVX2-LABEL: is_positive_mask_v32i8:
1121; AVX2:       # %bb.0:
1122; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1123; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1124; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
1125; AVX2-NEXT:    retq
1126;
1127; AVX512-LABEL: is_positive_mask_v32i8:
1128; AVX512:       # %bb.0:
1129; AVX512-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1130; AVX512-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1131; AVX512-NEXT:    vpand %ymm0, %ymm1, %ymm0
1132; AVX512-NEXT:    retq
1133  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1134  %mask = sext <32 x i1> %cmp to <32 x i8>
1135  %and = and <32 x i8> %y, %mask
1136  ret <32 x i8> %and
1137}
1138
1139define <2 x i64> @ispositive_mask_load_v2i64(<2 x i64> %x, ptr %p) {
1140; SSE2-LABEL: ispositive_mask_load_v2i64:
1141; SSE2:       # %bb.0:
1142; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1143; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
1144; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
1145; SSE2-NEXT:    pand (%rdi), %xmm0
1146; SSE2-NEXT:    retq
1147;
1148; SSE42-LABEL: ispositive_mask_load_v2i64:
1149; SSE42:       # %bb.0:
1150; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
1151; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
1152; SSE42-NEXT:    pand (%rdi), %xmm0
1153; SSE42-NEXT:    retq
1154;
1155; AVX1-LABEL: ispositive_mask_load_v2i64:
1156; AVX1:       # %bb.0:
1157; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1158; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1159; AVX1-NEXT:    vpand (%rdi), %xmm0, %xmm0
1160; AVX1-NEXT:    retq
1161;
1162; AVX2-LABEL: ispositive_mask_load_v2i64:
1163; AVX2:       # %bb.0:
1164; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1165; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
1166; AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
1167; AVX2-NEXT:    retq
1168;
1169; AVX512-LABEL: ispositive_mask_load_v2i64:
1170; AVX512:       # %bb.0:
1171; AVX512-NEXT:    vpsraq $63, %xmm0, %xmm0
1172; AVX512-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1173; AVX512-NEXT:    retq
1174  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
1175  %mask = sext <2 x i1> %cmp to <2 x i64>
1176  %y = load <2 x i64>, ptr %p
1177  %and = and <2 x i64> %mask, %y
1178  ret <2 x i64> %and
1179}
1180
1181define <4 x i32> @is_positive_mask_load_v4i32(<4 x i32> %x, ptr %p) {
1182; SSE-LABEL: is_positive_mask_load_v4i32:
1183; SSE:       # %bb.0:
1184; SSE-NEXT:    psrad $31, %xmm0
1185; SSE-NEXT:    pandn (%rdi), %xmm0
1186; SSE-NEXT:    retq
1187;
1188; AVX-LABEL: is_positive_mask_load_v4i32:
1189; AVX:       # %bb.0:
1190; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
1191; AVX-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1192; AVX-NEXT:    retq
1193  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
1194  %mask = sext <4 x i1> %cmp to <4 x i32>
1195  %y = load <4 x i32>, ptr %p
1196  %and = and <4 x i32> %y, %mask
1197  ret <4 x i32> %and
1198}
1199
1200define <8 x i16> @is_positive_mask_load_v8i16(<8 x i16> %x, ptr %p) {
1201; SSE-LABEL: is_positive_mask_load_v8i16:
1202; SSE:       # %bb.0:
1203; SSE-NEXT:    psraw $15, %xmm0
1204; SSE-NEXT:    pandn (%rdi), %xmm0
1205; SSE-NEXT:    retq
1206;
1207; AVX-LABEL: is_positive_mask_load_v8i16:
1208; AVX:       # %bb.0:
1209; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
1210; AVX-NEXT:    vpandn (%rdi), %xmm0, %xmm0
1211; AVX-NEXT:    retq
1212  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1213  %mask = sext <8 x i1> %cmp to <8 x i16>
1214  %y = load <8 x i16>, ptr %p
1215  %and = and <8 x i16> %mask, %y
1216  ret <8 x i16> %and
1217}
1218
1219define <16 x i8> @is_positive_mask_load_v16i8(<16 x i8> %x, ptr %p) {
1220; SSE-LABEL: is_positive_mask_load_v16i8:
1221; SSE:       # %bb.0:
1222; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
1223; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
1224; SSE-NEXT:    pand (%rdi), %xmm0
1225; SSE-NEXT:    retq
1226;
1227; AVX-LABEL: is_positive_mask_load_v16i8:
1228; AVX:       # %bb.0:
1229; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1230; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
1231; AVX-NEXT:    vpand (%rdi), %xmm0, %xmm0
1232; AVX-NEXT:    retq
1233  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1234  %mask = sext <16 x i1> %cmp to <16 x i8>
1235  %y = load <16 x i8>, ptr %p
1236  %and = and <16 x i8> %y, %mask
1237  ret <16 x i8> %and
1238}
1239
1240define <4 x i64> @is_positive_mask_load_v4i64(<4 x i64> %x, ptr %p) {
1241; SSE2-LABEL: is_positive_mask_load_v4i64:
1242; SSE2:       # %bb.0:
1243; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1244; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1245; SSE2-NEXT:    pcmpgtd %xmm2, %xmm1
1246; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1247; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1248; SSE2-NEXT:    pand (%rdi), %xmm0
1249; SSE2-NEXT:    pand 16(%rdi), %xmm1
1250; SSE2-NEXT:    retq
1251;
1252; SSE42-LABEL: is_positive_mask_load_v4i64:
1253; SSE42:       # %bb.0:
1254; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
1255; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
1256; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1257; SSE42-NEXT:    pand (%rdi), %xmm0
1258; SSE42-NEXT:    pand 16(%rdi), %xmm1
1259; SSE42-NEXT:    retq
1260;
1261; AVX1-LABEL: is_positive_mask_load_v4i64:
1262; AVX1:       # %bb.0:
1263; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1264; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1265; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm1, %xmm1
1266; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1267; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1268; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1269; AVX1-NEXT:    retq
1270;
1271; AVX2-LABEL: is_positive_mask_load_v4i64:
1272; AVX2:       # %bb.0:
1273; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1274; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
1275; AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
1276; AVX2-NEXT:    retq
1277;
1278; AVX512-LABEL: is_positive_mask_load_v4i64:
1279; AVX512:       # %bb.0:
1280; AVX512-NEXT:    vpsraq $63, %ymm0, %ymm0
1281; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1282; AVX512-NEXT:    retq
1283  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1284  %mask = sext <4 x i1> %cmp to <4 x i64>
1285  %y = load <4 x i64>, ptr %p
1286  %and = and <4 x i64> %mask, %y
1287  ret <4 x i64> %and
1288}
1289
1290define <8 x i32> @is_positive_mask_load_v8i32(<8 x i32> %x, ptr %p) {
1291; SSE-LABEL: is_positive_mask_load_v8i32:
1292; SSE:       # %bb.0:
1293; SSE-NEXT:    psrad $31, %xmm0
1294; SSE-NEXT:    pandn (%rdi), %xmm0
1295; SSE-NEXT:    psrad $31, %xmm1
1296; SSE-NEXT:    pandn 16(%rdi), %xmm1
1297; SSE-NEXT:    retq
1298;
1299; AVX1-LABEL: is_positive_mask_load_v8i32:
1300; AVX1:       # %bb.0:
1301; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1302; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1303; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm1, %xmm1
1304; AVX1-NEXT:    vpcmpgtd %xmm2, %xmm0, %xmm0
1305; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1306; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1307; AVX1-NEXT:    retq
1308;
1309; AVX2-LABEL: is_positive_mask_load_v8i32:
1310; AVX2:       # %bb.0:
1311; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
1312; AVX2-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1313; AVX2-NEXT:    retq
1314;
1315; AVX512-LABEL: is_positive_mask_load_v8i32:
1316; AVX512:       # %bb.0:
1317; AVX512-NEXT:    vpsrad $31, %ymm0, %ymm0
1318; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1319; AVX512-NEXT:    retq
1320  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1321  %mask = sext <8 x i1> %cmp to <8 x i32>
1322  %y = load <8 x i32>, ptr %p
1323  %and = and <8 x i32> %y, %mask
1324  ret <8 x i32> %and
1325}
1326
1327define <16 x i16> @is_positive_mask_load_v16i16(<16 x i16> %x, ptr %p) {
1328; SSE-LABEL: is_positive_mask_load_v16i16:
1329; SSE:       # %bb.0:
1330; SSE-NEXT:    psraw $15, %xmm0
1331; SSE-NEXT:    pandn (%rdi), %xmm0
1332; SSE-NEXT:    psraw $15, %xmm1
1333; SSE-NEXT:    pandn 16(%rdi), %xmm1
1334; SSE-NEXT:    retq
1335;
1336; AVX1-LABEL: is_positive_mask_load_v16i16:
1337; AVX1:       # %bb.0:
1338; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1339; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1340; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm1, %xmm1
1341; AVX1-NEXT:    vpcmpgtw %xmm2, %xmm0, %xmm0
1342; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1343; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1344; AVX1-NEXT:    retq
1345;
1346; AVX2-LABEL: is_positive_mask_load_v16i16:
1347; AVX2:       # %bb.0:
1348; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
1349; AVX2-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1350; AVX2-NEXT:    retq
1351;
1352; AVX512-LABEL: is_positive_mask_load_v16i16:
1353; AVX512:       # %bb.0:
1354; AVX512-NEXT:    vpsraw $15, %ymm0, %ymm0
1355; AVX512-NEXT:    vpandn (%rdi), %ymm0, %ymm0
1356; AVX512-NEXT:    retq
1357  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1358  %mask = sext <16 x i1> %cmp to <16 x i16>
1359  %y = load <16 x i16>, ptr %p
1360  %and = and <16 x i16> %mask, %y
1361  ret <16 x i16> %and
1362}
1363
1364define <32 x i8> @is_positive_mask_load_v32i8(<32 x i8> %x, ptr %p) {
1365; SSE-LABEL: is_positive_mask_load_v32i8:
1366; SSE:       # %bb.0:
1367; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
1368; SSE-NEXT:    pcmpgtb %xmm2, %xmm1
1369; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
1370; SSE-NEXT:    pand (%rdi), %xmm0
1371; SSE-NEXT:    pand 16(%rdi), %xmm1
1372; SSE-NEXT:    retq
1373;
1374; AVX1-LABEL: is_positive_mask_load_v32i8:
1375; AVX1:       # %bb.0:
1376; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1377; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1378; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm1, %xmm1
1379; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1380; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1381; AVX1-NEXT:    vandps (%rdi), %ymm0, %ymm0
1382; AVX1-NEXT:    retq
1383;
1384; AVX2-LABEL: is_positive_mask_load_v32i8:
1385; AVX2:       # %bb.0:
1386; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1387; AVX2-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
1388; AVX2-NEXT:    vpand (%rdi), %ymm0, %ymm0
1389; AVX2-NEXT:    retq
1390;
1391; AVX512-LABEL: is_positive_mask_load_v32i8:
1392; AVX512:       # %bb.0:
1393; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1394; AVX512-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
1395; AVX512-NEXT:    vpand (%rdi), %ymm0, %ymm0
1396; AVX512-NEXT:    retq
1397  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1398  %mask = sext <32 x i1> %cmp to <32 x i8>
1399  %y = load <32 x i8>, ptr %p
1400  %and = and <32 x i8> %y, %mask
1401  ret <32 x i8> %and
1402}
1403
1404define <2 x i1> @ispositive_mask_v2i64_v2i1(<2 x i64> %x, <2 x i1> %y) {
1405; SSE2-LABEL: ispositive_mask_v2i64_v2i1:
1406; SSE2:       # %bb.0:
1407; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1408; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
1409; SSE2-NEXT:    pcmpgtd %xmm2, %xmm0
1410; SSE2-NEXT:    pand %xmm1, %xmm0
1411; SSE2-NEXT:    retq
1412;
1413; SSE42-LABEL: ispositive_mask_v2i64_v2i1:
1414; SSE42:       # %bb.0:
1415; SSE42-NEXT:    pcmpeqd %xmm2, %xmm2
1416; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
1417; SSE42-NEXT:    pand %xmm1, %xmm0
1418; SSE42-NEXT:    retq
1419;
1420; AVX1-LABEL: ispositive_mask_v2i64_v2i1:
1421; AVX1:       # %bb.0:
1422; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1423; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1424; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1425; AVX1-NEXT:    retq
1426;
1427; AVX2-LABEL: ispositive_mask_v2i64_v2i1:
1428; AVX2:       # %bb.0:
1429; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1430; AVX2-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm0
1431; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1432; AVX2-NEXT:    retq
1433;
1434; AVX512F-LABEL: ispositive_mask_v2i64_v2i1:
1435; AVX512F:       # %bb.0:
1436; AVX512F-NEXT:    vpsllq $63, %xmm1, %xmm1
1437; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1438; AVX512F-NEXT:    vpcmpgtq %xmm2, %xmm0, %k1
1439; AVX512F-NEXT:    vptestmq %xmm1, %xmm1, %k1 {%k1}
1440; AVX512F-NEXT:    vmovdqa64 %xmm2, %xmm0 {%k1} {z}
1441; AVX512F-NEXT:    retq
1442;
1443; AVX512DQBW-LABEL: ispositive_mask_v2i64_v2i1:
1444; AVX512DQBW:       # %bb.0:
1445; AVX512DQBW-NEXT:    vpsllq $63, %xmm1, %xmm1
1446; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1447; AVX512DQBW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1448; AVX512DQBW-NEXT:    vpcmpgtq %xmm3, %xmm0, %k1
1449; AVX512DQBW-NEXT:    vpcmpgtq %xmm1, %xmm2, %k0 {%k1}
1450; AVX512DQBW-NEXT:    vpmovm2q %k0, %xmm0
1451; AVX512DQBW-NEXT:    retq
1452  %cmp = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
1453  %and = and <2 x i1> %cmp, %y
1454  ret <2 x i1> %and
1455}
1456
1457define <4 x i1> @is_positive_mask_v4i32_v4i1(<4 x i32> %x, <4 x i1> %y) {
1458; SSE-LABEL: is_positive_mask_v4i32_v4i1:
1459; SSE:       # %bb.0:
1460; SSE-NEXT:    psrad $31, %xmm0
1461; SSE-NEXT:    pandn %xmm1, %xmm0
1462; SSE-NEXT:    retq
1463;
1464; AVX1-LABEL: is_positive_mask_v4i32_v4i1:
1465; AVX1:       # %bb.0:
1466; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1467; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1468; AVX1-NEXT:    retq
1469;
1470; AVX2-LABEL: is_positive_mask_v4i32_v4i1:
1471; AVX2:       # %bb.0:
1472; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1473; AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1474; AVX2-NEXT:    retq
1475;
1476; AVX512F-LABEL: is_positive_mask_v4i32_v4i1:
1477; AVX512F:       # %bb.0:
1478; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm1
1479; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1480; AVX512F-NEXT:    vpcmpgtd %xmm2, %xmm0, %k1
1481; AVX512F-NEXT:    vptestmd %xmm1, %xmm1, %k1 {%k1}
1482; AVX512F-NEXT:    vmovdqa32 %xmm2, %xmm0 {%k1} {z}
1483; AVX512F-NEXT:    retq
1484;
1485; AVX512DQBW-LABEL: is_positive_mask_v4i32_v4i1:
1486; AVX512DQBW:       # %bb.0:
1487; AVX512DQBW-NEXT:    vpslld $31, %xmm1, %xmm1
1488; AVX512DQBW-NEXT:    vpmovd2m %xmm1, %k1
1489; AVX512DQBW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1490; AVX512DQBW-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
1491; AVX512DQBW-NEXT:    vpmovm2d %k0, %xmm0
1492; AVX512DQBW-NEXT:    retq
1493  %cmp = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
1494  %and = and <4 x i1> %y, %cmp
1495  ret <4 x i1> %and
1496}
1497
1498define <8 x i1> @is_positive_mask_v8i16_v8i1(<8 x i16> %x, <8 x i1> %y) {
1499; SSE-LABEL: is_positive_mask_v8i16_v8i1:
1500; SSE:       # %bb.0:
1501; SSE-NEXT:    psraw $15, %xmm0
1502; SSE-NEXT:    pandn %xmm1, %xmm0
1503; SSE-NEXT:    retq
1504;
1505; AVX1-LABEL: is_positive_mask_v8i16_v8i1:
1506; AVX1:       # %bb.0:
1507; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
1508; AVX1-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1509; AVX1-NEXT:    retq
1510;
1511; AVX2-LABEL: is_positive_mask_v8i16_v8i1:
1512; AVX2:       # %bb.0:
1513; AVX2-NEXT:    vpsraw $15, %xmm0, %xmm0
1514; AVX2-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1515; AVX2-NEXT:    retq
1516;
1517; AVX512F-LABEL: is_positive_mask_v8i16_v8i1:
1518; AVX512F:       # %bb.0:
1519; AVX512F-NEXT:    vpsraw $15, %xmm0, %xmm0
1520; AVX512F-NEXT:    vpandn %xmm1, %xmm0, %xmm0
1521; AVX512F-NEXT:    retq
1522;
1523; AVX512DQBW-LABEL: is_positive_mask_v8i16_v8i1:
1524; AVX512DQBW:       # %bb.0:
1525; AVX512DQBW-NEXT:    vpsllw $15, %xmm1, %xmm1
1526; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1527; AVX512DQBW-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1528; AVX512DQBW-NEXT:    vpcmpgtw %xmm3, %xmm0, %k1
1529; AVX512DQBW-NEXT:    vpcmpgtw %xmm1, %xmm2, %k0 {%k1}
1530; AVX512DQBW-NEXT:    vpmovm2w %k0, %xmm0
1531; AVX512DQBW-NEXT:    retq
1532  %cmp = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1533  %and = and <8 x i1> %cmp, %y
1534  ret <8 x i1> %and
1535}
1536
1537define <16 x i1> @is_positive_mask_v16i8_v16i1(<16 x i8> %x, <16 x i1> %y) {
1538; SSE-LABEL: is_positive_mask_v16i8_v16i1:
1539; SSE:       # %bb.0:
1540; SSE-NEXT:    pcmpeqd %xmm2, %xmm2
1541; SSE-NEXT:    pcmpgtb %xmm2, %xmm0
1542; SSE-NEXT:    pand %xmm1, %xmm0
1543; SSE-NEXT:    retq
1544;
1545; AVX1-LABEL: is_positive_mask_v16i8_v16i1:
1546; AVX1:       # %bb.0:
1547; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1548; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1549; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
1550; AVX1-NEXT:    retq
1551;
1552; AVX2-LABEL: is_positive_mask_v16i8_v16i1:
1553; AVX2:       # %bb.0:
1554; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1555; AVX2-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1556; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
1557; AVX2-NEXT:    retq
1558;
1559; AVX512F-LABEL: is_positive_mask_v16i8_v16i1:
1560; AVX512F:       # %bb.0:
1561; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1562; AVX512F-NEXT:    vpcmpgtb %xmm2, %xmm0, %xmm0
1563; AVX512F-NEXT:    vpand %xmm0, %xmm1, %xmm0
1564; AVX512F-NEXT:    retq
1565;
1566; AVX512DQBW-LABEL: is_positive_mask_v16i8_v16i1:
1567; AVX512DQBW:       # %bb.0:
1568; AVX512DQBW-NEXT:    vpsllw $7, %xmm1, %xmm1
1569; AVX512DQBW-NEXT:    vpmovb2m %xmm1, %k1
1570; AVX512DQBW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
1571; AVX512DQBW-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
1572; AVX512DQBW-NEXT:    vpmovm2b %k0, %xmm0
1573; AVX512DQBW-NEXT:    retq
1574  %cmp = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1575  %and = and <16 x i1> %y, %cmp
1576  ret <16 x i1> %and
1577}
1578
1579define <4 x i1> @is_positive_mask_v4i64_v4i1(<4 x i64> %x, <4 x i1> %y) {
1580; SSE2-LABEL: is_positive_mask_v4i64_v4i1:
1581; SSE2:       # %bb.0:
1582; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
1583; SSE2-NEXT:    psrad $31, %xmm0
1584; SSE2-NEXT:    pandn %xmm2, %xmm0
1585; SSE2-NEXT:    retq
1586;
1587; SSE42-LABEL: is_positive_mask_v4i64_v4i1:
1588; SSE42:       # %bb.0:
1589; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
1590; SSE42-NEXT:    pcmpgtq %xmm3, %xmm1
1591; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1592; SSE42-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
1593; SSE42-NEXT:    andps %xmm2, %xmm0
1594; SSE42-NEXT:    retq
1595;
1596; AVX1-LABEL: is_positive_mask_v4i64_v4i1:
1597; AVX1:       # %bb.0:
1598; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1599; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1600; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1601; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm0, %xmm0
1602; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1603; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1604; AVX1-NEXT:    vzeroupper
1605; AVX1-NEXT:    retq
1606;
1607; AVX2-LABEL: is_positive_mask_v4i64_v4i1:
1608; AVX2:       # %bb.0:
1609; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1610; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm0
1611; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1612; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1613; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1614; AVX2-NEXT:    vzeroupper
1615; AVX2-NEXT:    retq
1616;
1617; AVX512F-LABEL: is_positive_mask_v4i64_v4i1:
1618; AVX512F:       # %bb.0:
1619; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm1
1620; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1621; AVX512F-NEXT:    vpcmpgtq %ymm2, %ymm0, %k1
1622; AVX512F-NEXT:    vptestmd %xmm1, %xmm1, %k1 {%k1}
1623; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1624; AVX512F-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
1625; AVX512F-NEXT:    vzeroupper
1626; AVX512F-NEXT:    retq
1627;
1628; AVX512DQBW-LABEL: is_positive_mask_v4i64_v4i1:
1629; AVX512DQBW:       # %bb.0:
1630; AVX512DQBW-NEXT:    vpslld $31, %xmm1, %xmm1
1631; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1632; AVX512DQBW-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
1633; AVX512DQBW-NEXT:    vpcmpgtq %ymm3, %ymm0, %k1
1634; AVX512DQBW-NEXT:    vpcmpgtd %xmm1, %xmm2, %k0 {%k1}
1635; AVX512DQBW-NEXT:    vpmovm2d %k0, %xmm0
1636; AVX512DQBW-NEXT:    vzeroupper
1637; AVX512DQBW-NEXT:    retq
1638  %cmp = icmp sgt <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
1639  %and = and <4 x i1> %cmp, %y
1640  ret <4 x i1> %and
1641}
1642
1643define <8 x i1> @is_positive_mask_v8i32_v8i1(<8 x i32> %x, <8 x i1> %y) {
1644; SSE-LABEL: is_positive_mask_v8i32_v8i1:
1645; SSE:       # %bb.0:
1646; SSE-NEXT:    pcmpeqd %xmm3, %xmm3
1647; SSE-NEXT:    pcmpgtd %xmm3, %xmm1
1648; SSE-NEXT:    pcmpgtd %xmm3, %xmm0
1649; SSE-NEXT:    packssdw %xmm1, %xmm0
1650; SSE-NEXT:    pand %xmm2, %xmm0
1651; SSE-NEXT:    retq
1652;
1653; AVX1-LABEL: is_positive_mask_v8i32_v8i1:
1654; AVX1:       # %bb.0:
1655; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1656; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1657; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
1658; AVX1-NEXT:    vpcmpgtd %xmm3, %xmm0, %xmm0
1659; AVX1-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1660; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
1661; AVX1-NEXT:    vzeroupper
1662; AVX1-NEXT:    retq
1663;
1664; AVX2-LABEL: is_positive_mask_v8i32_v8i1:
1665; AVX2:       # %bb.0:
1666; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1667; AVX2-NEXT:    vpcmpgtd %ymm2, %ymm0, %ymm0
1668; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1669; AVX2-NEXT:    vpackssdw %xmm2, %xmm0, %xmm0
1670; AVX2-NEXT:    vpand %xmm0, %xmm1, %xmm0
1671; AVX2-NEXT:    vzeroupper
1672; AVX2-NEXT:    retq
1673;
1674; AVX512F-LABEL: is_positive_mask_v8i32_v8i1:
1675; AVX512F:       # %bb.0:
1676; AVX512F-NEXT:    vpmovsxwd %xmm1, %ymm1
1677; AVX512F-NEXT:    vpslld $31, %ymm1, %ymm1
1678; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1679; AVX512F-NEXT:    vpcmpgtd %ymm2, %ymm0, %k1
1680; AVX512F-NEXT:    vptestmd %ymm1, %ymm1, %k1 {%k1}
1681; AVX512F-NEXT:    vmovdqa32 %ymm2, %ymm0 {%k1} {z}
1682; AVX512F-NEXT:    vpmovdw %ymm0, %xmm0
1683; AVX512F-NEXT:    vzeroupper
1684; AVX512F-NEXT:    retq
1685;
1686; AVX512DQBW-LABEL: is_positive_mask_v8i32_v8i1:
1687; AVX512DQBW:       # %bb.0:
1688; AVX512DQBW-NEXT:    vpsllw $15, %xmm1, %xmm1
1689; AVX512DQBW-NEXT:    vpmovw2m %xmm1, %k1
1690; AVX512DQBW-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1691; AVX512DQBW-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
1692; AVX512DQBW-NEXT:    vpmovm2w %k0, %xmm0
1693; AVX512DQBW-NEXT:    vzeroupper
1694; AVX512DQBW-NEXT:    retq
1695  %cmp = icmp sgt <8 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
1696  %and = and <8 x i1> %y, %cmp
1697  ret <8 x i1> %and
1698}
1699
1700define <16 x i1> @is_positive_mask_v16i16_v16i1(<16 x i16> %x, <16 x i1> %y) {
1701; SSE-LABEL: is_positive_mask_v16i16_v16i1:
1702; SSE:       # %bb.0:
1703; SSE-NEXT:    pcmpeqd %xmm3, %xmm3
1704; SSE-NEXT:    pcmpgtw %xmm3, %xmm1
1705; SSE-NEXT:    pcmpgtw %xmm3, %xmm0
1706; SSE-NEXT:    packsswb %xmm1, %xmm0
1707; SSE-NEXT:    pand %xmm2, %xmm0
1708; SSE-NEXT:    retq
1709;
1710; AVX1-LABEL: is_positive_mask_v16i16_v16i1:
1711; AVX1:       # %bb.0:
1712; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1713; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1714; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm2, %xmm2
1715; AVX1-NEXT:    vpcmpgtw %xmm3, %xmm0, %xmm0
1716; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1717; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1718; AVX1-NEXT:    vzeroupper
1719; AVX1-NEXT:    retq
1720;
1721; AVX2-LABEL: is_positive_mask_v16i16_v16i1:
1722; AVX2:       # %bb.0:
1723; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1724; AVX2-NEXT:    vpcmpgtw %ymm2, %ymm0, %ymm0
1725; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1726; AVX2-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
1727; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1728; AVX2-NEXT:    vzeroupper
1729; AVX2-NEXT:    retq
1730;
1731; AVX512F-LABEL: is_positive_mask_v16i16_v16i1:
1732; AVX512F:       # %bb.0:
1733; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
1734; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
1735; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
1736; AVX512F-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1737; AVX512F-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
1738; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
1739; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
1740; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1741; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1742; AVX512F-NEXT:    vzeroupper
1743; AVX512F-NEXT:    retq
1744;
1745; AVX512DQBW-LABEL: is_positive_mask_v16i16_v16i1:
1746; AVX512DQBW:       # %bb.0:
1747; AVX512DQBW-NEXT:    vpsllw $7, %xmm1, %xmm1
1748; AVX512DQBW-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1749; AVX512DQBW-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
1750; AVX512DQBW-NEXT:    vpcmpgtw %ymm3, %ymm0, %k1
1751; AVX512DQBW-NEXT:    vpcmpgtb %xmm1, %xmm2, %k0 {%k1}
1752; AVX512DQBW-NEXT:    vpmovm2b %k0, %xmm0
1753; AVX512DQBW-NEXT:    vzeroupper
1754; AVX512DQBW-NEXT:    retq
1755  %cmp = icmp sgt <16 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1756  %and = and <16 x i1> %cmp, %y
1757  ret <16 x i1> %and
1758}
1759
1760define <32 x i1> @is_positive_mask_v32i8_v32i1(<32 x i8> %x, <32 x i1> %y) {
1761; SSE2-LABEL: is_positive_mask_v32i8_v32i1:
1762; SSE2:       # %bb.0:
1763; SSE2-NEXT:    movq %rdi, %rax
1764; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1765; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1766; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1767; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1768; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1769; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1770; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1771; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1772; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1773; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1774; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1775; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1776; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
1777; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
1778; SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
1779; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1780; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1781; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1782; SSE2-NEXT:    movd %r9d, %xmm4
1783; SSE2-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
1784; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1785; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1786; SSE2-NEXT:    movd %r8d, %xmm2
1787; SSE2-NEXT:    movd %ecx, %xmm3
1788; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1789; SSE2-NEXT:    movd %edx, %xmm6
1790; SSE2-NEXT:    movd %esi, %xmm2
1791; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
1792; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
1793; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1794; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
1795; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1796; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1797; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1798; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1799; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1800; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
1801; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1802; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1803; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1804; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1805; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1806; SSE2-NEXT:    movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
1807; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
1808; SSE2-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3]
1809; SSE2-NEXT:    punpckldq {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1]
1810; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1811; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1812; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1813; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1814; SSE2-NEXT:    movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
1815; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
1816; SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
1817; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1818; SSE2-NEXT:    movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
1819; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1820; SSE2-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1821; SSE2-NEXT:    movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
1822; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
1823; SSE2-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
1824; SSE2-NEXT:    punpckldq {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1]
1825; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm6[0]
1826; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
1827; SSE2-NEXT:    pcmpgtb %xmm3, %xmm0
1828; SSE2-NEXT:    pand %xmm2, %xmm0
1829; SSE2-NEXT:    pcmpgtb %xmm3, %xmm1
1830; SSE2-NEXT:    pand %xmm7, %xmm1
1831; SSE2-NEXT:    psllw $7, %xmm1
1832; SSE2-NEXT:    pmovmskb %xmm1, %ecx
1833; SSE2-NEXT:    shll $16, %ecx
1834; SSE2-NEXT:    psllw $7, %xmm0
1835; SSE2-NEXT:    pmovmskb %xmm0, %edx
1836; SSE2-NEXT:    orl %ecx, %edx
1837; SSE2-NEXT:    movl %edx, (%rdi)
1838; SSE2-NEXT:    retq
1839;
1840; SSE42-LABEL: is_positive_mask_v32i8_v32i1:
1841; SSE42:       # %bb.0:
1842; SSE42-NEXT:    movq %rdi, %rax
1843; SSE42-NEXT:    movd %esi, %xmm2
1844; SSE42-NEXT:    pinsrb $1, %edx, %xmm2
1845; SSE42-NEXT:    pinsrb $2, %ecx, %xmm2
1846; SSE42-NEXT:    pinsrb $3, %r8d, %xmm2
1847; SSE42-NEXT:    pinsrb $4, %r9d, %xmm2
1848; SSE42-NEXT:    pinsrb $5, {{[0-9]+}}(%rsp), %xmm2
1849; SSE42-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm2
1850; SSE42-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm2
1851; SSE42-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm2
1852; SSE42-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm2
1853; SSE42-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm2
1854; SSE42-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm2
1855; SSE42-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm2
1856; SSE42-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm2
1857; SSE42-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm2
1858; SSE42-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm2
1859; SSE42-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
1860; SSE42-NEXT:    pinsrb $1, {{[0-9]+}}(%rsp), %xmm3
1861; SSE42-NEXT:    pinsrb $2, {{[0-9]+}}(%rsp), %xmm3
1862; SSE42-NEXT:    pinsrb $3, {{[0-9]+}}(%rsp), %xmm3
1863; SSE42-NEXT:    pinsrb $4, {{[0-9]+}}(%rsp), %xmm3
1864; SSE42-NEXT:    pinsrb $5, {{[0-9]+}}(%rsp), %xmm3
1865; SSE42-NEXT:    pinsrb $6, {{[0-9]+}}(%rsp), %xmm3
1866; SSE42-NEXT:    pinsrb $7, {{[0-9]+}}(%rsp), %xmm3
1867; SSE42-NEXT:    pinsrb $8, {{[0-9]+}}(%rsp), %xmm3
1868; SSE42-NEXT:    pinsrb $9, {{[0-9]+}}(%rsp), %xmm3
1869; SSE42-NEXT:    pinsrb $10, {{[0-9]+}}(%rsp), %xmm3
1870; SSE42-NEXT:    pinsrb $11, {{[0-9]+}}(%rsp), %xmm3
1871; SSE42-NEXT:    pinsrb $12, {{[0-9]+}}(%rsp), %xmm3
1872; SSE42-NEXT:    pinsrb $13, {{[0-9]+}}(%rsp), %xmm3
1873; SSE42-NEXT:    pinsrb $14, {{[0-9]+}}(%rsp), %xmm3
1874; SSE42-NEXT:    pinsrb $15, {{[0-9]+}}(%rsp), %xmm3
1875; SSE42-NEXT:    pcmpeqd %xmm4, %xmm4
1876; SSE42-NEXT:    pcmpgtb %xmm4, %xmm1
1877; SSE42-NEXT:    pand %xmm3, %xmm1
1878; SSE42-NEXT:    pcmpgtb %xmm4, %xmm0
1879; SSE42-NEXT:    pand %xmm2, %xmm0
1880; SSE42-NEXT:    psllw $7, %xmm0
1881; SSE42-NEXT:    pmovmskb %xmm0, %ecx
1882; SSE42-NEXT:    psllw $7, %xmm1
1883; SSE42-NEXT:    pmovmskb %xmm1, %edx
1884; SSE42-NEXT:    shll $16, %edx
1885; SSE42-NEXT:    orl %ecx, %edx
1886; SSE42-NEXT:    movl %edx, (%rdi)
1887; SSE42-NEXT:    retq
1888;
1889; AVX1-LABEL: is_positive_mask_v32i8_v32i1:
1890; AVX1:       # %bb.0:
1891; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1892; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
1893; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
1894; AVX1-NEXT:    vpcmpgtb %xmm3, %xmm0, %xmm0
1895; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1896; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
1897; AVX1-NEXT:    retq
1898;
1899; AVX2-LABEL: is_positive_mask_v32i8_v32i1:
1900; AVX2:       # %bb.0:
1901; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1902; AVX2-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1903; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
1904; AVX2-NEXT:    retq
1905;
1906; AVX512F-LABEL: is_positive_mask_v32i8_v32i1:
1907; AVX512F:       # %bb.0:
1908; AVX512F-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1909; AVX512F-NEXT:    vpcmpgtb %ymm2, %ymm0, %ymm0
1910; AVX512F-NEXT:    vpand %ymm0, %ymm1, %ymm0
1911; AVX512F-NEXT:    retq
1912;
1913; AVX512DQBW-LABEL: is_positive_mask_v32i8_v32i1:
1914; AVX512DQBW:       # %bb.0:
1915; AVX512DQBW-NEXT:    vpsllw $7, %ymm1, %ymm1
1916; AVX512DQBW-NEXT:    vpmovb2m %ymm1, %k1
1917; AVX512DQBW-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1918; AVX512DQBW-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
1919; AVX512DQBW-NEXT:    vpmovm2b %k0, %ymm0
1920; AVX512DQBW-NEXT:    retq
1921  %cmp = icmp sgt <32 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1922  %and = and <32 x i1> %y, %cmp
1923  ret <32 x i1> %and
1924}
1925
1926define <4 x i64> @PR52504(<4 x i16> %t3) {
1927; SSE2-LABEL: PR52504:
1928; SSE2:       # %bb.0:
1929; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1930; SSE2-NEXT:    psrad $16, %xmm1
1931; SSE2-NEXT:    pxor %xmm2, %xmm2
1932; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
1933; SSE2-NEXT:    movdqa %xmm1, %xmm3
1934; SSE2-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1935; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1936; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
1937; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1938; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
1939; SSE2-NEXT:    pand %xmm0, %xmm1
1940; SSE2-NEXT:    pxor %xmm4, %xmm0
1941; SSE2-NEXT:    por %xmm1, %xmm0
1942; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
1943; SSE2-NEXT:    pcmpgtd %xmm4, %xmm1
1944; SSE2-NEXT:    pand %xmm1, %xmm3
1945; SSE2-NEXT:    pxor %xmm4, %xmm1
1946; SSE2-NEXT:    por %xmm3, %xmm1
1947; SSE2-NEXT:    retq
1948;
1949; SSE42-LABEL: PR52504:
1950; SSE42:       # %bb.0:
1951; SSE42-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1952; SSE42-NEXT:    pmovsxwq %xmm1, %xmm2
1953; SSE42-NEXT:    pmovsxwq %xmm0, %xmm3
1954; SSE42-NEXT:    pxor %xmm1, %xmm1
1955; SSE42-NEXT:    pxor %xmm0, %xmm0
1956; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1957; SSE42-NEXT:    por %xmm3, %xmm0
1958; SSE42-NEXT:    pcmpgtq %xmm2, %xmm1
1959; SSE42-NEXT:    por %xmm2, %xmm1
1960; SSE42-NEXT:    retq
1961;
1962; AVX1-LABEL: PR52504:
1963; AVX1:       # %bb.0:
1964; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm1
1965; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
1966; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
1967; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1968; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm3
1969; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
1970; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm2, %xmm2
1971; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
1972; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1973; AVX1-NEXT:    retq
1974;
1975; AVX2-LABEL: PR52504:
1976; AVX2:       # %bb.0:
1977; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
1978; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
1979; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm1
1980; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
1981; AVX2-NEXT:    retq
1982;
1983; AVX512-LABEL: PR52504:
1984; AVX512:       # %bb.0:
1985; AVX512-NEXT:    vpmovsxwq %xmm0, %ymm0
1986; AVX512-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
1987; AVX512-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
1988; AVX512-NEXT:    retq
1989  %t14 = sext <4 x i16> %t3 to <4 x i64>
1990  %t15 = icmp sgt <4 x i64> %t14, <i64 -1, i64 -1, i64 -1, i64 -1>
1991  %t16 = select <4 x i1> %t15, <4 x i64> %t14, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
1992  ret <4 x i64> %t16
1993}
1994