1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
9
10;
11; Unsigned Maximum (GT)
12;
13
14define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
15; SSE2-LABEL: max_gt_v2i64:
16; SSE2:       # %bb.0:
17; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
18; SSE2-NEXT:    movdqa %xmm1, %xmm3
19; SSE2-NEXT:    pxor %xmm2, %xmm3
20; SSE2-NEXT:    pxor %xmm0, %xmm2
21; SSE2-NEXT:    movdqa %xmm2, %xmm4
22; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
23; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
24; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
25; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
26; SSE2-NEXT:    pand %xmm5, %xmm2
27; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
28; SSE2-NEXT:    por %xmm2, %xmm3
29; SSE2-NEXT:    pand %xmm3, %xmm0
30; SSE2-NEXT:    pandn %xmm1, %xmm3
31; SSE2-NEXT:    por %xmm3, %xmm0
32; SSE2-NEXT:    retq
33;
34; SSE41-LABEL: max_gt_v2i64:
35; SSE41:       # %bb.0:
36; SSE41-NEXT:    movdqa %xmm0, %xmm2
37; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
38; SSE41-NEXT:    movdqa %xmm1, %xmm0
39; SSE41-NEXT:    pxor %xmm3, %xmm0
40; SSE41-NEXT:    pxor %xmm2, %xmm3
41; SSE41-NEXT:    movdqa %xmm3, %xmm4
42; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
43; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
44; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
45; SSE41-NEXT:    pand %xmm4, %xmm0
46; SSE41-NEXT:    por %xmm3, %xmm0
47; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
48; SSE41-NEXT:    movapd %xmm1, %xmm0
49; SSE41-NEXT:    retq
50;
51; SSE42-LABEL: max_gt_v2i64:
52; SSE42:       # %bb.0:
53; SSE42-NEXT:    movdqa %xmm0, %xmm2
54; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
55; SSE42-NEXT:    movdqa %xmm1, %xmm3
56; SSE42-NEXT:    pxor %xmm0, %xmm3
57; SSE42-NEXT:    pxor %xmm2, %xmm0
58; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
59; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
60; SSE42-NEXT:    movapd %xmm1, %xmm0
61; SSE42-NEXT:    retq
62;
63; AVX1-LABEL: max_gt_v2i64:
64; AVX1:       # %bb.0:
65; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
66; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
67; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
68; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
69; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
70; AVX1-NEXT:    retq
71;
72; AVX2-LABEL: max_gt_v2i64:
73; AVX2:       # %bb.0:
74; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
75; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
76; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
77; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
78; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
79; AVX2-NEXT:    retq
80;
81; AVX512-LABEL: max_gt_v2i64:
82; AVX512:       # %bb.0:
83; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
84; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
85; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
86; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
87; AVX512-NEXT:    vzeroupper
88; AVX512-NEXT:    retq
89  %1 = icmp ugt <2 x i64> %a, %b
90  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
91  ret <2 x i64> %2
92}
93
94define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
95; SSE2-LABEL: max_gt_v4i64:
96; SSE2:       # %bb.0:
97; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
98; SSE2-NEXT:    movdqa %xmm2, %xmm5
99; SSE2-NEXT:    pxor %xmm4, %xmm5
100; SSE2-NEXT:    movdqa %xmm0, %xmm6
101; SSE2-NEXT:    pxor %xmm4, %xmm6
102; SSE2-NEXT:    movdqa %xmm6, %xmm7
103; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
104; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
105; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
106; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
107; SSE2-NEXT:    pand %xmm8, %xmm5
108; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
109; SSE2-NEXT:    por %xmm5, %xmm6
110; SSE2-NEXT:    pand %xmm6, %xmm0
111; SSE2-NEXT:    pandn %xmm2, %xmm6
112; SSE2-NEXT:    por %xmm6, %xmm0
113; SSE2-NEXT:    movdqa %xmm3, %xmm2
114; SSE2-NEXT:    pxor %xmm4, %xmm2
115; SSE2-NEXT:    pxor %xmm1, %xmm4
116; SSE2-NEXT:    movdqa %xmm4, %xmm5
117; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
118; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
119; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
120; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
121; SSE2-NEXT:    pand %xmm6, %xmm2
122; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
123; SSE2-NEXT:    por %xmm2, %xmm4
124; SSE2-NEXT:    pand %xmm4, %xmm1
125; SSE2-NEXT:    pandn %xmm3, %xmm4
126; SSE2-NEXT:    por %xmm4, %xmm1
127; SSE2-NEXT:    retq
128;
129; SSE41-LABEL: max_gt_v4i64:
130; SSE41:       # %bb.0:
131; SSE41-NEXT:    movdqa %xmm0, %xmm4
132; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
133; SSE41-NEXT:    movdqa %xmm2, %xmm0
134; SSE41-NEXT:    pxor %xmm5, %xmm0
135; SSE41-NEXT:    movdqa %xmm4, %xmm6
136; SSE41-NEXT:    pxor %xmm5, %xmm6
137; SSE41-NEXT:    movdqa %xmm6, %xmm7
138; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
139; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
140; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
141; SSE41-NEXT:    pand %xmm7, %xmm0
142; SSE41-NEXT:    por %xmm6, %xmm0
143; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
144; SSE41-NEXT:    movdqa %xmm3, %xmm0
145; SSE41-NEXT:    pxor %xmm5, %xmm0
146; SSE41-NEXT:    pxor %xmm1, %xmm5
147; SSE41-NEXT:    movdqa %xmm5, %xmm4
148; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
149; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
150; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
151; SSE41-NEXT:    pand %xmm4, %xmm0
152; SSE41-NEXT:    por %xmm5, %xmm0
153; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
154; SSE41-NEXT:    movapd %xmm2, %xmm0
155; SSE41-NEXT:    movapd %xmm3, %xmm1
156; SSE41-NEXT:    retq
157;
158; SSE42-LABEL: max_gt_v4i64:
159; SSE42:       # %bb.0:
160; SSE42-NEXT:    movdqa %xmm0, %xmm4
161; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
162; SSE42-NEXT:    movdqa %xmm2, %xmm6
163; SSE42-NEXT:    pxor %xmm5, %xmm6
164; SSE42-NEXT:    pxor %xmm5, %xmm0
165; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
166; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
167; SSE42-NEXT:    movdqa %xmm3, %xmm0
168; SSE42-NEXT:    pxor %xmm5, %xmm0
169; SSE42-NEXT:    pxor %xmm1, %xmm5
170; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
171; SSE42-NEXT:    movdqa %xmm5, %xmm0
172; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
173; SSE42-NEXT:    movapd %xmm2, %xmm0
174; SSE42-NEXT:    movapd %xmm3, %xmm1
175; SSE42-NEXT:    retq
176;
177; AVX1-LABEL: max_gt_v4i64:
178; AVX1:       # %bb.0:
179; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
180; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
181; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
182; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
183; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
184; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
185; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
186; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
187; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
188; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
189; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
190; AVX1-NEXT:    retq
191;
192; AVX2-LABEL: max_gt_v4i64:
193; AVX2:       # %bb.0:
194; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
195; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
196; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
197; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
198; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
199; AVX2-NEXT:    retq
200;
201; AVX512-LABEL: max_gt_v4i64:
202; AVX512:       # %bb.0:
203; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
204; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
205; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
206; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
207; AVX512-NEXT:    retq
208  %1 = icmp ugt <4 x i64> %a, %b
209  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
210  ret <4 x i64> %2
211}
212
213define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
214; SSE2-LABEL: max_gt_v4i32:
215; SSE2:       # %bb.0:
216; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
217; SSE2-NEXT:    movdqa %xmm1, %xmm3
218; SSE2-NEXT:    pxor %xmm2, %xmm3
219; SSE2-NEXT:    pxor %xmm0, %xmm2
220; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
221; SSE2-NEXT:    pand %xmm2, %xmm0
222; SSE2-NEXT:    pandn %xmm1, %xmm2
223; SSE2-NEXT:    por %xmm2, %xmm0
224; SSE2-NEXT:    retq
225;
226; SSE41-LABEL: max_gt_v4i32:
227; SSE41:       # %bb.0:
228; SSE41-NEXT:    pmaxud %xmm1, %xmm0
229; SSE41-NEXT:    retq
230;
231; SSE42-LABEL: max_gt_v4i32:
232; SSE42:       # %bb.0:
233; SSE42-NEXT:    pmaxud %xmm1, %xmm0
234; SSE42-NEXT:    retq
235;
236; AVX-LABEL: max_gt_v4i32:
237; AVX:       # %bb.0:
238; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
239; AVX-NEXT:    retq
240  %1 = icmp ugt <4 x i32> %a, %b
241  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
242  ret <4 x i32> %2
243}
244
245define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
246; SSE2-LABEL: max_gt_v8i32:
247; SSE2:       # %bb.0:
248; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
249; SSE2-NEXT:    movdqa %xmm2, %xmm5
250; SSE2-NEXT:    pxor %xmm4, %xmm5
251; SSE2-NEXT:    movdqa %xmm0, %xmm6
252; SSE2-NEXT:    pxor %xmm4, %xmm6
253; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
254; SSE2-NEXT:    pand %xmm6, %xmm0
255; SSE2-NEXT:    pandn %xmm2, %xmm6
256; SSE2-NEXT:    por %xmm6, %xmm0
257; SSE2-NEXT:    movdqa %xmm3, %xmm2
258; SSE2-NEXT:    pxor %xmm4, %xmm2
259; SSE2-NEXT:    pxor %xmm1, %xmm4
260; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
261; SSE2-NEXT:    pand %xmm4, %xmm1
262; SSE2-NEXT:    pandn %xmm3, %xmm4
263; SSE2-NEXT:    por %xmm4, %xmm1
264; SSE2-NEXT:    retq
265;
266; SSE41-LABEL: max_gt_v8i32:
267; SSE41:       # %bb.0:
268; SSE41-NEXT:    pmaxud %xmm2, %xmm0
269; SSE41-NEXT:    pmaxud %xmm3, %xmm1
270; SSE41-NEXT:    retq
271;
272; SSE42-LABEL: max_gt_v8i32:
273; SSE42:       # %bb.0:
274; SSE42-NEXT:    pmaxud %xmm2, %xmm0
275; SSE42-NEXT:    pmaxud %xmm3, %xmm1
276; SSE42-NEXT:    retq
277;
278; AVX1-LABEL: max_gt_v8i32:
279; AVX1:       # %bb.0:
280; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
281; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
282; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
283; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
284; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
285; AVX1-NEXT:    retq
286;
287; AVX2-LABEL: max_gt_v8i32:
288; AVX2:       # %bb.0:
289; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
290; AVX2-NEXT:    retq
291;
292; AVX512-LABEL: max_gt_v8i32:
293; AVX512:       # %bb.0:
294; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
295; AVX512-NEXT:    retq
296  %1 = icmp ugt <8 x i32> %a, %b
297  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
298  ret <8 x i32> %2
299}
300
301define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
302; SSE2-LABEL: max_gt_v8i16:
303; SSE2:       # %bb.0:
304; SSE2-NEXT:    psubusw %xmm0, %xmm1
305; SSE2-NEXT:    paddw %xmm1, %xmm0
306; SSE2-NEXT:    retq
307;
308; SSE41-LABEL: max_gt_v8i16:
309; SSE41:       # %bb.0:
310; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
311; SSE41-NEXT:    retq
312;
313; SSE42-LABEL: max_gt_v8i16:
314; SSE42:       # %bb.0:
315; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
316; SSE42-NEXT:    retq
317;
318; AVX-LABEL: max_gt_v8i16:
319; AVX:       # %bb.0:
320; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
321; AVX-NEXT:    retq
322  %1 = icmp ugt <8 x i16> %a, %b
323  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
324  ret <8 x i16> %2
325}
326
327define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
328; SSE2-LABEL: max_gt_v16i16:
329; SSE2:       # %bb.0:
330; SSE2-NEXT:    psubusw %xmm0, %xmm2
331; SSE2-NEXT:    paddw %xmm2, %xmm0
332; SSE2-NEXT:    psubusw %xmm1, %xmm3
333; SSE2-NEXT:    paddw %xmm3, %xmm1
334; SSE2-NEXT:    retq
335;
336; SSE41-LABEL: max_gt_v16i16:
337; SSE41:       # %bb.0:
338; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
339; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
340; SSE41-NEXT:    retq
341;
342; SSE42-LABEL: max_gt_v16i16:
343; SSE42:       # %bb.0:
344; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
345; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
346; SSE42-NEXT:    retq
347;
348; AVX1-LABEL: max_gt_v16i16:
349; AVX1:       # %bb.0:
350; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
351; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
352; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
353; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
354; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
355; AVX1-NEXT:    retq
356;
357; AVX2-LABEL: max_gt_v16i16:
358; AVX2:       # %bb.0:
359; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
360; AVX2-NEXT:    retq
361;
362; AVX512-LABEL: max_gt_v16i16:
363; AVX512:       # %bb.0:
364; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
365; AVX512-NEXT:    retq
366  %1 = icmp ugt <16 x i16> %a, %b
367  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
368  ret <16 x i16> %2
369}
370
371define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
372; SSE-LABEL: max_gt_v16i8:
373; SSE:       # %bb.0:
374; SSE-NEXT:    pmaxub %xmm1, %xmm0
375; SSE-NEXT:    retq
376;
377; AVX-LABEL: max_gt_v16i8:
378; AVX:       # %bb.0:
379; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
380; AVX-NEXT:    retq
381  %1 = icmp ugt <16 x i8> %a, %b
382  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
383  ret <16 x i8> %2
384}
385
386define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
387; SSE-LABEL: max_gt_v32i8:
388; SSE:       # %bb.0:
389; SSE-NEXT:    pmaxub %xmm2, %xmm0
390; SSE-NEXT:    pmaxub %xmm3, %xmm1
391; SSE-NEXT:    retq
392;
393; AVX1-LABEL: max_gt_v32i8:
394; AVX1:       # %bb.0:
395; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
396; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
397; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
398; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
399; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
400; AVX1-NEXT:    retq
401;
402; AVX2-LABEL: max_gt_v32i8:
403; AVX2:       # %bb.0:
404; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
405; AVX2-NEXT:    retq
406;
407; AVX512-LABEL: max_gt_v32i8:
408; AVX512:       # %bb.0:
409; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
410; AVX512-NEXT:    retq
411  %1 = icmp ugt <32 x i8> %a, %b
412  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
413  ret <32 x i8> %2
414}
415
416;
417; Unsigned Maximum (GE)
418;
419
420define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
421; SSE2-LABEL: max_ge_v2i64:
422; SSE2:       # %bb.0:
423; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
424; SSE2-NEXT:    movdqa %xmm1, %xmm3
425; SSE2-NEXT:    pxor %xmm2, %xmm3
426; SSE2-NEXT:    pxor %xmm0, %xmm2
427; SSE2-NEXT:    movdqa %xmm2, %xmm4
428; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
429; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
430; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
431; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
432; SSE2-NEXT:    pand %xmm5, %xmm2
433; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
434; SSE2-NEXT:    por %xmm2, %xmm3
435; SSE2-NEXT:    pand %xmm3, %xmm0
436; SSE2-NEXT:    pandn %xmm1, %xmm3
437; SSE2-NEXT:    por %xmm3, %xmm0
438; SSE2-NEXT:    retq
439;
440; SSE41-LABEL: max_ge_v2i64:
441; SSE41:       # %bb.0:
442; SSE41-NEXT:    movdqa %xmm0, %xmm2
443; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
444; SSE41-NEXT:    movdqa %xmm1, %xmm0
445; SSE41-NEXT:    pxor %xmm3, %xmm0
446; SSE41-NEXT:    pxor %xmm2, %xmm3
447; SSE41-NEXT:    movdqa %xmm3, %xmm4
448; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
449; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
450; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
451; SSE41-NEXT:    pand %xmm4, %xmm0
452; SSE41-NEXT:    por %xmm3, %xmm0
453; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
454; SSE41-NEXT:    movapd %xmm1, %xmm0
455; SSE41-NEXT:    retq
456;
457; SSE42-LABEL: max_ge_v2i64:
458; SSE42:       # %bb.0:
459; SSE42-NEXT:    movdqa %xmm0, %xmm2
460; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
461; SSE42-NEXT:    movdqa %xmm1, %xmm3
462; SSE42-NEXT:    pxor %xmm0, %xmm3
463; SSE42-NEXT:    pxor %xmm2, %xmm0
464; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
465; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
466; SSE42-NEXT:    movapd %xmm1, %xmm0
467; SSE42-NEXT:    retq
468;
469; AVX1-LABEL: max_ge_v2i64:
470; AVX1:       # %bb.0:
471; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
472; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
473; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
474; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
475; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
476; AVX1-NEXT:    retq
477;
478; AVX2-LABEL: max_ge_v2i64:
479; AVX2:       # %bb.0:
480; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
481; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
482; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
483; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
484; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
485; AVX2-NEXT:    retq
486;
487; AVX512-LABEL: max_ge_v2i64:
488; AVX512:       # %bb.0:
489; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
490; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
491; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
492; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
493; AVX512-NEXT:    vzeroupper
494; AVX512-NEXT:    retq
495  %1 = icmp uge <2 x i64> %a, %b
496  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
497  ret <2 x i64> %2
498}
499
500define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
501; SSE2-LABEL: max_ge_v4i64:
502; SSE2:       # %bb.0:
503; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
504; SSE2-NEXT:    movdqa %xmm2, %xmm5
505; SSE2-NEXT:    pxor %xmm4, %xmm5
506; SSE2-NEXT:    movdqa %xmm0, %xmm6
507; SSE2-NEXT:    pxor %xmm4, %xmm6
508; SSE2-NEXT:    movdqa %xmm6, %xmm7
509; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
510; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
511; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
512; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
513; SSE2-NEXT:    pand %xmm8, %xmm5
514; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
515; SSE2-NEXT:    por %xmm5, %xmm6
516; SSE2-NEXT:    pand %xmm6, %xmm0
517; SSE2-NEXT:    pandn %xmm2, %xmm6
518; SSE2-NEXT:    por %xmm6, %xmm0
519; SSE2-NEXT:    movdqa %xmm3, %xmm2
520; SSE2-NEXT:    pxor %xmm4, %xmm2
521; SSE2-NEXT:    pxor %xmm1, %xmm4
522; SSE2-NEXT:    movdqa %xmm4, %xmm5
523; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
524; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
525; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
526; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
527; SSE2-NEXT:    pand %xmm6, %xmm2
528; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
529; SSE2-NEXT:    por %xmm2, %xmm4
530; SSE2-NEXT:    pand %xmm4, %xmm1
531; SSE2-NEXT:    pandn %xmm3, %xmm4
532; SSE2-NEXT:    por %xmm4, %xmm1
533; SSE2-NEXT:    retq
534;
535; SSE41-LABEL: max_ge_v4i64:
536; SSE41:       # %bb.0:
537; SSE41-NEXT:    movdqa %xmm0, %xmm4
538; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
539; SSE41-NEXT:    movdqa %xmm2, %xmm0
540; SSE41-NEXT:    pxor %xmm5, %xmm0
541; SSE41-NEXT:    movdqa %xmm4, %xmm6
542; SSE41-NEXT:    pxor %xmm5, %xmm6
543; SSE41-NEXT:    movdqa %xmm6, %xmm7
544; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
545; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
546; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
547; SSE41-NEXT:    pand %xmm7, %xmm0
548; SSE41-NEXT:    por %xmm6, %xmm0
549; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
550; SSE41-NEXT:    movdqa %xmm3, %xmm0
551; SSE41-NEXT:    pxor %xmm5, %xmm0
552; SSE41-NEXT:    pxor %xmm1, %xmm5
553; SSE41-NEXT:    movdqa %xmm5, %xmm4
554; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
555; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
556; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
557; SSE41-NEXT:    pand %xmm4, %xmm0
558; SSE41-NEXT:    por %xmm5, %xmm0
559; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
560; SSE41-NEXT:    movapd %xmm2, %xmm0
561; SSE41-NEXT:    movapd %xmm3, %xmm1
562; SSE41-NEXT:    retq
563;
564; SSE42-LABEL: max_ge_v4i64:
565; SSE42:       # %bb.0:
566; SSE42-NEXT:    movdqa %xmm0, %xmm4
567; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
568; SSE42-NEXT:    movdqa %xmm2, %xmm6
569; SSE42-NEXT:    pxor %xmm5, %xmm6
570; SSE42-NEXT:    pxor %xmm5, %xmm0
571; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
572; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
573; SSE42-NEXT:    movdqa %xmm3, %xmm0
574; SSE42-NEXT:    pxor %xmm5, %xmm0
575; SSE42-NEXT:    pxor %xmm1, %xmm5
576; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
577; SSE42-NEXT:    movdqa %xmm5, %xmm0
578; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
579; SSE42-NEXT:    movapd %xmm2, %xmm0
580; SSE42-NEXT:    movapd %xmm3, %xmm1
581; SSE42-NEXT:    retq
582;
583; AVX1-LABEL: max_ge_v4i64:
584; AVX1:       # %bb.0:
585; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
586; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
587; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
588; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
589; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
590; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
591; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm4
592; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm3
593; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
594; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
595; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
596; AVX1-NEXT:    retq
597;
598; AVX2-LABEL: max_ge_v4i64:
599; AVX2:       # %bb.0:
600; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
601; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
602; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
603; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
604; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
605; AVX2-NEXT:    retq
606;
607; AVX512-LABEL: max_ge_v4i64:
608; AVX512:       # %bb.0:
609; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
610; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
611; AVX512-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
612; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
613; AVX512-NEXT:    retq
614  %1 = icmp uge <4 x i64> %a, %b
615  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
616  ret <4 x i64> %2
617}
618
619define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
620; SSE2-LABEL: max_ge_v4i32:
621; SSE2:       # %bb.0:
622; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
623; SSE2-NEXT:    movdqa %xmm1, %xmm3
624; SSE2-NEXT:    pxor %xmm2, %xmm3
625; SSE2-NEXT:    pxor %xmm0, %xmm2
626; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
627; SSE2-NEXT:    pand %xmm2, %xmm0
628; SSE2-NEXT:    pandn %xmm1, %xmm2
629; SSE2-NEXT:    por %xmm2, %xmm0
630; SSE2-NEXT:    retq
631;
632; SSE41-LABEL: max_ge_v4i32:
633; SSE41:       # %bb.0:
634; SSE41-NEXT:    pmaxud %xmm1, %xmm0
635; SSE41-NEXT:    retq
636;
637; SSE42-LABEL: max_ge_v4i32:
638; SSE42:       # %bb.0:
639; SSE42-NEXT:    pmaxud %xmm1, %xmm0
640; SSE42-NEXT:    retq
641;
642; AVX-LABEL: max_ge_v4i32:
643; AVX:       # %bb.0:
644; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
645; AVX-NEXT:    retq
646  %1 = icmp uge <4 x i32> %a, %b
647  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
648  ret <4 x i32> %2
649}
650
651define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
652; SSE2-LABEL: max_ge_v8i32:
653; SSE2:       # %bb.0:
654; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
655; SSE2-NEXT:    movdqa %xmm2, %xmm5
656; SSE2-NEXT:    pxor %xmm4, %xmm5
657; SSE2-NEXT:    movdqa %xmm0, %xmm6
658; SSE2-NEXT:    pxor %xmm4, %xmm6
659; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
660; SSE2-NEXT:    pand %xmm6, %xmm0
661; SSE2-NEXT:    pandn %xmm2, %xmm6
662; SSE2-NEXT:    por %xmm6, %xmm0
663; SSE2-NEXT:    movdqa %xmm3, %xmm2
664; SSE2-NEXT:    pxor %xmm4, %xmm2
665; SSE2-NEXT:    pxor %xmm1, %xmm4
666; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
667; SSE2-NEXT:    pand %xmm4, %xmm1
668; SSE2-NEXT:    pandn %xmm3, %xmm4
669; SSE2-NEXT:    por %xmm4, %xmm1
670; SSE2-NEXT:    retq
671;
672; SSE41-LABEL: max_ge_v8i32:
673; SSE41:       # %bb.0:
674; SSE41-NEXT:    pmaxud %xmm2, %xmm0
675; SSE41-NEXT:    pmaxud %xmm3, %xmm1
676; SSE41-NEXT:    retq
677;
678; SSE42-LABEL: max_ge_v8i32:
679; SSE42:       # %bb.0:
680; SSE42-NEXT:    pmaxud %xmm2, %xmm0
681; SSE42-NEXT:    pmaxud %xmm3, %xmm1
682; SSE42-NEXT:    retq
683;
684; AVX1-LABEL: max_ge_v8i32:
685; AVX1:       # %bb.0:
686; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
687; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
688; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
689; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
690; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
691; AVX1-NEXT:    retq
692;
693; AVX2-LABEL: max_ge_v8i32:
694; AVX2:       # %bb.0:
695; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
696; AVX2-NEXT:    retq
697;
698; AVX512-LABEL: max_ge_v8i32:
699; AVX512:       # %bb.0:
700; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
701; AVX512-NEXT:    retq
702  %1 = icmp uge <8 x i32> %a, %b
703  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
704  ret <8 x i32> %2
705}
706
707define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
708; SSE2-LABEL: max_ge_v8i16:
709; SSE2:       # %bb.0:
710; SSE2-NEXT:    psubusw %xmm0, %xmm1
711; SSE2-NEXT:    paddw %xmm1, %xmm0
712; SSE2-NEXT:    retq
713;
714; SSE41-LABEL: max_ge_v8i16:
715; SSE41:       # %bb.0:
716; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
717; SSE41-NEXT:    retq
718;
719; SSE42-LABEL: max_ge_v8i16:
720; SSE42:       # %bb.0:
721; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
722; SSE42-NEXT:    retq
723;
724; AVX-LABEL: max_ge_v8i16:
725; AVX:       # %bb.0:
726; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
727; AVX-NEXT:    retq
728  %1 = icmp uge <8 x i16> %a, %b
729  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
730  ret <8 x i16> %2
731}
732
733define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
734; SSE2-LABEL: max_ge_v16i16:
735; SSE2:       # %bb.0:
736; SSE2-NEXT:    psubusw %xmm0, %xmm2
737; SSE2-NEXT:    paddw %xmm2, %xmm0
738; SSE2-NEXT:    psubusw %xmm1, %xmm3
739; SSE2-NEXT:    paddw %xmm3, %xmm1
740; SSE2-NEXT:    retq
741;
742; SSE41-LABEL: max_ge_v16i16:
743; SSE41:       # %bb.0:
744; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
745; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
746; SSE41-NEXT:    retq
747;
748; SSE42-LABEL: max_ge_v16i16:
749; SSE42:       # %bb.0:
750; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
751; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
752; SSE42-NEXT:    retq
753;
754; AVX1-LABEL: max_ge_v16i16:
755; AVX1:       # %bb.0:
756; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
757; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
758; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
759; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
760; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
761; AVX1-NEXT:    retq
762;
763; AVX2-LABEL: max_ge_v16i16:
764; AVX2:       # %bb.0:
765; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
766; AVX2-NEXT:    retq
767;
768; AVX512-LABEL: max_ge_v16i16:
769; AVX512:       # %bb.0:
770; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
771; AVX512-NEXT:    retq
772  %1 = icmp uge <16 x i16> %a, %b
773  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
774  ret <16 x i16> %2
775}
776
777define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
778; SSE-LABEL: max_ge_v16i8:
779; SSE:       # %bb.0:
780; SSE-NEXT:    pmaxub %xmm1, %xmm0
781; SSE-NEXT:    retq
782;
783; AVX-LABEL: max_ge_v16i8:
784; AVX:       # %bb.0:
785; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
786; AVX-NEXT:    retq
787  %1 = icmp uge <16 x i8> %a, %b
788  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
789  ret <16 x i8> %2
790}
791
792define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
793; SSE-LABEL: max_ge_v32i8:
794; SSE:       # %bb.0:
795; SSE-NEXT:    pmaxub %xmm2, %xmm0
796; SSE-NEXT:    pmaxub %xmm3, %xmm1
797; SSE-NEXT:    retq
798;
799; AVX1-LABEL: max_ge_v32i8:
800; AVX1:       # %bb.0:
801; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
802; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
803; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
804; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
805; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
806; AVX1-NEXT:    retq
807;
808; AVX2-LABEL: max_ge_v32i8:
809; AVX2:       # %bb.0:
810; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
811; AVX2-NEXT:    retq
812;
813; AVX512-LABEL: max_ge_v32i8:
814; AVX512:       # %bb.0:
815; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
816; AVX512-NEXT:    retq
817  %1 = icmp uge <32 x i8> %a, %b
818  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
819  ret <32 x i8> %2
820}
821
822;
823; Unsigned Minimum (LT)
824;
825
826define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
827; SSE2-LABEL: min_lt_v2i64:
828; SSE2:       # %bb.0:
829; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
830; SSE2-NEXT:    movdqa %xmm0, %xmm3
831; SSE2-NEXT:    pxor %xmm2, %xmm3
832; SSE2-NEXT:    pxor %xmm1, %xmm2
833; SSE2-NEXT:    movdqa %xmm2, %xmm4
834; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
835; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
836; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
837; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
838; SSE2-NEXT:    pand %xmm5, %xmm2
839; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
840; SSE2-NEXT:    por %xmm2, %xmm3
841; SSE2-NEXT:    pand %xmm3, %xmm0
842; SSE2-NEXT:    pandn %xmm1, %xmm3
843; SSE2-NEXT:    por %xmm3, %xmm0
844; SSE2-NEXT:    retq
845;
846; SSE41-LABEL: min_lt_v2i64:
847; SSE41:       # %bb.0:
848; SSE41-NEXT:    movdqa %xmm0, %xmm2
849; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
850; SSE41-NEXT:    pxor %xmm3, %xmm0
851; SSE41-NEXT:    pxor %xmm1, %xmm3
852; SSE41-NEXT:    movdqa %xmm3, %xmm4
853; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
854; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
855; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
856; SSE41-NEXT:    pand %xmm4, %xmm0
857; SSE41-NEXT:    por %xmm3, %xmm0
858; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
859; SSE41-NEXT:    movapd %xmm1, %xmm0
860; SSE41-NEXT:    retq
861;
862; SSE42-LABEL: min_lt_v2i64:
863; SSE42:       # %bb.0:
864; SSE42-NEXT:    movdqa %xmm0, %xmm2
865; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
866; SSE42-NEXT:    movdqa %xmm2, %xmm3
867; SSE42-NEXT:    pxor %xmm0, %xmm3
868; SSE42-NEXT:    pxor %xmm1, %xmm0
869; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
870; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
871; SSE42-NEXT:    movapd %xmm1, %xmm0
872; SSE42-NEXT:    retq
873;
874; AVX1-LABEL: min_lt_v2i64:
875; AVX1:       # %bb.0:
876; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
877; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
878; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
879; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
880; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
881; AVX1-NEXT:    retq
882;
883; AVX2-LABEL: min_lt_v2i64:
884; AVX2:       # %bb.0:
885; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
886; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
887; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
888; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
889; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
890; AVX2-NEXT:    retq
891;
892; AVX512-LABEL: min_lt_v2i64:
893; AVX512:       # %bb.0:
894; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
895; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
896; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
897; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
898; AVX512-NEXT:    vzeroupper
899; AVX512-NEXT:    retq
900  %1 = icmp ult <2 x i64> %a, %b
901  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
902  ret <2 x i64> %2
903}
904
905define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
906; SSE2-LABEL: min_lt_v4i64:
907; SSE2:       # %bb.0:
908; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
909; SSE2-NEXT:    movdqa %xmm0, %xmm5
910; SSE2-NEXT:    pxor %xmm4, %xmm5
911; SSE2-NEXT:    movdqa %xmm2, %xmm6
912; SSE2-NEXT:    pxor %xmm4, %xmm6
913; SSE2-NEXT:    movdqa %xmm6, %xmm7
914; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
915; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
916; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
917; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
918; SSE2-NEXT:    pand %xmm8, %xmm5
919; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
920; SSE2-NEXT:    por %xmm5, %xmm6
921; SSE2-NEXT:    pand %xmm6, %xmm0
922; SSE2-NEXT:    pandn %xmm2, %xmm6
923; SSE2-NEXT:    por %xmm6, %xmm0
924; SSE2-NEXT:    movdqa %xmm1, %xmm2
925; SSE2-NEXT:    pxor %xmm4, %xmm2
926; SSE2-NEXT:    pxor %xmm3, %xmm4
927; SSE2-NEXT:    movdqa %xmm4, %xmm5
928; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
929; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
930; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
931; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
932; SSE2-NEXT:    pand %xmm6, %xmm2
933; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
934; SSE2-NEXT:    por %xmm2, %xmm4
935; SSE2-NEXT:    pand %xmm4, %xmm1
936; SSE2-NEXT:    pandn %xmm3, %xmm4
937; SSE2-NEXT:    por %xmm4, %xmm1
938; SSE2-NEXT:    retq
939;
940; SSE41-LABEL: min_lt_v4i64:
941; SSE41:       # %bb.0:
942; SSE41-NEXT:    movdqa %xmm0, %xmm4
943; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
944; SSE41-NEXT:    pxor %xmm5, %xmm0
945; SSE41-NEXT:    movdqa %xmm2, %xmm6
946; SSE41-NEXT:    pxor %xmm5, %xmm6
947; SSE41-NEXT:    movdqa %xmm6, %xmm7
948; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
949; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
950; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
951; SSE41-NEXT:    pand %xmm7, %xmm0
952; SSE41-NEXT:    por %xmm6, %xmm0
953; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
954; SSE41-NEXT:    movdqa %xmm1, %xmm0
955; SSE41-NEXT:    pxor %xmm5, %xmm0
956; SSE41-NEXT:    pxor %xmm3, %xmm5
957; SSE41-NEXT:    movdqa %xmm5, %xmm4
958; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
959; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
960; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
961; SSE41-NEXT:    pand %xmm4, %xmm0
962; SSE41-NEXT:    por %xmm5, %xmm0
963; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
964; SSE41-NEXT:    movapd %xmm2, %xmm0
965; SSE41-NEXT:    movapd %xmm3, %xmm1
966; SSE41-NEXT:    retq
967;
968; SSE42-LABEL: min_lt_v4i64:
969; SSE42:       # %bb.0:
970; SSE42-NEXT:    movdqa %xmm0, %xmm4
971; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
972; SSE42-NEXT:    movdqa %xmm0, %xmm6
973; SSE42-NEXT:    pxor %xmm5, %xmm6
974; SSE42-NEXT:    movdqa %xmm2, %xmm0
975; SSE42-NEXT:    pxor %xmm5, %xmm0
976; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
977; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
978; SSE42-NEXT:    movdqa %xmm1, %xmm0
979; SSE42-NEXT:    pxor %xmm5, %xmm0
980; SSE42-NEXT:    pxor %xmm3, %xmm5
981; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
982; SSE42-NEXT:    movdqa %xmm5, %xmm0
983; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
984; SSE42-NEXT:    movapd %xmm2, %xmm0
985; SSE42-NEXT:    movapd %xmm3, %xmm1
986; SSE42-NEXT:    retq
987;
988; AVX1-LABEL: min_lt_v4i64:
989; AVX1:       # %bb.0:
990; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
991; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
992; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
993; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
994; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
995; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
996; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
997; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
998; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
999; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1000; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1001; AVX1-NEXT:    retq
1002;
1003; AVX2-LABEL: min_lt_v4i64:
1004; AVX2:       # %bb.0:
1005; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1006; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1007; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
1008; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
1009; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1010; AVX2-NEXT:    retq
1011;
1012; AVX512-LABEL: min_lt_v4i64:
1013; AVX512:       # %bb.0:
1014; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1015; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1016; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1017; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1018; AVX512-NEXT:    retq
1019  %1 = icmp ult <4 x i64> %a, %b
1020  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1021  ret <4 x i64> %2
1022}
1023
1024define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
1025; SSE2-LABEL: min_lt_v4i32:
1026; SSE2:       # %bb.0:
1027; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1028; SSE2-NEXT:    movdqa %xmm0, %xmm3
1029; SSE2-NEXT:    pxor %xmm2, %xmm3
1030; SSE2-NEXT:    pxor %xmm1, %xmm2
1031; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1032; SSE2-NEXT:    pand %xmm2, %xmm0
1033; SSE2-NEXT:    pandn %xmm1, %xmm2
1034; SSE2-NEXT:    por %xmm2, %xmm0
1035; SSE2-NEXT:    retq
1036;
1037; SSE41-LABEL: min_lt_v4i32:
1038; SSE41:       # %bb.0:
1039; SSE41-NEXT:    pminud %xmm1, %xmm0
1040; SSE41-NEXT:    retq
1041;
1042; SSE42-LABEL: min_lt_v4i32:
1043; SSE42:       # %bb.0:
1044; SSE42-NEXT:    pminud %xmm1, %xmm0
1045; SSE42-NEXT:    retq
1046;
1047; AVX-LABEL: min_lt_v4i32:
1048; AVX:       # %bb.0:
1049; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1050; AVX-NEXT:    retq
1051  %1 = icmp ult <4 x i32> %a, %b
1052  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1053  ret <4 x i32> %2
1054}
1055
1056define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
1057; SSE2-LABEL: min_lt_v8i32:
1058; SSE2:       # %bb.0:
1059; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1060; SSE2-NEXT:    movdqa %xmm0, %xmm5
1061; SSE2-NEXT:    pxor %xmm4, %xmm5
1062; SSE2-NEXT:    movdqa %xmm2, %xmm6
1063; SSE2-NEXT:    pxor %xmm4, %xmm6
1064; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1065; SSE2-NEXT:    pand %xmm6, %xmm0
1066; SSE2-NEXT:    pandn %xmm2, %xmm6
1067; SSE2-NEXT:    por %xmm6, %xmm0
1068; SSE2-NEXT:    movdqa %xmm1, %xmm2
1069; SSE2-NEXT:    pxor %xmm4, %xmm2
1070; SSE2-NEXT:    pxor %xmm3, %xmm4
1071; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1072; SSE2-NEXT:    pand %xmm4, %xmm1
1073; SSE2-NEXT:    pandn %xmm3, %xmm4
1074; SSE2-NEXT:    por %xmm4, %xmm1
1075; SSE2-NEXT:    retq
1076;
1077; SSE41-LABEL: min_lt_v8i32:
1078; SSE41:       # %bb.0:
1079; SSE41-NEXT:    pminud %xmm2, %xmm0
1080; SSE41-NEXT:    pminud %xmm3, %xmm1
1081; SSE41-NEXT:    retq
1082;
1083; SSE42-LABEL: min_lt_v8i32:
1084; SSE42:       # %bb.0:
1085; SSE42-NEXT:    pminud %xmm2, %xmm0
1086; SSE42-NEXT:    pminud %xmm3, %xmm1
1087; SSE42-NEXT:    retq
1088;
1089; AVX1-LABEL: min_lt_v8i32:
1090; AVX1:       # %bb.0:
1091; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1092; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1093; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1094; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1095; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1096; AVX1-NEXT:    retq
1097;
1098; AVX2-LABEL: min_lt_v8i32:
1099; AVX2:       # %bb.0:
1100; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1101; AVX2-NEXT:    retq
1102;
1103; AVX512-LABEL: min_lt_v8i32:
1104; AVX512:       # %bb.0:
1105; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1106; AVX512-NEXT:    retq
1107  %1 = icmp ult <8 x i32> %a, %b
1108  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1109  ret <8 x i32> %2
1110}
1111
1112define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
1113; SSE2-LABEL: min_lt_v8i16:
1114; SSE2:       # %bb.0:
1115; SSE2-NEXT:    movdqa %xmm0, %xmm2
1116; SSE2-NEXT:    psubusw %xmm1, %xmm2
1117; SSE2-NEXT:    psubw %xmm2, %xmm0
1118; SSE2-NEXT:    retq
1119;
1120; SSE41-LABEL: min_lt_v8i16:
1121; SSE41:       # %bb.0:
1122; SSE41-NEXT:    pminuw %xmm1, %xmm0
1123; SSE41-NEXT:    retq
1124;
1125; SSE42-LABEL: min_lt_v8i16:
1126; SSE42:       # %bb.0:
1127; SSE42-NEXT:    pminuw %xmm1, %xmm0
1128; SSE42-NEXT:    retq
1129;
1130; AVX-LABEL: min_lt_v8i16:
1131; AVX:       # %bb.0:
1132; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1133; AVX-NEXT:    retq
1134  %1 = icmp ult <8 x i16> %a, %b
1135  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1136  ret <8 x i16> %2
1137}
1138
1139define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
1140; SSE2-LABEL: min_lt_v16i16:
1141; SSE2:       # %bb.0:
1142; SSE2-NEXT:    movdqa %xmm0, %xmm4
1143; SSE2-NEXT:    psubusw %xmm2, %xmm4
1144; SSE2-NEXT:    psubw %xmm4, %xmm0
1145; SSE2-NEXT:    movdqa %xmm1, %xmm2
1146; SSE2-NEXT:    psubusw %xmm3, %xmm2
1147; SSE2-NEXT:    psubw %xmm2, %xmm1
1148; SSE2-NEXT:    retq
1149;
1150; SSE41-LABEL: min_lt_v16i16:
1151; SSE41:       # %bb.0:
1152; SSE41-NEXT:    pminuw %xmm2, %xmm0
1153; SSE41-NEXT:    pminuw %xmm3, %xmm1
1154; SSE41-NEXT:    retq
1155;
1156; SSE42-LABEL: min_lt_v16i16:
1157; SSE42:       # %bb.0:
1158; SSE42-NEXT:    pminuw %xmm2, %xmm0
1159; SSE42-NEXT:    pminuw %xmm3, %xmm1
1160; SSE42-NEXT:    retq
1161;
1162; AVX1-LABEL: min_lt_v16i16:
1163; AVX1:       # %bb.0:
1164; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1165; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1166; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1167; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1168; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1169; AVX1-NEXT:    retq
1170;
1171; AVX2-LABEL: min_lt_v16i16:
1172; AVX2:       # %bb.0:
1173; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1174; AVX2-NEXT:    retq
1175;
1176; AVX512-LABEL: min_lt_v16i16:
1177; AVX512:       # %bb.0:
1178; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1179; AVX512-NEXT:    retq
1180  %1 = icmp ult <16 x i16> %a, %b
1181  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1182  ret <16 x i16> %2
1183}
1184
1185define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
1186; SSE-LABEL: min_lt_v16i8:
1187; SSE:       # %bb.0:
1188; SSE-NEXT:    pminub %xmm1, %xmm0
1189; SSE-NEXT:    retq
1190;
1191; AVX-LABEL: min_lt_v16i8:
1192; AVX:       # %bb.0:
1193; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1194; AVX-NEXT:    retq
1195  %1 = icmp ult <16 x i8> %a, %b
1196  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1197  ret <16 x i8> %2
1198}
1199
1200define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
1201; SSE-LABEL: min_lt_v32i8:
1202; SSE:       # %bb.0:
1203; SSE-NEXT:    pminub %xmm2, %xmm0
1204; SSE-NEXT:    pminub %xmm3, %xmm1
1205; SSE-NEXT:    retq
1206;
1207; AVX1-LABEL: min_lt_v32i8:
1208; AVX1:       # %bb.0:
1209; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1210; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1211; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1212; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1213; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1214; AVX1-NEXT:    retq
1215;
1216; AVX2-LABEL: min_lt_v32i8:
1217; AVX2:       # %bb.0:
1218; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1219; AVX2-NEXT:    retq
1220;
1221; AVX512-LABEL: min_lt_v32i8:
1222; AVX512:       # %bb.0:
1223; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1224; AVX512-NEXT:    retq
1225  %1 = icmp ult <32 x i8> %a, %b
1226  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1227  ret <32 x i8> %2
1228}
1229
1230;
1231; Unsigned Minimum (LE)
1232;
1233
1234define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
1235; SSE2-LABEL: min_le_v2i64:
1236; SSE2:       # %bb.0:
1237; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
1238; SSE2-NEXT:    movdqa %xmm0, %xmm3
1239; SSE2-NEXT:    pxor %xmm2, %xmm3
1240; SSE2-NEXT:    pxor %xmm1, %xmm2
1241; SSE2-NEXT:    movdqa %xmm2, %xmm4
1242; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
1243; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
1244; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
1245; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1246; SSE2-NEXT:    pand %xmm5, %xmm2
1247; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
1248; SSE2-NEXT:    por %xmm2, %xmm3
1249; SSE2-NEXT:    pand %xmm3, %xmm0
1250; SSE2-NEXT:    pandn %xmm1, %xmm3
1251; SSE2-NEXT:    por %xmm3, %xmm0
1252; SSE2-NEXT:    retq
1253;
1254; SSE41-LABEL: min_le_v2i64:
1255; SSE41:       # %bb.0:
1256; SSE41-NEXT:    movdqa %xmm0, %xmm2
1257; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
1258; SSE41-NEXT:    pxor %xmm3, %xmm0
1259; SSE41-NEXT:    pxor %xmm1, %xmm3
1260; SSE41-NEXT:    movdqa %xmm3, %xmm4
1261; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1262; SSE41-NEXT:    pcmpgtd %xmm0, %xmm3
1263; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1264; SSE41-NEXT:    pand %xmm4, %xmm0
1265; SSE41-NEXT:    por %xmm3, %xmm0
1266; SSE41-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1267; SSE41-NEXT:    movapd %xmm1, %xmm0
1268; SSE41-NEXT:    retq
1269;
1270; SSE42-LABEL: min_le_v2i64:
1271; SSE42:       # %bb.0:
1272; SSE42-NEXT:    movdqa %xmm0, %xmm2
1273; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
1274; SSE42-NEXT:    movdqa %xmm2, %xmm3
1275; SSE42-NEXT:    pxor %xmm0, %xmm3
1276; SSE42-NEXT:    pxor %xmm1, %xmm0
1277; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
1278; SSE42-NEXT:    blendvpd %xmm0, %xmm2, %xmm1
1279; SSE42-NEXT:    movapd %xmm1, %xmm0
1280; SSE42-NEXT:    retq
1281;
1282; AVX1-LABEL: min_le_v2i64:
1283; AVX1:       # %bb.0:
1284; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1285; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1286; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1287; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1288; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1289; AVX1-NEXT:    retq
1290;
1291; AVX2-LABEL: min_le_v2i64:
1292; AVX2:       # %bb.0:
1293; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
1294; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
1295; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
1296; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
1297; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1298; AVX2-NEXT:    retq
1299;
1300; AVX512-LABEL: min_le_v2i64:
1301; AVX512:       # %bb.0:
1302; AVX512-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
1303; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1304; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1305; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1306; AVX512-NEXT:    vzeroupper
1307; AVX512-NEXT:    retq
1308  %1 = icmp ule <2 x i64> %a, %b
1309  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
1310  ret <2 x i64> %2
1311}
1312
1313define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
1314; SSE2-LABEL: min_le_v4i64:
1315; SSE2:       # %bb.0:
1316; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1317; SSE2-NEXT:    movdqa %xmm0, %xmm5
1318; SSE2-NEXT:    pxor %xmm4, %xmm5
1319; SSE2-NEXT:    movdqa %xmm2, %xmm6
1320; SSE2-NEXT:    pxor %xmm4, %xmm6
1321; SSE2-NEXT:    movdqa %xmm6, %xmm7
1322; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
1323; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1324; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
1325; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
1326; SSE2-NEXT:    pand %xmm8, %xmm5
1327; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
1328; SSE2-NEXT:    por %xmm5, %xmm6
1329; SSE2-NEXT:    pand %xmm6, %xmm0
1330; SSE2-NEXT:    pandn %xmm2, %xmm6
1331; SSE2-NEXT:    por %xmm6, %xmm0
1332; SSE2-NEXT:    movdqa %xmm1, %xmm2
1333; SSE2-NEXT:    pxor %xmm4, %xmm2
1334; SSE2-NEXT:    pxor %xmm3, %xmm4
1335; SSE2-NEXT:    movdqa %xmm4, %xmm5
1336; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
1337; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
1338; SSE2-NEXT:    pcmpeqd %xmm2, %xmm4
1339; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1340; SSE2-NEXT:    pand %xmm6, %xmm2
1341; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
1342; SSE2-NEXT:    por %xmm2, %xmm4
1343; SSE2-NEXT:    pand %xmm4, %xmm1
1344; SSE2-NEXT:    pandn %xmm3, %xmm4
1345; SSE2-NEXT:    por %xmm4, %xmm1
1346; SSE2-NEXT:    retq
1347;
1348; SSE41-LABEL: min_le_v4i64:
1349; SSE41:       # %bb.0:
1350; SSE41-NEXT:    movdqa %xmm0, %xmm4
1351; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
1352; SSE41-NEXT:    pxor %xmm5, %xmm0
1353; SSE41-NEXT:    movdqa %xmm2, %xmm6
1354; SSE41-NEXT:    pxor %xmm5, %xmm6
1355; SSE41-NEXT:    movdqa %xmm6, %xmm7
1356; SSE41-NEXT:    pcmpeqd %xmm0, %xmm7
1357; SSE41-NEXT:    pcmpgtd %xmm0, %xmm6
1358; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2]
1359; SSE41-NEXT:    pand %xmm7, %xmm0
1360; SSE41-NEXT:    por %xmm6, %xmm0
1361; SSE41-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1362; SSE41-NEXT:    movdqa %xmm1, %xmm0
1363; SSE41-NEXT:    pxor %xmm5, %xmm0
1364; SSE41-NEXT:    pxor %xmm3, %xmm5
1365; SSE41-NEXT:    movdqa %xmm5, %xmm4
1366; SSE41-NEXT:    pcmpeqd %xmm0, %xmm4
1367; SSE41-NEXT:    pcmpgtd %xmm0, %xmm5
1368; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2]
1369; SSE41-NEXT:    pand %xmm4, %xmm0
1370; SSE41-NEXT:    por %xmm5, %xmm0
1371; SSE41-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1372; SSE41-NEXT:    movapd %xmm2, %xmm0
1373; SSE41-NEXT:    movapd %xmm3, %xmm1
1374; SSE41-NEXT:    retq
1375;
1376; SSE42-LABEL: min_le_v4i64:
1377; SSE42:       # %bb.0:
1378; SSE42-NEXT:    movdqa %xmm0, %xmm4
1379; SSE42-NEXT:    movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
1380; SSE42-NEXT:    movdqa %xmm0, %xmm6
1381; SSE42-NEXT:    pxor %xmm5, %xmm6
1382; SSE42-NEXT:    movdqa %xmm2, %xmm0
1383; SSE42-NEXT:    pxor %xmm5, %xmm0
1384; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
1385; SSE42-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
1386; SSE42-NEXT:    movdqa %xmm1, %xmm0
1387; SSE42-NEXT:    pxor %xmm5, %xmm0
1388; SSE42-NEXT:    pxor %xmm3, %xmm5
1389; SSE42-NEXT:    pcmpgtq %xmm0, %xmm5
1390; SSE42-NEXT:    movdqa %xmm5, %xmm0
1391; SSE42-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
1392; SSE42-NEXT:    movapd %xmm2, %xmm0
1393; SSE42-NEXT:    movapd %xmm3, %xmm1
1394; SSE42-NEXT:    retq
1395;
1396; AVX1-LABEL: min_le_v4i64:
1397; AVX1:       # %bb.0:
1398; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
1399; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1400; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
1401; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1402; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm4
1403; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
1404; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm4
1405; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm3
1406; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
1407; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
1408; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1409; AVX1-NEXT:    retq
1410;
1411; AVX2-LABEL: min_le_v4i64:
1412; AVX2:       # %bb.0:
1413; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1414; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
1415; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
1416; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
1417; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1418; AVX2-NEXT:    retq
1419;
1420; AVX512-LABEL: min_le_v4i64:
1421; AVX512:       # %bb.0:
1422; AVX512-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
1423; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
1424; AVX512-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
1425; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
1426; AVX512-NEXT:    retq
1427  %1 = icmp ule <4 x i64> %a, %b
1428  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
1429  ret <4 x i64> %2
1430}
1431
1432define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
1433; SSE2-LABEL: min_le_v4i32:
1434; SSE2:       # %bb.0:
1435; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
1436; SSE2-NEXT:    movdqa %xmm0, %xmm3
1437; SSE2-NEXT:    pxor %xmm2, %xmm3
1438; SSE2-NEXT:    pxor %xmm1, %xmm2
1439; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
1440; SSE2-NEXT:    pand %xmm2, %xmm0
1441; SSE2-NEXT:    pandn %xmm1, %xmm2
1442; SSE2-NEXT:    por %xmm2, %xmm0
1443; SSE2-NEXT:    retq
1444;
1445; SSE41-LABEL: min_le_v4i32:
1446; SSE41:       # %bb.0:
1447; SSE41-NEXT:    pminud %xmm1, %xmm0
1448; SSE41-NEXT:    retq
1449;
1450; SSE42-LABEL: min_le_v4i32:
1451; SSE42:       # %bb.0:
1452; SSE42-NEXT:    pminud %xmm1, %xmm0
1453; SSE42-NEXT:    retq
1454;
1455; AVX-LABEL: min_le_v4i32:
1456; AVX:       # %bb.0:
1457; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1458; AVX-NEXT:    retq
1459  %1 = icmp ule <4 x i32> %a, %b
1460  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
1461  ret <4 x i32> %2
1462}
1463
1464define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
1465; SSE2-LABEL: min_le_v8i32:
1466; SSE2:       # %bb.0:
1467; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1468; SSE2-NEXT:    movdqa %xmm0, %xmm5
1469; SSE2-NEXT:    pxor %xmm4, %xmm5
1470; SSE2-NEXT:    movdqa %xmm2, %xmm6
1471; SSE2-NEXT:    pxor %xmm4, %xmm6
1472; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
1473; SSE2-NEXT:    pand %xmm6, %xmm0
1474; SSE2-NEXT:    pandn %xmm2, %xmm6
1475; SSE2-NEXT:    por %xmm6, %xmm0
1476; SSE2-NEXT:    movdqa %xmm1, %xmm2
1477; SSE2-NEXT:    pxor %xmm4, %xmm2
1478; SSE2-NEXT:    pxor %xmm3, %xmm4
1479; SSE2-NEXT:    pcmpgtd %xmm2, %xmm4
1480; SSE2-NEXT:    pand %xmm4, %xmm1
1481; SSE2-NEXT:    pandn %xmm3, %xmm4
1482; SSE2-NEXT:    por %xmm4, %xmm1
1483; SSE2-NEXT:    retq
1484;
1485; SSE41-LABEL: min_le_v8i32:
1486; SSE41:       # %bb.0:
1487; SSE41-NEXT:    pminud %xmm2, %xmm0
1488; SSE41-NEXT:    pminud %xmm3, %xmm1
1489; SSE41-NEXT:    retq
1490;
1491; SSE42-LABEL: min_le_v8i32:
1492; SSE42:       # %bb.0:
1493; SSE42-NEXT:    pminud %xmm2, %xmm0
1494; SSE42-NEXT:    pminud %xmm3, %xmm1
1495; SSE42-NEXT:    retq
1496;
1497; AVX1-LABEL: min_le_v8i32:
1498; AVX1:       # %bb.0:
1499; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1500; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1501; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
1502; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
1503; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1504; AVX1-NEXT:    retq
1505;
1506; AVX2-LABEL: min_le_v8i32:
1507; AVX2:       # %bb.0:
1508; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1509; AVX2-NEXT:    retq
1510;
1511; AVX512-LABEL: min_le_v8i32:
1512; AVX512:       # %bb.0:
1513; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
1514; AVX512-NEXT:    retq
1515  %1 = icmp ule <8 x i32> %a, %b
1516  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
1517  ret <8 x i32> %2
1518}
1519
1520define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
1521; SSE2-LABEL: min_le_v8i16:
1522; SSE2:       # %bb.0:
1523; SSE2-NEXT:    movdqa %xmm0, %xmm2
1524; SSE2-NEXT:    psubusw %xmm1, %xmm2
1525; SSE2-NEXT:    psubw %xmm2, %xmm0
1526; SSE2-NEXT:    retq
1527;
1528; SSE41-LABEL: min_le_v8i16:
1529; SSE41:       # %bb.0:
1530; SSE41-NEXT:    pminuw %xmm1, %xmm0
1531; SSE41-NEXT:    retq
1532;
1533; SSE42-LABEL: min_le_v8i16:
1534; SSE42:       # %bb.0:
1535; SSE42-NEXT:    pminuw %xmm1, %xmm0
1536; SSE42-NEXT:    retq
1537;
1538; AVX-LABEL: min_le_v8i16:
1539; AVX:       # %bb.0:
1540; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1541; AVX-NEXT:    retq
1542  %1 = icmp ule <8 x i16> %a, %b
1543  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
1544  ret <8 x i16> %2
1545}
1546
1547define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
1548; SSE2-LABEL: min_le_v16i16:
1549; SSE2:       # %bb.0:
1550; SSE2-NEXT:    movdqa %xmm0, %xmm4
1551; SSE2-NEXT:    psubusw %xmm2, %xmm4
1552; SSE2-NEXT:    psubw %xmm4, %xmm0
1553; SSE2-NEXT:    movdqa %xmm1, %xmm2
1554; SSE2-NEXT:    psubusw %xmm3, %xmm2
1555; SSE2-NEXT:    psubw %xmm2, %xmm1
1556; SSE2-NEXT:    retq
1557;
1558; SSE41-LABEL: min_le_v16i16:
1559; SSE41:       # %bb.0:
1560; SSE41-NEXT:    pminuw %xmm2, %xmm0
1561; SSE41-NEXT:    pminuw %xmm3, %xmm1
1562; SSE41-NEXT:    retq
1563;
1564; SSE42-LABEL: min_le_v16i16:
1565; SSE42:       # %bb.0:
1566; SSE42-NEXT:    pminuw %xmm2, %xmm0
1567; SSE42-NEXT:    pminuw %xmm3, %xmm1
1568; SSE42-NEXT:    retq
1569;
1570; AVX1-LABEL: min_le_v16i16:
1571; AVX1:       # %bb.0:
1572; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1573; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1574; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
1575; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
1576; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1577; AVX1-NEXT:    retq
1578;
1579; AVX2-LABEL: min_le_v16i16:
1580; AVX2:       # %bb.0:
1581; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1582; AVX2-NEXT:    retq
1583;
1584; AVX512-LABEL: min_le_v16i16:
1585; AVX512:       # %bb.0:
1586; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
1587; AVX512-NEXT:    retq
1588  %1 = icmp ule <16 x i16> %a, %b
1589  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
1590  ret <16 x i16> %2
1591}
1592
1593define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
1594; SSE-LABEL: min_le_v16i8:
1595; SSE:       # %bb.0:
1596; SSE-NEXT:    pminub %xmm1, %xmm0
1597; SSE-NEXT:    retq
1598;
1599; AVX-LABEL: min_le_v16i8:
1600; AVX:       # %bb.0:
1601; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1602; AVX-NEXT:    retq
1603  %1 = icmp ule <16 x i8> %a, %b
1604  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
1605  ret <16 x i8> %2
1606}
1607
1608define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
1609; SSE-LABEL: min_le_v32i8:
1610; SSE:       # %bb.0:
1611; SSE-NEXT:    pminub %xmm2, %xmm0
1612; SSE-NEXT:    pminub %xmm3, %xmm1
1613; SSE-NEXT:    retq
1614;
1615; AVX1-LABEL: min_le_v32i8:
1616; AVX1:       # %bb.0:
1617; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1618; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1619; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
1620; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
1621; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1622; AVX1-NEXT:    retq
1623;
1624; AVX2-LABEL: min_le_v32i8:
1625; AVX2:       # %bb.0:
1626; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1627; AVX2-NEXT:    retq
1628;
1629; AVX512-LABEL: min_le_v32i8:
1630; AVX512:       # %bb.0:
1631; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
1632; AVX512-NEXT:    retq
1633  %1 = icmp ule <32 x i8> %a, %b
1634  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
1635  ret <32 x i8> %2
1636}
1637
1638;
1639; Constant Folding
1640;
1641
1642define <2 x i64> @max_gt_v2i64c() {
1643; SSE-LABEL: max_gt_v2i64c:
1644; SSE:       # %bb.0:
1645; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1646; SSE-NEXT:    retq
1647;
1648; AVX-LABEL: max_gt_v2i64c:
1649; AVX:       # %bb.0:
1650; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1651; AVX-NEXT:    retq
1652  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1653  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1654  %3 = icmp ugt <2 x i64> %1, %2
1655  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1656  ret <2 x i64> %4
1657}
1658
1659define <4 x i64> @max_gt_v4i64c() {
1660; SSE-LABEL: max_gt_v4i64c:
1661; SSE:       # %bb.0:
1662; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
1663; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
1664; SSE-NEXT:    retq
1665;
1666; AVX-LABEL: max_gt_v4i64c:
1667; AVX:       # %bb.0:
1668; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1669; AVX-NEXT:    retq
1670  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1671  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1672  %3 = icmp ugt <4 x i64> %1, %2
1673  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1674  ret <4 x i64> %4
1675}
1676
1677define <4 x i32> @max_gt_v4i32c() {
1678; SSE-LABEL: max_gt_v4i32c:
1679; SSE:       # %bb.0:
1680; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1681; SSE-NEXT:    retq
1682;
1683; AVX-LABEL: max_gt_v4i32c:
1684; AVX:       # %bb.0:
1685; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1686; AVX-NEXT:    retq
1687  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1688  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1689  %3 = icmp ugt <4 x i32> %1, %2
1690  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1691  ret <4 x i32> %4
1692}
1693
1694define <8 x i32> @max_gt_v8i32c() {
1695; SSE-LABEL: max_gt_v8i32c:
1696; SSE:       # %bb.0:
1697; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1698; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
1699; SSE-NEXT:    retq
1700;
1701; AVX-LABEL: max_gt_v8i32c:
1702; AVX:       # %bb.0:
1703; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1704; AVX-NEXT:    retq
1705  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1706  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1707  %3 = icmp ugt <8 x i32> %1, %2
1708  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1709  ret <8 x i32> %4
1710}
1711
1712define <8 x i16> @max_gt_v8i16c() {
1713; SSE-LABEL: max_gt_v8i16c:
1714; SSE:       # %bb.0:
1715; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1716; SSE-NEXT:    retq
1717;
1718; AVX-LABEL: max_gt_v8i16c:
1719; AVX:       # %bb.0:
1720; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1721; AVX-NEXT:    retq
1722  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1723  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1724  %3 = icmp ugt <8 x i16> %1, %2
1725  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1726  ret <8 x i16> %4
1727}
1728
1729define <16 x i16> @max_gt_v16i16c() {
1730; SSE-LABEL: max_gt_v16i16c:
1731; SSE:       # %bb.0:
1732; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1733; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1734; SSE-NEXT:    retq
1735;
1736; AVX-LABEL: max_gt_v16i16c:
1737; AVX:       # %bb.0:
1738; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1739; AVX-NEXT:    retq
1740  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1741  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1742  %3 = icmp ugt <16 x i16> %1, %2
1743  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1744  ret <16 x i16> %4
1745}
1746
1747define <16 x i8> @max_gt_v16i8c() {
1748; SSE-LABEL: max_gt_v16i8c:
1749; SSE:       # %bb.0:
1750; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1751; SSE-NEXT:    retq
1752;
1753; AVX-LABEL: max_gt_v16i8c:
1754; AVX:       # %bb.0:
1755; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1756; AVX-NEXT:    retq
1757  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1758  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1759  %3 = icmp ugt <16 x i8> %1, %2
1760  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1761  ret <16 x i8> %4
1762}
1763
1764define <2 x i64> @max_ge_v2i64c() {
1765; SSE-LABEL: max_ge_v2i64c:
1766; SSE:       # %bb.0:
1767; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
1768; SSE-NEXT:    retq
1769;
1770; AVX-LABEL: max_ge_v2i64c:
1771; AVX:       # %bb.0:
1772; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
1773; AVX-NEXT:    retq
1774  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1775  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1776  %3 = icmp uge <2 x i64> %1, %2
1777  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1778  ret <2 x i64> %4
1779}
1780
1781define <4 x i64> @max_ge_v4i64c() {
1782; SSE-LABEL: max_ge_v4i64c:
1783; SSE:       # %bb.0:
1784; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
1785; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
1786; SSE-NEXT:    retq
1787;
1788; AVX-LABEL: max_ge_v4i64c:
1789; AVX:       # %bb.0:
1790; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
1791; AVX-NEXT:    retq
1792  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1793  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1794  %3 = icmp uge <4 x i64> %1, %2
1795  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1796  ret <4 x i64> %4
1797}
1798
1799define <4 x i32> @max_ge_v4i32c() {
1800; SSE-LABEL: max_ge_v4i32c:
1801; SSE:       # %bb.0:
1802; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1803; SSE-NEXT:    retq
1804;
1805; AVX-LABEL: max_ge_v4i32c:
1806; AVX:       # %bb.0:
1807; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
1808; AVX-NEXT:    retq
1809  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1810  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1811  %3 = icmp uge <4 x i32> %1, %2
1812  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1813  ret <4 x i32> %4
1814}
1815
1816define <8 x i32> @max_ge_v8i32c() {
1817; SSE-LABEL: max_ge_v8i32c:
1818; SSE:       # %bb.0:
1819; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
1820; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
1821; SSE-NEXT:    retq
1822;
1823; AVX-LABEL: max_ge_v8i32c:
1824; AVX:       # %bb.0:
1825; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
1826; AVX-NEXT:    retq
1827  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1828  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1829  %3 = icmp uge <8 x i32> %1, %2
1830  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1831  ret <8 x i32> %4
1832}
1833
1834define <8 x i16> @max_ge_v8i16c() {
1835; SSE-LABEL: max_ge_v8i16c:
1836; SSE:       # %bb.0:
1837; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1838; SSE-NEXT:    retq
1839;
1840; AVX-LABEL: max_ge_v8i16c:
1841; AVX:       # %bb.0:
1842; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
1843; AVX-NEXT:    retq
1844  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1845  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
1846  %3 = icmp uge <8 x i16> %1, %2
1847  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1848  ret <8 x i16> %4
1849}
1850
1851define <16 x i16> @max_ge_v16i16c() {
1852; SSE-LABEL: max_ge_v16i16c:
1853; SSE:       # %bb.0:
1854; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
1855; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
1856; SSE-NEXT:    retq
1857;
1858; AVX-LABEL: max_ge_v16i16c:
1859; AVX:       # %bb.0:
1860; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
1861; AVX-NEXT:    retq
1862  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1863  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
1864  %3 = icmp uge <16 x i16> %1, %2
1865  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1866  ret <16 x i16> %4
1867}
1868
1869define <16 x i8> @max_ge_v16i8c() {
1870; SSE-LABEL: max_ge_v16i8c:
1871; SSE:       # %bb.0:
1872; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1873; SSE-NEXT:    retq
1874;
1875; AVX-LABEL: max_ge_v16i8c:
1876; AVX:       # %bb.0:
1877; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
1878; AVX-NEXT:    retq
1879  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
1880  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
1881  %3 = icmp uge <16 x i8> %1, %2
1882  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
1883  ret <16 x i8> %4
1884}
1885
1886define <2 x i64> @min_lt_v2i64c() {
1887; SSE-LABEL: min_lt_v2i64c:
1888; SSE:       # %bb.0:
1889; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
1890; SSE-NEXT:    retq
1891;
1892; AVX-LABEL: min_lt_v2i64c:
1893; AVX:       # %bb.0:
1894; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
1895; AVX-NEXT:    retq
1896  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
1897  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
1898  %3 = icmp ult <2 x i64> %1, %2
1899  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
1900  ret <2 x i64> %4
1901}
1902
1903define <4 x i64> @min_lt_v4i64c() {
1904; SSE-LABEL: min_lt_v4i64c:
1905; SSE:       # %bb.0:
1906; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
1907; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
1908; SSE-NEXT:    retq
1909;
1910; AVX-LABEL: min_lt_v4i64c:
1911; AVX:       # %bb.0:
1912; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
1913; AVX-NEXT:    retq
1914  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
1915  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
1916  %3 = icmp ult <4 x i64> %1, %2
1917  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
1918  ret <4 x i64> %4
1919}
1920
1921define <4 x i32> @min_lt_v4i32c() {
1922; SSE-LABEL: min_lt_v4i32c:
1923; SSE:       # %bb.0:
1924; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1925; SSE-NEXT:    retq
1926;
1927; AVX-LABEL: min_lt_v4i32c:
1928; AVX:       # %bb.0:
1929; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
1930; AVX-NEXT:    retq
1931  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
1932  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
1933  %3 = icmp ult <4 x i32> %1, %2
1934  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
1935  ret <4 x i32> %4
1936}
1937
1938define <8 x i32> @min_lt_v8i32c() {
1939; SSE-LABEL: min_lt_v8i32c:
1940; SSE:       # %bb.0:
1941; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
1942; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
1943; SSE-NEXT:    retq
1944;
1945; AVX-LABEL: min_lt_v8i32c:
1946; AVX:       # %bb.0:
1947; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
1948; AVX-NEXT:    retq
1949  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
1950  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
1951  %3 = icmp ult <8 x i32> %1, %2
1952  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
1953  ret <8 x i32> %4
1954}
1955
1956define <8 x i16> @min_lt_v8i16c() {
1957; SSE-LABEL: min_lt_v8i16c:
1958; SSE:       # %bb.0:
1959; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1960; SSE-NEXT:    retq
1961;
1962; AVX-LABEL: min_lt_v8i16c:
1963; AVX:       # %bb.0:
1964; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
1965; AVX-NEXT:    retq
1966  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
1967  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16  1, i32 0
1968  %3 = icmp ult <8 x i16> %1, %2
1969  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
1970  ret <8 x i16> %4
1971}
1972
1973define <16 x i16> @min_lt_v16i16c() {
1974; SSE-LABEL: min_lt_v16i16c:
1975; SSE:       # %bb.0:
1976; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
1977; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
1978; SSE-NEXT:    retq
1979;
1980; AVX-LABEL: min_lt_v16i16c:
1981; AVX:       # %bb.0:
1982; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
1983; AVX-NEXT:    retq
1984  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
1985  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16  1, i32 0
1986  %3 = icmp ult <16 x i16> %1, %2
1987  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
1988  ret <16 x i16> %4
1989}
1990
1991define <16 x i8> @min_lt_v16i8c() {
1992; SSE-LABEL: min_lt_v16i8c:
1993; SSE:       # %bb.0:
1994; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
1995; SSE-NEXT:    retq
1996;
1997; AVX-LABEL: min_lt_v16i8c:
1998; AVX:       # %bb.0:
1999; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2000; AVX-NEXT:    retq
2001  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2002  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8  1, i32 0
2003  %3 = icmp ult <16 x i8> %1, %2
2004  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2005  ret <16 x i8> %4
2006}
2007
2008define <2 x i64> @min_le_v2i64c() {
2009; SSE-LABEL: min_le_v2i64c:
2010; SSE:       # %bb.0:
2011; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
2012; SSE-NEXT:    retq
2013;
2014; AVX-LABEL: min_le_v2i64c:
2015; AVX:       # %bb.0:
2016; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
2017; AVX-NEXT:    retq
2018  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
2019  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
2020  %3 = icmp ule <2 x i64> %1, %2
2021  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
2022  ret <2 x i64> %4
2023}
2024
2025define <4 x i64> @min_le_v4i64c() {
2026; SSE-LABEL: min_le_v4i64c:
2027; SSE:       # %bb.0:
2028; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
2029; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
2030; SSE-NEXT:    retq
2031;
2032; AVX-LABEL: min_le_v4i64c:
2033; AVX:       # %bb.0:
2034; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
2035; AVX-NEXT:    retq
2036  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
2037  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
2038  %3 = icmp ule <4 x i64> %1, %2
2039  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
2040  ret <4 x i64> %4
2041}
2042
2043define <4 x i32> @min_le_v4i32c() {
2044; SSE-LABEL: min_le_v4i32c:
2045; SSE:       # %bb.0:
2046; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2047; SSE-NEXT:    retq
2048;
2049; AVX-LABEL: min_le_v4i32c:
2050; AVX:       # %bb.0:
2051; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
2052; AVX-NEXT:    retq
2053  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
2054  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
2055  %3 = icmp ule <4 x i32> %1, %2
2056  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
2057  ret <4 x i32> %4
2058}
2059
2060define <8 x i32> @min_le_v8i32c() {
2061; SSE-LABEL: min_le_v8i32c:
2062; SSE:       # %bb.0:
2063; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
2064; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
2065; SSE-NEXT:    retq
2066;
2067; AVX-LABEL: min_le_v8i32c:
2068; AVX:       # %bb.0:
2069; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
2070; AVX-NEXT:    retq
2071  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
2072  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
2073  %3 = icmp ule <8 x i32> %1, %2
2074  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
2075  ret <8 x i32> %4
2076}
2077
2078define <8 x i16> @min_le_v8i16c() {
2079; SSE-LABEL: min_le_v8i16c:
2080; SSE:       # %bb.0:
2081; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2082; SSE-NEXT:    retq
2083;
2084; AVX-LABEL: min_le_v8i16c:
2085; AVX:       # %bb.0:
2086; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
2087; AVX-NEXT:    retq
2088  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
2089  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
2090  %3 = icmp ule <8 x i16> %1, %2
2091  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
2092  ret <8 x i16> %4
2093}
2094
2095define <16 x i16> @min_le_v16i16c() {
2096; SSE-LABEL: min_le_v16i16c:
2097; SSE:       # %bb.0:
2098; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
2099; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
2100; SSE-NEXT:    retq
2101;
2102; AVX-LABEL: min_le_v16i16c:
2103; AVX:       # %bb.0:
2104; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
2105; AVX-NEXT:    retq
2106  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
2107  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
2108  %3 = icmp ule <16 x i16> %1, %2
2109  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
2110  ret <16 x i16> %4
2111}
2112
2113define <16 x i8> @min_le_v16i8c() {
2114; SSE-LABEL: min_le_v16i8c:
2115; SSE:       # %bb.0:
2116; SSE-NEXT:    movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2117; SSE-NEXT:    retq
2118;
2119; AVX-LABEL: min_le_v16i8c:
2120; AVX:       # %bb.0:
2121; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
2122; AVX-NEXT:    retq
2123  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
2124  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
2125  %3 = icmp ule <16 x i8> %1, %2
2126  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
2127  ret <16 x i8> %4
2128}
2129