1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4
5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
6; SSE2-LABEL: zext_and_v8i32:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    pand %xmm0, %xmm1
9; SSE2-NEXT:    pxor %xmm2, %xmm2
10; SSE2-NEXT:    movdqa %xmm1, %xmm0
11; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
12; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
13; SSE2-NEXT:    retq
14;
15; AVX2-LABEL: zext_and_v8i32:
16; AVX2:       # %bb.0:
17; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
18; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
19; AVX2-NEXT:    retq
20  %xz = zext <8 x i16> %x to <8 x i32>
21  %yz = zext <8 x i16> %y to <8 x i32>
22  %r = and <8 x i32> %xz, %yz
23  ret <8 x i32> %r
24}
25
26define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
27; SSE2-LABEL: zext_or_v8i32:
28; SSE2:       # %bb.0:
29; SSE2-NEXT:    por %xmm0, %xmm1
30; SSE2-NEXT:    pxor %xmm2, %xmm2
31; SSE2-NEXT:    movdqa %xmm1, %xmm0
32; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
33; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
34; SSE2-NEXT:    retq
35;
36; AVX2-LABEL: zext_or_v8i32:
37; AVX2:       # %bb.0:
38; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
39; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
40; AVX2-NEXT:    retq
41  %xz = zext <8 x i16> %x to <8 x i32>
42  %yz = zext <8 x i16> %y to <8 x i32>
43  %r = or <8 x i32> %xz, %yz
44  ret <8 x i32> %r
45}
46
47define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
48; SSE2-LABEL: zext_xor_v8i32:
49; SSE2:       # %bb.0:
50; SSE2-NEXT:    pxor %xmm0, %xmm1
51; SSE2-NEXT:    pxor %xmm2, %xmm2
52; SSE2-NEXT:    movdqa %xmm1, %xmm0
53; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
54; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
55; SSE2-NEXT:    retq
56;
57; AVX2-LABEL: zext_xor_v8i32:
58; AVX2:       # %bb.0:
59; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
60; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
61; AVX2-NEXT:    retq
62  %xz = zext <8 x i16> %x to <8 x i32>
63  %yz = zext <8 x i16> %y to <8 x i32>
64  %r = xor <8 x i32> %xz, %yz
65  ret <8 x i32> %r
66}
67
68define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
69; SSE2-LABEL: sext_and_v8i32:
70; SSE2:       # %bb.0:
71; SSE2-NEXT:    pand %xmm1, %xmm0
72; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
73; SSE2-NEXT:    psrad $16, %xmm2
74; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
75; SSE2-NEXT:    psrad $16, %xmm1
76; SSE2-NEXT:    movdqa %xmm2, %xmm0
77; SSE2-NEXT:    retq
78;
79; AVX2-LABEL: sext_and_v8i32:
80; AVX2:       # %bb.0:
81; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
83; AVX2-NEXT:    retq
84  %xs = sext <8 x i16> %x to <8 x i32>
85  %ys = sext <8 x i16> %y to <8 x i32>
86  %r = and <8 x i32> %xs, %ys
87  ret <8 x i32> %r
88}
89
90define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
91; SSE2-LABEL: sext_or_v8i32:
92; SSE2:       # %bb.0:
93; SSE2-NEXT:    por %xmm1, %xmm0
94; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
95; SSE2-NEXT:    psrad $16, %xmm2
96; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
97; SSE2-NEXT:    psrad $16, %xmm1
98; SSE2-NEXT:    movdqa %xmm2, %xmm0
99; SSE2-NEXT:    retq
100;
101; AVX2-LABEL: sext_or_v8i32:
102; AVX2:       # %bb.0:
103; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
104; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
105; AVX2-NEXT:    retq
106  %xs = sext <8 x i16> %x to <8 x i32>
107  %ys = sext <8 x i16> %y to <8 x i32>
108  %r = or <8 x i32> %xs, %ys
109  ret <8 x i32> %r
110}
111
112define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
113; SSE2-LABEL: sext_xor_v8i32:
114; SSE2:       # %bb.0:
115; SSE2-NEXT:    pxor %xmm1, %xmm0
116; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
117; SSE2-NEXT:    psrad $16, %xmm2
118; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
119; SSE2-NEXT:    psrad $16, %xmm1
120; SSE2-NEXT:    movdqa %xmm2, %xmm0
121; SSE2-NEXT:    retq
122;
123; AVX2-LABEL: sext_xor_v8i32:
124; AVX2:       # %bb.0:
125; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
126; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
127; AVX2-NEXT:    retq
128  %xs = sext <8 x i16> %x to <8 x i32>
129  %ys = sext <8 x i16> %y to <8 x i32>
130  %r = xor <8 x i32> %xs, %ys
131  ret <8 x i32> %r
132}
133
134define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
135; SSE2-LABEL: zext_and_v8i16:
136; SSE2:       # %bb.0:
137; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138; SSE2-NEXT:    pxor %xmm2, %xmm2
139; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
140; SSE2-NEXT:    pand %xmm1, %xmm0
141; SSE2-NEXT:    retq
142;
143; AVX2-LABEL: zext_and_v8i16:
144; AVX2:       # %bb.0:
145; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
146; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
147; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
148; AVX2-NEXT:    retq
149  %xz = zext <8 x i8> %x to <8 x i16>
150  %yz = zext <8 x i8> %y to <8 x i16>
151  %r = and <8 x i16> %xz, %yz
152  ret <8 x i16> %r
153}
154
155define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
156; SSE2-LABEL: zext_or_v8i16:
157; SSE2:       # %bb.0:
158; SSE2-NEXT:    pxor %xmm2, %xmm2
159; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
160; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
161; SSE2-NEXT:    por %xmm1, %xmm0
162; SSE2-NEXT:    retq
163;
164; AVX2-LABEL: zext_or_v8i16:
165; AVX2:       # %bb.0:
166; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
167; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
168; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
169; AVX2-NEXT:    retq
170  %xz = zext <8 x i8> %x to <8 x i16>
171  %yz = zext <8 x i8> %y to <8 x i16>
172  %r = or <8 x i16> %xz, %yz
173  ret <8 x i16> %r
174}
175
176define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
177; SSE2-LABEL: zext_xor_v8i16:
178; SSE2:       # %bb.0:
179; SSE2-NEXT:    pxor %xmm2, %xmm2
180; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
181; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
182; SSE2-NEXT:    pxor %xmm1, %xmm0
183; SSE2-NEXT:    retq
184;
185; AVX2-LABEL: zext_xor_v8i16:
186; AVX2:       # %bb.0:
187; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
188; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
189; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
190; AVX2-NEXT:    retq
191  %xz = zext <8 x i8> %x to <8 x i16>
192  %yz = zext <8 x i8> %y to <8 x i16>
193  %r = xor <8 x i16> %xz, %yz
194  ret <8 x i16> %r
195}
196
197define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
198; SSE2-LABEL: sext_and_v8i16:
199; SSE2:       # %bb.0:
200; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
201; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
202; SSE2-NEXT:    pand %xmm2, %xmm0
203; SSE2-NEXT:    psraw $8, %xmm0
204; SSE2-NEXT:    retq
205;
206; AVX2-LABEL: sext_and_v8i16:
207; AVX2:       # %bb.0:
208; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
209; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
210; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
211; AVX2-NEXT:    retq
212  %xs = sext <8 x i8> %x to <8 x i16>
213  %ys = sext <8 x i8> %y to <8 x i16>
214  %r = and <8 x i16> %xs, %ys
215  ret <8 x i16> %r
216}
217
218define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
219; SSE2-LABEL: sext_or_v8i16:
220; SSE2:       # %bb.0:
221; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
222; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
223; SSE2-NEXT:    por %xmm2, %xmm0
224; SSE2-NEXT:    psraw $8, %xmm0
225; SSE2-NEXT:    retq
226;
227; AVX2-LABEL: sext_or_v8i16:
228; AVX2:       # %bb.0:
229; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
230; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
231; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
232; AVX2-NEXT:    retq
233  %xs = sext <8 x i8> %x to <8 x i16>
234  %ys = sext <8 x i8> %y to <8 x i16>
235  %r = or <8 x i16> %xs, %ys
236  ret <8 x i16> %r
237}
238
239define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
240; SSE2-LABEL: sext_xor_v8i16:
241; SSE2:       # %bb.0:
242; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
243; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
244; SSE2-NEXT:    pxor %xmm2, %xmm0
245; SSE2-NEXT:    psraw $8, %xmm0
246; SSE2-NEXT:    retq
247;
248; AVX2-LABEL: sext_xor_v8i16:
249; AVX2:       # %bb.0:
250; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
251; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
252; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
253; AVX2-NEXT:    retq
254  %xs = sext <8 x i8> %x to <8 x i16>
255  %ys = sext <8 x i8> %y to <8 x i16>
256  %r = xor <8 x i16> %xs, %ys
257  ret <8 x i16> %r
258}
259
260define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) {
261; SSE2-LABEL: bool_zext_and:
262; SSE2:       # %bb.0:
263; SSE2-NEXT:    movdqa %xmm0, %xmm3
264; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
265; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
266; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
267; SSE2-NEXT:    pxor %xmm4, %xmm4
268; SSE2-NEXT:    movdqa %xmm1, %xmm2
269; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
270; SSE2-NEXT:    pand %xmm3, %xmm2
271; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
272; SSE2-NEXT:    pand %xmm1, %xmm0
273; SSE2-NEXT:    movdqa %xmm2, %xmm1
274; SSE2-NEXT:    retq
275;
276; AVX2-LABEL: bool_zext_and:
277; AVX2:       # %bb.0:
278; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
279; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
280; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
281; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
282; AVX2-NEXT:    retq
283  %xz = zext <8 x i1> %x to <8 x i32>
284  %yz = zext <8 x i1> %y to <8 x i32>
285  %r = and <8 x i32> %xz, %yz
286  ret <8 x i32> %r
287}
288
289define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) {
290; SSE2-LABEL: bool_zext_or:
291; SSE2:       # %bb.0:
292; SSE2-NEXT:    por %xmm0, %xmm1
293; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
294; SSE2-NEXT:    pxor %xmm2, %xmm2
295; SSE2-NEXT:    movdqa %xmm1, %xmm0
296; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
297; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
298; SSE2-NEXT:    retq
299;
300; AVX2-LABEL: bool_zext_or:
301; AVX2:       # %bb.0:
302; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
303; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
304; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
305; AVX2-NEXT:    retq
306  %xz = zext <8 x i1> %x to <8 x i32>
307  %yz = zext <8 x i1> %y to <8 x i32>
308  %r = or <8 x i32> %xz, %yz
309  ret <8 x i32> %r
310}
311
312define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
313; SSE2-LABEL: bool_zext_xor:
314; SSE2:       # %bb.0:
315; SSE2-NEXT:    pxor %xmm0, %xmm1
316; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
317; SSE2-NEXT:    pxor %xmm2, %xmm2
318; SSE2-NEXT:    movdqa %xmm1, %xmm0
319; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
320; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
321; SSE2-NEXT:    retq
322;
323; AVX2-LABEL: bool_zext_xor:
324; AVX2:       # %bb.0:
325; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
326; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
327; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
328; AVX2-NEXT:    retq
329  %xz = zext <8 x i1> %x to <8 x i32>
330  %yz = zext <8 x i1> %y to <8 x i32>
331  %r = xor <8 x i32> %xz, %yz
332  ret <8 x i32> %r
333}
334
335define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
336; SSE2-LABEL: bool_sext_and:
337; SSE2:       # %bb.0:
338; SSE2-NEXT:    movdqa %xmm1, %xmm3
339; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
340; SSE2-NEXT:    movdqa %xmm0, %xmm2
341; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
342; SSE2-NEXT:    pand %xmm3, %xmm2
343; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
344; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
345; SSE2-NEXT:    pand %xmm1, %xmm0
346; SSE2-NEXT:    pslld $31, %xmm0
347; SSE2-NEXT:    psrad $31, %xmm0
348; SSE2-NEXT:    pslld $31, %xmm2
349; SSE2-NEXT:    psrad $31, %xmm2
350; SSE2-NEXT:    movdqa %xmm2, %xmm1
351; SSE2-NEXT:    retq
352;
353; AVX2-LABEL: bool_sext_and:
354; AVX2:       # %bb.0:
355; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
356; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
357; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
358; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
359; AVX2-NEXT:    retq
360  %xs = sext <8 x i1> %x to <8 x i32>
361  %ys = sext <8 x i1> %y to <8 x i32>
362  %r = and <8 x i32> %xs, %ys
363  ret <8 x i32> %r
364}
365
366define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
367; SSE2-LABEL: bool_sext_or:
368; SSE2:       # %bb.0:
369; SSE2-NEXT:    movdqa %xmm1, %xmm3
370; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
371; SSE2-NEXT:    movdqa %xmm0, %xmm2
372; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
373; SSE2-NEXT:    por %xmm3, %xmm2
374; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
375; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
376; SSE2-NEXT:    por %xmm1, %xmm0
377; SSE2-NEXT:    pslld $31, %xmm0
378; SSE2-NEXT:    psrad $31, %xmm0
379; SSE2-NEXT:    pslld $31, %xmm2
380; SSE2-NEXT:    psrad $31, %xmm2
381; SSE2-NEXT:    movdqa %xmm2, %xmm1
382; SSE2-NEXT:    retq
383;
384; AVX2-LABEL: bool_sext_or:
385; AVX2:       # %bb.0:
386; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
387; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
388; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
389; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
390; AVX2-NEXT:    retq
391  %xs = sext <8 x i1> %x to <8 x i32>
392  %ys = sext <8 x i1> %y to <8 x i32>
393  %r = or <8 x i32> %xs, %ys
394  ret <8 x i32> %r
395}
396
397define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
398; SSE2-LABEL: bool_sext_xor:
399; SSE2:       # %bb.0:
400; SSE2-NEXT:    movdqa %xmm1, %xmm3
401; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
402; SSE2-NEXT:    movdqa %xmm0, %xmm2
403; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
404; SSE2-NEXT:    pxor %xmm3, %xmm2
405; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
406; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
407; SSE2-NEXT:    pxor %xmm1, %xmm0
408; SSE2-NEXT:    pslld $31, %xmm0
409; SSE2-NEXT:    psrad $31, %xmm0
410; SSE2-NEXT:    pslld $31, %xmm2
411; SSE2-NEXT:    psrad $31, %xmm2
412; SSE2-NEXT:    movdqa %xmm2, %xmm1
413; SSE2-NEXT:    retq
414;
415; AVX2-LABEL: bool_sext_xor:
416; AVX2:       # %bb.0:
417; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
418; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
419; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
420; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
421; AVX2-NEXT:    retq
422  %xs = sext <8 x i1> %x to <8 x i32>
423  %ys = sext <8 x i1> %y to <8 x i32>
424  %r = xor <8 x i32> %xs, %ys
425  ret <8 x i32> %r
426}
427
428