1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BWVL
11
12define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
13; SSE-LABEL: trunc8i64_8i32:
14; SSE:       # BB#0: # %entry
15; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
16; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
17; SSE-NEXT:    movaps %xmm2, %xmm1
18; SSE-NEXT:    retq
19;
20; AVX1-LABEL: trunc8i64_8i32:
21; AVX1:       # BB#0: # %entry
22; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
23; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
24; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
25; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
26; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
27; AVX1-NEXT:    retq
28;
29; AVX2-LABEL: trunc8i64_8i32:
30; AVX2:       # BB#0: # %entry
31; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
32; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
33; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
34; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
35; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
36; AVX2-NEXT:    retq
37;
38; AVX512-LABEL: trunc8i64_8i32:
39; AVX512:       # BB#0: # %entry
40; AVX512-NEXT:    vpmovqd %zmm0, %ymm0
41; AVX512-NEXT:    retq
42entry:
43  %0 = trunc <8 x i64> %a to <8 x i32>
44  ret <8 x i32> %0
45}
46
47define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
48; SSE2-LABEL: trunc8i64_8i16:
49; SSE2:       # BB#0: # %entry
50; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
51; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
52; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
53; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
54; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
55; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
56; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
57; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
58; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
59; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
60; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
61; SSE2-NEXT:    movapd %xmm2, %xmm0
62; SSE2-NEXT:    retq
63;
64; SSSE3-LABEL: trunc8i64_8i16:
65; SSSE3:       # BB#0: # %entry
66; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
67; SSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
68; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
69; SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
70; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
71; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
72; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
73; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
74; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
75; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
76; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
77; SSSE3-NEXT:    movapd %xmm2, %xmm0
78; SSSE3-NEXT:    retq
79;
80; SSE41-LABEL: trunc8i64_8i16:
81; SSE41:       # BB#0: # %entry
82; SSE41-NEXT:    pxor %xmm4, %xmm4
83; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
84; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
85; SSE41-NEXT:    packusdw %xmm3, %xmm2
86; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
87; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
88; SSE41-NEXT:    packusdw %xmm1, %xmm0
89; SSE41-NEXT:    packusdw %xmm2, %xmm0
90; SSE41-NEXT:    retq
91;
92; AVX1-LABEL: trunc8i64_8i16:
93; AVX1:       # BB#0: # %entry
94; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
95; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
96; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
97; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
98; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
99; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
100; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
101; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
102; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
103; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
104; AVX1-NEXT:    vzeroupper
105; AVX1-NEXT:    retq
106;
107; AVX2-LABEL: trunc8i64_8i16:
108; AVX2:       # BB#0: # %entry
109; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
110; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
111; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
112; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
113; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
114; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
115; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
116; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
117; AVX2-NEXT:    vzeroupper
118; AVX2-NEXT:    retq
119;
120; AVX512-LABEL: trunc8i64_8i16:
121; AVX512:       # BB#0: # %entry
122; AVX512-NEXT:    vpmovqw %zmm0, %xmm0
123; AVX512-NEXT:    retq
124entry:
125  %0 = trunc <8 x i64> %a to <8 x i16>
126  ret <8 x i16> %0
127}
128
129define void @trunc8i64_8i8(<8 x i64> %a) {
130; SSE-LABEL: trunc8i64_8i8:
131; SSE:       # BB#0: # %entry
132; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
133; SSE-NEXT:    pand %xmm4, %xmm3
134; SSE-NEXT:    pand %xmm4, %xmm2
135; SSE-NEXT:    packuswb %xmm3, %xmm2
136; SSE-NEXT:    pand %xmm4, %xmm1
137; SSE-NEXT:    pand %xmm4, %xmm0
138; SSE-NEXT:    packuswb %xmm1, %xmm0
139; SSE-NEXT:    packuswb %xmm2, %xmm0
140; SSE-NEXT:    packuswb %xmm0, %xmm0
141; SSE-NEXT:    movq %xmm0, (%rax)
142; SSE-NEXT:    retq
143;
144; AVX1-LABEL: trunc8i64_8i8:
145; AVX1:       # BB#0: # %entry
146; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
147; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
148; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
149; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
150; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
151; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
152; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
153; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
154; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
155; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
156; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
157; AVX1-NEXT:    vmovq %xmm0, (%rax)
158; AVX1-NEXT:    vzeroupper
159; AVX1-NEXT:    retq
160;
161; AVX2-LABEL: trunc8i64_8i8:
162; AVX2:       # BB#0: # %entry
163; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
164; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
165; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
166; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
167; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
168; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
169; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
170; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
171; AVX2-NEXT:    vmovq %xmm0, (%rax)
172; AVX2-NEXT:    vzeroupper
173; AVX2-NEXT:    retq
174;
175; AVX512-LABEL: trunc8i64_8i8:
176; AVX512:       # BB#0: # %entry
177; AVX512-NEXT:    vpmovqb %zmm0, (%rax)
178; AVX512-NEXT:    retq
179entry:
180  %0 = trunc <8 x i64> %a to <8 x i8>
181  store <8 x i8> %0, <8 x i8>* undef, align 4
182  ret void
183}
184
185define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
186; SSE2-LABEL: trunc8i32_8i16:
187; SSE2:       # BB#0: # %entry
188; SSE2-NEXT:    pslld $16, %xmm1
189; SSE2-NEXT:    psrad $16, %xmm1
190; SSE2-NEXT:    pslld $16, %xmm0
191; SSE2-NEXT:    psrad $16, %xmm0
192; SSE2-NEXT:    packssdw %xmm1, %xmm0
193; SSE2-NEXT:    retq
194;
195; SSSE3-LABEL: trunc8i32_8i16:
196; SSSE3:       # BB#0: # %entry
197; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
198; SSSE3-NEXT:    pshufb %xmm2, %xmm1
199; SSSE3-NEXT:    pshufb %xmm2, %xmm0
200; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
201; SSSE3-NEXT:    retq
202;
203; SSE41-LABEL: trunc8i32_8i16:
204; SSE41:       # BB#0: # %entry
205; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
206; SSE41-NEXT:    pshufb %xmm2, %xmm1
207; SSE41-NEXT:    pshufb %xmm2, %xmm0
208; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
209; SSE41-NEXT:    retq
210;
211; AVX1-LABEL: trunc8i32_8i16:
212; AVX1:       # BB#0: # %entry
213; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
214; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
215; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
216; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
217; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
218; AVX1-NEXT:    vzeroupper
219; AVX1-NEXT:    retq
220;
221; AVX2-LABEL: trunc8i32_8i16:
222; AVX2:       # BB#0: # %entry
223; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
224; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
225; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
226; AVX2-NEXT:    vzeroupper
227; AVX2-NEXT:    retq
228;
229; AVX512F-LABEL: trunc8i32_8i16:
230; AVX512F:       # BB#0: # %entry
231; AVX512F-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
232; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
233; AVX512F-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
234; AVX512F-NEXT:    retq
235;
236; AVX512VL-LABEL: trunc8i32_8i16:
237; AVX512VL:       # BB#0: # %entry
238; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
239; AVX512VL-NEXT:    retq
240;
241; AVX512BW-LABEL: trunc8i32_8i16:
242; AVX512BW:       # BB#0: # %entry
243; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
244; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
245; AVX512BW-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
246; AVX512BW-NEXT:    retq
247;
248; AVX512BWVL-LABEL: trunc8i32_8i16:
249; AVX512BWVL:       # BB#0: # %entry
250; AVX512BWVL-NEXT:    vpmovdw %ymm0, %xmm0
251; AVX512BWVL-NEXT:    retq
252entry:
253  %0 = trunc <8 x i32> %a to <8 x i16>
254  ret <8 x i16> %0
255}
256
257define void @trunc8i32_8i8(<8 x i32> %a) {
258; SSE2-LABEL: trunc8i32_8i8:
259; SSE2:       # BB#0: # %entry
260; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
261; SSE2-NEXT:    pand %xmm2, %xmm1
262; SSE2-NEXT:    pand %xmm2, %xmm0
263; SSE2-NEXT:    packuswb %xmm1, %xmm0
264; SSE2-NEXT:    packuswb %xmm0, %xmm0
265; SSE2-NEXT:    movq %xmm0, (%rax)
266; SSE2-NEXT:    retq
267;
268; SSSE3-LABEL: trunc8i32_8i8:
269; SSSE3:       # BB#0: # %entry
270; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
271; SSSE3-NEXT:    pshufb %xmm2, %xmm1
272; SSSE3-NEXT:    pshufb %xmm2, %xmm0
273; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
274; SSSE3-NEXT:    movq %xmm0, (%rax)
275; SSSE3-NEXT:    retq
276;
277; SSE41-LABEL: trunc8i32_8i8:
278; SSE41:       # BB#0: # %entry
279; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
280; SSE41-NEXT:    pshufb %xmm2, %xmm1
281; SSE41-NEXT:    pshufb %xmm2, %xmm0
282; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
283; SSE41-NEXT:    movq %xmm0, (%rax)
284; SSE41-NEXT:    retq
285;
286; AVX1-LABEL: trunc8i32_8i8:
287; AVX1:       # BB#0: # %entry
288; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
289; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
290; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
291; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
292; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
293; AVX1-NEXT:    vmovq %xmm0, (%rax)
294; AVX1-NEXT:    vzeroupper
295; AVX1-NEXT:    retq
296;
297; AVX2-LABEL: trunc8i32_8i8:
298; AVX2:       # BB#0: # %entry
299; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
300; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
301; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
302; AVX2-NEXT:    vmovq %xmm0, (%rax)
303; AVX2-NEXT:    vzeroupper
304; AVX2-NEXT:    retq
305;
306; AVX512F-LABEL: trunc8i32_8i8:
307; AVX512F:       # BB#0: # %entry
308; AVX512F-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
309; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
310; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
311; AVX512F-NEXT:    vmovq %xmm0, (%rax)
312; AVX512F-NEXT:    retq
313;
314; AVX512VL-LABEL: trunc8i32_8i8:
315; AVX512VL:       # BB#0: # %entry
316; AVX512VL-NEXT:    vpmovdb %ymm0, (%rax)
317; AVX512VL-NEXT:    retq
318;
319; AVX512BW-LABEL: trunc8i32_8i8:
320; AVX512BW:       # BB#0: # %entry
321; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
322; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
323; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
324; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
325; AVX512BW-NEXT:    retq
326;
327; AVX512BWVL-LABEL: trunc8i32_8i8:
328; AVX512BWVL:       # BB#0: # %entry
329; AVX512BWVL-NEXT:    vpmovdb %ymm0, (%rax)
330; AVX512BWVL-NEXT:    retq
331entry:
332  %0 = trunc <8 x i32> %a to <8 x i8>
333  store <8 x i8> %0, <8 x i8>* undef, align 4
334  ret void
335}
336
337define void @trunc16i32_16i16(<16 x i32> %a) {
338; SSE2-LABEL: trunc16i32_16i16:
339; SSE2:       # BB#0: # %entry
340; SSE2-NEXT:    pslld $16, %xmm1
341; SSE2-NEXT:    psrad $16, %xmm1
342; SSE2-NEXT:    pslld $16, %xmm0
343; SSE2-NEXT:    psrad $16, %xmm0
344; SSE2-NEXT:    packssdw %xmm1, %xmm0
345; SSE2-NEXT:    pslld $16, %xmm3
346; SSE2-NEXT:    psrad $16, %xmm3
347; SSE2-NEXT:    pslld $16, %xmm2
348; SSE2-NEXT:    psrad $16, %xmm2
349; SSE2-NEXT:    packssdw %xmm3, %xmm2
350; SSE2-NEXT:    movdqu %xmm2, (%rax)
351; SSE2-NEXT:    movdqu %xmm0, (%rax)
352; SSE2-NEXT:    retq
353;
354; SSSE3-LABEL: trunc16i32_16i16:
355; SSSE3:       # BB#0: # %entry
356; SSSE3-NEXT:    pslld $16, %xmm1
357; SSSE3-NEXT:    psrad $16, %xmm1
358; SSSE3-NEXT:    pslld $16, %xmm0
359; SSSE3-NEXT:    psrad $16, %xmm0
360; SSSE3-NEXT:    packssdw %xmm1, %xmm0
361; SSSE3-NEXT:    pslld $16, %xmm3
362; SSSE3-NEXT:    psrad $16, %xmm3
363; SSSE3-NEXT:    pslld $16, %xmm2
364; SSSE3-NEXT:    psrad $16, %xmm2
365; SSSE3-NEXT:    packssdw %xmm3, %xmm2
366; SSSE3-NEXT:    movdqu %xmm2, (%rax)
367; SSSE3-NEXT:    movdqu %xmm0, (%rax)
368; SSSE3-NEXT:    retq
369;
370; SSE41-LABEL: trunc16i32_16i16:
371; SSE41:       # BB#0: # %entry
372; SSE41-NEXT:    pxor %xmm4, %xmm4
373; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7]
374; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7]
375; SSE41-NEXT:    packusdw %xmm1, %xmm0
376; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7]
377; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
378; SSE41-NEXT:    packusdw %xmm3, %xmm2
379; SSE41-NEXT:    movdqu %xmm2, (%rax)
380; SSE41-NEXT:    movdqu %xmm0, (%rax)
381; SSE41-NEXT:    retq
382;
383; AVX1-LABEL: trunc16i32_16i16:
384; AVX1:       # BB#0: # %entry
385; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
386; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
387; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
388; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7]
389; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
390; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
391; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
392; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7]
393; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
394; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
395; AVX1-NEXT:    vmovups %ymm0, (%rax)
396; AVX1-NEXT:    vzeroupper
397; AVX1-NEXT:    retq
398;
399; AVX2-LABEL: trunc16i32_16i16:
400; AVX2:       # BB#0: # %entry
401; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
402; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
403; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
404; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
405; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
406; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
407; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
408; AVX2-NEXT:    vzeroupper
409; AVX2-NEXT:    retq
410;
411; AVX512-LABEL: trunc16i32_16i16:
412; AVX512:       # BB#0: # %entry
413; AVX512-NEXT:    vpmovdw %zmm0, (%rax)
414; AVX512-NEXT:    retq
415entry:
416  %0 = trunc <16 x i32> %a to <16 x i16>
417  store <16 x i16> %0, <16 x i16>* undef, align 4
418  ret void
419}
420
421define void @trunc16i32_16i8(<16 x i32> %a) {
422; SSE-LABEL: trunc16i32_16i8:
423; SSE:       # BB#0: # %entry
424; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
425; SSE-NEXT:    pand %xmm4, %xmm3
426; SSE-NEXT:    pand %xmm4, %xmm2
427; SSE-NEXT:    packuswb %xmm3, %xmm2
428; SSE-NEXT:    pand %xmm4, %xmm1
429; SSE-NEXT:    pand %xmm4, %xmm0
430; SSE-NEXT:    packuswb %xmm1, %xmm0
431; SSE-NEXT:    packuswb %xmm2, %xmm0
432; SSE-NEXT:    movdqu %xmm0, (%rax)
433; SSE-NEXT:    retq
434;
435; AVX1-LABEL: trunc16i32_16i8:
436; AVX1:       # BB#0: # %entry
437; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
438; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
439; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
440; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
441; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
442; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
443; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
444; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
445; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
446; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
447; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
448; AVX1-NEXT:    vzeroupper
449; AVX1-NEXT:    retq
450;
451; AVX2-LABEL: trunc16i32_16i8:
452; AVX2:       # BB#0: # %entry
453; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
454; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
455; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
456; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
457; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
458; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
459; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
460; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
461; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
462; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
463; AVX2-NEXT:    vzeroupper
464; AVX2-NEXT:    retq
465;
466; AVX512-LABEL: trunc16i32_16i8:
467; AVX512:       # BB#0: # %entry
468; AVX512-NEXT:    vpmovdb %zmm0, (%rax)
469; AVX512-NEXT:    retq
470entry:
471  %0 = trunc <16 x i32> %a to <16 x i8>
472  store <16 x i8> %0, <16 x i8>* undef, align 4
473  ret void
474}
475
476;PR25684
477define void @trunc16i16_16i8(<16 x i16> %a) {
478; SSE2-LABEL: trunc16i16_16i8:
479; SSE2:       # BB#0: # %entry
480; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
481; SSE2-NEXT:    pand %xmm2, %xmm1
482; SSE2-NEXT:    pand %xmm2, %xmm0
483; SSE2-NEXT:    packuswb %xmm1, %xmm0
484; SSE2-NEXT:    movdqu %xmm0, (%rax)
485; SSE2-NEXT:    retq
486;
487; SSSE3-LABEL: trunc16i16_16i8:
488; SSSE3:       # BB#0: # %entry
489; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
490; SSSE3-NEXT:    pshufb %xmm2, %xmm1
491; SSSE3-NEXT:    pshufb %xmm2, %xmm0
492; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
493; SSSE3-NEXT:    movdqu %xmm0, (%rax)
494; SSSE3-NEXT:    retq
495;
496; SSE41-LABEL: trunc16i16_16i8:
497; SSE41:       # BB#0: # %entry
498; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
499; SSE41-NEXT:    pshufb %xmm2, %xmm1
500; SSE41-NEXT:    pshufb %xmm2, %xmm0
501; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
502; SSE41-NEXT:    movdqu %xmm0, (%rax)
503; SSE41-NEXT:    retq
504;
505; AVX1-LABEL: trunc16i16_16i8:
506; AVX1:       # BB#0: # %entry
507; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
508; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
509; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
510; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
511; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
512; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
513; AVX1-NEXT:    vzeroupper
514; AVX1-NEXT:    retq
515;
516; AVX2-LABEL: trunc16i16_16i8:
517; AVX2:       # BB#0: # %entry
518; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
519; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
520; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
521; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
522; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
523; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
524; AVX2-NEXT:    vzeroupper
525; AVX2-NEXT:    retq
526;
527; AVX512F-LABEL: trunc16i16_16i8:
528; AVX512F:       # BB#0: # %entry
529; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
530; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
531; AVX512F-NEXT:    vmovdqu %xmm0, (%rax)
532; AVX512F-NEXT:    retq
533;
534; AVX512VL-LABEL: trunc16i16_16i8:
535; AVX512VL:       # BB#0: # %entry
536; AVX512VL-NEXT:    vpmovsxwd %ymm0, %zmm0
537; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
538; AVX512VL-NEXT:    vmovdqu %xmm0, (%rax)
539; AVX512VL-NEXT:    retq
540;
541; AVX512BW-LABEL: trunc16i16_16i8:
542; AVX512BW:       # BB#0: # %entry
543; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
544; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
545; AVX512BW-NEXT:    vmovdqu %xmm0, (%rax)
546; AVX512BW-NEXT:    retq
547;
548; AVX512BWVL-LABEL: trunc16i16_16i8:
549; AVX512BWVL:       # BB#0: # %entry
550; AVX512BWVL-NEXT:    vpmovwb %ymm0, (%rax)
551; AVX512BWVL-NEXT:    retq
552entry:
553  %0 = trunc <16 x i16> %a to <16 x i8>
554  store <16 x i8> %0, <16 x i8>* undef, align 4
555  ret void
556}
557
558define void @trunc32i16_32i8(<32 x i16> %a) {
559; SSE2-LABEL: trunc32i16_32i8:
560; SSE2:       # BB#0: # %entry
561; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
562; SSE2-NEXT:    pand %xmm4, %xmm1
563; SSE2-NEXT:    pand %xmm4, %xmm0
564; SSE2-NEXT:    packuswb %xmm1, %xmm0
565; SSE2-NEXT:    pand %xmm4, %xmm3
566; SSE2-NEXT:    pand %xmm4, %xmm2
567; SSE2-NEXT:    packuswb %xmm3, %xmm2
568; SSE2-NEXT:    movdqu %xmm2, (%rax)
569; SSE2-NEXT:    movdqu %xmm0, (%rax)
570; SSE2-NEXT:    retq
571;
572; SSSE3-LABEL: trunc32i16_32i8:
573; SSSE3:       # BB#0: # %entry
574; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
575; SSSE3-NEXT:    pshufb %xmm4, %xmm1
576; SSSE3-NEXT:    pshufb %xmm4, %xmm0
577; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
578; SSSE3-NEXT:    pshufb %xmm4, %xmm3
579; SSSE3-NEXT:    pshufb %xmm4, %xmm2
580; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
581; SSSE3-NEXT:    movdqu %xmm2, (%rax)
582; SSSE3-NEXT:    movdqu %xmm0, (%rax)
583; SSSE3-NEXT:    retq
584;
585; SSE41-LABEL: trunc32i16_32i8:
586; SSE41:       # BB#0: # %entry
587; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
588; SSE41-NEXT:    pshufb %xmm4, %xmm1
589; SSE41-NEXT:    pshufb %xmm4, %xmm0
590; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
591; SSE41-NEXT:    pshufb %xmm4, %xmm3
592; SSE41-NEXT:    pshufb %xmm4, %xmm2
593; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
594; SSE41-NEXT:    movdqu %xmm2, (%rax)
595; SSE41-NEXT:    movdqu %xmm0, (%rax)
596; SSE41-NEXT:    retq
597;
598; AVX1-LABEL: trunc32i16_32i8:
599; AVX1:       # BB#0: # %entry
600; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
601; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
602; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
603; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
604; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
605; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
606; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
607; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
608; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
609; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
610; AVX1-NEXT:    vmovups %ymm0, (%rax)
611; AVX1-NEXT:    vzeroupper
612; AVX1-NEXT:    retq
613;
614; AVX2-LABEL: trunc32i16_32i8:
615; AVX2:       # BB#0: # %entry
616; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
617; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
618; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
619; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
620; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
621; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
622; AVX2-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
623; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
624; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
625; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
626; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
627; AVX2-NEXT:    vzeroupper
628; AVX2-NEXT:    retq
629;
630; AVX512F-LABEL: trunc32i16_32i8:
631; AVX512F:       # BB#0: # %entry
632; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
633; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
634; AVX512F-NEXT:    vpmovsxwd %ymm1, %zmm1
635; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
636; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
637; AVX512F-NEXT:    vmovdqu %ymm0, (%rax)
638; AVX512F-NEXT:    retq
639;
640; AVX512VL-LABEL: trunc32i16_32i8:
641; AVX512VL:       # BB#0: # %entry
642; AVX512VL-NEXT:    vpmovsxwd %ymm0, %zmm0
643; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
644; AVX512VL-NEXT:    vpmovsxwd %ymm1, %zmm1
645; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
646; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
647; AVX512VL-NEXT:    vmovdqu %ymm0, (%rax)
648; AVX512VL-NEXT:    retq
649;
650; AVX512BW-LABEL: trunc32i16_32i8:
651; AVX512BW:       # BB#0: # %entry
652; AVX512BW-NEXT:    vpmovwb %zmm0, (%rax)
653; AVX512BW-NEXT:    retq
654;
655; AVX512BWVL-LABEL: trunc32i16_32i8:
656; AVX512BWVL:       # BB#0: # %entry
657; AVX512BWVL-NEXT:    vpmovwb %zmm0, (%rax)
658; AVX512BWVL-NEXT:    retq
659entry:
660  %0 = trunc <32 x i16> %a to <32 x i8>
661  store <32 x i8> %0, <32 x i8>* undef, align 4
662  ret void
663}
664
665define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
666; SSE-LABEL: trunc2x4i64_8i32:
667; SSE:       # BB#0: # %entry
668; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
669; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
670; SSE-NEXT:    movaps %xmm2, %xmm1
671; SSE-NEXT:    retq
672;
673; AVX1-LABEL: trunc2x4i64_8i32:
674; AVX1:       # BB#0: # %entry
675; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
676; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
677; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
678; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
679; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
680; AVX1-NEXT:    retq
681;
682; AVX2-LABEL: trunc2x4i64_8i32:
683; AVX2:       # BB#0: # %entry
684; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
685; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
686; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
687; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
688; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
689; AVX2-NEXT:    retq
690;
691; AVX512F-LABEL: trunc2x4i64_8i32:
692; AVX512F:       # BB#0: # %entry
693; AVX512F-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
694; AVX512F-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
695; AVX512F-NEXT:    vpmovqd %zmm0, %ymm0
696; AVX512F-NEXT:    vpmovqd %zmm1, %ymm1
697; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
698; AVX512F-NEXT:    retq
699;
700; AVX512VL-LABEL: trunc2x4i64_8i32:
701; AVX512VL:       # BB#0: # %entry
702; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
703; AVX512VL-NEXT:    vpmovqd %ymm1, %xmm1
704; AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
705; AVX512VL-NEXT:    retq
706;
707; AVX512BW-LABEL: trunc2x4i64_8i32:
708; AVX512BW:       # BB#0: # %entry
709; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
710; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
711; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
712; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
713; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
714; AVX512BW-NEXT:    retq
715;
716; AVX512BWVL-LABEL: trunc2x4i64_8i32:
717; AVX512BWVL:       # BB#0: # %entry
718; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
719; AVX512BWVL-NEXT:    vpmovqd %ymm1, %xmm1
720; AVX512BWVL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
721; AVX512BWVL-NEXT:    retq
722entry:
723  %0 = trunc <4 x i64> %a to <4 x i32>
724  %1 = trunc <4 x i64> %b to <4 x i32>
725  %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
726  ret <8 x i32> %2
727}
728
729define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
730; SSE2-LABEL: trunc2x4i64_8i16:
731; SSE2:       # BB#0: # %entry
732; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
733; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
734; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
735; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
736; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
737; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
738; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
739; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
740; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
741; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
742; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
743; SSE2-NEXT:    movapd %xmm2, %xmm0
744; SSE2-NEXT:    retq
745;
746; SSSE3-LABEL: trunc2x4i64_8i16:
747; SSSE3:       # BB#0: # %entry
748; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
749; SSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
750; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
751; SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
752; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
753; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
754; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
755; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
756; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
757; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
758; SSSE3-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
759; SSSE3-NEXT:    movapd %xmm2, %xmm0
760; SSSE3-NEXT:    retq
761;
762; SSE41-LABEL: trunc2x4i64_8i16:
763; SSE41:       # BB#0: # %entry
764; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
765; SSE41-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
766; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
767; SSE41-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
768; SSE41-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
769; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
770; SSE41-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
771; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
772; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
773; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
774; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
775; SSE41-NEXT:    retq
776;
777; AVX1-LABEL: trunc2x4i64_8i16:
778; AVX1:       # BB#0: # %entry
779; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
780; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
781; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
782; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
783; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
784; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
785; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
786; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
787; AVX1-NEXT:    vzeroupper
788; AVX1-NEXT:    retq
789;
790; AVX2-LABEL: trunc2x4i64_8i16:
791; AVX2:       # BB#0: # %entry
792; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
793; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
794; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
795; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
796; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
797; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
798; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
799; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
800; AVX2-NEXT:    vzeroupper
801; AVX2-NEXT:    retq
802;
803; AVX512F-LABEL: trunc2x4i64_8i16:
804; AVX512F:       # BB#0: # %entry
805; AVX512F-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
806; AVX512F-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
807; AVX512F-NEXT:    vpmovqd %zmm0, %ymm0
808; AVX512F-NEXT:    vpmovqd %zmm1, %ymm1
809; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
810; AVX512F-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
811; AVX512F-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
812; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
813; AVX512F-NEXT:    retq
814;
815; AVX512VL-LABEL: trunc2x4i64_8i16:
816; AVX512VL:       # BB#0: # %entry
817; AVX512VL-NEXT:    vpmovqd %ymm0, %xmm0
818; AVX512VL-NEXT:    vpmovqd %ymm1, %xmm1
819; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
820; AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
821; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
822; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
823; AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
824; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
825; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
826; AVX512VL-NEXT:    retq
827;
828; AVX512BW-LABEL: trunc2x4i64_8i16:
829; AVX512BW:       # BB#0: # %entry
830; AVX512BW-NEXT:    # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
831; AVX512BW-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
832; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
833; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
834; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
835; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
836; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
837; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
838; AVX512BW-NEXT:    retq
839;
840; AVX512BWVL-LABEL: trunc2x4i64_8i16:
841; AVX512BWVL:       # BB#0: # %entry
842; AVX512BWVL-NEXT:    vpmovqd %ymm0, %xmm0
843; AVX512BWVL-NEXT:    vpmovqd %ymm1, %xmm1
844; AVX512BWVL-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
845; AVX512BWVL-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
846; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
847; AVX512BWVL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
848; AVX512BWVL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
849; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
850; AVX512BWVL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
851; AVX512BWVL-NEXT:    retq
852entry:
853  %0 = trunc <4 x i64> %a to <4 x i16>
854  %1 = trunc <4 x i64> %b to <4 x i16>
855  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
856  ret <8 x i16> %2
857}
858
859define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
860; SSE-LABEL: trunc2x2i64_4i32:
861; SSE:       # BB#0: # %entry
862; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
863; SSE-NEXT:    retq
864;
865; AVX-LABEL: trunc2x2i64_4i32:
866; AVX:       # BB#0: # %entry
867; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
868; AVX-NEXT:    retq
869;
870; AVX512-LABEL: trunc2x2i64_4i32:
871; AVX512:       # BB#0: # %entry
872; AVX512-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
873; AVX512-NEXT:    retq
874entry:
875  %0 = trunc <2 x i64> %a to <2 x i32>
876  %1 = trunc <2 x i64> %b to <2 x i32>
877  %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
878  ret <4 x i32> %2
879}
880
881define i64 @trunc2i64_i64(<2 x i64> %inval) {
882; SSE-LABEL: trunc2i64_i64:
883; SSE:       # BB#0: # %entry
884; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
885; SSE-NEXT:    movd %xmm0, %rax
886; SSE-NEXT:    retq
887;
888; AVX-LABEL: trunc2i64_i64:
889; AVX:       # BB#0: # %entry
890; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
891; AVX-NEXT:    vmovq %xmm0, %rax
892; AVX-NEXT:    retq
893;
894; AVX512F-LABEL: trunc2i64_i64:
895; AVX512F:       # BB#0: # %entry
896; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
897; AVX512F-NEXT:    vmovq %xmm0, %rax
898; AVX512F-NEXT:    retq
899;
900; AVX512VL-LABEL: trunc2i64_i64:
901; AVX512VL:       # BB#0: # %entry
902; AVX512VL-NEXT:    vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
903; AVX512VL-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
904; AVX512VL-NEXT:    retq
905;
906; AVX512BW-LABEL: trunc2i64_i64:
907; AVX512BW:       # BB#0: # %entry
908; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
909; AVX512BW-NEXT:    vmovq %xmm0, %rax
910; AVX512BW-NEXT:    retq
911;
912; AVX512BWVL-LABEL: trunc2i64_i64:
913; AVX512BWVL:       # BB#0: # %entry
914; AVX512BWVL-NEXT:    vpmovqd %xmm0, -{{[0-9]+}}(%rsp)
915; AVX512BWVL-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
916; AVX512BWVL-NEXT:    retq
917entry:
918  %0 = trunc <2 x i64> %inval to <2 x i32>
919  %1 = bitcast <2 x i32> %0 to i64
920  ret i64 %1
921}
922
923define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
924; SSE2-LABEL: trunc2x4i32_8i16:
925; SSE2:       # BB#0: # %entry
926; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
927; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
928; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
929; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
930; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
931; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
932; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
933; SSE2-NEXT:    retq
934;
935; SSSE3-LABEL: trunc2x4i32_8i16:
936; SSSE3:       # BB#0: # %entry
937; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
938; SSSE3-NEXT:    pshufb %xmm2, %xmm1
939; SSSE3-NEXT:    pshufb %xmm2, %xmm0
940; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
941; SSSE3-NEXT:    retq
942;
943; SSE41-LABEL: trunc2x4i32_8i16:
944; SSE41:       # BB#0: # %entry
945; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
946; SSE41-NEXT:    pshufb %xmm2, %xmm1
947; SSE41-NEXT:    pshufb %xmm2, %xmm0
948; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
949; SSE41-NEXT:    retq
950;
951; AVX-LABEL: trunc2x4i32_8i16:
952; AVX:       # BB#0: # %entry
953; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
954; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
955; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
956; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
957; AVX-NEXT:    retq
958;
959; AVX512F-LABEL: trunc2x4i32_8i16:
960; AVX512F:       # BB#0: # %entry
961; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
962; AVX512F-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
963; AVX512F-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
964; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
965; AVX512F-NEXT:    retq
966;
967; AVX512VL-LABEL: trunc2x4i32_8i16:
968; AVX512VL:       # BB#0: # %entry
969; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
970; AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
971; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
972; AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
973; AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
974; AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
975; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
976; AVX512VL-NEXT:    retq
977;
978; AVX512BW-LABEL: trunc2x4i32_8i16:
979; AVX512BW:       # BB#0: # %entry
980; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
981; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
982; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
983; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
984; AVX512BW-NEXT:    retq
985;
986; AVX512BWVL-LABEL: trunc2x4i32_8i16:
987; AVX512BWVL:       # BB#0: # %entry
988; AVX512BWVL-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
989; AVX512BWVL-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
990; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
991; AVX512BWVL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
992; AVX512BWVL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
993; AVX512BWVL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
994; AVX512BWVL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
995; AVX512BWVL-NEXT:    retq
996entry:
997  %0 = trunc <4 x i32> %a to <4 x i16>
998  %1 = trunc <4 x i32> %b to <4 x i16>
999  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1000  ret <8 x i16> %2
1001}
1002
1003; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
1004define i64 @trunc4i32_i64(<4 x i32> %inval) {
1005; SSE2-LABEL: trunc4i32_i64:
1006; SSE2:       # BB#0: # %entry
1007; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1008; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
1009; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1010; SSE2-NEXT:    movd %xmm0, %rax
1011; SSE2-NEXT:    retq
1012;
1013; SSSE3-LABEL: trunc4i32_i64:
1014; SSSE3:       # BB#0: # %entry
1015; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1016; SSSE3-NEXT:    movd %xmm0, %rax
1017; SSSE3-NEXT:    retq
1018;
1019; SSE41-LABEL: trunc4i32_i64:
1020; SSE41:       # BB#0: # %entry
1021; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1022; SSE41-NEXT:    movd %xmm0, %rax
1023; SSE41-NEXT:    retq
1024;
1025; AVX-LABEL: trunc4i32_i64:
1026; AVX:       # BB#0: # %entry
1027; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1028; AVX-NEXT:    vmovq %xmm0, %rax
1029; AVX-NEXT:    retq
1030;
1031; AVX512F-LABEL: trunc4i32_i64:
1032; AVX512F:       # BB#0: # %entry
1033; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1034; AVX512F-NEXT:    vmovq %xmm0, %rax
1035; AVX512F-NEXT:    retq
1036;
1037; AVX512VL-LABEL: trunc4i32_i64:
1038; AVX512VL:       # BB#0: # %entry
1039; AVX512VL-NEXT:    vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
1040; AVX512VL-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
1041; AVX512VL-NEXT:    retq
1042;
1043; AVX512BW-LABEL: trunc4i32_i64:
1044; AVX512BW:       # BB#0: # %entry
1045; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
1046; AVX512BW-NEXT:    vmovq %xmm0, %rax
1047; AVX512BW-NEXT:    retq
1048;
1049; AVX512BWVL-LABEL: trunc4i32_i64:
1050; AVX512BWVL:       # BB#0: # %entry
1051; AVX512BWVL-NEXT:    vpmovdw %xmm0, -{{[0-9]+}}(%rsp)
1052; AVX512BWVL-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
1053; AVX512BWVL-NEXT:    retq
1054entry:
1055  %0 = trunc <4 x i32> %inval to <4 x i16>
1056  %1 = bitcast <4 x i16> %0 to i64
1057  ret i64 %1
1058}
1059
1060define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
1061; SSE2-LABEL: trunc2x8i16_16i8:
1062; SSE2:       # BB#0: # %entry
1063; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1064; SSE2-NEXT:    pand %xmm2, %xmm1
1065; SSE2-NEXT:    pand %xmm2, %xmm0
1066; SSE2-NEXT:    packuswb %xmm1, %xmm0
1067; SSE2-NEXT:    retq
1068;
1069; SSSE3-LABEL: trunc2x8i16_16i8:
1070; SSSE3:       # BB#0: # %entry
1071; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1072; SSSE3-NEXT:    pshufb %xmm2, %xmm1
1073; SSSE3-NEXT:    pshufb %xmm2, %xmm0
1074; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1075; SSSE3-NEXT:    retq
1076;
1077; SSE41-LABEL: trunc2x8i16_16i8:
1078; SSE41:       # BB#0: # %entry
1079; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1080; SSE41-NEXT:    pshufb %xmm2, %xmm1
1081; SSE41-NEXT:    pshufb %xmm2, %xmm0
1082; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1083; SSE41-NEXT:    retq
1084;
1085; AVX-LABEL: trunc2x8i16_16i8:
1086; AVX:       # BB#0: # %entry
1087; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1088; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1089; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1090; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1091; AVX-NEXT:    retq
1092;
1093; AVX512F-LABEL: trunc2x8i16_16i8:
1094; AVX512F:       # BB#0: # %entry
1095; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1096; AVX512F-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1097; AVX512F-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1098; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1099; AVX512F-NEXT:    retq
1100;
1101; AVX512VL-LABEL: trunc2x8i16_16i8:
1102; AVX512VL:       # BB#0: # %entry
1103; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1104; AVX512VL-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1105; AVX512VL-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1106; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1107; AVX512VL-NEXT:    retq
1108;
1109; AVX512BW-LABEL: trunc2x8i16_16i8:
1110; AVX512BW:       # BB#0: # %entry
1111; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1112; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1113; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1114; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1115; AVX512BW-NEXT:    retq
1116;
1117; AVX512BWVL-LABEL: trunc2x8i16_16i8:
1118; AVX512BWVL:       # BB#0: # %entry
1119; AVX512BWVL-NEXT:    vmovdqu {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1120; AVX512BWVL-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1121; AVX512BWVL-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
1122; AVX512BWVL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1123; AVX512BWVL-NEXT:    retq
1124entry:
1125  %0 = trunc <8 x i16> %a to <8 x i8>
1126  %1 = trunc <8 x i16> %b to <8 x i8>
1127  %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1128  ret <16 x i8> %2
1129}
1130
1131; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
1132define i64 @trunc8i16_i64(<8 x i16> %inval) {
1133; SSE2-LABEL: trunc8i16_i64:
1134; SSE2:       # BB#0: # %entry
1135; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
1136; SSE2-NEXT:    packuswb %xmm0, %xmm0
1137; SSE2-NEXT:    movd %xmm0, %rax
1138; SSE2-NEXT:    retq
1139;
1140; SSSE3-LABEL: trunc8i16_i64:
1141; SSSE3:       # BB#0: # %entry
1142; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1143; SSSE3-NEXT:    movd %xmm0, %rax
1144; SSSE3-NEXT:    retq
1145;
1146; SSE41-LABEL: trunc8i16_i64:
1147; SSE41:       # BB#0: # %entry
1148; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1149; SSE41-NEXT:    movd %xmm0, %rax
1150; SSE41-NEXT:    retq
1151;
1152; AVX-LABEL: trunc8i16_i64:
1153; AVX:       # BB#0: # %entry
1154; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1155; AVX-NEXT:    vmovq %xmm0, %rax
1156; AVX-NEXT:    retq
1157;
1158; AVX512F-LABEL: trunc8i16_i64:
1159; AVX512F:       # BB#0: # %entry
1160; AVX512F-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1161; AVX512F-NEXT:    vmovq %xmm0, %rax
1162; AVX512F-NEXT:    retq
1163;
1164; AVX512VL-LABEL: trunc8i16_i64:
1165; AVX512VL:       # BB#0: # %entry
1166; AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1167; AVX512VL-NEXT:    vmovq %xmm0, %rax
1168; AVX512VL-NEXT:    retq
1169;
1170; AVX512BW-LABEL: trunc8i16_i64:
1171; AVX512BW:       # BB#0: # %entry
1172; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1173; AVX512BW-NEXT:    vmovq %xmm0, %rax
1174; AVX512BW-NEXT:    retq
1175;
1176; AVX512BWVL-LABEL: trunc8i16_i64:
1177; AVX512BWVL:       # BB#0: # %entry
1178; AVX512BWVL-NEXT:    vpmovwb %xmm0, -{{[0-9]+}}(%rsp)
1179; AVX512BWVL-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
1180; AVX512BWVL-NEXT:    retq
1181entry:
1182  %0 = trunc <8 x i16> %inval to <8 x i8>
1183  %1 = bitcast <8 x i8> %0 to i64
1184  ret i64 %1
1185}
1186
1187define <16 x i8> @trunc16i64_16i8_const() {
1188; SSE-LABEL: trunc16i64_16i8_const:
1189; SSE:       # BB#0: # %entry
1190; SSE-NEXT:    xorps %xmm0, %xmm0
1191; SSE-NEXT:    retq
1192;
1193; AVX-LABEL: trunc16i64_16i8_const:
1194; AVX:       # BB#0: # %entry
1195; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1196; AVX-NEXT:    retq
1197;
1198; AVX512F-LABEL: trunc16i64_16i8_const:
1199; AVX512F:       # BB#0: # %entry
1200; AVX512F-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1201; AVX512F-NEXT:    retq
1202;
1203; AVX512VL-LABEL: trunc16i64_16i8_const:
1204; AVX512VL:       # BB#0: # %entry
1205; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1206; AVX512VL-NEXT:    retq
1207;
1208; AVX512BW-LABEL: trunc16i64_16i8_const:
1209; AVX512BW:       # BB#0: # %entry
1210; AVX512BW-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1211; AVX512BW-NEXT:    retq
1212;
1213; AVX512BWVL-LABEL: trunc16i64_16i8_const:
1214; AVX512BWVL:       # BB#0: # %entry
1215; AVX512BWVL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
1216; AVX512BWVL-NEXT:    retq
1217
1218entry:
1219  %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
1220  %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
1221  ret <16 x i8> %1
1222}
1223
1224