1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
8
9;
10; vXi8
11;
12
13define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
14; SSE2-LABEL: @loadext_2i8_to_2i64(
15; SSE2-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
16; SSE2-NEXT:    [[I0:%.*]] = load i8, i8* [[P0]], align 1
17; SSE2-NEXT:    [[I1:%.*]] = load i8, i8* [[P1]], align 1
18; SSE2-NEXT:    [[X0:%.*]] = sext i8 [[I0]] to i64
19; SSE2-NEXT:    [[X1:%.*]] = sext i8 [[I1]] to i64
20; SSE2-NEXT:    [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
21; SSE2-NEXT:    [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
22; SSE2-NEXT:    ret <2 x i64> [[V1]]
23;
24; SLM-LABEL: @loadext_2i8_to_2i64(
25; SLM-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
26; SLM-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
27; SLM-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
28; SLM-NEXT:    ret <2 x i64> [[TMP3]]
29;
30; AVX-LABEL: @loadext_2i8_to_2i64(
31; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <2 x i8>*
32; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
33; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
34; AVX-NEXT:    ret <2 x i64> [[TMP3]]
35;
36  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
37  %i0 = load i8, i8* %p0, align 1
38  %i1 = load i8, i8* %p1, align 1
39  %x0 = sext i8 %i0 to i64
40  %x1 = sext i8 %i1 to i64
41  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
42  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
43  ret <2 x i64> %v1
44}
45
46define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) {
47; SSE-LABEL: @loadext_4i8_to_4i32(
48; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
49; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
50; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
51; SSE-NEXT:    ret <4 x i32> [[TMP3]]
52;
53; AVX-LABEL: @loadext_4i8_to_4i32(
54; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
55; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
56; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
57; AVX-NEXT:    ret <4 x i32> [[TMP3]]
58;
59  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
60  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
61  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
62  %i0 = load i8, i8* %p0, align 1
63  %i1 = load i8, i8* %p1, align 1
64  %i2 = load i8, i8* %p2, align 1
65  %i3 = load i8, i8* %p3, align 1
66  %x0 = sext i8 %i0 to i32
67  %x1 = sext i8 %i1 to i32
68  %x2 = sext i8 %i2 to i32
69  %x3 = sext i8 %i3 to i32
70  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
71  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
72  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
73  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
74  ret <4 x i32> %v3
75}
76
77define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) {
78; SSE-LABEL: @loadext_4i8_to_4i64(
79; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
80; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
81; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
82; SSE-NEXT:    ret <4 x i64> [[TMP3]]
83;
84; AVX-LABEL: @loadext_4i8_to_4i64(
85; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <4 x i8>*
86; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1
87; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64>
88; AVX-NEXT:    ret <4 x i64> [[TMP3]]
89;
90  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
91  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
92  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
93  %i0 = load i8, i8* %p0, align 1
94  %i1 = load i8, i8* %p1, align 1
95  %i2 = load i8, i8* %p2, align 1
96  %i3 = load i8, i8* %p3, align 1
97  %x0 = sext i8 %i0 to i64
98  %x1 = sext i8 %i1 to i64
99  %x2 = sext i8 %i2 to i64
100  %x3 = sext i8 %i3 to i64
101  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
102  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
103  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
104  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
105  ret <4 x i64> %v3
106}
107
108define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) {
109; SSE-LABEL: @loadext_8i8_to_8i16(
110; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
111; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
112; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
113; SSE-NEXT:    ret <8 x i16> [[TMP3]]
114;
115; AVX-LABEL: @loadext_8i8_to_8i16(
116; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
117; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
118; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
119; AVX-NEXT:    ret <8 x i16> [[TMP3]]
120;
121  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
122  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
123  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
124  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
125  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
126  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
127  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
128  %i0 = load i8, i8* %p0, align 1
129  %i1 = load i8, i8* %p1, align 1
130  %i2 = load i8, i8* %p2, align 1
131  %i3 = load i8, i8* %p3, align 1
132  %i4 = load i8, i8* %p4, align 1
133  %i5 = load i8, i8* %p5, align 1
134  %i6 = load i8, i8* %p6, align 1
135  %i7 = load i8, i8* %p7, align 1
136  %x0 = sext i8 %i0 to i16
137  %x1 = sext i8 %i1 to i16
138  %x2 = sext i8 %i2 to i16
139  %x3 = sext i8 %i3 to i16
140  %x4 = sext i8 %i4 to i16
141  %x5 = sext i8 %i5 to i16
142  %x6 = sext i8 %i6 to i16
143  %x7 = sext i8 %i7 to i16
144  %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0
145  %v1 = insertelement <8 x i16>   %v0, i16 %x1, i32 1
146  %v2 = insertelement <8 x i16>   %v1, i16 %x2, i32 2
147  %v3 = insertelement <8 x i16>   %v2, i16 %x3, i32 3
148  %v4 = insertelement <8 x i16>   %v3, i16 %x4, i32 4
149  %v5 = insertelement <8 x i16>   %v4, i16 %x5, i32 5
150  %v6 = insertelement <8 x i16>   %v5, i16 %x6, i32 6
151  %v7 = insertelement <8 x i16>   %v6, i16 %x7, i32 7
152  ret <8 x i16> %v7
153}
154
155define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) {
156; SSE-LABEL: @loadext_8i8_to_8i32(
157; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
158; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
159; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
160; SSE-NEXT:    ret <8 x i32> [[TMP3]]
161;
162; AVX-LABEL: @loadext_8i8_to_8i32(
163; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <8 x i8>*
164; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
165; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32>
166; AVX-NEXT:    ret <8 x i32> [[TMP3]]
167;
168  %p1 = getelementptr inbounds i8, i8* %p0, i64 1
169  %p2 = getelementptr inbounds i8, i8* %p0, i64 2
170  %p3 = getelementptr inbounds i8, i8* %p0, i64 3
171  %p4 = getelementptr inbounds i8, i8* %p0, i64 4
172  %p5 = getelementptr inbounds i8, i8* %p0, i64 5
173  %p6 = getelementptr inbounds i8, i8* %p0, i64 6
174  %p7 = getelementptr inbounds i8, i8* %p0, i64 7
175  %i0 = load i8, i8* %p0, align 1
176  %i1 = load i8, i8* %p1, align 1
177  %i2 = load i8, i8* %p2, align 1
178  %i3 = load i8, i8* %p3, align 1
179  %i4 = load i8, i8* %p4, align 1
180  %i5 = load i8, i8* %p5, align 1
181  %i6 = load i8, i8* %p6, align 1
182  %i7 = load i8, i8* %p7, align 1
183  %x0 = sext i8 %i0 to i32
184  %x1 = sext i8 %i1 to i32
185  %x2 = sext i8 %i2 to i32
186  %x3 = sext i8 %i3 to i32
187  %x4 = sext i8 %i4 to i32
188  %x5 = sext i8 %i5 to i32
189  %x6 = sext i8 %i6 to i32
190  %x7 = sext i8 %i7 to i32
191  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
192  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
193  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
194  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
195  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
196  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
197  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
198  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
199  ret <8 x i32> %v7
200}
201
202define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) {
203; SSE-LABEL: @loadext_16i8_to_16i16(
204; SSE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
205; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
206; SSE-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
207; SSE-NEXT:    ret <16 x i16> [[TMP3]]
208;
209; AVX-LABEL: @loadext_16i8_to_16i16(
210; AVX-NEXT:    [[TMP1:%.*]] = bitcast i8* [[P0:%.*]] to <16 x i8>*
211; AVX-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
212; AVX-NEXT:    [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16>
213; AVX-NEXT:    ret <16 x i16> [[TMP3]]
214;
215  %p1  = getelementptr inbounds i8, i8* %p0, i64 1
216  %p2  = getelementptr inbounds i8, i8* %p0, i64 2
217  %p3  = getelementptr inbounds i8, i8* %p0, i64 3
218  %p4  = getelementptr inbounds i8, i8* %p0, i64 4
219  %p5  = getelementptr inbounds i8, i8* %p0, i64 5
220  %p6  = getelementptr inbounds i8, i8* %p0, i64 6
221  %p7  = getelementptr inbounds i8, i8* %p0, i64 7
222  %p8  = getelementptr inbounds i8, i8* %p0, i64 8
223  %p9  = getelementptr inbounds i8, i8* %p0, i64 9
224  %p10 = getelementptr inbounds i8, i8* %p0, i64 10
225  %p11 = getelementptr inbounds i8, i8* %p0, i64 11
226  %p12 = getelementptr inbounds i8, i8* %p0, i64 12
227  %p13 = getelementptr inbounds i8, i8* %p0, i64 13
228  %p14 = getelementptr inbounds i8, i8* %p0, i64 14
229  %p15 = getelementptr inbounds i8, i8* %p0, i64 15
230  %i0  = load i8, i8* %p0,  align 1
231  %i1  = load i8, i8* %p1,  align 1
232  %i2  = load i8, i8* %p2,  align 1
233  %i3  = load i8, i8* %p3,  align 1
234  %i4  = load i8, i8* %p4,  align 1
235  %i5  = load i8, i8* %p5,  align 1
236  %i6  = load i8, i8* %p6,  align 1
237  %i7  = load i8, i8* %p7,  align 1
238  %i8  = load i8, i8* %p8,  align 1
239  %i9  = load i8, i8* %p9,  align 1
240  %i10 = load i8, i8* %p10, align 1
241  %i11 = load i8, i8* %p11, align 1
242  %i12 = load i8, i8* %p12, align 1
243  %i13 = load i8, i8* %p13, align 1
244  %i14 = load i8, i8* %p14, align 1
245  %i15 = load i8, i8* %p15, align 1
246  %x0  = sext i8 %i0  to i16
247  %x1  = sext i8 %i1  to i16
248  %x2  = sext i8 %i2  to i16
249  %x3  = sext i8 %i3  to i16
250  %x4  = sext i8 %i4  to i16
251  %x5  = sext i8 %i5  to i16
252  %x6  = sext i8 %i6  to i16
253  %x7  = sext i8 %i7  to i16
254  %x8  = sext i8 %i8  to i16
255  %x9  = sext i8 %i9  to i16
256  %x10 = sext i8 %i10 to i16
257  %x11 = sext i8 %i11 to i16
258  %x12 = sext i8 %i12 to i16
259  %x13 = sext i8 %i13 to i16
260  %x14 = sext i8 %i14 to i16
261  %x15 = sext i8 %i15 to i16
262  %v0  = insertelement <16 x i16> undef, i16 %x0,  i32 0
263  %v1  = insertelement <16 x i16>  %v0,  i16 %x1,  i32 1
264  %v2  = insertelement <16 x i16>  %v1,  i16 %x2,  i32 2
265  %v3  = insertelement <16 x i16>  %v2,  i16 %x3,  i32 3
266  %v4  = insertelement <16 x i16>  %v3,  i16 %x4,  i32 4
267  %v5  = insertelement <16 x i16>  %v4,  i16 %x5,  i32 5
268  %v6  = insertelement <16 x i16>  %v5,  i16 %x6,  i32 6
269  %v7  = insertelement <16 x i16>  %v6,  i16 %x7,  i32 7
270  %v8  = insertelement <16 x i16>  %v7,  i16 %x8,  i32 8
271  %v9  = insertelement <16 x i16>  %v8,  i16 %x9,  i32 9
272  %v10 = insertelement <16 x i16>  %v9,  i16 %x10, i32 10
273  %v11 = insertelement <16 x i16>  %v10, i16 %x11, i32 11
274  %v12 = insertelement <16 x i16>  %v11, i16 %x12, i32 12
275  %v13 = insertelement <16 x i16>  %v12, i16 %x13, i32 13
276  %v14 = insertelement <16 x i16>  %v13, i16 %x14, i32 14
277  %v15 = insertelement <16 x i16>  %v14, i16 %x15, i32 15
278  ret <16 x i16> %v15
279}
280
281;
282; vXi16
283;
284
285define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) {
286; SSE-LABEL: @loadext_2i16_to_2i64(
287; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
288; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
289; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
290; SSE-NEXT:    ret <2 x i64> [[TMP3]]
291;
292; AVX-LABEL: @loadext_2i16_to_2i64(
293; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <2 x i16>*
294; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
295; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
296; AVX-NEXT:    ret <2 x i64> [[TMP3]]
297;
298  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
299  %i0 = load i16, i16* %p0, align 1
300  %i1 = load i16, i16* %p1, align 1
301  %x0 = sext i16 %i0 to i64
302  %x1 = sext i16 %i1 to i64
303  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
304  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
305  ret <2 x i64> %v1
306}
307
308define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
309; SSE-LABEL: @loadext_4i16_to_4i32(
310; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
311; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
312; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
313; SSE-NEXT:    ret <4 x i32> [[TMP3]]
314;
315; AVX-LABEL: @loadext_4i16_to_4i32(
316; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
317; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
318; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
319; AVX-NEXT:    ret <4 x i32> [[TMP3]]
320;
321  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
322  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
323  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
324  %i0 = load i16, i16* %p0, align 1
325  %i1 = load i16, i16* %p1, align 1
326  %i2 = load i16, i16* %p2, align 1
327  %i3 = load i16, i16* %p3, align 1
328  %x0 = sext i16 %i0 to i32
329  %x1 = sext i16 %i1 to i32
330  %x2 = sext i16 %i2 to i32
331  %x3 = sext i16 %i3 to i32
332  %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0
333  %v1 = insertelement <4 x i32>   %v0, i32 %x1, i32 1
334  %v2 = insertelement <4 x i32>   %v1, i32 %x2, i32 2
335  %v3 = insertelement <4 x i32>   %v2, i32 %x3, i32 3
336  ret <4 x i32> %v3
337}
338
339define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
340; SSE-LABEL: @loadext_4i16_to_4i64(
341; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
342; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
343; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
344; SSE-NEXT:    ret <4 x i64> [[TMP3]]
345;
346; AVX-LABEL: @loadext_4i16_to_4i64(
347; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <4 x i16>*
348; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
349; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
350; AVX-NEXT:    ret <4 x i64> [[TMP3]]
351;
352  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
353  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
354  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
355  %i0 = load i16, i16* %p0, align 1
356  %i1 = load i16, i16* %p1, align 1
357  %i2 = load i16, i16* %p2, align 1
358  %i3 = load i16, i16* %p3, align 1
359  %x0 = sext i16 %i0 to i64
360  %x1 = sext i16 %i1 to i64
361  %x2 = sext i16 %i2 to i64
362  %x3 = sext i16 %i3 to i64
363  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
364  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
365  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
366  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
367  ret <4 x i64> %v3
368}
369
370define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) {
371; SSE-LABEL: @loadext_8i16_to_8i32(
372; SSE-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
373; SSE-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
374; SSE-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
375; SSE-NEXT:    ret <8 x i32> [[TMP3]]
376;
377; AVX-LABEL: @loadext_8i16_to_8i32(
378; AVX-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0:%.*]] to <8 x i16>*
379; AVX-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1
380; AVX-NEXT:    [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32>
381; AVX-NEXT:    ret <8 x i32> [[TMP3]]
382;
383  %p1 = getelementptr inbounds i16, i16* %p0, i64 1
384  %p2 = getelementptr inbounds i16, i16* %p0, i64 2
385  %p3 = getelementptr inbounds i16, i16* %p0, i64 3
386  %p4 = getelementptr inbounds i16, i16* %p0, i64 4
387  %p5 = getelementptr inbounds i16, i16* %p0, i64 5
388  %p6 = getelementptr inbounds i16, i16* %p0, i64 6
389  %p7 = getelementptr inbounds i16, i16* %p0, i64 7
390  %i0 = load i16, i16* %p0, align 1
391  %i1 = load i16, i16* %p1, align 1
392  %i2 = load i16, i16* %p2, align 1
393  %i3 = load i16, i16* %p3, align 1
394  %i4 = load i16, i16* %p4, align 1
395  %i5 = load i16, i16* %p5, align 1
396  %i6 = load i16, i16* %p6, align 1
397  %i7 = load i16, i16* %p7, align 1
398  %x0 = sext i16 %i0 to i32
399  %x1 = sext i16 %i1 to i32
400  %x2 = sext i16 %i2 to i32
401  %x3 = sext i16 %i3 to i32
402  %x4 = sext i16 %i4 to i32
403  %x5 = sext i16 %i5 to i32
404  %x6 = sext i16 %i6 to i32
405  %x7 = sext i16 %i7 to i32
406  %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0
407  %v1 = insertelement <8 x i32>   %v0, i32 %x1, i32 1
408  %v2 = insertelement <8 x i32>   %v1, i32 %x2, i32 2
409  %v3 = insertelement <8 x i32>   %v2, i32 %x3, i32 3
410  %v4 = insertelement <8 x i32>   %v3, i32 %x4, i32 4
411  %v5 = insertelement <8 x i32>   %v4, i32 %x5, i32 5
412  %v6 = insertelement <8 x i32>   %v5, i32 %x6, i32 6
413  %v7 = insertelement <8 x i32>   %v6, i32 %x7, i32 7
414  ret <8 x i32> %v7
415}
416
417;
418; vXi32
419;
420
421define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) {
422; SSE-LABEL: @loadext_2i32_to_2i64(
423; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
424; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
425; SSE-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
426; SSE-NEXT:    ret <2 x i64> [[TMP3]]
427;
428; AVX-LABEL: @loadext_2i32_to_2i64(
429; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <2 x i32>*
430; AVX-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1
431; AVX-NEXT:    [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64>
432; AVX-NEXT:    ret <2 x i64> [[TMP3]]
433;
434  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
435  %i0 = load i32, i32* %p0, align 1
436  %i1 = load i32, i32* %p1, align 1
437  %x0 = sext i32 %i0 to i64
438  %x1 = sext i32 %i1 to i64
439  %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0
440  %v1 = insertelement <2 x i64>   %v0, i64 %x1, i32 1
441  ret <2 x i64> %v1
442}
443
444define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) {
445; SSE-LABEL: @loadext_4i32_to_4i64(
446; SSE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
447; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
448; SSE-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
449; SSE-NEXT:    ret <4 x i64> [[TMP3]]
450;
451; AVX-LABEL: @loadext_4i32_to_4i64(
452; AVX-NEXT:    [[TMP1:%.*]] = bitcast i32* [[P0:%.*]] to <4 x i32>*
453; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
454; AVX-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
455; AVX-NEXT:    ret <4 x i64> [[TMP3]]
456;
457  %p1 = getelementptr inbounds i32, i32* %p0, i64 1
458  %p2 = getelementptr inbounds i32, i32* %p0, i64 2
459  %p3 = getelementptr inbounds i32, i32* %p0, i64 3
460  %i0 = load i32, i32* %p0, align 1
461  %i1 = load i32, i32* %p1, align 1
462  %i2 = load i32, i32* %p2, align 1
463  %i3 = load i32, i32* %p3, align 1
464  %x0 = sext i32 %i0 to i64
465  %x1 = sext i32 %i1 to i64
466  %x2 = sext i32 %i2 to i64
467  %x3 = sext i32 %i3 to i64
468  %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0
469  %v1 = insertelement <4 x i64>   %v0, i64 %x1, i32 1
470  %v2 = insertelement <4 x i64>   %v1, i64 %x2, i32 2
471  %v3 = insertelement <4 x i64>   %v2, i64 %x3, i32 3
472  ret <4 x i64> %v3
473}
474