1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
3
4
5; i8
6
7define <16 x i8> @insert_v16i8_2_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
8; CHECK-LABEL: insert_v16i8_2_1:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    mov v0.16b, v1.16b
11; CHECK-NEXT:    mov v0.h[0], v2.h[0]
12; CHECK-NEXT:    ret
13  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
14  ret <16 x i8> %s2
15}
16
17define <16 x i8> @insert_v16i8_2_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
18; CHECK-LABEL: insert_v16i8_2_2:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    mov v0.16b, v1.16b
21; CHECK-NEXT:    mov v0.h[1], v2.h[0]
22; CHECK-NEXT:    ret
23  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
24  ret <16 x i8> %s2
25}
26
27define <16 x i8> @insert_v16i8_2_6(float %tmp, <16 x i8> %b, <16 x i8> %a) {
28; CHECK-LABEL: insert_v16i8_2_6:
29; CHECK:       // %bb.0:
30; CHECK-NEXT:    mov v0.16b, v1.16b
31; CHECK-NEXT:    mov v0.h[6], v2.h[0]
32; CHECK-NEXT:    ret
33  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 30, i32 31>
34  ret <16 x i8> %s2
35}
36
37define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
38; CHECK-LABEL: insert_v16i8_4_1:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    mov v0.16b, v1.16b
41; CHECK-NEXT:    mov v0.s[0], v2.s[0]
42; CHECK-NEXT:    ret
43  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
44  ret <16 x i8> %s2
45}
46
47define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) {
48; CHECK-LABEL: insert_v16i8_4_15:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    adrp x8, .LCPI4_0
51; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
52; CHECK-NEXT:    mov v3.16b, v1.16b
53; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
54; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
55; CHECK-NEXT:    ret
56  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
57  ret <16 x i8> %s2
58}
59
60define <16 x i8> @insert_v16i8_4_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
61; CHECK-LABEL: insert_v16i8_4_2:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    mov v0.16b, v1.16b
64; CHECK-NEXT:    mov v0.s[1], v2.s[0]
65; CHECK-NEXT:    ret
66  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
67  ret <16 x i8> %s2
68}
69
70define <16 x i8> @insert_v16i8_4_3(float %tmp, <16 x i8> %b, <16 x i8> %a) {
71; CHECK-LABEL: insert_v16i8_4_3:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    mov v0.16b, v1.16b
74; CHECK-NEXT:    mov v0.s[2], v2.s[0]
75; CHECK-NEXT:    ret
76  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31>
77  ret <16 x i8> %s2
78}
79
80define <16 x i8> @insert_v16i8_4_4(float %tmp, <16 x i8> %b, <16 x i8> %a) {
81; CHECK-LABEL: insert_v16i8_4_4:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    mov v0.16b, v1.16b
84; CHECK-NEXT:    mov v0.s[3], v2.s[0]
85; CHECK-NEXT:    ret
86  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3>
87  ret <16 x i8> %s2
88}
89
90define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) {
91; CHECK-LABEL: insert_v8i8_4_1:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    fmov d0, d2
94; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
95; CHECK-NEXT:    mov v0.s[1], v1.s[1]
96; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
97; CHECK-NEXT:    ret
98  %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
99  ret <8 x i8> %s2
100}
101
102define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) {
103; CHECK-LABEL: insert_v8i8_4_2:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    fmov d0, d1
106; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
107; CHECK-NEXT:    mov v0.s[1], v2.s[0]
108; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
109; CHECK-NEXT:    ret
110  %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
111  ret <8 x i8> %s2
112}
113
114define <16 x i8> @insert_v16i8_8_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
115; CHECK-LABEL: insert_v16i8_8_1:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    mov v0.16b, v2.16b
118; CHECK-NEXT:    mov v0.d[1], v1.d[1]
119; CHECK-NEXT:    ret
120  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
121  ret <16 x i8> %s2
122}
123
124define <16 x i8> @insert_v16i8_8_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
125; CHECK-LABEL: insert_v16i8_8_2:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    mov v0.16b, v1.16b
128; CHECK-NEXT:    mov v0.d[1], v2.d[0]
129; CHECK-NEXT:    ret
130  %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
131  ret <16 x i8> %s2
132}
133
134; i16
135
136define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
137; CHECK-LABEL: insert_v8i16_2_1:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    mov v0.16b, v1.16b
140; CHECK-NEXT:    mov v0.s[0], v2.s[0]
141; CHECK-NEXT:    ret
142  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
143  ret <8 x i16> %s2
144}
145
146define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) {
147; CHECK-LABEL: insert_v8i16_2_15:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    adrp x8, .LCPI13_0
150; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
151; CHECK-NEXT:    mov v3.16b, v1.16b
152; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
153; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
154; CHECK-NEXT:    ret
155  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
156  ret <8 x i16> %s2
157}
158
159define <8 x i16> @insert_v8i16_2_2(float %tmp, <8 x i16> %b, <8 x i16> %a) {
160; CHECK-LABEL: insert_v8i16_2_2:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    mov v0.16b, v1.16b
163; CHECK-NEXT:    mov v0.s[1], v2.s[0]
164; CHECK-NEXT:    ret
165  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15>
166  ret <8 x i16> %s2
167}
168
169define <8 x i16> @insert_v8i16_2_3(float %tmp, <8 x i16> %b, <8 x i16> %a) {
170; CHECK-LABEL: insert_v8i16_2_3:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    mov v0.16b, v1.16b
173; CHECK-NEXT:    mov v0.s[2], v2.s[0]
174; CHECK-NEXT:    ret
175  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15>
176  ret <8 x i16> %s2
177}
178
179define <8 x i16> @insert_v8i16_2_4(float %tmp, <8 x i16> %b, <8 x i16> %a) {
180; CHECK-LABEL: insert_v8i16_2_4:
181; CHECK:       // %bb.0:
182; CHECK-NEXT:    mov v0.16b, v1.16b
183; CHECK-NEXT:    mov v0.s[3], v2.s[0]
184; CHECK-NEXT:    ret
185  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1>
186  ret <8 x i16> %s2
187}
188
189define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) {
190; CHECK-LABEL: insert_v4i16_2_1:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    fmov d0, d2
193; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
194; CHECK-NEXT:    mov v0.s[1], v1.s[1]
195; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
196; CHECK-NEXT:    ret
197  %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
198  ret <4 x i16> %s2
199}
200
201define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) {
202; CHECK-LABEL: insert_v4i16_2_2:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    fmov d0, d1
205; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
206; CHECK-NEXT:    mov v0.s[1], v2.s[0]
207; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
208; CHECK-NEXT:    ret
209  %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
210  ret <4 x i16> %s2
211}
212
213define <8 x i16> @insert_v8i16_4_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
214; CHECK-LABEL: insert_v8i16_4_1:
215; CHECK:       // %bb.0:
216; CHECK-NEXT:    mov v0.16b, v2.16b
217; CHECK-NEXT:    mov v0.d[1], v1.d[1]
218; CHECK-NEXT:    ret
219  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
220  ret <8 x i16> %s2
221}
222
223define <8 x i16> @insert_v8i16_4_2(float %tmp, <8 x i16> %b, <8 x i16> %a) {
224; CHECK-LABEL: insert_v8i16_4_2:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    mov v0.16b, v1.16b
227; CHECK-NEXT:    mov v0.d[1], v2.d[0]
228; CHECK-NEXT:    ret
229  %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
230  ret <8 x i16> %s2
231}
232
233; i32
234
235define <4 x i32> @insert_v4i32_2_1(float %tmp, <4 x i32> %b, <4 x i32> %a) {
236; CHECK-LABEL: insert_v4i32_2_1:
237; CHECK:       // %bb.0:
238; CHECK-NEXT:    mov v0.16b, v2.16b
239; CHECK-NEXT:    mov v0.d[1], v1.d[1]
240; CHECK-NEXT:    ret
241  %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
242  ret <4 x i32> %s2
243}
244
245define <4 x i32> @insert_v4i32_2_2(float %tmp, <4 x i32> %b, <4 x i32> %a) {
246; CHECK-LABEL: insert_v4i32_2_2:
247; CHECK:       // %bb.0:
248; CHECK-NEXT:    mov v0.16b, v1.16b
249; CHECK-NEXT:    mov v0.d[1], v2.d[0]
250; CHECK-NEXT:    ret
251  %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
252  ret <4 x i32> %s2
253}
254
255
256
257
258; i8
259
260define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, <4 x i8> *%a) {
261; CHECK-LABEL: load_v16i8_4_1:
262; CHECK:       // %bb.0:
263; CHECK-NEXT:    mov v0.16b, v1.16b
264; CHECK-NEXT:    ldr s1, [x0]
265; CHECK-NEXT:    mov v0.s[0], v1.s[0]
266; CHECK-NEXT:    ret
267  %l = load <4 x i8>, <4 x i8> *%a
268  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
269  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
270  ret <16 x i8> %s2
271}
272
273define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, <4 x i8> *%a) {
274; CHECK-LABEL: load_v16i8_4_15:
275; CHECK:       // %bb.0:
276; CHECK-NEXT:    adrp x8, .LCPI24_0
277; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
278; CHECK-NEXT:    ldr s0, [x0]
279; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI24_0]
280; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
281; CHECK-NEXT:    ret
282  %l = load <4 x i8>, <4 x i8> *%a
283  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
284  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
285  ret <16 x i8> %s2
286}
287
288define <16 x i8> @load_v16i8_4_2(float %tmp, <16 x i8> %b, <4 x i8> *%a) {
289; CHECK-LABEL: load_v16i8_4_2:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    mov v0.16b, v1.16b
292; CHECK-NEXT:    ldr s1, [x0]
293; CHECK-NEXT:    mov v0.s[1], v1.s[0]
294; CHECK-NEXT:    ret
295  %l = load <4 x i8>, <4 x i8> *%a
296  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
297  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
298  ret <16 x i8> %s2
299}
300
301define <16 x i8> @load_v16i8_4_3(float %tmp, <16 x i8> %b, <4 x i8> *%a) {
302; CHECK-LABEL: load_v16i8_4_3:
303; CHECK:       // %bb.0:
304; CHECK-NEXT:    mov v0.16b, v1.16b
305; CHECK-NEXT:    ldr s1, [x0]
306; CHECK-NEXT:    mov v0.s[2], v1.s[0]
307; CHECK-NEXT:    ret
308  %l = load <4 x i8>, <4 x i8> *%a
309  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
310  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31>
311  ret <16 x i8> %s2
312}
313
314define <16 x i8> @load_v16i8_4_4(float %tmp, <16 x i8> %b, <4 x i8> *%a) {
315; CHECK-LABEL: load_v16i8_4_4:
316; CHECK:       // %bb.0:
317; CHECK-NEXT:    mov v0.16b, v1.16b
318; CHECK-NEXT:    ldr s1, [x0]
319; CHECK-NEXT:    mov v0.s[3], v1.s[0]
320; CHECK-NEXT:    ret
321  %l = load <4 x i8>, <4 x i8> *%a
322  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
323  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3>
324  ret <16 x i8> %s2
325}
326
327define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, <4 x i8> *%a) {
328; CHECK-LABEL: load_v8i8_4_1:
329; CHECK:       // %bb.0:
330; CHECK-NEXT:    ldr s0, [x0]
331; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
332; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
333; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
334; CHECK-NEXT:    mov v0.s[1], v1.s[1]
335; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
336; CHECK-NEXT:    ret
337  %l = load <4 x i8>, <4 x i8> *%a
338  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
339  %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
340  ret <8 x i8> %s2
341}
342
343define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, <4 x i8> *%a) {
344; CHECK-LABEL: load_v8i8_4_2:
345; CHECK:       // %bb.0:
346; CHECK-NEXT:    ldr s0, [x0]
347; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
348; CHECK-NEXT:    uzp1 v2.8b, v0.8b, v0.8b
349; CHECK-NEXT:    fmov d0, d1
350; CHECK-NEXT:    mov v0.s[1], v2.s[0]
351; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
352; CHECK-NEXT:    ret
353  %l = load <4 x i8>, <4 x i8> *%a
354  %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
355  %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
356  ret <8 x i8> %s2
357}
358
359define <16 x i8> @load_v16i8_8_1(float %tmp, <16 x i8> %b, <8 x i8> *%a) {
360; CHECK-LABEL: load_v16i8_8_1:
361; CHECK:       // %bb.0:
362; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
363; CHECK-NEXT:    ldr d0, [x0]
364; CHECK-NEXT:    mov v0.d[1], v1.d[0]
365; CHECK-NEXT:    ret
366  %l = load <8 x i8>, <8 x i8> *%a
367  %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
368  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
369  ret <16 x i8> %s2
370}
371
372define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, <8 x i8> *%a) {
373; CHECK-LABEL: load_v16i8_8_2:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    mov v0.16b, v1.16b
376; CHECK-NEXT:    ldr d1, [x0]
377; CHECK-NEXT:    mov v0.d[1], v1.d[0]
378; CHECK-NEXT:    ret
379  %l = load <8 x i8>, <8 x i8> *%a
380  %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
381  %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
382  ret <16 x i8> %s2
383}
384
385; i16
386
387define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, <2 x i16> *%a) {
388; CHECK-LABEL: load_v8i16_2_1:
389; CHECK:       // %bb.0:
390; CHECK-NEXT:    ldrh w9, [x0]
391; CHECK-NEXT:    add x8, x0, #2
392; CHECK-NEXT:    mov v0.16b, v1.16b
393; CHECK-NEXT:    fmov s2, w9
394; CHECK-NEXT:    ld1 { v2.h }[2], [x8]
395; CHECK-NEXT:    xtn v1.4h, v2.4s
396; CHECK-NEXT:    mov v0.s[0], v1.s[0]
397; CHECK-NEXT:    ret
398  %l = load <2 x i16>, <2 x i16> *%a
399  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
400  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
401  ret <8 x i16> %s2
402}
403
404define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, <2 x i16> *%a) {
405; CHECK-LABEL: load_v8i16_2_15:
406; CHECK:       // %bb.0:
407; CHECK-NEXT:    ldrh w9, [x0]
408; CHECK-NEXT:    add x8, x0, #2
409; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
410; CHECK-NEXT:    fmov s2, w9
411; CHECK-NEXT:    ld1 { v2.h }[2], [x8]
412; CHECK-NEXT:    adrp x8, .LCPI33_0
413; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI33_0]
414; CHECK-NEXT:    xtn v0.4h, v2.4s
415; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v3.16b
416; CHECK-NEXT:    ret
417  %l = load <2 x i16>, <2 x i16> *%a
418  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
419  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
420  ret <8 x i16> %s2
421}
422
423define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, <2 x i16> *%a) {
424; CHECK-LABEL: load_v8i16_2_2:
425; CHECK:       // %bb.0:
426; CHECK-NEXT:    ldrh w9, [x0]
427; CHECK-NEXT:    add x8, x0, #2
428; CHECK-NEXT:    mov v0.16b, v1.16b
429; CHECK-NEXT:    fmov s2, w9
430; CHECK-NEXT:    ld1 { v2.h }[2], [x8]
431; CHECK-NEXT:    xtn v1.4h, v2.4s
432; CHECK-NEXT:    mov v0.s[1], v1.s[0]
433; CHECK-NEXT:    ret
434  %l = load <2 x i16>, <2 x i16> *%a
435  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
436  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15>
437  ret <8 x i16> %s2
438}
439
440define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, <2 x i16> *%a) {
441; CHECK-LABEL: load_v8i16_2_3:
442; CHECK:       // %bb.0:
443; CHECK-NEXT:    ldrh w9, [x0]
444; CHECK-NEXT:    add x8, x0, #2
445; CHECK-NEXT:    mov v0.16b, v1.16b
446; CHECK-NEXT:    fmov s2, w9
447; CHECK-NEXT:    ld1 { v2.h }[2], [x8]
448; CHECK-NEXT:    xtn v1.4h, v2.4s
449; CHECK-NEXT:    mov v0.s[2], v1.s[0]
450; CHECK-NEXT:    ret
451  %l = load <2 x i16>, <2 x i16> *%a
452  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
453  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15>
454  ret <8 x i16> %s2
455}
456
457define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, <2 x i16> *%a) {
458; CHECK-LABEL: load_v8i16_2_4:
459; CHECK:       // %bb.0:
460; CHECK-NEXT:    ldrh w9, [x0]
461; CHECK-NEXT:    add x8, x0, #2
462; CHECK-NEXT:    mov v0.16b, v1.16b
463; CHECK-NEXT:    fmov s2, w9
464; CHECK-NEXT:    ld1 { v2.h }[2], [x8]
465; CHECK-NEXT:    xtn v1.4h, v2.4s
466; CHECK-NEXT:    mov v0.s[3], v1.s[0]
467; CHECK-NEXT:    ret
468  %l = load <2 x i16>, <2 x i16> *%a
469  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
470  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1>
471  ret <8 x i16> %s2
472}
473
474define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, <2 x i16> *%a) {
475; CHECK-LABEL: load_v4i16_2_1:
476; CHECK:       // %bb.0:
477; CHECK-NEXT:    ld1 { v0.h }[0], [x0]
478; CHECK-NEXT:    add x8, x0, #2
479; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
480; CHECK-NEXT:    ld1 { v0.h }[2], [x8]
481; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
482; CHECK-NEXT:    mov v0.s[1], v1.s[1]
483; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
484; CHECK-NEXT:    ret
485  %l = load <2 x i16>, <2 x i16> *%a
486  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
487  %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
488  ret <4 x i16> %s2
489}
490
491define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, <2 x i16> *%a) {
492; CHECK-LABEL: load_v4i16_2_2:
493; CHECK:       // %bb.0:
494; CHECK-NEXT:    ld1 { v0.h }[0], [x0]
495; CHECK-NEXT:    add x8, x0, #2
496; CHECK-NEXT:    ld1 { v0.h }[2], [x8]
497; CHECK-NEXT:    uzp1 v2.4h, v0.4h, v0.4h
498; CHECK-NEXT:    fmov d0, d1
499; CHECK-NEXT:    mov v0.s[1], v2.s[0]
500; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
501; CHECK-NEXT:    ret
502  %l = load <2 x i16>, <2 x i16> *%a
503  %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
504  %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
505  ret <4 x i16> %s2
506}
507
508define <8 x i16> @load_v8i16_4_1(float %tmp, <8 x i16> %b, <4 x i16> *%a) {
509; CHECK-LABEL: load_v8i16_4_1:
510; CHECK:       // %bb.0:
511; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
512; CHECK-NEXT:    ldr d0, [x0]
513; CHECK-NEXT:    mov v0.d[1], v1.d[0]
514; CHECK-NEXT:    ret
515  %l = load <4 x i16>, <4 x i16> *%a
516  %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
517  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
518  ret <8 x i16> %s2
519}
520
521define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, <4 x i16> *%a) {
522; CHECK-LABEL: load_v8i16_4_2:
523; CHECK:       // %bb.0:
524; CHECK-NEXT:    mov v0.16b, v1.16b
525; CHECK-NEXT:    ldr d1, [x0]
526; CHECK-NEXT:    mov v0.d[1], v1.d[0]
527; CHECK-NEXT:    ret
528  %l = load <4 x i16>, <4 x i16> *%a
529  %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
530  %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
531  ret <8 x i16> %s2
532}
533
534; i32
535
536define <4 x i32> @load_v4i32_2_1(float %tmp, <4 x i32> %b, <2 x i32> *%a) {
537; CHECK-LABEL: load_v4i32_2_1:
538; CHECK:       // %bb.0:
539; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
540; CHECK-NEXT:    ldr d0, [x0]
541; CHECK-NEXT:    mov v0.d[1], v1.d[0]
542; CHECK-NEXT:    ret
543  %l = load <2 x i32>, <2 x i32> *%a
544  %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
545  %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
546  ret <4 x i32> %s2
547}
548
549define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, <2 x i32> *%a) {
550; CHECK-LABEL: load_v4i32_2_2:
551; CHECK:       // %bb.0:
552; CHECK-NEXT:    mov v0.16b, v1.16b
553; CHECK-NEXT:    ldr d1, [x0]
554; CHECK-NEXT:    mov v0.d[1], v1.d[0]
555; CHECK-NEXT:    ret
556  %l = load <2 x i32>, <2 x i32> *%a
557  %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
558  %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
559  ret <4 x i32> %s2
560}
561