1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck --check-prefixes=CHECK,CHECK-LE %s
3; RUN: llc -mtriple=armeb-eabi -mattr=+neon,+fullfp16 %s -o - | FileCheck --check-prefixes=CHECK,CHECK-BE %s
4
5define arm_aapcs_vfpcc <8 x i8> @v_movi8() nounwind {
6; CHECK-LABEL: v_movi8:
7; CHECK:       @ %bb.0:
8; CHECK-NEXT:    vmov.i8 d0, #0x8
9; CHECK-NEXT:    mov pc, lr
10	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
11}
12
13define arm_aapcs_vfpcc <4 x i16> @v_movi16a() nounwind {
14; CHECK-LABEL: v_movi16a:
15; CHECK:       @ %bb.0:
16; CHECK-NEXT:    vmov.i16 d0, #0x10
17; CHECK-NEXT:    mov pc, lr
18	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
19}
20
21define arm_aapcs_vfpcc <4 x i16> @v_movi16b() nounwind {
22; CHECK-LABEL: v_movi16b:
23; CHECK:       @ %bb.0:
24; CHECK-NEXT:    vmov.i16 d0, #0x1000
25; CHECK-NEXT:    mov pc, lr
26	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
27}
28
29define arm_aapcs_vfpcc <4 x i16> @v_mvni16a() nounwind {
30; CHECK-LABEL: v_mvni16a:
31; CHECK:       @ %bb.0:
32; CHECK-NEXT:    vmvn.i16 d0, #0x10
33; CHECK-NEXT:    mov pc, lr
34	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
35}
36
37define arm_aapcs_vfpcc <4 x i16> @v_mvni16b() nounwind {
38; CHECK-LABEL: v_mvni16b:
39; CHECK:       @ %bb.0:
40; CHECK-NEXT:    vmvn.i16 d0, #0x1000
41; CHECK-NEXT:    mov pc, lr
42	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
43}
44
45define arm_aapcs_vfpcc <2 x i32> @v_movi32a() nounwind {
46; CHECK-LABEL: v_movi32a:
47; CHECK:       @ %bb.0:
48; CHECK-NEXT:    vmov.i32 d0, #0x20
49; CHECK-NEXT:    mov pc, lr
50	ret <2 x i32> < i32 32, i32 32 >
51}
52
53define arm_aapcs_vfpcc <2 x i32> @v_movi32b() nounwind {
54; CHECK-LABEL: v_movi32b:
55; CHECK:       @ %bb.0:
56; CHECK-NEXT:    vmov.i32 d0, #0x2000
57; CHECK-NEXT:    mov pc, lr
58	ret <2 x i32> < i32 8192, i32 8192 >
59}
60
61define arm_aapcs_vfpcc <2 x i32> @v_movi32c() nounwind {
62; CHECK-LABEL: v_movi32c:
63; CHECK:       @ %bb.0:
64; CHECK-NEXT:    vmov.i32 d0, #0x200000
65; CHECK-NEXT:    mov pc, lr
66	ret <2 x i32> < i32 2097152, i32 2097152 >
67}
68
69define arm_aapcs_vfpcc <2 x i32> @v_movi32d() nounwind {
70; CHECK-LABEL: v_movi32d:
71; CHECK:       @ %bb.0:
72; CHECK-NEXT:    vmov.i32 d0, #0x20000000
73; CHECK-NEXT:    mov pc, lr
74	ret <2 x i32> < i32 536870912, i32 536870912 >
75}
76
77define arm_aapcs_vfpcc <2 x i32> @v_movi32e() nounwind {
78; CHECK-LABEL: v_movi32e:
79; CHECK:       @ %bb.0:
80; CHECK-NEXT:    vmov.i32 d0, #0x20ff
81; CHECK-NEXT:    mov pc, lr
82	ret <2 x i32> < i32 8447, i32 8447 >
83}
84
85define arm_aapcs_vfpcc <2 x i32> @v_movi32f() nounwind {
86; CHECK-LABEL: v_movi32f:
87; CHECK:       @ %bb.0:
88; CHECK-NEXT:    vmov.i32 d0, #0x20ffff
89; CHECK-NEXT:    mov pc, lr
90	ret <2 x i32> < i32 2162687, i32 2162687 >
91}
92
93define arm_aapcs_vfpcc <2 x i32> @v_mvni32a() nounwind {
94; CHECK-LABEL: v_mvni32a:
95; CHECK:       @ %bb.0:
96; CHECK-NEXT:    vmvn.i32 d0, #0x20
97; CHECK-NEXT:    mov pc, lr
98	ret <2 x i32> < i32 4294967263, i32 4294967263 >
99}
100
101define arm_aapcs_vfpcc <2 x i32> @v_mvni32b() nounwind {
102; CHECK-LABEL: v_mvni32b:
103; CHECK:       @ %bb.0:
104; CHECK-NEXT:    vmvn.i32 d0, #0x2000
105; CHECK-NEXT:    mov pc, lr
106	ret <2 x i32> < i32 4294959103, i32 4294959103 >
107}
108
109define arm_aapcs_vfpcc <2 x i32> @v_mvni32c() nounwind {
110; CHECK-LABEL: v_mvni32c:
111; CHECK:       @ %bb.0:
112; CHECK-NEXT:    vmvn.i32 d0, #0x200000
113; CHECK-NEXT:    mov pc, lr
114	ret <2 x i32> < i32 4292870143, i32 4292870143 >
115}
116
117define arm_aapcs_vfpcc <2 x i32> @v_mvni32d() nounwind {
118; CHECK-LABEL: v_mvni32d:
119; CHECK:       @ %bb.0:
120; CHECK-NEXT:    vmvn.i32 d0, #0x20000000
121; CHECK-NEXT:    mov pc, lr
122	ret <2 x i32> < i32 3758096383, i32 3758096383 >
123}
124
125define arm_aapcs_vfpcc <2 x i32> @v_mvni32e() nounwind {
126; CHECK-LABEL: v_mvni32e:
127; CHECK:       @ %bb.0:
128; CHECK-NEXT:    vmvn.i32 d0, #0x20ff
129; CHECK-NEXT:    mov pc, lr
130	ret <2 x i32> < i32 4294958848, i32 4294958848 >
131}
132
133define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind {
134; CHECK-LABEL: v_mvni32f:
135; CHECK:       @ %bb.0:
136; CHECK-NEXT:    vmvn.i32 d0, #0x20ffff
137; CHECK-NEXT:    mov pc, lr
138	ret <2 x i32> < i32 4292804608, i32 4292804608 >
139}
140
141define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind {
142; CHECK-LABEL: v_movi64:
143; CHECK:       @ %bb.0:
144; CHECK-NEXT:    vmov.i64 d0, #0xff0000ff0000ffff
145; CHECK-NEXT:    mov pc, lr
146	ret <1 x i64> < i64 18374687574888349695 >
147}
148
149define arm_aapcs_vfpcc <16 x i8> @v_movQi8() nounwind {
150; CHECK-LABEL: v_movQi8:
151; CHECK:       @ %bb.0:
152; CHECK-NEXT:    vmov.i8 q0, #0x8
153; CHECK-NEXT:    mov pc, lr
154	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
155}
156
157define arm_aapcs_vfpcc <8 x i16> @v_movQi16a() nounwind {
158; CHECK-LABEL: v_movQi16a:
159; CHECK:       @ %bb.0:
160; CHECK-NEXT:    vmov.i16 q0, #0x10
161; CHECK-NEXT:    mov pc, lr
162	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
163}
164
165define arm_aapcs_vfpcc <8 x i16> @v_movQi16b() nounwind {
166; CHECK-LABEL: v_movQi16b:
167; CHECK:       @ %bb.0:
168; CHECK-NEXT:    vmov.i16 q0, #0x1000
169; CHECK-NEXT:    mov pc, lr
170	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
171}
172
173define arm_aapcs_vfpcc <4 x i32> @v_movQi32a() nounwind {
174; CHECK-LABEL: v_movQi32a:
175; CHECK:       @ %bb.0:
176; CHECK-NEXT:    vmov.i32 q0, #0x20
177; CHECK-NEXT:    mov pc, lr
178	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
179}
180
181define arm_aapcs_vfpcc <4 x i32> @v_movQi32b() nounwind {
182; CHECK-LABEL: v_movQi32b:
183; CHECK:       @ %bb.0:
184; CHECK-NEXT:    vmov.i32 q0, #0x2000
185; CHECK-NEXT:    mov pc, lr
186	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
187}
188
189define arm_aapcs_vfpcc <4 x i32> @v_movQi32c() nounwind {
190; CHECK-LABEL: v_movQi32c:
191; CHECK:       @ %bb.0:
192; CHECK-NEXT:    vmov.i32 q0, #0x200000
193; CHECK-NEXT:    mov pc, lr
194	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
195}
196
197define arm_aapcs_vfpcc <4 x i32> @v_movQi32d() nounwind {
198; CHECK-LABEL: v_movQi32d:
199; CHECK:       @ %bb.0:
200; CHECK-NEXT:    vmov.i32 q0, #0x20000000
201; CHECK-NEXT:    mov pc, lr
202	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
203}
204
205define arm_aapcs_vfpcc <4 x i32> @v_movQi32e() nounwind {
206; CHECK-LABEL: v_movQi32e:
207; CHECK:       @ %bb.0:
208; CHECK-NEXT:    vmov.i32 q0, #0x20ff
209; CHECK-NEXT:    mov pc, lr
210	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
211}
212
213define arm_aapcs_vfpcc <4 x i32> @v_movQi32f() nounwind {
214; CHECK-LABEL: v_movQi32f:
215; CHECK:       @ %bb.0:
216; CHECK-NEXT:    vmov.i32 q0, #0x20ffff
217; CHECK-NEXT:    mov pc, lr
218	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
219}
220
221define arm_aapcs_vfpcc <2 x i64> @v_movQi64() nounwind {
222; CHECK-LABEL: v_movQi64:
223; CHECK:       @ %bb.0:
224; CHECK-NEXT:    vmov.i64 q0, #0xff0000ff0000ffff
225; CHECK-NEXT:    mov pc, lr
226	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
227}
228
229; Check for correct assembler printing for immediate values.
230%struct.int8x8_t = type { <8 x i8> }
231define arm_aapcs_vfpcc void @vdupn128(%struct.int8x8_t* noalias nocapture sret(%struct.int8x8_t) %agg.result) nounwind {
232; CHECK-LABEL: vdupn128:
233; CHECK:       @ %bb.0: @ %entry
234; CHECK-NEXT:    vmov.i8 d16, #0x80
235; CHECK-NEXT:    vstr d16, [r0]
236; CHECK-NEXT:    mov pc, lr
237entry:
238  %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
239  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
240  ret void
241}
242
243define arm_aapcs_vfpcc void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret(%struct.int8x8_t) %agg.result) nounwind {
244; CHECK-LABEL: vdupnneg75:
245; CHECK:       @ %bb.0: @ %entry
246; CHECK-NEXT:    vmov.i8 d16, #0xb5
247; CHECK-NEXT:    vstr d16, [r0]
248; CHECK-NEXT:    mov pc, lr
249entry:
250  %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
251  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
252  ret void
253}
254
255define arm_aapcs_vfpcc <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
256; CHECK-LE-LABEL: vmovls8:
257; CHECK-LE:       @ %bb.0:
258; CHECK-LE-NEXT:    vld1.8 {d16}, [r0:64]
259; CHECK-LE-NEXT:    vmovl.s8 q0, d16
260; CHECK-LE-NEXT:    mov pc, lr
261;
262; CHECK-BE-LABEL: vmovls8:
263; CHECK-BE:       @ %bb.0:
264; CHECK-BE-NEXT:    vld1.8 {d16}, [r0:64]
265; CHECK-BE-NEXT:    vmovl.s8 q8, d16
266; CHECK-BE-NEXT:    vrev64.16 q0, q8
267; CHECK-BE-NEXT:    mov pc, lr
268	%tmp1 = load <8 x i8>, <8 x i8>* %A
269	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
270	ret <8 x i16> %tmp2
271}
272
273define arm_aapcs_vfpcc <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
274; CHECK-LE-LABEL: vmovls16:
275; CHECK-LE:       @ %bb.0:
276; CHECK-LE-NEXT:    vld1.16 {d16}, [r0:64]
277; CHECK-LE-NEXT:    vmovl.s16 q0, d16
278; CHECK-LE-NEXT:    mov pc, lr
279;
280; CHECK-BE-LABEL: vmovls16:
281; CHECK-BE:       @ %bb.0:
282; CHECK-BE-NEXT:    vld1.16 {d16}, [r0:64]
283; CHECK-BE-NEXT:    vmovl.s16 q8, d16
284; CHECK-BE-NEXT:    vrev64.32 q0, q8
285; CHECK-BE-NEXT:    mov pc, lr
286	%tmp1 = load <4 x i16>, <4 x i16>* %A
287	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
288	ret <4 x i32> %tmp2
289}
290
291define arm_aapcs_vfpcc <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
292; CHECK-LABEL: vmovls32:
293; CHECK:       @ %bb.0:
294; CHECK-NEXT:    vld1.32 {d16}, [r0:64]
295; CHECK-NEXT:    vmovl.s32 q0, d16
296; CHECK-NEXT:    mov pc, lr
297	%tmp1 = load <2 x i32>, <2 x i32>* %A
298	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
299	ret <2 x i64> %tmp2
300}
301
302define arm_aapcs_vfpcc <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
303; CHECK-LE-LABEL: vmovlu8:
304; CHECK-LE:       @ %bb.0:
305; CHECK-LE-NEXT:    vld1.8 {d16}, [r0:64]
306; CHECK-LE-NEXT:    vmovl.u8 q0, d16
307; CHECK-LE-NEXT:    mov pc, lr
308;
309; CHECK-BE-LABEL: vmovlu8:
310; CHECK-BE:       @ %bb.0:
311; CHECK-BE-NEXT:    vld1.8 {d16}, [r0:64]
312; CHECK-BE-NEXT:    vmovl.u8 q8, d16
313; CHECK-BE-NEXT:    vrev64.16 q0, q8
314; CHECK-BE-NEXT:    mov pc, lr
315	%tmp1 = load <8 x i8>, <8 x i8>* %A
316	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
317	ret <8 x i16> %tmp2
318}
319
320define arm_aapcs_vfpcc <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
321; CHECK-LE-LABEL: vmovlu16:
322; CHECK-LE:       @ %bb.0:
323; CHECK-LE-NEXT:    vld1.16 {d16}, [r0:64]
324; CHECK-LE-NEXT:    vmovl.u16 q0, d16
325; CHECK-LE-NEXT:    mov pc, lr
326;
327; CHECK-BE-LABEL: vmovlu16:
328; CHECK-BE:       @ %bb.0:
329; CHECK-BE-NEXT:    vld1.16 {d16}, [r0:64]
330; CHECK-BE-NEXT:    vmovl.u16 q8, d16
331; CHECK-BE-NEXT:    vrev64.32 q0, q8
332; CHECK-BE-NEXT:    mov pc, lr
333	%tmp1 = load <4 x i16>, <4 x i16>* %A
334	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
335	ret <4 x i32> %tmp2
336}
337
338define arm_aapcs_vfpcc <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
339; CHECK-LABEL: vmovlu32:
340; CHECK:       @ %bb.0:
341; CHECK-NEXT:    vld1.32 {d16}, [r0:64]
342; CHECK-NEXT:    vmovl.u32 q0, d16
343; CHECK-NEXT:    mov pc, lr
344	%tmp1 = load <2 x i32>, <2 x i32>* %A
345	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
346	ret <2 x i64> %tmp2
347}
348
349define arm_aapcs_vfpcc <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
350; CHECK-LE-LABEL: vmovni16:
351; CHECK-LE:       @ %bb.0:
352; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
353; CHECK-LE-NEXT:    vmovn.i16 d0, q8
354; CHECK-LE-NEXT:    mov pc, lr
355;
356; CHECK-BE-LABEL: vmovni16:
357; CHECK-BE:       @ %bb.0:
358; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
359; CHECK-BE-NEXT:    vrev64.16 q8, q8
360; CHECK-BE-NEXT:    vmovn.i16 d16, q8
361; CHECK-BE-NEXT:    vrev64.8 d0, d16
362; CHECK-BE-NEXT:    mov pc, lr
363	%tmp1 = load <8 x i16>, <8 x i16>* %A
364	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
365	ret <8 x i8> %tmp2
366}
367
368define arm_aapcs_vfpcc <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
369; CHECK-LE-LABEL: vmovni32:
370; CHECK-LE:       @ %bb.0:
371; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
372; CHECK-LE-NEXT:    vmovn.i32 d0, q8
373; CHECK-LE-NEXT:    mov pc, lr
374;
375; CHECK-BE-LABEL: vmovni32:
376; CHECK-BE:       @ %bb.0:
377; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
378; CHECK-BE-NEXT:    vrev64.32 q8, q8
379; CHECK-BE-NEXT:    vmovn.i32 d16, q8
380; CHECK-BE-NEXT:    vrev64.16 d0, d16
381; CHECK-BE-NEXT:    mov pc, lr
382	%tmp1 = load <4 x i32>, <4 x i32>* %A
383	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
384	ret <4 x i16> %tmp2
385}
386
387define arm_aapcs_vfpcc <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
388; CHECK-LE-LABEL: vmovni64:
389; CHECK-LE:       @ %bb.0:
390; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
391; CHECK-LE-NEXT:    vmovn.i64 d0, q8
392; CHECK-LE-NEXT:    mov pc, lr
393;
394; CHECK-BE-LABEL: vmovni64:
395; CHECK-BE:       @ %bb.0:
396; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
397; CHECK-BE-NEXT:    vmovn.i64 d16, q8
398; CHECK-BE-NEXT:    vrev64.32 d0, d16
399; CHECK-BE-NEXT:    mov pc, lr
400	%tmp1 = load <2 x i64>, <2 x i64>* %A
401	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
402	ret <2 x i32> %tmp2
403}
404
405define arm_aapcs_vfpcc <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
406; CHECK-LE-LABEL: vqmovns16:
407; CHECK-LE:       @ %bb.0:
408; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
409; CHECK-LE-NEXT:    vqmovn.s16 d0, q8
410; CHECK-LE-NEXT:    mov pc, lr
411;
412; CHECK-BE-LABEL: vqmovns16:
413; CHECK-BE:       @ %bb.0:
414; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
415; CHECK-BE-NEXT:    vrev64.16 q8, q8
416; CHECK-BE-NEXT:    vqmovn.s16 d16, q8
417; CHECK-BE-NEXT:    vrev64.8 d0, d16
418; CHECK-BE-NEXT:    mov pc, lr
419	%tmp1 = load <8 x i16>, <8 x i16>* %A
420	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
421	ret <8 x i8> %tmp2
422}
423
424define arm_aapcs_vfpcc <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
425; CHECK-LE-LABEL: vqmovns32:
426; CHECK-LE:       @ %bb.0:
427; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
428; CHECK-LE-NEXT:    vqmovn.s32 d0, q8
429; CHECK-LE-NEXT:    mov pc, lr
430;
431; CHECK-BE-LABEL: vqmovns32:
432; CHECK-BE:       @ %bb.0:
433; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
434; CHECK-BE-NEXT:    vrev64.32 q8, q8
435; CHECK-BE-NEXT:    vqmovn.s32 d16, q8
436; CHECK-BE-NEXT:    vrev64.16 d0, d16
437; CHECK-BE-NEXT:    mov pc, lr
438	%tmp1 = load <4 x i32>, <4 x i32>* %A
439	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
440	ret <4 x i16> %tmp2
441}
442
443define arm_aapcs_vfpcc <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
444; CHECK-LE-LABEL: vqmovns64:
445; CHECK-LE:       @ %bb.0:
446; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
447; CHECK-LE-NEXT:    vqmovn.s64 d0, q8
448; CHECK-LE-NEXT:    mov pc, lr
449;
450; CHECK-BE-LABEL: vqmovns64:
451; CHECK-BE:       @ %bb.0:
452; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
453; CHECK-BE-NEXT:    vqmovn.s64 d16, q8
454; CHECK-BE-NEXT:    vrev64.32 d0, d16
455; CHECK-BE-NEXT:    mov pc, lr
456	%tmp1 = load <2 x i64>, <2 x i64>* %A
457	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
458	ret <2 x i32> %tmp2
459}
460
461define arm_aapcs_vfpcc <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
462; CHECK-LE-LABEL: vqmovnu16:
463; CHECK-LE:       @ %bb.0:
464; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
465; CHECK-LE-NEXT:    vqmovn.u16 d0, q8
466; CHECK-LE-NEXT:    mov pc, lr
467;
468; CHECK-BE-LABEL: vqmovnu16:
469; CHECK-BE:       @ %bb.0:
470; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
471; CHECK-BE-NEXT:    vrev64.16 q8, q8
472; CHECK-BE-NEXT:    vqmovn.u16 d16, q8
473; CHECK-BE-NEXT:    vrev64.8 d0, d16
474; CHECK-BE-NEXT:    mov pc, lr
475	%tmp1 = load <8 x i16>, <8 x i16>* %A
476	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
477	ret <8 x i8> %tmp2
478}
479
480define arm_aapcs_vfpcc <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
481; CHECK-LE-LABEL: vqmovnu32:
482; CHECK-LE:       @ %bb.0:
483; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
484; CHECK-LE-NEXT:    vqmovn.u32 d0, q8
485; CHECK-LE-NEXT:    mov pc, lr
486;
487; CHECK-BE-LABEL: vqmovnu32:
488; CHECK-BE:       @ %bb.0:
489; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
490; CHECK-BE-NEXT:    vrev64.32 q8, q8
491; CHECK-BE-NEXT:    vqmovn.u32 d16, q8
492; CHECK-BE-NEXT:    vrev64.16 d0, d16
493; CHECK-BE-NEXT:    mov pc, lr
494	%tmp1 = load <4 x i32>, <4 x i32>* %A
495	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
496	ret <4 x i16> %tmp2
497}
498
499define arm_aapcs_vfpcc <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
500; CHECK-LE-LABEL: vqmovnu64:
501; CHECK-LE:       @ %bb.0:
502; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
503; CHECK-LE-NEXT:    vqmovn.u64 d0, q8
504; CHECK-LE-NEXT:    mov pc, lr
505;
506; CHECK-BE-LABEL: vqmovnu64:
507; CHECK-BE:       @ %bb.0:
508; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
509; CHECK-BE-NEXT:    vqmovn.u64 d16, q8
510; CHECK-BE-NEXT:    vrev64.32 d0, d16
511; CHECK-BE-NEXT:    mov pc, lr
512	%tmp1 = load <2 x i64>, <2 x i64>* %A
513	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
514	ret <2 x i32> %tmp2
515}
516
517define arm_aapcs_vfpcc <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
518; CHECK-LE-LABEL: vqmovuns16:
519; CHECK-LE:       @ %bb.0:
520; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
521; CHECK-LE-NEXT:    vqmovun.s16 d0, q8
522; CHECK-LE-NEXT:    mov pc, lr
523;
524; CHECK-BE-LABEL: vqmovuns16:
525; CHECK-BE:       @ %bb.0:
526; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
527; CHECK-BE-NEXT:    vrev64.16 q8, q8
528; CHECK-BE-NEXT:    vqmovun.s16 d16, q8
529; CHECK-BE-NEXT:    vrev64.8 d0, d16
530; CHECK-BE-NEXT:    mov pc, lr
531	%tmp1 = load <8 x i16>, <8 x i16>* %A
532	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
533	ret <8 x i8> %tmp2
534}
535
536define arm_aapcs_vfpcc <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
537; CHECK-LE-LABEL: vqmovuns32:
538; CHECK-LE:       @ %bb.0:
539; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
540; CHECK-LE-NEXT:    vqmovun.s32 d0, q8
541; CHECK-LE-NEXT:    mov pc, lr
542;
543; CHECK-BE-LABEL: vqmovuns32:
544; CHECK-BE:       @ %bb.0:
545; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
546; CHECK-BE-NEXT:    vrev64.32 q8, q8
547; CHECK-BE-NEXT:    vqmovun.s32 d16, q8
548; CHECK-BE-NEXT:    vrev64.16 d0, d16
549; CHECK-BE-NEXT:    mov pc, lr
550	%tmp1 = load <4 x i32>, <4 x i32>* %A
551	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
552	ret <4 x i16> %tmp2
553}
554
555define arm_aapcs_vfpcc <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
556; CHECK-LE-LABEL: vqmovuns64:
557; CHECK-LE:       @ %bb.0:
558; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0]
559; CHECK-LE-NEXT:    vqmovun.s64 d0, q8
560; CHECK-LE-NEXT:    mov pc, lr
561;
562; CHECK-BE-LABEL: vqmovuns64:
563; CHECK-BE:       @ %bb.0:
564; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
565; CHECK-BE-NEXT:    vqmovun.s64 d16, q8
566; CHECK-BE-NEXT:    vrev64.32 d0, d16
567; CHECK-BE-NEXT:    mov pc, lr
568	%tmp1 = load <2 x i64>, <2 x i64>* %A
569	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
570	ret <2 x i32> %tmp2
571}
572
573declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
574declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
575declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
576
577declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
578declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
579declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
580
581declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
582declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
583declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
584
585; Truncating vector stores are not supported.  The following should not crash.
586; Radar 8598391.
587define arm_aapcs_vfpcc void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
588; CHECK-LE-LABEL: noTruncStore:
589; CHECK-LE:       @ %bb.0:
590; CHECK-LE-NEXT:    vld1.64 {d16, d17}, [r0:128]
591; CHECK-LE-NEXT:    vmovn.i32 d16, q8
592; CHECK-LE-NEXT:    vstr d16, [r1]
593; CHECK-LE-NEXT:    mov pc, lr
594;
595; CHECK-BE-LABEL: noTruncStore:
596; CHECK-BE:       @ %bb.0:
597; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0:128]
598; CHECK-BE-NEXT:    vrev64.32 q8, q8
599; CHECK-BE-NEXT:    vmovn.i32 d16, q8
600; CHECK-BE-NEXT:    vrev64.16 d16, d16
601; CHECK-BE-NEXT:    vstr d16, [r1]
602; CHECK-BE-NEXT:    mov pc, lr
603  %tmp1 = load <4 x i32>, <4 x i32>* %a, align 16
604  %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
605  store <4 x i16> %tmp2, <4 x i16>* %b, align 8
606  ret void
607}
608
609; Use vmov.f32 to materialize f32 immediate splats
610; rdar://10437054
611define arm_aapcs_vfpcc void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
612; CHECK-LABEL: v_mov_v2f32:
613; CHECK:       @ %bb.0: @ %entry
614; CHECK-NEXT:    vmov.f32 d16, #-1.600000e+01
615; CHECK-NEXT:    vstr d16, [r0]
616; CHECK-NEXT:    mov pc, lr
617entry:
618  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
619  ret void
620}
621
622define arm_aapcs_vfpcc void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
623; CHECK-LE-LABEL: v_mov_v4f32:
624; CHECK-LE:       @ %bb.0: @ %entry
625; CHECK-LE-NEXT:    vmov.f32 q8, #3.100000e+01
626; CHECK-LE-NEXT:    vst1.32 {d16, d17}, [r0]
627; CHECK-LE-NEXT:    mov pc, lr
628;
629; CHECK-BE-LABEL: v_mov_v4f32:
630; CHECK-BE:       @ %bb.0: @ %entry
631; CHECK-BE-NEXT:    vmov.f32 q8, #3.100000e+01
632; CHECK-BE-NEXT:    vstmia r0, {d16, d17}
633; CHECK-BE-NEXT:    mov pc, lr
634entry:
635  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
636  ret void
637}
638
639define arm_aapcs_vfpcc void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
640; CHECK-LE-LABEL: v_mov_v4f32_undef:
641; CHECK-LE:       @ %bb.0: @ %entry
642; CHECK-LE-NEXT:    vmov.f32 q8, #1.000000e+00
643; CHECK-LE-NEXT:    vld1.64 {d18, d19}, [r0]
644; CHECK-LE-NEXT:    vadd.f32 q8, q9, q8
645; CHECK-LE-NEXT:    vst1.64 {d16, d17}, [r0]
646; CHECK-LE-NEXT:    mov pc, lr
647;
648; CHECK-BE-LABEL: v_mov_v4f32_undef:
649; CHECK-BE:       @ %bb.0: @ %entry
650; CHECK-BE-NEXT:    vld1.64 {d16, d17}, [r0]
651; CHECK-BE-NEXT:    vmov.f32 q9, #1.000000e+00
652; CHECK-BE-NEXT:    vrev64.32 q8, q8
653; CHECK-BE-NEXT:    vadd.f32 q8, q8, q9
654; CHECK-BE-NEXT:    vrev64.32 q8, q8
655; CHECK-BE-NEXT:    vst1.64 {d16, d17}, [r0]
656; CHECK-BE-NEXT:    mov pc, lr
657entry:
658  %a = load <4 x float> , <4 x float> *%p
659  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
660  store <4 x float> %b, <4 x float> *%p
661  ret void
662}
663
664; Vector any_extends must be selected as either vmovl.u or vmovl.s.
665; rdar://10723651
666define arm_aapcs_vfpcc void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
667; CHECK-LE-LABEL: any_extend:
668; CHECK-LE:       @ %bb.0: @ %entry
669; CHECK-LE-NEXT:    vmov.i16 d16, #0x1
670; CHECK-LE-NEXT:    vand d16, d0, d16
671; CHECK-LE-NEXT:    vmovl.u16 q8, d16
672; CHECK-LE-NEXT:    vsub.i32 q8, q8, q1
673; CHECK-LE-NEXT:    vmovn.i32 d16, q8
674; CHECK-LE-NEXT:    vst1.16 {d16}, [r0]
675;
676; CHECK-BE-LABEL: any_extend:
677; CHECK-BE:       @ %bb.0: @ %entry
678; CHECK-BE-NEXT:    vmov.i16 d16, #0x1
679; CHECK-BE-NEXT:    vrev64.16 d17, d0
680; CHECK-BE-NEXT:    vrev64.32 q9, q1
681; CHECK-BE-NEXT:    vand d16, d17, d16
682; CHECK-BE-NEXT:    vmovl.u16 q8, d16
683; CHECK-BE-NEXT:    vsub.i32 q8, q8, q9
684; CHECK-BE-NEXT:    vmovn.i32 d16, q8
685; CHECK-BE-NEXT:    vst1.16 {d16}, [r0]
686entry:
687  %and.i186 = zext <4 x i1> %x to <4 x i32>
688  %add.i185 = sub <4 x i32> %and.i186, %y
689  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
690  %add.i = add <4 x i32> %sub.i, zeroinitializer
691  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
692  tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
693  unreachable
694}
695
696define arm_aapcs_vfpcc void @v_movi8_sti8(i8* %p) {
697; CHECK-LABEL: v_movi8_sti8:
698; CHECK:       @ %bb.0:
699; CHECK-NEXT:    vmov.i8 d16, #0x1
700; CHECK-NEXT:    vst1.8 {d16}, [r0]
701; CHECK-NEXT:    mov pc, lr
702  call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %p, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, i32 1)
703  ret void
704}
705
706define arm_aapcs_vfpcc void @v_movi8_sti16(i8* %p) {
707; CHECK-LABEL: v_movi8_sti16:
708; CHECK:       @ %bb.0:
709; CHECK-NEXT:    vmov.i8 d16, #0x1
710; CHECK-NEXT:    vst1.16 {d16}, [r0]
711; CHECK-NEXT:    mov pc, lr
712  %val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x i16>
713  call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> %val, i32 2)
714  ret void
715}
716
717define arm_aapcs_vfpcc void @v_movi8_stf16(i8* %p) {
718; CHECK-LABEL: v_movi8_stf16:
719; CHECK:       @ %bb.0:
720; CHECK-NEXT:    vmov.i8 d16, #0x1
721; CHECK-NEXT:    vst1.16 {d16}, [r0]
722; CHECK-NEXT:    mov pc, lr
723  %val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x half>
724  call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2)
725  ret void
726}
727
728define arm_aapcs_vfpcc void @v_movi8_sti32(i8* %p) {
729; CHECK-LABEL: v_movi8_sti32:
730; CHECK:       @ %bb.0:
731; CHECK-NEXT:    vmov.i8 d16, #0x1
732; CHECK-NEXT:    vst1.32 {d16}, [r0]
733; CHECK-NEXT:    mov pc, lr
734  %val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x i32>
735  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4)
736  ret void
737}
738
739define arm_aapcs_vfpcc void @v_movi8_stf32(i8* %p) {
740; CHECK-LABEL: v_movi8_stf32:
741; CHECK:       @ %bb.0:
742; CHECK-NEXT:    vmov.i8 d16, #0x1
743; CHECK-NEXT:    vst1.32 {d16}, [r0]
744; CHECK-NEXT:    mov pc, lr
745  %val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x float>
746  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4)
747  ret void
748}
749
750define arm_aapcs_vfpcc void @v_movi8_sti64(i8* %p) {
751; CHECK-LABEL: v_movi8_sti64:
752; CHECK:       @ %bb.0:
753; CHECK-NEXT:    vmov.i8 d16, #0x1
754; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
755; CHECK-NEXT:    mov pc, lr
756  %val = bitcast <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <1 x i64>
757  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
758  ret void
759}
760
761define arm_aapcs_vfpcc void @v_movi16_sti16(i8* %p) {
762; CHECK-LABEL: v_movi16_sti16:
763; CHECK:       @ %bb.0:
764; CHECK-NEXT:    vmov.i16 d16, #0x1
765; CHECK-NEXT:    vst1.16 {d16}, [r0]
766; CHECK-NEXT:    mov pc, lr
767  call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> <i16 1, i16 1, i16 1, i16 1>, i32 2)
768  ret void
769}
770
771define arm_aapcs_vfpcc void @v_movi16_stf16(i8* %p) {
772; CHECK-LABEL: v_movi16_stf16:
773; CHECK:       @ %bb.0:
774; CHECK-NEXT:    vmov.i16 d16, #0x1
775; CHECK-NEXT:    vst1.16 {d16}, [r0]
776; CHECK-NEXT:    mov pc, lr
777  %val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <4 x half>
778  call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2)
779  ret void
780}
781
782define arm_aapcs_vfpcc void @v_movi16_sti32(i8* %p) {
783; CHECK-LABEL: v_movi16_sti32:
784; CHECK:       @ %bb.0:
785; CHECK-NEXT:    vmov.i16 d16, #0x1
786; CHECK-NEXT:    vst1.32 {d16}, [r0]
787; CHECK-NEXT:    mov pc, lr
788  %val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <2 x i32>
789  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4)
790  ret void
791}
792
793define arm_aapcs_vfpcc void @v_movi16_stf32(i8* %p) {
794; CHECK-LABEL: v_movi16_stf32:
795; CHECK:       @ %bb.0:
796; CHECK-NEXT:    vmov.i16 d16, #0x1
797; CHECK-NEXT:    vst1.32 {d16}, [r0]
798; CHECK-NEXT:    mov pc, lr
799  %val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <2 x float>
800  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4)
801  ret void
802}
803
804define arm_aapcs_vfpcc void @v_movi16_sti64(i8* %p) {
805; CHECK-LABEL: v_movi16_sti64:
806; CHECK:       @ %bb.0:
807; CHECK-NEXT:    vmov.i16 d16, #0x1
808; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
809; CHECK-NEXT:    mov pc, lr
810  %val = bitcast <4 x i16> <i16 1, i16 1, i16 1, i16 1> to <1 x i64>
811  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
812  ret void
813}
814
815define arm_aapcs_vfpcc void @v_movi32_sti32(i8* %p) {
816; CHECK-LABEL: v_movi32_sti32:
817; CHECK:       @ %bb.0:
818; CHECK-NEXT:    vmov.i32 d16, #0x1
819; CHECK-NEXT:    vst1.32 {d16}, [r0]
820; CHECK-NEXT:    mov pc, lr
821  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> <i32 1, i32 1>, i32 4)
822  ret void
823}
824
825define arm_aapcs_vfpcc void @v_movi32_stf32(i8* %p) {
826; CHECK-LABEL: v_movi32_stf32:
827; CHECK:       @ %bb.0:
828; CHECK-NEXT:    vmov.i32 d16, #0x1
829; CHECK-NEXT:    vst1.32 {d16}, [r0]
830; CHECK-NEXT:    mov pc, lr
831  %val = bitcast <2 x i32> <i32 1, i32 1> to <2 x float>
832  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4)
833  ret void
834}
835
836define arm_aapcs_vfpcc void @v_movi32_sti64(i8* %p) {
837; CHECK-LABEL: v_movi32_sti64:
838; CHECK:       @ %bb.0:
839; CHECK-NEXT:    vmov.i32 d16, #0x1
840; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
841; CHECK-NEXT:    mov pc, lr
842  %val = bitcast <2 x i32> <i32 1, i32 1> to <1 x i64>
843  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
844  ret void
845}
846
847define arm_aapcs_vfpcc void @v_movf32_stf32(i8* %p) {
848; CHECK-LABEL: v_movf32_stf32:
849; CHECK:       @ %bb.0:
850; CHECK-NEXT:    vmov.f32 d16, #1.000000e+00
851; CHECK-NEXT:    vst1.32 {d16}, [r0]
852; CHECK-NEXT:    mov pc, lr
853  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> <float 1.0, float 1.0>, i32 4)
854  ret void
855}
856
857define arm_aapcs_vfpcc void@v_movf32_sti32(i8* %p) {
858; FIXME: We should use vmov.f32 instead of mov then vdup
859; CHECK-LABEL: v_movf32_sti32:
860; CHECK:       @ %bb.0:
861; CHECK-NEXT:    mov r1, #1065353216
862; CHECK-NEXT:    vdup.32 d16, r1
863; CHECK-NEXT:    vst1.32 {d16}, [r0]
864; CHECK-NEXT:    mov pc, lr
865  %val = bitcast <2 x float> <float 1.0, float 1.0> to <2 x i32>
866  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4)
867  ret void
868}
869
870define arm_aapcs_vfpcc void @v_movf32_sti64(i8* %p) {
871; CHECK-LE-LABEL: v_movf32_sti64:
872; CHECK-LE:       @ %bb.0:
873; CHECK-LE-NEXT:    mov r1, #1065353216
874; CHECK-LE-NEXT:    vdup.32 d16, r1
875; CHECK-LE-NEXT:    vst1.64 {d16}, [r0:64]
876; CHECK-LE-NEXT:    mov pc, lr
877;
878; FIXME: vrev is not needed here
879; CHECK-BE-LABEL: v_movf32_sti64:
880; CHECK-BE:       @ %bb.0:
881; CHECK-BE-NEXT:    mov r1, #1065353216
882; CHECK-BE-NEXT:    vdup.32 d16, r1
883; CHECK-BE-NEXT:    vrev64.32 d16, d16
884; CHECK-BE-NEXT:    vst1.64 {d16}, [r0:64]
885; CHECK-BE-NEXT:    mov pc, lr
886  %val = bitcast <2 x float> <float 1.0, float 1.0> to <1 x i64>
887  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
888  ret void
889}
890
891define arm_aapcs_vfpcc void @v_movi64_sti64(i8* %p) {
892; CHECK-LABEL: v_movi64_sti64:
893; CHECK:       @ %bb.0:
894; CHECK-NEXT:    vmov.i64 d16, #0xff
895; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
896; CHECK-NEXT:    mov pc, lr
897  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> <i64 255>, i32 8)
898  ret void
899}
900
901define arm_aapcs_vfpcc void @v_movQi8_sti8(i8* %p) {
902; CHECK-LABEL: v_movQi8_sti8:
903; CHECK:       @ %bb.0:
904; CHECK-NEXT:    vmov.i8 q8, #0x1
905; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]
906; CHECK-NEXT:    mov pc, lr
907  call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %p, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, i32 1)
908  ret void
909}
910
911define arm_aapcs_vfpcc void @v_movQi8_sti16(i8* %p) {
912; CHECK-LABEL: v_movQi8_sti16:
913; CHECK:       @ %bb.0:
914; CHECK-NEXT:    vmov.i8 q8, #0x1
915; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
916; CHECK-NEXT:    mov pc, lr
917  %val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <8 x i16>
918  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> %val, i32 2)
919  ret void
920}
921
922define arm_aapcs_vfpcc void @v_movQi8_stf16(i8* %p) {
923; CHECK-LABEL: v_movQi8_stf16:
924; CHECK:       @ %bb.0:
925; CHECK-NEXT:    vmov.i8 q8, #0x1
926; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
927; CHECK-NEXT:    mov pc, lr
928  %val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <8 x half>
929  call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2)
930  ret void
931}
932
933define arm_aapcs_vfpcc void @v_movQi8_sti32(i8* %p) {
934; CHECK-LABEL: v_movQi8_sti32:
935; CHECK:       @ %bb.0:
936; CHECK-NEXT:    vmov.i8 q8, #0x1
937; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
938; CHECK-NEXT:    mov pc, lr
939  %val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x i32>
940  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4)
941  ret void
942}
943
944define arm_aapcs_vfpcc void @v_movQi8_stf32(i8* %p) {
945; CHECK-LABEL: v_movQi8_stf32:
946; CHECK:       @ %bb.0:
947; CHECK-NEXT:    vmov.i8 q8, #0x1
948; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
949; CHECK-NEXT:    mov pc, lr
950  %val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <4 x float>
951  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4)
952  ret void
953}
954
955define arm_aapcs_vfpcc void @v_movQi8_sti64(i8* %p) {
956; CHECK-LABEL: v_movQi8_sti64:
957; CHECK:       @ %bb.0:
958; CHECK-NEXT:    vmov.i8 q8, #0x1
959; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
960; CHECK-NEXT:    mov pc, lr
961  %val = bitcast <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> to <2 x i64>
962  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
963  ret void
964}
965
966define arm_aapcs_vfpcc void @v_movQi16_sti16(i8* %p) {
967; CHECK-LABEL: v_movQi16_sti16:
968; CHECK:       @ %bb.0:
969; CHECK-NEXT:    vmov.i16 q8, #0x1
970; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
971; CHECK-NEXT:    mov pc, lr
972  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, i32 2)
973  ret void
974}
975
976define arm_aapcs_vfpcc void @v_movQi16_stf16(i8* %p) {
977; CHECK-LABEL: v_movQi16_stf16:
978; CHECK:       @ %bb.0:
979; CHECK-NEXT:    vmov.i16 q8, #0x1
980; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
981; CHECK-NEXT:    mov pc, lr
982  %val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <8 x half>
983  call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2)
984  ret void
985}
986
987define arm_aapcs_vfpcc void @v_movQi16_sti32(i8* %p) {
988; CHECK-LABEL: v_movQi16_sti32:
989; CHECK:       @ %bb.0:
990; CHECK-NEXT:    vmov.i16 q8, #0x1
991; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
992; CHECK-NEXT:    mov pc, lr
993  %val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <4 x i32>
994  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4)
995  ret void
996}
997
998define arm_aapcs_vfpcc void @v_movQi16_stf32(i8* %p) {
999; CHECK-LABEL: v_movQi16_stf32:
1000; CHECK:       @ %bb.0:
1001; CHECK-NEXT:    vmov.i16 q8, #0x1
1002; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1003; CHECK-NEXT:    mov pc, lr
1004  %val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <4 x float>
1005  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4)
1006  ret void
1007}
1008
1009define arm_aapcs_vfpcc void @v_movQi16_sti64(i8* %p) {
1010; CHECK-LABEL: v_movQi16_sti64:
1011; CHECK:       @ %bb.0:
1012; CHECK-NEXT:    vmov.i16 q8, #0x1
1013; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
1014; CHECK-NEXT:    mov pc, lr
1015  %val = bitcast <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> to <2 x i64>
1016  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
1017  ret void
1018}
1019
1020define arm_aapcs_vfpcc void @v_movQi32_sti32(i8* %p) {
1021; CHECK-LABEL: v_movQi32_sti32:
1022; CHECK:       @ %bb.0:
1023; CHECK-NEXT:    vmov.i32 q8, #0x1
1024; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1025; CHECK-NEXT:    mov pc, lr
1026  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, i32 4)
1027  ret void
1028}
1029
1030define arm_aapcs_vfpcc void @v_movQi32_stf32(i8* %p) {
1031; CHECK-LABEL: v_movQi32_stf32:
1032; CHECK:       @ %bb.0:
1033; CHECK-NEXT:    vmov.i32 q8, #0x1
1034; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1035; CHECK-NEXT:    mov pc, lr
1036  %val = bitcast <4 x i32> <i32 1, i32 1, i32 1, i32 1> to <4 x float>
1037  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4)
1038  ret void
1039}
1040
1041define arm_aapcs_vfpcc void @v_movQi32_sti64(i8* %p) {
1042; CHECK-LABEL: v_movQi32_sti64:
1043; CHECK:       @ %bb.0:
1044; CHECK-NEXT:    vmov.i32 q8, #0x1
1045; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
1046; CHECK-NEXT:    mov pc, lr
1047  %val = bitcast <4 x i32> <i32 1, i32 1, i32 1, i32 1> to <2 x i64>
1048  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
1049  ret void
1050}
1051
1052define arm_aapcs_vfpcc void @v_movQf32_stf32(i8* %p) {
1053; CHECK-LABEL: v_movQf32_stf32:
1054; CHECK:       @ %bb.0:
1055; CHECK-NEXT:    vmov.f32 q8, #1.000000e+00
1056; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1057; CHECK-NEXT:    mov pc, lr
1058  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, i32 4)
1059  ret void
1060}
1061
1062define arm_aapcs_vfpcc void @v_movQf32_sti32(i8* %p) {
1063; FIXME: We should use vmov.f32 instead of mov then vdup
1064; CHECK-LABEL: v_movQf32_sti32:
1065; CHECK:       @ %bb.0:
1066; CHECK-NEXT:    mov r1, #1065353216
1067; CHECK-NEXT:    vdup.32 q8, r1
1068; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1069; CHECK-NEXT:    mov pc, lr
1070  %val = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
1071  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4)
1072  ret void
1073}
1074
1075define arm_aapcs_vfpcc void @v_movQf32_sti64(i8* %p) {
1076; CHECK-LE-LABEL: v_movQf32_sti64:
1077; CHECK-LE:       @ %bb.0:
1078; CHECK-LE-NEXT:    mov r1, #1065353216
1079; CHECK-LE-NEXT:    vdup.32 q8, r1
1080; CHECK-LE-NEXT:    vst1.64 {d16, d17}, [r0:64]
1081; CHECK-LE-NEXT:    mov pc, lr
1082;
1083; FIXME: vrev is not needed here
1084; CHECK-BE-LABEL: v_movQf32_sti64:
1085; CHECK-BE:       @ %bb.0:
1086; CHECK-BE-NEXT:    mov r1, #1065353216
1087; CHECK-BE-NEXT:    vdup.32 q8, r1
1088; CHECK-BE-NEXT:    vrev64.32 q8, q8
1089; CHECK-BE-NEXT:    vst1.64 {d16, d17}, [r0:64]
1090; CHECK-BE-NEXT:    mov pc, lr
1091  %val = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <2 x i64>
1092  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
1093  ret void
1094}
1095
1096define arm_aapcs_vfpcc void @v_movQi64_sti64(i8* %p) {
1097; CHECK-LABEL: v_movQi64_sti64:
1098; CHECK:       @ %bb.0:
1099; CHECK-NEXT:    vmov.i64 q8, #0xff
1100; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
1101; CHECK-NEXT:    mov pc, lr
1102  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> <i64 255, i64 255>, i32 8)
1103  ret void
1104}
1105
1106define arm_aapcs_vfpcc void @v_mvni16_sti16(i8* %p) {
1107; CHECK-LABEL: v_mvni16_sti16:
1108; CHECK:       @ %bb.0:
1109; CHECK-NEXT:    vmvn.i16 d16, #0xfe
1110; CHECK-NEXT:    vst1.16 {d16}, [r0]
1111; CHECK-NEXT:    mov pc, lr
1112  call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %p, <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281>, i32 2)
1113  ret void
1114}
1115
1116define arm_aapcs_vfpcc void @v_mvni16_stf16(i8* %p) {
1117; CHECK-LABEL: v_mvni16_stf16:
1118; CHECK:       @ %bb.0:
1119; CHECK-NEXT:    vmvn.i16 d16, #0xfe
1120; CHECK-NEXT:    vst1.16 {d16}, [r0]
1121; CHECK-NEXT:    mov pc, lr
1122  %val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <4 x half>
1123  call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* %p, <4 x half> %val, i32 2)
1124  ret void
1125}
1126
1127define arm_aapcs_vfpcc void @v_mvni16_sti32(i8* %p) {
1128; CHECK-LABEL: v_mvni16_sti32:
1129; CHECK:       @ %bb.0:
1130; CHECK-NEXT:    vmvn.i16 d16, #0xfe
1131; CHECK-NEXT:    vst1.32 {d16}, [r0]
1132; CHECK-NEXT:    mov pc, lr
1133  %val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <2 x i32>
1134  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> %val, i32 4)
1135  ret void
1136}
1137
1138define arm_aapcs_vfpcc void @v_mvni16_stf32(i8* %p) {
1139; CHECK-LABEL: v_mvni16_stf32:
1140; CHECK:       @ %bb.0:
1141; CHECK-NEXT:    vmvn.i16 d16, #0xfe
1142; CHECK-NEXT:    vst1.32 {d16}, [r0]
1143; CHECK-NEXT:    mov pc, lr
1144  %val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <2 x float>
1145  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4)
1146  ret void
1147}
1148
1149define arm_aapcs_vfpcc void @v_mvni16_sti64(i8* %p) {
1150; CHECK-LABEL: v_mvni16_sti64:
1151; CHECK:       @ %bb.0:
1152; CHECK-NEXT:    vmvn.i16 d16, #0xfe
1153; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
1154; CHECK-NEXT:    mov pc, lr
1155  %val = bitcast <4 x i16> <i16 65281, i16 65281, i16 65281, i16 65281> to <1 x i64>
1156  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
1157  ret void
1158}
1159
1160define arm_aapcs_vfpcc void @v_mvni32_sti32(i8* %p) {
1161; CHECK-LABEL: v_mvni32_sti32:
1162; CHECK:       @ %bb.0:
1163; CHECK-NEXT:    vmvn.i32 d16, #0xfe
1164; CHECK-NEXT:    vst1.32 {d16}, [r0]
1165; CHECK-NEXT:    mov pc, lr
1166  call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %p, <2 x i32> <i32 4294967041, i32 4294967041>, i32 4)
1167  ret void
1168}
1169
1170define arm_aapcs_vfpcc void @v_mvni32_stf32(i8* %p) {
1171; CHECK-LABEL: v_mvni32_stf32:
1172; CHECK:       @ %bb.0:
1173; CHECK-NEXT:    vmvn.i32 d16, #0xfe
1174; CHECK-NEXT:    vst1.32 {d16}, [r0]
1175; CHECK-NEXT:    mov pc, lr
1176  %val = bitcast <2 x i32> <i32 4294967041, i32 4294967041> to <2 x float>
1177  call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %p, <2 x float> %val, i32 4)
1178  ret void
1179}
1180
1181define arm_aapcs_vfpcc void @v_mvni32_sti64(i8* %p) {
1182; CHECK-LABEL: v_mvni32_sti64:
1183; CHECK:       @ %bb.0:
1184; CHECK-NEXT:    vmvn.i32 d16, #0xfe
1185; CHECK-NEXT:    vst1.64 {d16}, [r0:64]
1186; CHECK-NEXT:    mov pc, lr
1187  %val = bitcast <2 x i32> <i32 4294967041, i32 4294967041> to <1 x i64>
1188  call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %p, <1 x i64> %val, i32 8)
1189  ret void
1190}
1191
1192
1193define arm_aapcs_vfpcc void @v_mvnQi16_sti16(i8* %p) {
1194; CHECK-LABEL: v_mvnQi16_sti16:
1195; CHECK:       @ %bb.0:
1196; CHECK-NEXT:    vmvn.i16 q8, #0xfe
1197; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
1198; CHECK-NEXT:    mov pc, lr
1199  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %p, <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281>, i32 2)
1200  ret void
1201}
1202
1203define arm_aapcs_vfpcc void @v_mvnQi16_stf16(i8* %p) {
1204; CHECK-LABEL: v_mvnQi16_stf16:
1205; CHECK:       @ %bb.0:
1206; CHECK-NEXT:    vmvn.i16 q8, #0xfe
1207; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]
1208; CHECK-NEXT:    mov pc, lr
1209  %val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <8 x half>
1210  call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* %p, <8 x half> %val, i32 2)
1211  ret void
1212}
1213
1214define arm_aapcs_vfpcc void @v_mvnQi16_sti32(i8* %p) {
1215; CHECK-LABEL: v_mvnQi16_sti32:
1216; CHECK:       @ %bb.0:
1217; CHECK-NEXT:    vmvn.i16 q8, #0xfe
1218; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1219; CHECK-NEXT:    mov pc, lr
1220  %val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <4 x i32>
1221  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> %val, i32 4)
1222  ret void
1223}
1224
1225define arm_aapcs_vfpcc void @v_mvnQi16_stf32(i8* %p) {
1226; CHECK-LABEL: v_mvnQi16_stf32:
1227; CHECK:       @ %bb.0:
1228; CHECK-NEXT:    vmvn.i16 q8, #0xfe
1229; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1230; CHECK-NEXT:    mov pc, lr
1231  %val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <4 x float>
1232  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4)
1233  ret void
1234}
1235
1236define arm_aapcs_vfpcc void @v_mvnQi16_sti64(i8* %p) {
1237; CHECK-LABEL: v_mvnQi16_sti64:
1238; CHECK:       @ %bb.0:
1239; CHECK-NEXT:    vmvn.i16 q8, #0xfe
1240; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
1241; CHECK-NEXT:    mov pc, lr
1242  %val = bitcast <8 x i16> <i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281, i16 65281> to <2 x i64>
1243  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
1244  ret void
1245}
1246
1247define arm_aapcs_vfpcc void @v_mvnQi32_sti32(i8* %p) {
1248; CHECK-LABEL: v_mvnQi32_sti32:
1249; CHECK:       @ %bb.0:
1250; CHECK-NEXT:    vmvn.i32 q8, #0xfe
1251; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1252; CHECK-NEXT:    mov pc, lr
1253  call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %p, <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041>, i32 4)
1254  ret void
1255}
1256
1257define arm_aapcs_vfpcc void @v_mvnQi32_stf32(i8* %p) {
1258; CHECK-LABEL: v_mvnQi32_stf32:
1259; CHECK:       @ %bb.0:
1260; CHECK-NEXT:    vmvn.i32 q8, #0xfe
1261; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]
1262; CHECK-NEXT:    mov pc, lr
1263  %val = bitcast <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041> to <4 x float>
1264  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %val, i32 4)
1265  ret void
1266}
1267
1268define arm_aapcs_vfpcc void @v_mvnQi32_sti64(i8* %p) {
1269; CHECK-LABEL: v_mvnQi32_sti64:
1270; CHECK:       @ %bb.0:
1271; CHECK-NEXT:    vmvn.i32 q8, #0xfe
1272; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:64]
1273; CHECK-NEXT:    mov pc, lr
1274  %val = bitcast <4 x i32> <i32 4294967041, i32 4294967041, i32 4294967041, i32 4294967041> to <2 x i64>
1275  call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %p, <2 x i64> %val, i32 8)
1276  ret void
1277}
1278
1279declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
1280declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind
1281declare void @llvm.arm.neon.vst1.p0i8.v4f16(i8*, <4 x half>, i32) nounwind
1282declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind
1283declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind
1284declare void @llvm.arm.neon.vst1.p0i8.v1i64(i8*, <1 x i64>, i32) nounwind
1285
1286declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind
1287declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
1288declare void @llvm.arm.neon.vst1.p0i8.v8f16(i8*, <8 x half>, i32) nounwind
1289declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind
1290declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
1291declare void @llvm.arm.neon.vst1.p0i8.v2i64(i8*, <2 x i64>, i32) nounwind
1292