1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc void @vstrw32() {
5; CHECK-LABEL: vstrw32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    .save {r7, lr}
8; CHECK-NEXT:    push {r7, lr}
9; CHECK-NEXT:    .pad #16
10; CHECK-NEXT:    sub sp, #16
11; CHECK-NEXT:    vmov.i32 q0, #0x0
12; CHECK-NEXT:    mov r0, sp
13; CHECK-NEXT:    vstrw.32 q0, [sp, #8]
14; CHECK-NEXT:    bl func
15; CHECK-NEXT:    add sp, #16
16; CHECK-NEXT:    pop {r7, pc}
17entry:
18  %d = alloca [4 x i32], align 2
19  %g = getelementptr inbounds [4 x i32], [4 x i32]* %d, i32 0, i32 2
20  %b = bitcast i32* %g to <4 x i32>*
21  store <4 x i32> zeroinitializer, <4 x i32>* %b, align 2
22  %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %d, i32 0, i32 0
23  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i32*)*)(i32* %arraydecay)
24  ret void
25}
26
27define arm_aapcs_vfpcc void @vstrh16() {
28; CHECK-LABEL: vstrh16:
29; CHECK:       @ %bb.0: @ %entry
30; CHECK-NEXT:    .save {r7, lr}
31; CHECK-NEXT:    push {r7, lr}
32; CHECK-NEXT:    .pad #16
33; CHECK-NEXT:    sub sp, #16
34; CHECK-NEXT:    vmov.i32 q0, #0x0
35; CHECK-NEXT:    mov r0, sp
36; CHECK-NEXT:    vstrh.16 q0, [sp, #4]
37; CHECK-NEXT:    bl func
38; CHECK-NEXT:    add sp, #16
39; CHECK-NEXT:    pop {r7, pc}
40entry:
41  %d = alloca [8 x i16], align 2
42  %g = getelementptr inbounds [8 x i16], [8 x i16]* %d, i32 0, i32 2
43  %b = bitcast i16* %g to <8 x i16>*
44  store <8 x i16> zeroinitializer, <8 x i16>* %b, align 2
45  %arraydecay = getelementptr inbounds [8 x i16], [8 x i16]* %d, i32 0, i32 0
46  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i16*)*)(i16* %arraydecay)
47  ret void
48}
49
50define arm_aapcs_vfpcc void @vstrb8() {
51; CHECK-LABEL: vstrb8:
52; CHECK:       @ %bb.0: @ %entry
53; CHECK-NEXT:    .save {r7, lr}
54; CHECK-NEXT:    push {r7, lr}
55; CHECK-NEXT:    .pad #16
56; CHECK-NEXT:    sub sp, #16
57; CHECK-NEXT:    vmov.i32 q0, #0x0
58; CHECK-NEXT:    mov r0, sp
59; CHECK-NEXT:    vstrh.16 q0, [sp, #2]
60; CHECK-NEXT:    bl func
61; CHECK-NEXT:    add sp, #16
62; CHECK-NEXT:    pop {r7, pc}
63entry:
64  %d = alloca [16 x i8], align 2
65  %g = getelementptr inbounds [16 x i8], [16 x i8]* %d, i32 0, i32 2
66  %b = bitcast i8* %g to <16 x i8>*
67  store <16 x i8> zeroinitializer, <16 x i8>* %b, align 2
68  %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %d, i32 0, i32 0
69  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
70  ret void
71}
72
73define arm_aapcs_vfpcc void @vstrh32() {
74; CHECK-LABEL: vstrh32:
75; CHECK:       @ %bb.0: @ %entry
76; CHECK-NEXT:    .save {r7, lr}
77; CHECK-NEXT:    push {r7, lr}
78; CHECK-NEXT:    .pad #8
79; CHECK-NEXT:    sub sp, #8
80; CHECK-NEXT:    vmov.i32 q0, #0x6
81; CHECK-NEXT:    mov r0, sp
82; CHECK-NEXT:    vstrh.32 q0, [r0, #4]
83; CHECK-NEXT:    mov r0, sp
84; CHECK-NEXT:    bl func
85; CHECK-NEXT:    add sp, #8
86; CHECK-NEXT:    pop {r7, pc}
87entry:
88  %d = alloca [4 x i16], align 2
89  %g = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 2
90  %b = bitcast i16* %g to <4 x i16>*
91  store <4 x i16> <i16 6, i16 6, i16 6, i16 6>, <4 x i16>* %b, align 2
92  %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 0
93  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i16*)*)(i16* %arraydecay)
94  ret void
95}
96
97define arm_aapcs_vfpcc void @vstrb32() {
98; CHECK-LABEL: vstrb32:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    .save {r7, lr}
101; CHECK-NEXT:    push {r7, lr}
102; CHECK-NEXT:    .pad #8
103; CHECK-NEXT:    sub sp, #8
104; CHECK-NEXT:    vmov.i32 q0, #0x6
105; CHECK-NEXT:    mov r0, sp
106; CHECK-NEXT:    vstrb.32 q0, [r0, #6]
107; CHECK-NEXT:    add r0, sp, #4
108; CHECK-NEXT:    bl func
109; CHECK-NEXT:    add sp, #8
110; CHECK-NEXT:    pop {r7, pc}
111entry:
112  %d = alloca [4 x i8], align 2
113  %g = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 2
114  %b = bitcast i8* %g to <4 x i8>*
115  store <4 x i8> <i8 6, i8 6, i8 6, i8 6>, <4 x i8>* %b, align 2
116  %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 0
117  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
118  ret void
119}
120
121define arm_aapcs_vfpcc void @vstrb16() {
122; CHECK-LABEL: vstrb16:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    .save {r7, lr}
125; CHECK-NEXT:    push {r7, lr}
126; CHECK-NEXT:    .pad #8
127; CHECK-NEXT:    sub sp, #8
128; CHECK-NEXT:    vmov.i32 q0, #0x0
129; CHECK-NEXT:    mov r0, sp
130; CHECK-NEXT:    vstrb.16 q0, [r0, #2]
131; CHECK-NEXT:    mov r0, sp
132; CHECK-NEXT:    bl func
133; CHECK-NEXT:    add sp, #8
134; CHECK-NEXT:    pop {r7, pc}
135entry:
136  %d = alloca [8 x i8], align 2
137  %g = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 2
138  %b = bitcast i8* %g to <8 x i8>*
139  store <8 x i8> zeroinitializer, <8 x i8>* %b, align 2
140  %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 0
141  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
142  ret void
143}
144
145
146define arm_aapcs_vfpcc <4 x i32> @vldrw32() {
147; CHECK-LABEL: vldrw32:
148; CHECK:       @ %bb.0: @ %entry
149; CHECK-NEXT:    .save {r7, lr}
150; CHECK-NEXT:    push {r7, lr}
151; CHECK-NEXT:    .pad #16
152; CHECK-NEXT:    sub sp, #16
153; CHECK-NEXT:    mov r0, sp
154; CHECK-NEXT:    bl func
155; CHECK-NEXT:    vldrw.u32 q0, [sp, #8]
156; CHECK-NEXT:    add sp, #16
157; CHECK-NEXT:    pop {r7, pc}
158entry:
159  %d = alloca [4 x i32], align 2
160  %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %d, i32 0, i32 0
161  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i32*)*)(i32* %arraydecay)
162  %g = getelementptr inbounds [4 x i32], [4 x i32]* %d, i32 0, i32 2
163  %b = bitcast i32* %g to <4 x i32>*
164  %l = load <4 x i32>, <4 x i32>* %b, align 2
165  ret <4 x i32> %l
166}
167
168define arm_aapcs_vfpcc <8 x i16> @vldrh16() {
169; CHECK-LABEL: vldrh16:
170; CHECK:       @ %bb.0: @ %entry
171; CHECK-NEXT:    .save {r7, lr}
172; CHECK-NEXT:    push {r7, lr}
173; CHECK-NEXT:    .pad #16
174; CHECK-NEXT:    sub sp, #16
175; CHECK-NEXT:    mov r0, sp
176; CHECK-NEXT:    bl func
177; CHECK-NEXT:    vldrh.u16 q0, [sp, #4]
178; CHECK-NEXT:    add sp, #16
179; CHECK-NEXT:    pop {r7, pc}
180entry:
181  %d = alloca [8 x i16], align 2
182  %arraydecay = getelementptr inbounds [8 x i16], [8 x i16]* %d, i32 0, i32 0
183  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i16*)*)(i16* %arraydecay)
184  %g = getelementptr inbounds [8 x i16], [8 x i16]* %d, i32 0, i32 2
185  %b = bitcast i16* %g to <8 x i16>*
186  %l = load <8 x i16>, <8 x i16>* %b, align 2
187  ret <8 x i16> %l
188}
189
190define arm_aapcs_vfpcc <16 x i8> @vldrb8() {
191; CHECK-LABEL: vldrb8:
192; CHECK:       @ %bb.0: @ %entry
193; CHECK-NEXT:    .save {r7, lr}
194; CHECK-NEXT:    push {r7, lr}
195; CHECK-NEXT:    .pad #16
196; CHECK-NEXT:    sub sp, #16
197; CHECK-NEXT:    mov r0, sp
198; CHECK-NEXT:    bl func
199; CHECK-NEXT:    vldrh.u16 q0, [sp, #2]
200; CHECK-NEXT:    add sp, #16
201; CHECK-NEXT:    pop {r7, pc}
202entry:
203  %d = alloca [16 x i8], align 2
204  %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %d, i32 0, i32 0
205  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
206  %g = getelementptr inbounds [16 x i8], [16 x i8]* %d, i32 0, i32 2
207  %b = bitcast i8* %g to <16 x i8>*
208  %l = load <16 x i8>, <16 x i8>* %b, align 2
209  ret <16 x i8> %l
210}
211
212define arm_aapcs_vfpcc <4 x i16> @vldrh32() {
213; CHECK-LABEL: vldrh32:
214; CHECK:       @ %bb.0: @ %entry
215; CHECK-NEXT:    .save {r7, lr}
216; CHECK-NEXT:    push {r7, lr}
217; CHECK-NEXT:    .pad #8
218; CHECK-NEXT:    sub sp, #8
219; CHECK-NEXT:    mov r0, sp
220; CHECK-NEXT:    bl func
221; CHECK-NEXT:    mov r0, sp
222; CHECK-NEXT:    vldrh.u32 q0, [r0, #4]
223; CHECK-NEXT:    add sp, #8
224; CHECK-NEXT:    pop {r7, pc}
225entry:
226  %d = alloca [4 x i16], align 2
227  %arraydecay = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 0
228  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i16*)*)(i16* %arraydecay)
229  %g = getelementptr inbounds [4 x i16], [4 x i16]* %d, i32 0, i32 2
230  %b = bitcast i16* %g to <4 x i16>*
231  %l = load <4 x i16>, <4 x i16>* %b, align 2
232  ret <4 x i16> %l
233}
234
235define arm_aapcs_vfpcc <4 x i8> @vldrb32() {
236; CHECK-LABEL: vldrb32:
237; CHECK:       @ %bb.0: @ %entry
238; CHECK-NEXT:    .save {r7, lr}
239; CHECK-NEXT:    push {r7, lr}
240; CHECK-NEXT:    .pad #8
241; CHECK-NEXT:    sub sp, #8
242; CHECK-NEXT:    add r0, sp, #4
243; CHECK-NEXT:    bl func
244; CHECK-NEXT:    mov r0, sp
245; CHECK-NEXT:    vldrb.u32 q0, [r0, #6]
246; CHECK-NEXT:    add sp, #8
247; CHECK-NEXT:    pop {r7, pc}
248entry:
249  %d = alloca [4 x i8], align 2
250  %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 0
251  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
252  %g = getelementptr inbounds [4 x i8], [4 x i8]* %d, i32 0, i32 2
253  %b = bitcast i8* %g to <4 x i8>*
254  %l = load <4 x i8>, <4 x i8>* %b, align 2
255  ret <4 x i8> %l
256}
257
258define arm_aapcs_vfpcc <8 x i8> @vldrb16() {
259; CHECK-LABEL: vldrb16:
260; CHECK:       @ %bb.0: @ %entry
261; CHECK-NEXT:    .save {r7, lr}
262; CHECK-NEXT:    push {r7, lr}
263; CHECK-NEXT:    .pad #8
264; CHECK-NEXT:    sub sp, #8
265; CHECK-NEXT:    mov r0, sp
266; CHECK-NEXT:    bl func
267; CHECK-NEXT:    mov r0, sp
268; CHECK-NEXT:    vldrb.u16 q0, [r0, #2]
269; CHECK-NEXT:    add sp, #8
270; CHECK-NEXT:    pop {r7, pc}
271entry:
272  %d = alloca [8 x i8], align 2
273  %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 0
274  call arm_aapcs_vfpcc void bitcast (void (...)* @func to void (i8*)*)(i8* %arraydecay)
275  %g = getelementptr inbounds [8 x i8], [8 x i8]* %d, i32 0, i32 2
276  %b = bitcast i8* %g to <8 x i8>*
277  %l = load <8 x i8>, <8 x i8>* %b, align 2
278  ret <8 x i8> %l
279}
280
281declare dso_local arm_aapcs_vfpcc void @func(...)
282