1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=hexagon < %s | FileCheck %s
3
4define void @f0(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
5; CHECK-LABEL: f0:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    {
8; CHECK-NEXT:     jumpr r31
9; CHECK-NEXT:     v0.cur = vmem(r0+#1)
10; CHECK-NEXT:     vmem(r1+#2) = v0
11; CHECK-NEXT:    }
12  %v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1
13  %v1 = load <128 x i8>, <128 x i8>* %v0, align 128
14  %v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2
15  store <128 x i8> %v1, <128 x i8>* %v2, align 128
16  ret void
17}
18
19define void @f1(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
20; CHECK-LABEL: f1:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    {
23; CHECK-NEXT:     jumpr r31
24; CHECK-NEXT:     v0.cur = vmem(r0+#1)
25; CHECK-NEXT:     vmem(r1+#2) = v0
26; CHECK-NEXT:    }
27  %v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1
28  %v1 = load <64 x i16>, <64 x i16>* %v0, align 128
29  %v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2
30  store <64 x i16> %v1, <64 x i16>* %v2, align 128
31  ret void
32}
33
34define void @f2(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
35; CHECK-LABEL: f2:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    {
38; CHECK-NEXT:     jumpr r31
39; CHECK-NEXT:     v0.cur = vmem(r0+#1)
40; CHECK-NEXT:     vmem(r1+#2) = v0
41; CHECK-NEXT:    }
42  %v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1
43  %v1 = load <32 x i32>, <32 x i32>* %v0, align 128
44  %v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2
45  store <32 x i32> %v1, <32 x i32>* %v2, align 128
46  ret void
47}
48
49define void @f3(<64 x half>* %a0, <64 x half>* %a1) #0 {
50; CHECK-LABEL: f3:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    {
53; CHECK-NEXT:     jumpr r31
54; CHECK-NEXT:     v0.cur = vmem(r0+#1)
55; CHECK-NEXT:     vmem(r1+#2) = v0
56; CHECK-NEXT:    }
57  %v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1
58  %v1 = load <64 x half>, <64 x half>* %v0, align 128
59  %v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2
60  store <64 x half> %v1, <64 x half>* %v2, align 128
61  ret void
62}
63
64define void @f4(<32 x float>* %a0, <32 x float>* %a1) #0 {
65; CHECK-LABEL: f4:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    {
68; CHECK-NEXT:     jumpr r31
69; CHECK-NEXT:     v0.cur = vmem(r0+#1)
70; CHECK-NEXT:     vmem(r1+#2) = v0
71; CHECK-NEXT:    }
72  %v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1
73  %v1 = load <32 x float>, <32 x float>* %v0, align 128
74  %v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2
75  store <32 x float> %v1, <32 x float>* %v2, align 128
76  ret void
77}
78
79define void @f5(<128 x i8>* %a0, <128 x i8>* %a1) #0 {
80; CHECK-LABEL: f5:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    {
83; CHECK-NEXT:     v0 = vmemu(r0+#1)
84; CHECK-NEXT:    }
85; CHECK-NEXT:    {
86; CHECK-NEXT:     jumpr r31
87; CHECK-NEXT:     vmemu(r1+#2) = v0
88; CHECK-NEXT:    }
89  %v0 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1
90  %v1 = load <128 x i8>, <128 x i8>* %v0, align 1
91  %v2 = getelementptr <128 x i8>, <128 x i8>* %a1, i32 2
92  store <128 x i8> %v1, <128 x i8>* %v2, align 1
93  ret void
94}
95
96define void @f6(<64 x i16>* %a0, <64 x i16>* %a1) #0 {
97; CHECK-LABEL: f6:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    {
100; CHECK-NEXT:     v0 = vmemu(r0+#1)
101; CHECK-NEXT:    }
102; CHECK-NEXT:    {
103; CHECK-NEXT:     jumpr r31
104; CHECK-NEXT:     vmemu(r1+#2) = v0
105; CHECK-NEXT:    }
106  %v0 = getelementptr <64 x i16>, <64 x i16>* %a0, i32 1
107  %v1 = load <64 x i16>, <64 x i16>* %v0, align 1
108  %v2 = getelementptr <64 x i16>, <64 x i16>* %a1, i32 2
109  store <64 x i16> %v1, <64 x i16>* %v2, align 1
110  ret void
111}
112
113define void @f7(<32 x i32>* %a0, <32 x i32>* %a1) #0 {
114; CHECK-LABEL: f7:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    {
117; CHECK-NEXT:     v0 = vmemu(r0+#1)
118; CHECK-NEXT:    }
119; CHECK-NEXT:    {
120; CHECK-NEXT:     jumpr r31
121; CHECK-NEXT:     vmemu(r1+#2) = v0
122; CHECK-NEXT:    }
123  %v0 = getelementptr <32 x i32>, <32 x i32>* %a0, i32 1
124  %v1 = load <32 x i32>, <32 x i32>* %v0, align 1
125  %v2 = getelementptr <32 x i32>, <32 x i32>* %a1, i32 2
126  store <32 x i32> %v1, <32 x i32>* %v2, align 1
127  ret void
128}
129
130define void @f8(<64 x half>* %a0, <64 x half>* %a1) #0 {
131; CHECK-LABEL: f8:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    {
134; CHECK-NEXT:     v0 = vmemu(r0+#1)
135; CHECK-NEXT:    }
136; CHECK-NEXT:    {
137; CHECK-NEXT:     jumpr r31
138; CHECK-NEXT:     vmemu(r1+#2) = v0
139; CHECK-NEXT:    }
140  %v0 = getelementptr <64 x half>, <64 x half>* %a0, i32 1
141  %v1 = load <64 x half>, <64 x half>* %v0, align 1
142  %v2 = getelementptr <64 x half>, <64 x half>* %a1, i32 2
143  store <64 x half> %v1, <64 x half>* %v2, align 1
144  ret void
145}
146
147define void @f9(<32 x float>* %a0, <32 x float>* %a1) #0 {
148; CHECK-LABEL: f9:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    {
151; CHECK-NEXT:     v0 = vmemu(r0+#1)
152; CHECK-NEXT:    }
153; CHECK-NEXT:    {
154; CHECK-NEXT:     jumpr r31
155; CHECK-NEXT:     vmemu(r1+#2) = v0
156; CHECK-NEXT:    }
157  %v0 = getelementptr <32 x float>, <32 x float>* %a0, i32 1
158  %v1 = load <32 x float>, <32 x float>* %v0, align 1
159  %v2 = getelementptr <32 x float>, <32 x float>* %a1, i32 2
160  store <32 x float> %v1, <32 x float>* %v2, align 1
161  ret void
162}
163
164define void @f10(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
165; CHECK-LABEL: f10:
166; CHECK:       // %bb.0:
167; CHECK-NEXT:    {
168; CHECK-NEXT:     v0 = vmem(r0+#2)
169; CHECK-NEXT:    }
170; CHECK-NEXT:    {
171; CHECK-NEXT:     v1.cur = vmem(r0+#3)
172; CHECK-NEXT:     vmem(r1+#5) = v1
173; CHECK-NEXT:    }
174; CHECK-NEXT:    {
175; CHECK-NEXT:     jumpr r31
176; CHECK-NEXT:     vmem(r1+#4) = v0
177; CHECK-NEXT:    }
178  %v0 = getelementptr <256 x i8>, <256 x i8>* %a0, i32 1
179  %v1 = load <256 x i8>, <256 x i8>* %v0, align 128
180  %v2 = getelementptr <256 x i8>, <256 x i8>* %a1, i32 2
181  store <256 x i8> %v1, <256 x i8>* %v2, align 128
182  ret void
183}
184
185define void @f11(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
186; CHECK-LABEL: f11:
187; CHECK:       // %bb.0:
188; CHECK-NEXT:    {
189; CHECK-NEXT:     v0 = vmem(r0+#2)
190; CHECK-NEXT:    }
191; CHECK-NEXT:    {
192; CHECK-NEXT:     v1.cur = vmem(r0+#3)
193; CHECK-NEXT:     vmem(r1+#5) = v1
194; CHECK-NEXT:    }
195; CHECK-NEXT:    {
196; CHECK-NEXT:     jumpr r31
197; CHECK-NEXT:     vmem(r1+#4) = v0
198; CHECK-NEXT:    }
199  %v0 = getelementptr <128 x i16>, <128 x i16>* %a0, i32 1
200  %v1 = load <128 x i16>, <128 x i16>* %v0, align 128
201  %v2 = getelementptr <128 x i16>, <128 x i16>* %a1, i32 2
202  store <128 x i16> %v1, <128 x i16>* %v2, align 128
203  ret void
204}
205
206define void @f12(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
207; CHECK-LABEL: f12:
208; CHECK:       // %bb.0:
209; CHECK-NEXT:    {
210; CHECK-NEXT:     v0 = vmem(r0+#2)
211; CHECK-NEXT:    }
212; CHECK-NEXT:    {
213; CHECK-NEXT:     v1.cur = vmem(r0+#3)
214; CHECK-NEXT:     vmem(r1+#5) = v1
215; CHECK-NEXT:    }
216; CHECK-NEXT:    {
217; CHECK-NEXT:     jumpr r31
218; CHECK-NEXT:     vmem(r1+#4) = v0
219; CHECK-NEXT:    }
220  %v0 = getelementptr <64 x i32>, <64 x i32>* %a0, i32 1
221  %v1 = load <64 x i32>, <64 x i32>* %v0, align 128
222  %v2 = getelementptr <64 x i32>, <64 x i32>* %a1, i32 2
223  store <64 x i32> %v1, <64 x i32>* %v2, align 128
224  ret void
225}
226
227define void @f13(<128 x half>* %a0, <128 x half>* %a1) #0 {
228; CHECK-LABEL: f13:
229; CHECK:       // %bb.0:
230; CHECK-NEXT:    {
231; CHECK-NEXT:     v0 = vmem(r0+#2)
232; CHECK-NEXT:    }
233; CHECK-NEXT:    {
234; CHECK-NEXT:     v1.cur = vmem(r0+#3)
235; CHECK-NEXT:     vmem(r1+#5) = v1
236; CHECK-NEXT:    }
237; CHECK-NEXT:    {
238; CHECK-NEXT:     jumpr r31
239; CHECK-NEXT:     vmem(r1+#4) = v0
240; CHECK-NEXT:    }
241  %v0 = getelementptr <128 x half>, <128 x half>* %a0, i32 1
242  %v1 = load <128 x half>, <128 x half>* %v0, align 128
243  %v2 = getelementptr <128 x half>, <128 x half>* %a1, i32 2
244  store <128 x half> %v1, <128 x half>* %v2, align 128
245  ret void
246}
247
248define void @f14(<64 x float>* %a0, <64 x float>* %a1) #0 {
249; CHECK-LABEL: f14:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    {
252; CHECK-NEXT:     v0 = vmem(r0+#2)
253; CHECK-NEXT:    }
254; CHECK-NEXT:    {
255; CHECK-NEXT:     v1.cur = vmem(r0+#3)
256; CHECK-NEXT:     vmem(r1+#5) = v1
257; CHECK-NEXT:    }
258; CHECK-NEXT:    {
259; CHECK-NEXT:     jumpr r31
260; CHECK-NEXT:     vmem(r1+#4) = v0
261; CHECK-NEXT:    }
262  %v0 = getelementptr <64 x float>, <64 x float>* %a0, i32 1
263  %v1 = load <64 x float>, <64 x float>* %v0, align 128
264  %v2 = getelementptr <64 x float>, <64 x float>* %a1, i32 2
265  store <64 x float> %v1, <64 x float>* %v2, align 128
266  ret void
267}
268
269define void @f15(<256 x i8>* %a0, <256 x i8>* %a1) #0 {
270; CHECK-LABEL: f15:
271; CHECK:       // %bb.0:
272; CHECK-NEXT:    {
273; CHECK-NEXT:     v0 = vmemu(r0+#3)
274; CHECK-NEXT:    }
275; CHECK-NEXT:    {
276; CHECK-NEXT:     v1 = vmemu(r0+#2)
277; CHECK-NEXT:    }
278; CHECK-NEXT:    {
279; CHECK-NEXT:     vmemu(r1+#5) = v0
280; CHECK-NEXT:    }
281; CHECK-NEXT:    {
282; CHECK-NEXT:     jumpr r31
283; CHECK-NEXT:     vmemu(r1+#4) = v1
284; CHECK-NEXT:    }
285  %v0 = getelementptr <256 x i8>, <256 x i8>* %a0, i32 1
286  %v1 = load <256 x i8>, <256 x i8>* %v0, align 1
287  %v2 = getelementptr <256 x i8>, <256 x i8>* %a1, i32 2
288  store <256 x i8> %v1, <256 x i8>* %v2, align 1
289  ret void
290}
291
292define void @f16(<128 x i16>* %a0, <128 x i16>* %a1) #0 {
293; CHECK-LABEL: f16:
294; CHECK:       // %bb.0:
295; CHECK-NEXT:    {
296; CHECK-NEXT:     v0 = vmemu(r0+#3)
297; CHECK-NEXT:    }
298; CHECK-NEXT:    {
299; CHECK-NEXT:     v1 = vmemu(r0+#2)
300; CHECK-NEXT:    }
301; CHECK-NEXT:    {
302; CHECK-NEXT:     vmemu(r1+#5) = v0
303; CHECK-NEXT:    }
304; CHECK-NEXT:    {
305; CHECK-NEXT:     jumpr r31
306; CHECK-NEXT:     vmemu(r1+#4) = v1
307; CHECK-NEXT:    }
308  %v0 = getelementptr <128 x i16>, <128 x i16>* %a0, i32 1
309  %v1 = load <128 x i16>, <128 x i16>* %v0, align 1
310  %v2 = getelementptr <128 x i16>, <128 x i16>* %a1, i32 2
311  store <128 x i16> %v1, <128 x i16>* %v2, align 1
312  ret void
313}
314
315define void @f17(<64 x i32>* %a0, <64 x i32>* %a1) #0 {
316; CHECK-LABEL: f17:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    {
319; CHECK-NEXT:     v0 = vmemu(r0+#3)
320; CHECK-NEXT:    }
321; CHECK-NEXT:    {
322; CHECK-NEXT:     v1 = vmemu(r0+#2)
323; CHECK-NEXT:    }
324; CHECK-NEXT:    {
325; CHECK-NEXT:     vmemu(r1+#5) = v0
326; CHECK-NEXT:    }
327; CHECK-NEXT:    {
328; CHECK-NEXT:     jumpr r31
329; CHECK-NEXT:     vmemu(r1+#4) = v1
330; CHECK-NEXT:    }
331  %v0 = getelementptr <64 x i32>, <64 x i32>* %a0, i32 1
332  %v1 = load <64 x i32>, <64 x i32>* %v0, align 1
333  %v2 = getelementptr <64 x i32>, <64 x i32>* %a1, i32 2
334  store <64 x i32> %v1, <64 x i32>* %v2, align 1
335  ret void
336}
337
338define void @f18(<128 x half>* %a0, <128 x half>* %a1) #0 {
339; CHECK-LABEL: f18:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    {
342; CHECK-NEXT:     v0 = vmemu(r0+#3)
343; CHECK-NEXT:    }
344; CHECK-NEXT:    {
345; CHECK-NEXT:     v1 = vmemu(r0+#2)
346; CHECK-NEXT:    }
347; CHECK-NEXT:    {
348; CHECK-NEXT:     vmemu(r1+#5) = v0
349; CHECK-NEXT:    }
350; CHECK-NEXT:    {
351; CHECK-NEXT:     jumpr r31
352; CHECK-NEXT:     vmemu(r1+#4) = v1
353; CHECK-NEXT:    }
354  %v0 = getelementptr <128 x half>, <128 x half>* %a0, i32 1
355  %v1 = load <128 x half>, <128 x half>* %v0, align 1
356  %v2 = getelementptr <128 x half>, <128 x half>* %a1, i32 2
357  store <128 x half> %v1, <128 x half>* %v2, align 1
358  ret void
359}
360
361define void @f19(<64 x float>* %a0, <64 x float>* %a1) #0 {
362; CHECK-LABEL: f19:
363; CHECK:       // %bb.0:
364; CHECK-NEXT:    {
365; CHECK-NEXT:     v0 = vmemu(r0+#3)
366; CHECK-NEXT:    }
367; CHECK-NEXT:    {
368; CHECK-NEXT:     v1 = vmemu(r0+#2)
369; CHECK-NEXT:    }
370; CHECK-NEXT:    {
371; CHECK-NEXT:     vmemu(r1+#5) = v0
372; CHECK-NEXT:    }
373; CHECK-NEXT:    {
374; CHECK-NEXT:     jumpr r31
375; CHECK-NEXT:     vmemu(r1+#4) = v1
376; CHECK-NEXT:    }
377  %v0 = getelementptr <64 x float>, <64 x float>* %a0, i32 1
378  %v1 = load <64 x float>, <64 x float>* %v0, align 1
379  %v2 = getelementptr <64 x float>, <64 x float>* %a1, i32 2
380  store <64 x float> %v1, <64 x float>* %v2, align 1
381  ret void
382}
383
384
385attributes #0 = { nounwind "target-cpu"="hexagonv69" "target-features"="+hvxv69,+hvx-length128b,+hvx-qfloat" }
386