1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=generic -aarch64-neon-syntax=apple -mattr="+fullfp16" | FileCheck %s
3
4define void @test0f(float* nocapture %x, float %a) #0 {
5; CHECK-LABEL: test0f:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    movi.2d v1, #0000000000000000
8; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
9; CHECK-NEXT:    mov.s v1[0], v0[0]
10; CHECK-NEXT:    str q1, [x0]
11; CHECK-NEXT:    ret
12entry:
13  %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
14  %1 = bitcast float* %x to <4 x float>*
15  store <4 x float> %0, <4 x float>* %1, align 16
16  ret void
17}
18
19define void @test1f(float* nocapture %x, float %a) #0 {
20; CHECK-LABEL: test1f:
21; CHECK:       // %bb.0: // %entry
22; CHECK-NEXT:    fmov.4s v1, #1.00000000
23; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
24; CHECK-NEXT:    mov.s v1[0], v0[0]
25; CHECK-NEXT:    str q1, [x0]
26; CHECK-NEXT:    ret
27entry:
28  %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
29  %1 = bitcast float* %x to <4 x float>*
30  store <4 x float> %0, <4 x float>* %1, align 16
31  ret void
32}
33
34define <16 x i8> @test_insert_v16i8_insert_1(i8 %a) {
35; CHECK-LABEL: test_insert_v16i8_insert_1:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    movi.2d v0, #0000000000000000
38; CHECK-NEXT:    mov.b v0[14], w0
39; CHECK-NEXT:    ret
40  %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 14
41  ret <16 x i8> %v.0
42}
43
44define <16 x i8> @test_insert_v16i8_insert_2(i8 %a) {
45; CHECK-LABEL: test_insert_v16i8_insert_2:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    movi.2d v0, #0000000000000000
48; CHECK-NEXT:    mov.b v0[1], w0
49; CHECK-NEXT:    mov.b v0[2], w0
50; CHECK-NEXT:    ret
51  %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 2
52  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
53  ret <16 x i8> %v.1
54}
55
56define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) {
57; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base:
58; CHECK:       // %bb.0:
59; CHECK-NEXT:    dup.16b v0, w0
60; CHECK-NEXT:    mov.b v0[5], wzr
61; CHECK-NEXT:    mov.b v0[9], wzr
62; CHECK-NEXT:    ret
63  %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>  , i8 %a, i32 0
64  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
65  %v.2 = insertelement <16 x i8> %v.1, i8 %a, i32 2
66  %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3
67  %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4
68  %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6
69  %v.7 = insertelement <16 x i8> %v.6, i8 %a, i32 7
70  %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8
71  %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10
72  %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11
73  %v.12 = insertelement <16 x i8> %v.11, i8 %a, i32 12
74  %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13
75  %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14
76  %v.15 = insertelement <16 x i8> %v.14, i8 %a, i32 15
77  ret <16 x i8> %v.15
78}
79
80define <16 x i8> @test_insert_v16i8_insert_2_undef_base_different_valeus(i8 %a, i8 %b) {
81; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_valeus:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    dup.16b v0, w0
84; CHECK-NEXT:    mov.b v0[2], w1
85; CHECK-NEXT:    mov.b v0[5], wzr
86; CHECK-NEXT:    mov.b v0[7], w1
87; CHECK-NEXT:    mov.b v0[9], wzr
88; CHECK-NEXT:    mov.b v0[12], w1
89; CHECK-NEXT:    mov.b v0[15], w1
90; CHECK-NEXT:    ret
91  %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>  , i8 %a, i32 0
92  %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1
93  %v.2 = insertelement <16 x i8> %v.1, i8 %b, i32 2
94  %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3
95  %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4
96  %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6
97  %v.7 = insertelement <16 x i8> %v.6, i8 %b, i32 7
98  %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8
99  %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10
100  %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11
101  %v.12 = insertelement <16 x i8> %v.11, i8 %b, i32 12
102  %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13
103  %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14
104  %v.15 = insertelement <16 x i8> %v.14, i8 %b, i32 15
105  ret <16 x i8> %v.15
106}
107
108define <8 x half> @test_insert_v8f16_insert_1(half %a) {
109; CHECK-LABEL: test_insert_v8f16_insert_1:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
112; CHECK-NEXT:    dup.8h v0, v0[0]
113; CHECK-NEXT:    mov.h v0[7], wzr
114; CHECK-NEXT:    ret
115  %v.0 = insertelement <8 x half> <half undef, half undef, half undef, half undef, half undef, half undef, half undef, half 0.0>, half %a, i32 0
116  %v.1 = insertelement <8 x half> %v.0, half %a, i32 1
117  %v.2 = insertelement <8 x half> %v.1, half %a, i32 2
118  %v.3 = insertelement <8 x half> %v.2, half %a, i32 3
119  %v.4 = insertelement <8 x half> %v.3, half %a, i32 4
120  %v.5 = insertelement <8 x half> %v.4, half %a, i32 5
121  %v.6 = insertelement <8 x half> %v.5, half %a, i32 6
122  ret <8 x half> %v.6
123}
124
125
126define <8 x half> @test_insert_v8f16_insert_2(half %a) {
127; CHECK-LABEL: test_insert_v8f16_insert_2:
128; CHECK:       // %bb.0:
129; CHECK-NEXT:    movi.2d v1, #0000000000000000
130; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
131; CHECK-NEXT:    mov.h v1[1], v0[0]
132; CHECK-NEXT:    mov.h v1[2], v0[0]
133; CHECK-NEXT:    mov.16b v0, v1
134; CHECK-NEXT:    ret
135  %v.0 = insertelement <8 x half> zeroinitializer, half %a, i32 2
136  %v.1 = insertelement <8 x half> %v.0, half %a, i32 1
137  ret <8 x half> %v.1
138}
139
140define <8 x i16> @test_insert_v8i16_insert_2(i16 %a) {
141; CHECK-LABEL: test_insert_v8i16_insert_2:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    dup.8h v0, w0
144; CHECK-NEXT:    mov.h v0[3], wzr
145; CHECK-NEXT:    mov.h v0[7], wzr
146; CHECK-NEXT:    ret
147  %v.0 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0
148  %v.1 = insertelement <8 x i16> %v.0, i16 %a, i32 1
149  %v.2 = insertelement <8 x i16> %v.1, i16 %a, i32 2
150  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
151  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
152  %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6
153  ret <8 x i16> %v.5
154}
155
156define <8 x i16> @test_insert_v8i16_insert_3(i16 %a) {
157; CHECK-LABEL: test_insert_v8i16_insert_3:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    dup.8h v0, w0
160; CHECK-NEXT:    mov.h v0[1], wzr
161; CHECK-NEXT:    mov.h v0[3], wzr
162; CHECK-NEXT:    mov.h v0[7], wzr
163; CHECK-NEXT:    ret
164  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0
165  %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2
166  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
167  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
168  %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6
169  ret <8 x i16> %v.5
170}
171
172define <8 x i16> @test_insert_v8i16_insert_4(i16 %a) {
173; CHECK-LABEL: test_insert_v8i16_insert_4:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    movi.2d v0, #0000000000000000
176; CHECK-NEXT:    mov.h v0[0], w0
177; CHECK-NEXT:    mov.h v0[2], w0
178; CHECK-NEXT:    mov.h v0[4], w0
179; CHECK-NEXT:    mov.h v0[5], w0
180; CHECK-NEXT:    ret
181  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0
182  %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2
183  %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4
184  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
185  ret <8 x i16> %v.4
186}
187
188define <8 x i16> @test_insert_v8i16_insert_5(i16 %a) {
189; CHECK-LABEL: test_insert_v8i16_insert_5:
190; CHECK:       // %bb.0:
191; CHECK-NEXT:    movi.2d v0, #0000000000000000
192; CHECK-NEXT:    mov.h v0[0], w0
193; CHECK-NEXT:    mov.h v0[4], w0
194; CHECK-NEXT:    mov.h v0[5], w0
195; CHECK-NEXT:    ret
196  %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0
197  %v.3 = insertelement <8 x i16> %v.0, i16 %a, i32 4
198  %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5
199  ret <8 x i16> %v.4
200}
201
202define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
203; CHECK-LABEL: test_insert_v2f32_undef_zero_vector:
204; CHECK:       // %bb.0:
205; CHECK-NEXT:    movi d1, #0000000000000000
206; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
207; CHECK-NEXT:    mov.s v1[1], v0[0]
208; CHECK-NEXT:    fmov d0, d1
209; CHECK-NEXT:    ret
210  %v.0 = insertelement <2 x float> <float 0.000000e+00, float undef>, float %a, i32 1
211  ret <2 x float> %v.0
212}
213
214define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
215; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
218; CHECK-NEXT:    dup.4s v0, v0[0]
219; CHECK-NEXT:    mov.s v0[3], wzr
220; CHECK-NEXT:    ret
221  %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %a, i32 0
222  %v.1 = insertelement <4 x float> %v.0, float %a, i32 1
223  %v.2 = insertelement <4 x float> %v.1, float %a, i32 2
224  ret <4 x float> %v.2
225}
226
227define <4 x float> @test_insert_3_f32_undef(float %a) {
228; CHECK-LABEL: test_insert_3_f32_undef:
229; CHECK:       // %bb.0:
230; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
231; CHECK-NEXT:    dup.4s v0, v0[0]
232; CHECK-NEXT:    ret
233  %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0
234  %v.1 = insertelement <4 x float> %v.0, float %a, i32 1
235  %v.2 = insertelement <4 x float> %v.1, float %a, i32 2
236  ret <4 x float> %v.2
237}
238
239define <4 x float> @test_insert_2_f32_undef_zero(float %a) {
240; CHECK-LABEL: test_insert_2_f32_undef_zero:
241; CHECK:       // %bb.0:
242; CHECK-NEXT:    movi.2d v1, #0000000000000000
243; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
244; CHECK-NEXT:    mov.s v1[0], v0[0]
245; CHECK-NEXT:    mov.s v1[2], v0[0]
246; CHECK-NEXT:    mov.16b v0, v1
247; CHECK-NEXT:    ret
248  %v.0 = insertelement <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, float %a, i32 0
249  %v.1 = insertelement <4 x float> %v.0, float %a, i32 2
250  ret <4 x float> %v.1
251}
252
253define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
254; CHECK-LABEL: test_insert_v2f64_undef_insert1:
255; CHECK:       // %bb.0:
256; CHECK-NEXT:    movi.2d v1, #0000000000000000
257; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
258; CHECK-NEXT:    mov.d v1[0], v0[0]
259; CHECK-NEXT:    mov.16b v0, v1
260; CHECK-NEXT:    ret
261  %v.0 = insertelement <2 x double > <double undef, double 0.000000e+00>, double %a, i32 0
262  ret <2 x double> %v.0
263}
264
265define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) {
266; CHECK-LABEL: test_insert_2_f32_var:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
269; CHECK-NEXT:    mov.s v1[0], v0[0]
270; CHECK-NEXT:    mov.s v1[2], v0[0]
271; CHECK-NEXT:    mov.16b v0, v1
272; CHECK-NEXT:    ret
273  %v.0 = insertelement <4 x float> %b, float %a, i32 0
274  %v.1 = insertelement <4 x float> %v.0, float %a, i32 2
275  ret <4 x float> %v.1
276}
277
278define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
279; CHECK-LABEL: test_insert_v8i16_i16_zero:
280; CHECK:       // %bb.0:
281; CHECK-NEXT:    mov.h v0[5], wzr
282; CHECK-NEXT:    ret
283  %v.0 = insertelement <8 x i16> %a, i16 0, i32 5
284  ret <8 x i16> %v.0
285}
286
287; TODO: This should jsut be a mov.s v0[3], wzr
288define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
289; CHECK-LABEL: test_insert_v4f16_f16_zero:
290; CHECK:       // %bb.0:
291; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
292; CHECK-NEXT:    mov.h v0[0], wzr
293; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
294; CHECK-NEXT:    ret
295  %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0
296  ret <4 x half> %v.0
297}
298
299define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
300; CHECK-LABEL: test_insert_v8f16_f16_zero:
301; CHECK:       // %bb.0:
302; CHECK-NEXT:    mov.h v0[6], wzr
303; CHECK-NEXT:    ret
304  %v.0 = insertelement <8 x half> %a, half 0.000000e+00, i32 6
305  ret <8 x half> %v.0
306}
307
308define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
309; CHECK-LABEL: test_insert_v2f32_f32_zero:
310; CHECK:       // %bb.0:
311; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
312; CHECK-NEXT:    mov.s v0[0], wzr
313; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
314; CHECK-NEXT:    ret
315  %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0
316  ret <2 x float> %v.0
317}
318
319define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
320; CHECK-LABEL: test_insert_v4f32_f32_zero:
321; CHECK:       // %bb.0:
322; CHECK-NEXT:    mov.s v0[3], wzr
323; CHECK-NEXT:    ret
324  %v.0 = insertelement <4 x float> %a, float 0.000000e+00, i32 3
325  ret <4 x float> %v.0
326}
327
328define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) {
329; CHECK-LABEL: test_insert_v2f64_f64_zero:
330; CHECK:       // %bb.0:
331; CHECK-NEXT:    mov.d v0[1], xzr
332; CHECK-NEXT:    ret
333  %v.0 = insertelement <2 x double> %a, double 0.000000e+00, i32 1
334  ret <2 x double> %v.0
335}
336