1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
5; CHECK-LABEL: test_lane0_16xi8:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    mov w8, #30
8; CHECK-NEXT:    ptrue p0.b, vl1
9; CHECK-NEXT:    mov z0.b, p0/m, w8
10; CHECK-NEXT:    ret
11  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0
12  ret <vscale x 16 x i8> %b
13}
14
15define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) {
16; CHECK-LABEL: test_lane0_8xi16:
17; CHECK:       // %bb.0:
18; CHECK-NEXT:    mov w8, #30
19; CHECK-NEXT:    ptrue p0.h, vl1
20; CHECK-NEXT:    mov z0.h, p0/m, w8
21; CHECK-NEXT:    ret
22  %b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0
23  ret <vscale x 8 x i16> %b
24}
25
26define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) {
27; CHECK-LABEL: test_lane0_4xi32:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    mov w8, #30
30; CHECK-NEXT:    ptrue p0.s, vl1
31; CHECK-NEXT:    mov z0.s, p0/m, w8
32; CHECK-NEXT:    ret
33  %b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0
34  ret <vscale x 4 x i32> %b
35}
36
37define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) {
38; CHECK-LABEL: test_lane0_2xi64:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    mov w8, #30
41; CHECK-NEXT:    ptrue p0.d, vl1
42; CHECK-NEXT:    mov z0.d, p0/m, x8
43; CHECK-NEXT:    ret
44  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0
45  ret <vscale x 2 x i64> %b
46}
47
48define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) {
49; CHECK-LABEL: test_lane0_2xf64:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    fmov d1, #1.00000000
52; CHECK-NEXT:    ptrue p0.d, vl1
53; CHECK-NEXT:    mov z0.d, p0/m, z1.d
54; CHECK-NEXT:    ret
55  %b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0
56  ret <vscale x 2 x double> %b
57}
58
59define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) {
60; CHECK-LABEL: test_lane0_4xf32:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    fmov s1, #1.00000000
63; CHECK-NEXT:    ptrue p0.s, vl1
64; CHECK-NEXT:    mov z0.s, p0/m, z1.s
65; CHECK-NEXT:    ret
66  %b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0
67  ret <vscale x 4 x float> %b
68}
69
70define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) {
71; CHECK-LABEL: test_lane0_8xf16:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    fmov h1, #1.00000000
74; CHECK-NEXT:    ptrue p0.h, vl1
75; CHECK-NEXT:    mov z0.h, p0/m, z1.h
76; CHECK-NEXT:    ret
77  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0
78  ret <vscale x 8 x half> %b
79}
80
81; Undefined lane insert
82define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) {
83; CHECK-LABEL: test_lane4_2xi64:
84; CHECK:       // %bb.0:
85; CHECK-NEXT:    mov w8, #4
86; CHECK-NEXT:    mov w9, #30
87; CHECK-NEXT:    index z2.d, #0, #1
88; CHECK-NEXT:    ptrue p0.d
89; CHECK-NEXT:    mov z1.d, x8
90; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z1.d
91; CHECK-NEXT:    mov z0.d, p0/m, x9
92; CHECK-NEXT:    ret
93  %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4
94  ret <vscale x 2 x i64> %b
95}
96
97; Undefined lane insert
98define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) {
99; CHECK-LABEL: test_lane9_8xf16:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    mov w8, #9
102; CHECK-NEXT:    fmov h1, #1.00000000
103; CHECK-NEXT:    index z3.h, #0, #1
104; CHECK-NEXT:    ptrue p0.h
105; CHECK-NEXT:    mov z2.h, w8
106; CHECK-NEXT:    cmpeq p0.h, p0/z, z3.h, z2.h
107; CHECK-NEXT:    mov z0.h, p0/m, h1
108; CHECK-NEXT:    ret
109  %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9
110  ret <vscale x 8 x half> %b
111}
112
113define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) {
114; CHECK-LABEL: test_lane1_16xi8:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    mov w8, #1
117; CHECK-NEXT:    mov w9, #30
118; CHECK-NEXT:    index z2.b, #0, #1
119; CHECK-NEXT:    ptrue p0.b
120; CHECK-NEXT:    mov z1.b, w8
121; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z1.b
122; CHECK-NEXT:    mov z0.b, p0/m, w9
123; CHECK-NEXT:    ret
124  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1
125  ret <vscale x 16 x i8> %b
126}
127
128define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) {
129; CHECK-LABEL: test_lanex_16xi8:
130; CHECK:       // %bb.0:
131; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
132; CHECK-NEXT:    sxtw x8, w0
133; CHECK-NEXT:    mov w9, #30
134; CHECK-NEXT:    index z2.b, #0, #1
135; CHECK-NEXT:    ptrue p0.b
136; CHECK-NEXT:    mov z1.b, w8
137; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z1.b
138; CHECK-NEXT:    mov z0.b, p0/m, w9
139; CHECK-NEXT:    ret
140  %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x
141  ret <vscale x 16 x i8> %b
142}
143
144
145; Redundant lane insert
146define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) {
147; CHECK-LABEL: extract_insert_4xi32:
148; CHECK:       // %bb.0:
149; CHECK-NEXT:    ret
150  %b = extractelement <vscale x 4 x i32> %a, i32 2
151  %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2
152  ret <vscale x 4 x i32> %c
153}
154
155define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) {
156; CHECK-LABEL: test_lane6_undef_8xi16:
157; CHECK:       // %bb.0:
158; CHECK-NEXT:    mov w8, #6
159; CHECK-NEXT:    index z1.h, #0, #1
160; CHECK-NEXT:    ptrue p0.h
161; CHECK-NEXT:    mov z0.h, w8
162; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z0.h
163; CHECK-NEXT:    mov z0.h, p0/m, w0
164; CHECK-NEXT:    ret
165  %b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6
166  ret <vscale x 8 x i16> %b
167}
168
169define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) {
170; CHECK-LABEL: test_lane0_undef_16xi8:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    fmov s0, w0
173; CHECK-NEXT:    ret
174  %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0
175  ret <vscale x 16 x i8> %b
176}
177
178define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
179; CHECK-LABEL: test_insert0_of_extract0_16xi8:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    fmov w8, s1
182; CHECK-NEXT:    ptrue p0.b, vl1
183; CHECK-NEXT:    mov z0.b, p0/m, w8
184; CHECK-NEXT:    ret
185  %c = extractelement <vscale x 16 x i8> %b, i32 0
186  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0
187  ret <vscale x 16 x i8> %d
188}
189
190define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
191; CHECK-LABEL: test_insert64_of_extract64_16xi8:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    mov w8, #64
194; CHECK-NEXT:    index z3.b, #0, #1
195; CHECK-NEXT:    ptrue p1.b
196; CHECK-NEXT:    whilels p0.b, xzr, x8
197; CHECK-NEXT:    mov z2.b, w8
198; CHECK-NEXT:    lastb w8, p0, z1.b
199; CHECK-NEXT:    cmpeq p0.b, p1/z, z3.b, z2.b
200; CHECK-NEXT:    mov z0.b, p0/m, w8
201; CHECK-NEXT:    ret
202  %c = extractelement <vscale x 16 x i8> %b, i32 64
203  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64
204  ret <vscale x 16 x i8> %d
205}
206
207define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
208; CHECK-LABEL: test_insert3_of_extract1_16xi8:
209; CHECK:       // %bb.0:
210; CHECK-NEXT:    mov w8, #3
211; CHECK-NEXT:    umov w9, v1.b[1]
212; CHECK-NEXT:    index z2.b, #0, #1
213; CHECK-NEXT:    ptrue p0.b
214; CHECK-NEXT:    mov z1.b, w8
215; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z1.b
216; CHECK-NEXT:    mov z0.b, p0/m, w9
217; CHECK-NEXT:    ret
218  %c = extractelement <vscale x 16 x i8> %b, i32 1
219  %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
220  ret <vscale x 16 x i8> %d
221}
222
223define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
224; CHECK-LABEL: test_insert_into_undef_nxv8f16:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
227; CHECK-NEXT:    ret
228  %b = insertelement <vscale x 8 x half> undef, half %a, i32 0
229  ret <vscale x 8 x half> %b
230}
231
232define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
233; CHECK-LABEL: test_insert_into_undef_nxv4f16:
234; CHECK:       // %bb.0:
235; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
236; CHECK-NEXT:    ret
237  %b = insertelement <vscale x 4 x half> undef, half %a, i32 0
238  ret <vscale x 4 x half> %b
239}
240
241define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
242; CHECK-LABEL: test_insert_into_undef_nxv2f16:
243; CHECK:       // %bb.0:
244; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
245; CHECK-NEXT:    ret
246  %b = insertelement <vscale x 2 x half> undef, half %a, i32 0
247  ret <vscale x 2 x half> %b
248}
249
250define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
251; CHECK-LABEL: test_insert_into_undef_nxv4f32:
252; CHECK:       // %bb.0:
253; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
254; CHECK-NEXT:    ret
255  %b = insertelement <vscale x 4 x float> undef, float %a, i32 0
256  ret <vscale x 4 x float> %b
257}
258
259define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
260; CHECK-LABEL: test_insert_into_undef_nxv2f32:
261; CHECK:       // %bb.0:
262; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
263; CHECK-NEXT:    ret
264  %b = insertelement <vscale x 2 x float> undef, float %a, i32 0
265  ret <vscale x 2 x float> %b
266}
267
268define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
269; CHECK-LABEL: test_insert_into_undef_nxv2f64:
270; CHECK:       // %bb.0:
271; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
272; CHECK-NEXT:    ret
273  %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
274  ret <vscale x 2 x double> %b
275}
276
277; Insert scalar at index
278define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) {
279; CHECK-LABEL: test_insert_with_index_nxv2f16:
280; CHECK:       // %bb.0:
281; CHECK-NEXT:    index z1.d, #0, #1
282; CHECK-NEXT:    mov z2.d, x0
283; CHECK-NEXT:    ptrue p0.d
284; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
285; CHECK-NEXT:    mov z0.h, p0/m, h0
286; CHECK-NEXT:    ret
287  %res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx
288  ret <vscale x 2 x half> %res
289}
290
291define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) {
292; CHECK-LABEL: test_insert_with_index_nxv4f16:
293; CHECK:       // %bb.0:
294; CHECK-NEXT:    index z1.s, #0, #1
295; CHECK-NEXT:    mov z2.s, w0
296; CHECK-NEXT:    ptrue p0.s
297; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
298; CHECK-NEXT:    mov z0.h, p0/m, h0
299; CHECK-NEXT:    ret
300  %res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx
301  ret <vscale x 4 x half> %res
302}
303
304define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) {
305; CHECK-LABEL: test_insert_with_index_nxv8f16:
306; CHECK:       // %bb.0:
307; CHECK-NEXT:    index z1.h, #0, #1
308; CHECK-NEXT:    mov z2.h, w0
309; CHECK-NEXT:    ptrue p0.h
310; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
311; CHECK-NEXT:    mov z0.h, p0/m, h0
312; CHECK-NEXT:    ret
313  %res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx
314  ret <vscale x 8 x half> %res
315}
316
317define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) {
318; CHECK-LABEL: test_insert_with_index_nxv2f32:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    index z1.d, #0, #1
321; CHECK-NEXT:    mov z2.d, x0
322; CHECK-NEXT:    ptrue p0.d
323; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
324; CHECK-NEXT:    mov z0.s, p0/m, s0
325; CHECK-NEXT:    ret
326  %res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx
327  ret <vscale x 2 x float> %res
328}
329
330define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) {
331; CHECK-LABEL: test_insert_with_index_nxv4f32:
332; CHECK:       // %bb.0:
333; CHECK-NEXT:    index z1.s, #0, #1
334; CHECK-NEXT:    mov z2.s, w0
335; CHECK-NEXT:    ptrue p0.s
336; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
337; CHECK-NEXT:    mov z0.s, p0/m, s0
338; CHECK-NEXT:    ret
339  %res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx
340  ret <vscale x 4 x float> %res
341}
342
343define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) {
344; CHECK-LABEL: test_insert_with_index_nxv2f64:
345; CHECK:       // %bb.0:
346; CHECK-NEXT:    index z1.d, #0, #1
347; CHECK-NEXT:    mov z2.d, x0
348; CHECK-NEXT:    ptrue p0.d
349; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
350; CHECK-NEXT:    mov z0.d, p0/m, d0
351; CHECK-NEXT:    ret
352  %res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx
353  ret <vscale x 2 x double> %res
354}
355
356;Predicate insert
357define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) {
358; CHECK-LABEL: test_predicate_insert_2xi1_immediate:
359; CHECK:       // %bb.0:
360; CHECK-NEXT:    ptrue p1.d, vl1
361; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
362; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
363; CHECK-NEXT:    mov z0.d, p1/m, x0
364; CHECK-NEXT:    ptrue p0.d
365; CHECK-NEXT:    and z0.d, z0.d, #0x1
366; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
367; CHECK-NEXT:    ret
368  %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0
369  ret <vscale x 2 x i1> %res
370}
371
372define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) {
373; CHECK-LABEL: test_predicate_insert_4xi1_immediate:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    mov w8, #2
376; CHECK-NEXT:    index z1.s, #0, #1
377; CHECK-NEXT:    ptrue p1.s
378; CHECK-NEXT:    mov z0.s, w8
379; CHECK-NEXT:    cmpeq p2.s, p1/z, z1.s, z0.s
380; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
381; CHECK-NEXT:    mov z0.s, p2/m, w0
382; CHECK-NEXT:    and z0.s, z0.s, #0x1
383; CHECK-NEXT:    cmpne p0.s, p1/z, z0.s, #0
384; CHECK-NEXT:    ret
385  %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2
386  ret <vscale x 4 x i1> %res
387}
388
389define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) {
390; CHECK-LABEL: test_predicate_insert_8xi1_immediate:
391; CHECK:       // %bb.0:
392; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
393; CHECK-NEXT:    sxtw x8, w0
394; CHECK-NEXT:    mov w9, #1
395; CHECK-NEXT:    index z1.h, #0, #1
396; CHECK-NEXT:    ptrue p1.h
397; CHECK-NEXT:    mov z0.h, w8
398; CHECK-NEXT:    cmpeq p2.h, p1/z, z1.h, z0.h
399; CHECK-NEXT:    mov z0.h, p0/z, #1 // =0x1
400; CHECK-NEXT:    mov z0.h, p2/m, w9
401; CHECK-NEXT:    and z0.h, z0.h, #0x1
402; CHECK-NEXT:    cmpne p0.h, p1/z, z0.h, #0
403; CHECK-NEXT:    ret
404  %res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx
405  ret <vscale x 8 x i1> %res
406}
407
408define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) {
409; CHECK-LABEL: test_predicate_insert_16xi1_immediate:
410; CHECK:       // %bb.0:
411; CHECK-NEXT:    mov w9, #4
412; CHECK-NEXT:    mov w8, wzr
413; CHECK-NEXT:    index z1.b, #0, #1
414; CHECK-NEXT:    ptrue p1.b
415; CHECK-NEXT:    mov z0.b, w9
416; CHECK-NEXT:    cmpeq p2.b, p1/z, z1.b, z0.b
417; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
418; CHECK-NEXT:    mov z0.b, p2/m, w8
419; CHECK-NEXT:    and z0.b, z0.b, #0x1
420; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
421; CHECK-NEXT:    ret
422  %res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4
423  ret <vscale x 16 x i1> %res
424}
425
426
427define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) {
428; CHECK-LABEL: test_predicate_insert_2xi1:
429; CHECK:       // %bb.0:
430; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
431; CHECK-NEXT:    sxtw x8, w1
432; CHECK-NEXT:    index z1.d, #0, #1
433; CHECK-NEXT:    ptrue p1.d
434; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
435; CHECK-NEXT:    mov z0.d, x8
436; CHECK-NEXT:    cmpeq p2.d, p1/z, z1.d, z0.d
437; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
438; CHECK-NEXT:    mov z0.d, p2/m, x0
439; CHECK-NEXT:    and z0.d, z0.d, #0x1
440; CHECK-NEXT:    cmpne p0.d, p1/z, z0.d, #0
441; CHECK-NEXT:    ret
442  %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx
443  ret <vscale x 2 x i1> %res
444}
445
446define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) {
447; CHECK-LABEL: test_predicate_insert_4xi1:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
450; CHECK-NEXT:    sxtw x8, w1
451; CHECK-NEXT:    index z1.s, #0, #1
452; CHECK-NEXT:    ptrue p1.s
453; CHECK-NEXT:    mov z0.s, w8
454; CHECK-NEXT:    cmpeq p2.s, p1/z, z1.s, z0.s
455; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
456; CHECK-NEXT:    mov z0.s, p2/m, w0
457; CHECK-NEXT:    and z0.s, z0.s, #0x1
458; CHECK-NEXT:    cmpne p0.s, p1/z, z0.s, #0
459; CHECK-NEXT:    ret
460  %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx
461  ret <vscale x 4 x i1> %res
462}
463define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) {
464; CHECK-LABEL: test_predicate_insert_8xi1:
465; CHECK:       // %bb.0:
466; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
467; CHECK-NEXT:    sxtw x8, w1
468; CHECK-NEXT:    index z1.h, #0, #1
469; CHECK-NEXT:    ptrue p1.h
470; CHECK-NEXT:    mov z0.h, w8
471; CHECK-NEXT:    cmpeq p2.h, p1/z, z1.h, z0.h
472; CHECK-NEXT:    mov z0.h, p0/z, #1 // =0x1
473; CHECK-NEXT:    mov z0.h, p2/m, w0
474; CHECK-NEXT:    and z0.h, z0.h, #0x1
475; CHECK-NEXT:    cmpne p0.h, p1/z, z0.h, #0
476; CHECK-NEXT:    ret
477  %res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx
478  ret <vscale x 8 x i1> %res
479}
480
481define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) {
482; CHECK-LABEL: test_predicate_insert_16xi1:
483; CHECK:       // %bb.0:
484; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
485; CHECK-NEXT:    sxtw x8, w1
486; CHECK-NEXT:    index z1.b, #0, #1
487; CHECK-NEXT:    ptrue p1.b
488; CHECK-NEXT:    mov z0.b, w8
489; CHECK-NEXT:    cmpeq p2.b, p1/z, z1.b, z0.b
490; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
491; CHECK-NEXT:    mov z0.b, p2/m, w0
492; CHECK-NEXT:    and z0.b, z0.b, #0x1
493; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
494; CHECK-NEXT:    ret
495  %res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx
496  ret <vscale x 16 x i1> %res
497}
498
499define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable {
500; CHECK-LABEL: test_predicate_insert_32xi1:
501; CHECK:       // %bb.0:
502; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
503; CHECK-NEXT:    .cfi_def_cfa_offset 16
504; CHECK-NEXT:    .cfi_offset w29, -16
505; CHECK-NEXT:    addvl sp, sp, #-2
506; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
507; CHECK-NEXT:    mov x8, #-1
508; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
509; CHECK-NEXT:    sxtw x9, w1
510; CHECK-NEXT:    mov z0.b, p1/z, #1 // =0x1
511; CHECK-NEXT:    ptrue p1.b
512; CHECK-NEXT:    st1b { z0.b }, p1, [sp, #1, mul vl]
513; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
514; CHECK-NEXT:    addvl x8, x8, #2
515; CHECK-NEXT:    st1b { z0.b }, p1, [sp]
516; CHECK-NEXT:    cmp x9, x8
517; CHECK-NEXT:    csel x8, x9, x8, lo
518; CHECK-NEXT:    mov x9, sp
519; CHECK-NEXT:    strb w0, [x9, x8]
520; CHECK-NEXT:    ld1b { z0.b }, p1/z, [sp]
521; CHECK-NEXT:    ld1b { z1.b }, p1/z, [sp, #1, mul vl]
522; CHECK-NEXT:    and z0.b, z0.b, #0x1
523; CHECK-NEXT:    and z1.b, z1.b, #0x1
524; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
525; CHECK-NEXT:    cmpne p1.b, p1/z, z1.b, #0
526; CHECK-NEXT:    addvl sp, sp, #2
527; CHECK-NEXT:    .cfi_def_cfa wsp, 16
528; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
529; CHECK-NEXT:    .cfi_def_cfa_offset 0
530; CHECK-NEXT:    .cfi_restore w29
531; CHECK-NEXT:    ret
532  %res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx
533  ret <vscale x 32 x i1> %res
534}
535