1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
3; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
4; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
5; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
6
7; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
8; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
9; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
10; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
11
12define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
13; CHECK-LABEL: insert_nxv8i32_v2i32_0:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
16; CHECK-NEXT:    vle32.v v12, (a0)
17; CHECK-NEXT:    vsetivli zero, 2, e32, m4, tu, mu
18; CHECK-NEXT:    vslideup.vi v8, v12, 0
19; CHECK-NEXT:    ret
20  %sv = load <2 x i32>, <2 x i32>* %svp
21  %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0)
22  ret <vscale x 8 x i32> %v
23}
24
25define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
26; CHECK-LABEL: insert_nxv8i32_v2i32_2:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
29; CHECK-NEXT:    vle32.v v12, (a0)
30; CHECK-NEXT:    vsetivli zero, 4, e32, m4, tu, mu
31; CHECK-NEXT:    vslideup.vi v8, v12, 2
32; CHECK-NEXT:    ret
33  %sv = load <2 x i32>, <2 x i32>* %svp
34  %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2)
35  ret <vscale x 8 x i32> %v
36}
37
38define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
39; CHECK-LABEL: insert_nxv8i32_v2i32_6:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
42; CHECK-NEXT:    vle32.v v12, (a0)
43; CHECK-NEXT:    vsetivli zero, 8, e32, m4, tu, mu
44; CHECK-NEXT:    vslideup.vi v8, v12, 6
45; CHECK-NEXT:    ret
46  %sv = load <2 x i32>, <2 x i32>* %svp
47  %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6)
48  ret <vscale x 8 x i32> %v
49}
50
51define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
52; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
53; LMULMAX2:       # %bb.0:
54; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
55; LMULMAX2-NEXT:    vle32.v v12, (a0)
56; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m4, tu, mu
57; LMULMAX2-NEXT:    vslideup.vi v8, v12, 0
58; LMULMAX2-NEXT:    ret
59;
60; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
61; LMULMAX1:       # %bb.0:
62; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
63; LMULMAX1-NEXT:    vle32.v v12, (a0)
64; LMULMAX1-NEXT:    addi a0, a0, 16
65; LMULMAX1-NEXT:    vle32.v v16, (a0)
66; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m4, tu, mu
67; LMULMAX1-NEXT:    vslideup.vi v8, v12, 0
68; LMULMAX1-NEXT:    vsetivli zero, 8, e32, m4, tu, mu
69; LMULMAX1-NEXT:    vslideup.vi v8, v16, 4
70; LMULMAX1-NEXT:    ret
71  %sv = load <8 x i32>, <8 x i32>* %svp
72  %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
73  ret <vscale x 8 x i32> %v
74}
75
76define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) {
77; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8:
78; LMULMAX2:       # %bb.0:
79; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
80; LMULMAX2-NEXT:    vle32.v v12, (a0)
81; LMULMAX2-NEXT:    vsetivli zero, 16, e32, m4, tu, mu
82; LMULMAX2-NEXT:    vslideup.vi v8, v12, 8
83; LMULMAX2-NEXT:    ret
84;
85; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
86; LMULMAX1:       # %bb.0:
87; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
88; LMULMAX1-NEXT:    vle32.v v12, (a0)
89; LMULMAX1-NEXT:    addi a0, a0, 16
90; LMULMAX1-NEXT:    vle32.v v16, (a0)
91; LMULMAX1-NEXT:    vsetivli zero, 12, e32, m4, tu, mu
92; LMULMAX1-NEXT:    vslideup.vi v8, v12, 8
93; LMULMAX1-NEXT:    vsetivli zero, 16, e32, m4, tu, mu
94; LMULMAX1-NEXT:    vslideup.vi v8, v16, 12
95; LMULMAX1-NEXT:    ret
96  %sv = load <8 x i32>, <8 x i32>* %svp
97  %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
98  ret <vscale x 8 x i32> %v
99}
100
101define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(<2 x i32>* %svp) {
102; CHECK-LABEL: insert_nxv8i32_undef_v2i32_0:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
105; CHECK-NEXT:    vle32.v v8, (a0)
106; CHECK-NEXT:    ret
107  %sv = load <2 x i32>, <2 x i32>* %svp
108  %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0)
109  ret <vscale x 8 x i32> %v
110}
111
112define void @insert_v4i32_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
113; CHECK-LABEL: insert_v4i32_v2i32_0:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
116; CHECK-NEXT:    vle32.v v8, (a1)
117; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
118; CHECK-NEXT:    vle32.v v9, (a0)
119; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
120; CHECK-NEXT:    vslideup.vi v9, v8, 0
121; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
122; CHECK-NEXT:    vse32.v v9, (a0)
123; CHECK-NEXT:    ret
124  %sv = load <2 x i32>, <2 x i32>* %svp
125  %vec = load <4 x i32>, <4 x i32>* %vp
126  %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 0)
127  store <4 x i32> %v, <4 x i32>* %vp
128  ret void
129}
130
131define void @insert_v4i32_v2i32_2(<4 x i32>* %vp, <2 x i32>* %svp) {
132; CHECK-LABEL: insert_v4i32_v2i32_2:
133; CHECK:       # %bb.0:
134; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
135; CHECK-NEXT:    vle32.v v8, (a1)
136; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
137; CHECK-NEXT:    vle32.v v9, (a0)
138; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
139; CHECK-NEXT:    vslideup.vi v9, v8, 2
140; CHECK-NEXT:    vse32.v v9, (a0)
141; CHECK-NEXT:    ret
142  %sv = load <2 x i32>, <2 x i32>* %svp
143  %vec = load <4 x i32>, <4 x i32>* %vp
144  %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 2)
145  store <4 x i32> %v, <4 x i32>* %vp
146  ret void
147}
148
149define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) {
150; CHECK-LABEL: insert_v4i32_undef_v2i32_0:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
153; CHECK-NEXT:    vle32.v v8, (a1)
154; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
155; CHECK-NEXT:    vse32.v v8, (a0)
156; CHECK-NEXT:    ret
157  %sv = load <2 x i32>, <2 x i32>* %svp
158  %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0)
159  store <4 x i32> %v, <4 x i32>* %vp
160  ret void
161}
162
163define void @insert_v8i32_v2i32_0(<8 x i32>* %vp, <2 x i32>* %svp) {
164; LMULMAX2-LABEL: insert_v8i32_v2i32_0:
165; LMULMAX2:       # %bb.0:
166; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
167; LMULMAX2-NEXT:    vle32.v v8, (a1)
168; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
169; LMULMAX2-NEXT:    vle32.v v10, (a0)
170; LMULMAX2-NEXT:    vsetivli zero, 2, e32, m2, tu, mu
171; LMULMAX2-NEXT:    vslideup.vi v10, v8, 0
172; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
173; LMULMAX2-NEXT:    vse32.v v10, (a0)
174; LMULMAX2-NEXT:    ret
175;
176; LMULMAX1-LABEL: insert_v8i32_v2i32_0:
177; LMULMAX1:       # %bb.0:
178; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
179; LMULMAX1-NEXT:    vle32.v v8, (a1)
180; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
181; LMULMAX1-NEXT:    vle32.v v9, (a0)
182; LMULMAX1-NEXT:    vsetivli zero, 2, e32, m1, tu, mu
183; LMULMAX1-NEXT:    vslideup.vi v9, v8, 0
184; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
185; LMULMAX1-NEXT:    vse32.v v9, (a0)
186; LMULMAX1-NEXT:    ret
187  %sv = load <2 x i32>, <2 x i32>* %svp
188  %vec = load <8 x i32>, <8 x i32>* %vp
189  %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0)
190  store <8 x i32> %v, <8 x i32>* %vp
191  ret void
192}
193
194define void @insert_v8i32_v2i32_2(<8 x i32>* %vp, <2 x i32>* %svp) {
195; LMULMAX2-LABEL: insert_v8i32_v2i32_2:
196; LMULMAX2:       # %bb.0:
197; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
198; LMULMAX2-NEXT:    vle32.v v8, (a1)
199; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
200; LMULMAX2-NEXT:    vle32.v v10, (a0)
201; LMULMAX2-NEXT:    vsetivli zero, 4, e32, m2, tu, mu
202; LMULMAX2-NEXT:    vslideup.vi v10, v8, 2
203; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
204; LMULMAX2-NEXT:    vse32.v v10, (a0)
205; LMULMAX2-NEXT:    ret
206;
207; LMULMAX1-LABEL: insert_v8i32_v2i32_2:
208; LMULMAX1:       # %bb.0:
209; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
210; LMULMAX1-NEXT:    vle32.v v8, (a1)
211; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
212; LMULMAX1-NEXT:    vle32.v v9, (a0)
213; LMULMAX1-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
214; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
215; LMULMAX1-NEXT:    vse32.v v9, (a0)
216; LMULMAX1-NEXT:    ret
217  %sv = load <2 x i32>, <2 x i32>* %svp
218  %vec = load <8 x i32>, <8 x i32>* %vp
219  %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2)
220  store <8 x i32> %v, <8 x i32>* %vp
221  ret void
222}
223
224define void @insert_v8i32_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
225; LMULMAX2-LABEL: insert_v8i32_v2i32_6:
226; LMULMAX2:       # %bb.0:
227; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
228; LMULMAX2-NEXT:    vle32.v v8, (a1)
229; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
230; LMULMAX2-NEXT:    vle32.v v10, (a0)
231; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m2, tu, mu
232; LMULMAX2-NEXT:    vslideup.vi v10, v8, 6
233; LMULMAX2-NEXT:    vse32.v v10, (a0)
234; LMULMAX2-NEXT:    ret
235;
236; LMULMAX1-LABEL: insert_v8i32_v2i32_6:
237; LMULMAX1:       # %bb.0:
238; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
239; LMULMAX1-NEXT:    vle32.v v8, (a1)
240; LMULMAX1-NEXT:    addi a0, a0, 16
241; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
242; LMULMAX1-NEXT:    vle32.v v9, (a0)
243; LMULMAX1-NEXT:    vsetvli zero, zero, e32, m1, tu, mu
244; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
245; LMULMAX1-NEXT:    vse32.v v9, (a0)
246; LMULMAX1-NEXT:    ret
247  %sv = load <2 x i32>, <2 x i32>* %svp
248  %vec = load <8 x i32>, <8 x i32>* %vp
249  %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6)
250  store <8 x i32> %v, <8 x i32>* %vp
251  ret void
252}
253
254define void @insert_v8i32_undef_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
255; LMULMAX2-LABEL: insert_v8i32_undef_v2i32_6:
256; LMULMAX2:       # %bb.0:
257; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
258; LMULMAX2-NEXT:    vle32.v v8, (a1)
259; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, tu, mu
260; LMULMAX2-NEXT:    vslideup.vi v10, v8, 6
261; LMULMAX2-NEXT:    vse32.v v10, (a0)
262; LMULMAX2-NEXT:    ret
263;
264; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6:
265; LMULMAX1:       # %bb.0:
266; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
267; LMULMAX1-NEXT:    vle32.v v8, (a1)
268; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
269; LMULMAX1-NEXT:    vslideup.vi v9, v8, 2
270; LMULMAX1-NEXT:    addi a0, a0, 16
271; LMULMAX1-NEXT:    vse32.v v9, (a0)
272; LMULMAX1-NEXT:    ret
273  %sv = load <2 x i32>, <2 x i32>* %svp
274  %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
275  store <8 x i32> %v, <8 x i32>* %vp
276  ret void
277}
278
279define void @insert_v4i16_v2i16_0(<4 x i16>* %vp, <2 x i16>* %svp) {
280; CHECK-LABEL: insert_v4i16_v2i16_0:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
283; CHECK-NEXT:    vle16.v v8, (a0)
284; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
285; CHECK-NEXT:    vle16.v v9, (a1)
286; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, tu, mu
287; CHECK-NEXT:    vslideup.vi v8, v9, 0
288; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
289; CHECK-NEXT:    vse16.v v8, (a0)
290; CHECK-NEXT:    ret
291  %v = load <4 x i16>, <4 x i16>* %vp
292  %sv = load <2 x i16>, <2 x i16>* %svp
293  %c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 0)
294  store <4 x i16> %c, <4 x i16>* %vp
295  ret void
296}
297
298define void @insert_v4i16_v2i16_2(<4 x i16>* %vp, <2 x i16>* %svp) {
299; CHECK-LABEL: insert_v4i16_v2i16_2:
300; CHECK:       # %bb.0:
301; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
302; CHECK-NEXT:    vle16.v v8, (a0)
303; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
304; CHECK-NEXT:    vle16.v v9, (a1)
305; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, mu
306; CHECK-NEXT:    vslideup.vi v8, v9, 2
307; CHECK-NEXT:    vse16.v v8, (a0)
308; CHECK-NEXT:    ret
309  %v = load <4 x i16>, <4 x i16>* %vp
310  %sv = load <2 x i16>, <2 x i16>* %svp
311  %c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 2)
312  store <4 x i16> %c, <4 x i16>* %vp
313  ret void
314}
315
316define void @insert_v32i1_v8i1_0(<32 x i1>* %vp, <8 x i1>* %svp) {
317; LMULMAX2-LABEL: insert_v32i1_v8i1_0:
318; LMULMAX2:       # %bb.0:
319; LMULMAX2-NEXT:    li a2, 32
320; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
321; LMULMAX2-NEXT:    vlm.v v8, (a0)
322; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
323; LMULMAX2-NEXT:    vlm.v v9, (a1)
324; LMULMAX2-NEXT:    vsetivli zero, 1, e8, mf4, tu, mu
325; LMULMAX2-NEXT:    vslideup.vi v8, v9, 0
326; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
327; LMULMAX2-NEXT:    vsm.v v8, (a0)
328; LMULMAX2-NEXT:    ret
329;
330; LMULMAX1-LABEL: insert_v32i1_v8i1_0:
331; LMULMAX1:       # %bb.0:
332; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
333; LMULMAX1-NEXT:    vlm.v v8, (a0)
334; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
335; LMULMAX1-NEXT:    vlm.v v9, (a1)
336; LMULMAX1-NEXT:    vsetivli zero, 1, e8, mf8, tu, mu
337; LMULMAX1-NEXT:    vslideup.vi v8, v9, 0
338; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
339; LMULMAX1-NEXT:    vsm.v v8, (a0)
340; LMULMAX1-NEXT:    ret
341  %v = load <32 x i1>, <32 x i1>* %vp
342  %sv = load <8 x i1>, <8 x i1>* %svp
343  %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0)
344  store <32 x i1> %c, <32 x i1>* %vp
345  ret void
346}
347
348define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) {
349; LMULMAX2-LABEL: insert_v32i1_v8i1_16:
350; LMULMAX2:       # %bb.0:
351; LMULMAX2-NEXT:    li a2, 32
352; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
353; LMULMAX2-NEXT:    vlm.v v8, (a0)
354; LMULMAX2-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
355; LMULMAX2-NEXT:    vlm.v v9, (a1)
356; LMULMAX2-NEXT:    vsetivli zero, 3, e8, mf4, tu, mu
357; LMULMAX2-NEXT:    vslideup.vi v8, v9, 2
358; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
359; LMULMAX2-NEXT:    vsm.v v8, (a0)
360; LMULMAX2-NEXT:    ret
361;
362; LMULMAX1-LABEL: insert_v32i1_v8i1_16:
363; LMULMAX1:       # %bb.0:
364; LMULMAX1-NEXT:    addi a0, a0, 2
365; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
366; LMULMAX1-NEXT:    vlm.v v8, (a0)
367; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
368; LMULMAX1-NEXT:    vlm.v v9, (a1)
369; LMULMAX1-NEXT:    vsetivli zero, 1, e8, mf8, tu, mu
370; LMULMAX1-NEXT:    vslideup.vi v8, v9, 0
371; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
372; LMULMAX1-NEXT:    vsm.v v8, (a0)
373; LMULMAX1-NEXT:    ret
374  %v = load <32 x i1>, <32 x i1>* %vp
375  %sv = load <8 x i1>, <8 x i1>* %svp
376  %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
377  store <32 x i1> %c, <32 x i1>* %vp
378  ret void
379}
380
381define void @insert_v8i1_v4i1_0(<8 x i1>* %vp, <4 x i1>* %svp) {
382; CHECK-LABEL: insert_v8i1_v4i1_0:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
385; CHECK-NEXT:    vlm.v v0, (a0)
386; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
387; CHECK-NEXT:    vlm.v v8, (a1)
388; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
389; CHECK-NEXT:    vmv.v.i v9, 0
390; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
391; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
392; CHECK-NEXT:    vmv.v.i v10, 0
393; CHECK-NEXT:    vmv1r.v v0, v8
394; CHECK-NEXT:    vmerge.vim v8, v10, 1, v0
395; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, mu
396; CHECK-NEXT:    vslideup.vi v9, v8, 0
397; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
398; CHECK-NEXT:    vmsne.vi v8, v9, 0
399; CHECK-NEXT:    vsm.v v8, (a0)
400; CHECK-NEXT:    ret
401  %v = load <8 x i1>, <8 x i1>* %vp
402  %sv = load <4 x i1>, <4 x i1>* %svp
403  %c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 0)
404  store <8 x i1> %c, <8 x i1>* %vp
405  ret void
406}
407
408define void @insert_v8i1_v4i1_4(<8 x i1>* %vp, <4 x i1>* %svp) {
409; CHECK-LABEL: insert_v8i1_v4i1_4:
410; CHECK:       # %bb.0:
411; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
412; CHECK-NEXT:    vlm.v v0, (a0)
413; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
414; CHECK-NEXT:    vlm.v v8, (a1)
415; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
416; CHECK-NEXT:    vmv.v.i v9, 0
417; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
418; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
419; CHECK-NEXT:    vmv.v.i v10, 0
420; CHECK-NEXT:    vmv1r.v v0, v8
421; CHECK-NEXT:    vmerge.vim v8, v10, 1, v0
422; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
423; CHECK-NEXT:    vslideup.vi v9, v8, 4
424; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
425; CHECK-NEXT:    vmsne.vi v8, v9, 0
426; CHECK-NEXT:    vsm.v v8, (a0)
427; CHECK-NEXT:    ret
428  %v = load <8 x i1>, <8 x i1>* %vp
429  %sv = load <4 x i1>, <4 x i1>* %svp
430  %c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 4)
431  store <8 x i1> %c, <8 x i1>* %vp
432  ret void
433}
434
435define <vscale x 2 x i16> @insert_nxv2i16_v2i16_0(<vscale x 2 x i16> %v, <2 x i16>* %svp) {
436; CHECK-LABEL: insert_nxv2i16_v2i16_0:
437; CHECK:       # %bb.0:
438; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
439; CHECK-NEXT:    vle16.v v9, (a0)
440; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, tu, mu
441; CHECK-NEXT:    vslideup.vi v8, v9, 0
442; CHECK-NEXT:    ret
443  %sv = load <2 x i16>, <2 x i16>* %svp
444  %c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 0)
445  ret <vscale x 2 x i16> %c
446}
447
448define <vscale x 2 x i16> @insert_nxv2i16_v2i16_2(<vscale x 2 x i16> %v, <2 x i16>* %svp) {
449; CHECK-LABEL: insert_nxv2i16_v2i16_2:
450; CHECK:       # %bb.0:
451; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
452; CHECK-NEXT:    vle16.v v9, (a0)
453; CHECK-NEXT:    vsetivli zero, 6, e16, mf2, tu, mu
454; CHECK-NEXT:    vslideup.vi v8, v9, 4
455; CHECK-NEXT:    ret
456  %sv = load <2 x i16>, <2 x i16>* %svp
457  %c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 4)
458  ret <vscale x 2 x i16> %c
459}
460
461define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %svp) {
462; CHECK-LABEL: insert_nxv2i1_v4i1_0:
463; CHECK:       # %bb.0:
464; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
465; CHECK-NEXT:    vlm.v v8, (a0)
466; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
467; CHECK-NEXT:    vmv.v.i v9, 0
468; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
469; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
470; CHECK-NEXT:    vmv.v.i v10, 0
471; CHECK-NEXT:    vmv1r.v v0, v8
472; CHECK-NEXT:    vmerge.vim v8, v10, 1, v0
473; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, mu
474; CHECK-NEXT:    vslideup.vi v9, v8, 0
475; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
476; CHECK-NEXT:    vmsne.vi v0, v9, 0
477; CHECK-NEXT:    ret
478  %sv = load <4 x i1>, <4 x i1>* %svp
479  %c = call <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)
480  ret <vscale x 2 x i1> %c
481}
482
483define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
484; CHECK-LABEL: insert_nxv8i1_v4i1_0:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
487; CHECK-NEXT:    vlm.v v8, (a0)
488; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, tu, mu
489; CHECK-NEXT:    vslideup.vi v0, v8, 0
490; CHECK-NEXT:    ret
491  %sv = load <8 x i1>, <8 x i1>* %svp
492  %c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 0)
493  ret <vscale x 8 x i1> %c
494}
495
496define <vscale x 8 x i1> @insert_nxv8i1_v8i1_16(<vscale x 8 x i1> %v, <8 x i1>* %svp) {
497; CHECK-LABEL: insert_nxv8i1_v8i1_16:
498; CHECK:       # %bb.0:
499; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
500; CHECK-NEXT:    vlm.v v8, (a0)
501; CHECK-NEXT:    vsetivli zero, 3, e8, mf8, tu, mu
502; CHECK-NEXT:    vslideup.vi v0, v8, 2
503; CHECK-NEXT:    ret
504  %sv = load <8 x i1>, <8 x i1>* %svp
505  %c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 16)
506  ret <vscale x 8 x i1> %c
507}
508
509declare <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64)
510
511define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x 16 x i64>* %out) {
512; CHECK-LABEL: insert_v2i64_nxv16i64:
513; CHECK:       # %bb.0:
514; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
515; CHECK-NEXT:    vle64.v v8, (a0)
516; CHECK-NEXT:    vle64.v v16, (a1)
517; CHECK-NEXT:    vsetivli zero, 6, e64, m8, tu, mu
518; CHECK-NEXT:    vslideup.vi v8, v16, 4
519; CHECK-NEXT:    vs8r.v v8, (a2)
520; CHECK-NEXT:    ret
521  %sv0 = load <2 x i64>, <2 x i64>* %psv0
522  %sv1 = load <2 x i64>, <2 x i64>* %psv1
523  %v0 = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0)
524  %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4)
525  store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
526  ret void
527}
528
529define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
530; CHECK-LABEL: insert_v2i64_nxv16i64_lo0:
531; CHECK:       # %bb.0:
532; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
533; CHECK-NEXT:    vle64.v v8, (a0)
534; CHECK-NEXT:    vs8r.v v8, (a1)
535; CHECK-NEXT:    ret
536  %sv = load <2 x i64>, <2 x i64>* %psv
537  %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0)
538  store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
539  ret void
540}
541
542define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
543; CHECK-LABEL: insert_v2i64_nxv16i64_lo2:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
546; CHECK-NEXT:    vle64.v v8, (a0)
547; CHECK-NEXT:    vsetivli zero, 4, e64, m8, tu, mu
548; CHECK-NEXT:    vslideup.vi v16, v8, 2
549; CHECK-NEXT:    vs8r.v v16, (a1)
550; CHECK-NEXT:    ret
551  %sv = load <2 x i64>, <2 x i64>* %psv
552  %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2)
553  store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
554  ret void
555}
556
557; Check we don't mistakenly optimize this: we don't know whether this is
558; inserted into the low or high split vector.
559define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
560; CHECK-LABEL: insert_v2i64_nxv16i64_hi:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    addi sp, sp, -64
563; CHECK-NEXT:    .cfi_def_cfa_offset 64
564; CHECK-NEXT:    addi s0, sp, 64
565; CHECK-NEXT:    .cfi_def_cfa s0, 0
566; CHECK-NEXT:    csrr a2, vlenb
567; CHECK-NEXT:    slli a2, a2, 4
568; CHECK-NEXT:    sub sp, sp, a2
569; CHECK-NEXT:    andi sp, sp, -64
570; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
571; CHECK-NEXT:    vle64.v v8, (a0)
572; CHECK-NEXT:    addi a0, sp, 128
573; CHECK-NEXT:    vse64.v v8, (a0)
574; CHECK-NEXT:    csrr a0, vlenb
575; CHECK-NEXT:    slli a0, a0, 3
576; CHECK-NEXT:    addi a2, sp, 64
577; CHECK-NEXT:    add a3, a2, a0
578; CHECK-NEXT:    vl8re64.v v8, (a3)
579; CHECK-NEXT:    vl8re64.v v16, (a2)
580; CHECK-NEXT:    add a0, a1, a0
581; CHECK-NEXT:    vs8r.v v8, (a0)
582; CHECK-NEXT:    vs8r.v v16, (a1)
583; CHECK-NEXT:    addi sp, s0, -64
584; CHECK-NEXT:    addi sp, sp, 64
585; CHECK-NEXT:    ret
586  %sv = load <2 x i64>, <2 x i64>* %psv
587  %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
588  store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
589  ret void
590}
591
592declare <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
593declare <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)
594
595declare <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16>, <2 x i16>, i64)
596
597declare <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32>, <2 x i32>, i64)
598declare <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32>, <2 x i32>, i64)
599
600declare <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1>, <4 x i1>, i64)
601declare <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1>, <8 x i1>, i64)
602
603declare <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>, <2 x i16>, i64)
604
605declare <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64)
606declare <vscale x 8 x i32> @llvm.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64)
607declare <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64)
608