1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6
7declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
8
9define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
10; RV32-LABEL: vpscatter_nxv1i8:
11; RV32:       # %bb.0:
12; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
13; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
14; RV32-NEXT:    ret
15;
16; RV64-LABEL: vpscatter_nxv1i8:
17; RV64:       # %bb.0:
18; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
19; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
20; RV64-NEXT:    ret
21  call void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
22  ret void
23}
24
25declare void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i32)
26
27define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
28; RV32-LABEL: vpscatter_nxv2i8:
29; RV32:       # %bb.0:
30; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
31; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
32; RV32-NEXT:    ret
33;
34; RV64-LABEL: vpscatter_nxv2i8:
35; RV64:       # %bb.0:
36; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
37; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
38; RV64-NEXT:    ret
39  call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
40  ret void
41}
42
43define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
44; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
45; RV32:       # %bb.0:
46; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
47; RV32-NEXT:    vncvt.x.x.w v8, v8
48; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
49; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
50; RV32-NEXT:    ret
51;
52; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
53; RV64:       # %bb.0:
54; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
55; RV64-NEXT:    vncvt.x.x.w v8, v8
56; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
57; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
58; RV64-NEXT:    ret
59  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
60  call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
61  ret void
62}
63
64define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
65; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
66; RV32:       # %bb.0:
67; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
68; RV32-NEXT:    vncvt.x.x.w v8, v8
69; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
70; RV32-NEXT:    vncvt.x.x.w v8, v8
71; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
72; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
73; RV32-NEXT:    ret
74;
75; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
76; RV64:       # %bb.0:
77; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
78; RV64-NEXT:    vncvt.x.x.w v8, v8
79; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
80; RV64-NEXT:    vncvt.x.x.w v8, v8
81; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
82; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
83; RV64-NEXT:    ret
84  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
85  call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
86  ret void
87}
88
89define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
90; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
91; RV32:       # %bb.0:
92; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
93; RV32-NEXT:    vncvt.x.x.w v11, v8
94; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
95; RV32-NEXT:    vncvt.x.x.w v8, v11
96; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
97; RV32-NEXT:    vncvt.x.x.w v8, v8
98; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
99; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
100; RV32-NEXT:    ret
101;
102; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
103; RV64:       # %bb.0:
104; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
105; RV64-NEXT:    vncvt.x.x.w v12, v8
106; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
107; RV64-NEXT:    vncvt.x.x.w v8, v12
108; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
109; RV64-NEXT:    vncvt.x.x.w v8, v8
110; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
111; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
112; RV64-NEXT:    ret
113  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
114  call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
115  ret void
116}
117
118declare void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, <vscale x 4 x i1>, i32)
119
120define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
121; RV32-LABEL: vpscatter_nxv4i8:
122; RV32:       # %bb.0:
123; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
124; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
125; RV32-NEXT:    ret
126;
127; RV64-LABEL: vpscatter_nxv4i8:
128; RV64:       # %bb.0:
129; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
130; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
131; RV64-NEXT:    ret
132  call void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
133  ret void
134}
135
136define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 zeroext %evl) {
137; RV32-LABEL: vpscatter_truemask_nxv4i8:
138; RV32:       # %bb.0:
139; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
140; RV32-NEXT:    vsoxei32.v v8, (zero), v10
141; RV32-NEXT:    ret
142;
143; RV64-LABEL: vpscatter_truemask_nxv4i8:
144; RV64:       # %bb.0:
145; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
146; RV64-NEXT:    vsoxei64.v v8, (zero), v12
147; RV64-NEXT:    ret
148  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
149  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
150  call void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
151  ret void
152}
153
154declare void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, <vscale x 8 x i1>, i32)
155
156define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
157; RV32-LABEL: vpscatter_nxv8i8:
158; RV32:       # %bb.0:
159; RV32-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
160; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
161; RV32-NEXT:    ret
162;
163; RV64-LABEL: vpscatter_nxv8i8:
164; RV64:       # %bb.0:
165; RV64-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
166; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
167; RV64-NEXT:    ret
168  call void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
169  ret void
170}
171
172define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
173; RV32-LABEL: vpscatter_baseidx_nxv8i8:
174; RV32:       # %bb.0:
175; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
176; RV32-NEXT:    vsext.vf4 v12, v9
177; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
178; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
179; RV32-NEXT:    ret
180;
181; RV64-LABEL: vpscatter_baseidx_nxv8i8:
182; RV64:       # %bb.0:
183; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
184; RV64-NEXT:    vsext.vf8 v16, v9
185; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
186; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
187; RV64-NEXT:    ret
188  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs
189  call void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
190  ret void
191}
192
193declare void @llvm.vp.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, <vscale x 1 x i1>, i32)
194
195define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
196; RV32-LABEL: vpscatter_nxv1i16:
197; RV32:       # %bb.0:
198; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
199; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
200; RV32-NEXT:    ret
201;
202; RV64-LABEL: vpscatter_nxv1i16:
203; RV64:       # %bb.0:
204; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
205; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
206; RV64-NEXT:    ret
207  call void @llvm.vp.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
208  ret void
209}
210
211declare void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, <vscale x 2 x i1>, i32)
212
213define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
214; RV32-LABEL: vpscatter_nxv2i16:
215; RV32:       # %bb.0:
216; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
217; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
218; RV32-NEXT:    ret
219;
220; RV64-LABEL: vpscatter_nxv2i16:
221; RV64:       # %bb.0:
222; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
223; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
224; RV64-NEXT:    ret
225  call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
226  ret void
227}
228
229define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
230; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
231; RV32:       # %bb.0:
232; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
233; RV32-NEXT:    vncvt.x.x.w v8, v8
234; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
235; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
236; RV32-NEXT:    ret
237;
238; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
239; RV64:       # %bb.0:
240; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
241; RV64-NEXT:    vncvt.x.x.w v8, v8
242; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
243; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
244; RV64-NEXT:    ret
245  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
246  call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
247  ret void
248}
249
250define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
251; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
252; RV32:       # %bb.0:
253; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
254; RV32-NEXT:    vncvt.x.x.w v11, v8
255; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
256; RV32-NEXT:    vncvt.x.x.w v8, v11
257; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
258; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
259; RV32-NEXT:    ret
260;
261; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
262; RV64:       # %bb.0:
263; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
264; RV64-NEXT:    vncvt.x.x.w v12, v8
265; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
266; RV64-NEXT:    vncvt.x.x.w v8, v12
267; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
268; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
269; RV64-NEXT:    ret
270  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
271  call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
272  ret void
273}
274
275declare void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, <vscale x 4 x i1>, i32)
276
277define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
278; RV32-LABEL: vpscatter_nxv4i16:
279; RV32:       # %bb.0:
280; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
281; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
282; RV32-NEXT:    ret
283;
284; RV64-LABEL: vpscatter_nxv4i16:
285; RV64:       # %bb.0:
286; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
287; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
288; RV64-NEXT:    ret
289  call void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
290  ret void
291}
292
293define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 zeroext %evl) {
294; RV32-LABEL: vpscatter_truemask_nxv4i16:
295; RV32:       # %bb.0:
296; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
297; RV32-NEXT:    vsoxei32.v v8, (zero), v10
298; RV32-NEXT:    ret
299;
300; RV64-LABEL: vpscatter_truemask_nxv4i16:
301; RV64:       # %bb.0:
302; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
303; RV64-NEXT:    vsoxei64.v v8, (zero), v12
304; RV64-NEXT:    ret
305  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
306  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
307  call void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
308  ret void
309}
310
311declare void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, <vscale x 8 x i1>, i32)
312
313define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
314; RV32-LABEL: vpscatter_nxv8i16:
315; RV32:       # %bb.0:
316; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
317; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
318; RV32-NEXT:    ret
319;
320; RV64-LABEL: vpscatter_nxv8i16:
321; RV64:       # %bb.0:
322; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
323; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
324; RV64-NEXT:    ret
325  call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
326  ret void
327}
328
329define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
330; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
331; RV32:       # %bb.0:
332; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
333; RV32-NEXT:    vsext.vf4 v12, v10
334; RV32-NEXT:    vadd.vv v12, v12, v12
335; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
336; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
337; RV32-NEXT:    ret
338;
339; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
340; RV64:       # %bb.0:
341; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
342; RV64-NEXT:    vsext.vf8 v16, v10
343; RV64-NEXT:    vadd.vv v16, v16, v16
344; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
345; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
346; RV64-NEXT:    ret
347  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs
348  call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
349  ret void
350}
351
352define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
353; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
354; RV32:       # %bb.0:
355; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
356; RV32-NEXT:    vsext.vf4 v12, v10
357; RV32-NEXT:    vadd.vv v12, v12, v12
358; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
359; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
360; RV32-NEXT:    ret
361;
362; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
363; RV64:       # %bb.0:
364; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
365; RV64-NEXT:    vsext.vf8 v16, v10
366; RV64-NEXT:    vadd.vv v16, v16, v16
367; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
368; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
369; RV64-NEXT:    ret
370  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
371  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
372  call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
373  ret void
374}
375
376define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
377; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
378; RV32:       # %bb.0:
379; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
380; RV32-NEXT:    vzext.vf4 v12, v10
381; RV32-NEXT:    vadd.vv v12, v12, v12
382; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
383; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
384; RV32-NEXT:    ret
385;
386; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
387; RV64:       # %bb.0:
388; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
389; RV64-NEXT:    vzext.vf8 v16, v10
390; RV64-NEXT:    vadd.vv v16, v16, v16
391; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
392; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
393; RV64-NEXT:    ret
394  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
395  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
396  call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
397  ret void
398}
399
400define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
401; RV32-LABEL: vpscatter_baseidx_nxv8i16:
402; RV32:       # %bb.0:
403; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
404; RV32-NEXT:    vsext.vf2 v12, v10
405; RV32-NEXT:    vadd.vv v12, v12, v12
406; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
407; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
408; RV32-NEXT:    ret
409;
410; RV64-LABEL: vpscatter_baseidx_nxv8i16:
411; RV64:       # %bb.0:
412; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
413; RV64-NEXT:    vsext.vf4 v16, v10
414; RV64-NEXT:    vadd.vv v16, v16, v16
415; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
416; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
417; RV64-NEXT:    ret
418  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs
419  call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
420  ret void
421}
422
423declare void @llvm.vp.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, <vscale x 1 x i1>, i32)
424
425define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
426; RV32-LABEL: vpscatter_nxv1i32:
427; RV32:       # %bb.0:
428; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
429; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
430; RV32-NEXT:    ret
431;
432; RV64-LABEL: vpscatter_nxv1i32:
433; RV64:       # %bb.0:
434; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
435; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
436; RV64-NEXT:    ret
437  call void @llvm.vp.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
438  ret void
439}
440
441declare void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, <vscale x 2 x i1>, i32)
442
443define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
444; RV32-LABEL: vpscatter_nxv2i32:
445; RV32:       # %bb.0:
446; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
447; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
448; RV32-NEXT:    ret
449;
450; RV64-LABEL: vpscatter_nxv2i32:
451; RV64:       # %bb.0:
452; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
453; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
454; RV64-NEXT:    ret
455  call void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
456  ret void
457}
458
459define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
460; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
461; RV32:       # %bb.0:
462; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
463; RV32-NEXT:    vncvt.x.x.w v11, v8
464; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
465; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
466; RV32-NEXT:    ret
467;
468; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
469; RV64:       # %bb.0:
470; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
471; RV64-NEXT:    vncvt.x.x.w v12, v8
472; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
473; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
474; RV64-NEXT:    ret
475  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
476  call void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
477  ret void
478}
479
480declare void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, <vscale x 4 x i1>, i32)
481
482define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
483; RV32-LABEL: vpscatter_nxv4i32:
484; RV32:       # %bb.0:
485; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
486; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
487; RV32-NEXT:    ret
488;
489; RV64-LABEL: vpscatter_nxv4i32:
490; RV64:       # %bb.0:
491; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
492; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
493; RV64-NEXT:    ret
494  call void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
495  ret void
496}
497
498define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 zeroext %evl) {
499; RV32-LABEL: vpscatter_truemask_nxv4i32:
500; RV32:       # %bb.0:
501; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
502; RV32-NEXT:    vsoxei32.v v8, (zero), v10
503; RV32-NEXT:    ret
504;
505; RV64-LABEL: vpscatter_truemask_nxv4i32:
506; RV64:       # %bb.0:
507; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
508; RV64-NEXT:    vsoxei64.v v8, (zero), v12
509; RV64-NEXT:    ret
510  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
511  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
512  call void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
513  ret void
514}
515
516declare void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, <vscale x 8 x i1>, i32)
517
518define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
519; RV32-LABEL: vpscatter_nxv8i32:
520; RV32:       # %bb.0:
521; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
522; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
523; RV32-NEXT:    ret
524;
525; RV64-LABEL: vpscatter_nxv8i32:
526; RV64:       # %bb.0:
527; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
528; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
529; RV64-NEXT:    ret
530  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
531  ret void
532}
533
534define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
535; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
536; RV32:       # %bb.0:
537; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
538; RV32-NEXT:    vsext.vf4 v16, v12
539; RV32-NEXT:    vsll.vi v12, v16, 2
540; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
541; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
542; RV32-NEXT:    ret
543;
544; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
545; RV64:       # %bb.0:
546; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
547; RV64-NEXT:    vsext.vf8 v16, v12
548; RV64-NEXT:    vsll.vi v16, v16, 2
549; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
550; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
551; RV64-NEXT:    ret
552  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs
553  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
554  ret void
555}
556
557define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
558; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
559; RV32:       # %bb.0:
560; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
561; RV32-NEXT:    vsext.vf4 v16, v12
562; RV32-NEXT:    vsll.vi v12, v16, 2
563; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
564; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
565; RV32-NEXT:    ret
566;
567; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
568; RV64:       # %bb.0:
569; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
570; RV64-NEXT:    vsext.vf8 v16, v12
571; RV64-NEXT:    vsll.vi v16, v16, 2
572; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
573; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
574; RV64-NEXT:    ret
575  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
576  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
577  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
578  ret void
579}
580
581define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
582; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
583; RV32:       # %bb.0:
584; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
585; RV32-NEXT:    vzext.vf4 v16, v12
586; RV32-NEXT:    vsll.vi v12, v16, 2
587; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
588; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
589; RV32-NEXT:    ret
590;
591; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
592; RV64:       # %bb.0:
593; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
594; RV64-NEXT:    vzext.vf8 v16, v12
595; RV64-NEXT:    vsll.vi v16, v16, 2
596; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
597; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
598; RV64-NEXT:    ret
599  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
600  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
601  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
602  ret void
603}
604
605define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
606; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
607; RV32:       # %bb.0:
608; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
609; RV32-NEXT:    vsext.vf2 v16, v12
610; RV32-NEXT:    vsll.vi v12, v16, 2
611; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
612; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
613; RV32-NEXT:    ret
614;
615; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
616; RV64:       # %bb.0:
617; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
618; RV64-NEXT:    vsext.vf4 v16, v12
619; RV64-NEXT:    vsll.vi v16, v16, 2
620; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
621; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
622; RV64-NEXT:    ret
623  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs
624  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
625  ret void
626}
627
628define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
629; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
630; RV32:       # %bb.0:
631; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
632; RV32-NEXT:    vsext.vf2 v16, v12
633; RV32-NEXT:    vsll.vi v12, v16, 2
634; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
635; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
636; RV32-NEXT:    ret
637;
638; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
639; RV64:       # %bb.0:
640; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
641; RV64-NEXT:    vsext.vf4 v16, v12
642; RV64-NEXT:    vsll.vi v16, v16, 2
643; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
644; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
645; RV64-NEXT:    ret
646  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
647  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
648  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
649  ret void
650}
651
652define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
653; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
654; RV32:       # %bb.0:
655; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
656; RV32-NEXT:    vzext.vf2 v16, v12
657; RV32-NEXT:    vsll.vi v12, v16, 2
658; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
659; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
660; RV32-NEXT:    ret
661;
662; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
663; RV64:       # %bb.0:
664; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
665; RV64-NEXT:    vzext.vf4 v16, v12
666; RV64-NEXT:    vsll.vi v16, v16, 2
667; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
668; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
669; RV64-NEXT:    ret
670  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
671  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
672  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
673  ret void
674}
675
676define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
677; RV32-LABEL: vpscatter_baseidx_nxv8i32:
678; RV32:       # %bb.0:
679; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
680; RV32-NEXT:    vsll.vi v12, v12, 2
681; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
682; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
683; RV32-NEXT:    ret
684;
685; RV64-LABEL: vpscatter_baseidx_nxv8i32:
686; RV64:       # %bb.0:
687; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
688; RV64-NEXT:    vsext.vf2 v16, v12
689; RV64-NEXT:    vsll.vi v16, v16, 2
690; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
691; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
692; RV64-NEXT:    ret
693  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs
694  call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
695  ret void
696}
697
698declare void @llvm.vp.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, <vscale x 1 x i1>, i32)
699
700define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
701; RV32-LABEL: vpscatter_nxv1i64:
702; RV32:       # %bb.0:
703; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
704; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
705; RV32-NEXT:    ret
706;
707; RV64-LABEL: vpscatter_nxv1i64:
708; RV64:       # %bb.0:
709; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
710; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
711; RV64-NEXT:    ret
712  call void @llvm.vp.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
713  ret void
714}
715
716declare void @llvm.vp.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, <vscale x 2 x i1>, i32)
717
718define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
719; RV32-LABEL: vpscatter_nxv2i64:
720; RV32:       # %bb.0:
721; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
722; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
723; RV32-NEXT:    ret
724;
725; RV64-LABEL: vpscatter_nxv2i64:
726; RV64:       # %bb.0:
727; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
728; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
729; RV64-NEXT:    ret
730  call void @llvm.vp.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
731  ret void
732}
733
734declare void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, <vscale x 4 x i1>, i32)
735
736define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
737; RV32-LABEL: vpscatter_nxv4i64:
738; RV32:       # %bb.0:
739; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
740; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
741; RV32-NEXT:    ret
742;
743; RV64-LABEL: vpscatter_nxv4i64:
744; RV64:       # %bb.0:
745; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
746; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
747; RV64-NEXT:    ret
748  call void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
749  ret void
750}
751
752define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 zeroext %evl) {
753; RV32-LABEL: vpscatter_truemask_nxv4i64:
754; RV32:       # %bb.0:
755; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
756; RV32-NEXT:    vsoxei32.v v8, (zero), v12
757; RV32-NEXT:    ret
758;
759; RV64-LABEL: vpscatter_truemask_nxv4i64:
760; RV64:       # %bb.0:
761; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
762; RV64-NEXT:    vsoxei64.v v8, (zero), v12
763; RV64-NEXT:    ret
764  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
765  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
766  call void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
767  ret void
768}
769
770declare void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, <vscale x 8 x i1>, i32)
771
772define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
773; RV32-LABEL: vpscatter_nxv8i64:
774; RV32:       # %bb.0:
775; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
776; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
777; RV32-NEXT:    ret
778;
779; RV64-LABEL: vpscatter_nxv8i64:
780; RV64:       # %bb.0:
781; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
782; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
783; RV64-NEXT:    ret
784  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
785  ret void
786}
787
788define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
789; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
790; RV32:       # %bb.0:
791; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
792; RV32-NEXT:    vsext.vf4 v20, v16
793; RV32-NEXT:    vsll.vi v16, v20, 3
794; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
795; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
796; RV32-NEXT:    ret
797;
798; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
799; RV64:       # %bb.0:
800; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
801; RV64-NEXT:    vsext.vf8 v24, v16
802; RV64-NEXT:    vsll.vi v16, v24, 3
803; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
804; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
805; RV64-NEXT:    ret
806  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs
807  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
808  ret void
809}
810
811define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
812; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
813; RV32:       # %bb.0:
814; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
815; RV32-NEXT:    vsext.vf8 v24, v16
816; RV32-NEXT:    vsll.vi v16, v24, 3
817; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
818; RV32-NEXT:    vncvt.x.x.w v24, v16
819; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
820; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
821; RV32-NEXT:    ret
822;
823; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
824; RV64:       # %bb.0:
825; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
826; RV64-NEXT:    vsext.vf8 v24, v16
827; RV64-NEXT:    vsll.vi v16, v24, 3
828; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
829; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
830; RV64-NEXT:    ret
831  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
832  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
833  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
834  ret void
835}
836
837define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
838; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
839; RV32:       # %bb.0:
840; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
841; RV32-NEXT:    vzext.vf8 v24, v16
842; RV32-NEXT:    vsll.vi v16, v24, 3
843; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
844; RV32-NEXT:    vncvt.x.x.w v24, v16
845; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
846; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
847; RV32-NEXT:    ret
848;
849; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
850; RV64:       # %bb.0:
851; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
852; RV64-NEXT:    vzext.vf8 v24, v16
853; RV64-NEXT:    vsll.vi v16, v24, 3
854; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
855; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
856; RV64-NEXT:    ret
857  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
858  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
859  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
860  ret void
861}
862
863define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
864; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
865; RV32:       # %bb.0:
866; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
867; RV32-NEXT:    vsext.vf2 v20, v16
868; RV32-NEXT:    vsll.vi v16, v20, 3
869; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
870; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
871; RV32-NEXT:    ret
872;
873; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
874; RV64:       # %bb.0:
875; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
876; RV64-NEXT:    vsext.vf4 v24, v16
877; RV64-NEXT:    vsll.vi v16, v24, 3
878; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
879; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
880; RV64-NEXT:    ret
881  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs
882  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
883  ret void
884}
885
886define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
887; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
888; RV32:       # %bb.0:
889; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
890; RV32-NEXT:    vsext.vf4 v24, v16
891; RV32-NEXT:    vsll.vi v16, v24, 3
892; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
893; RV32-NEXT:    vncvt.x.x.w v24, v16
894; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
895; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
896; RV32-NEXT:    ret
897;
898; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
899; RV64:       # %bb.0:
900; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
901; RV64-NEXT:    vsext.vf4 v24, v16
902; RV64-NEXT:    vsll.vi v16, v24, 3
903; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
904; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
905; RV64-NEXT:    ret
906  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
907  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
908  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
909  ret void
910}
911
912define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
913; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
914; RV32:       # %bb.0:
915; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
916; RV32-NEXT:    vzext.vf4 v24, v16
917; RV32-NEXT:    vsll.vi v16, v24, 3
918; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
919; RV32-NEXT:    vncvt.x.x.w v24, v16
920; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
921; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
922; RV32-NEXT:    ret
923;
924; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
925; RV64:       # %bb.0:
926; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
927; RV64-NEXT:    vzext.vf4 v24, v16
928; RV64-NEXT:    vsll.vi v16, v24, 3
929; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
930; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
931; RV64-NEXT:    ret
932  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
933  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
934  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
935  ret void
936}
937
938define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
939; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
940; RV32:       # %bb.0:
941; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
942; RV32-NEXT:    vsll.vi v16, v16, 3
943; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
944; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
945; RV32-NEXT:    ret
946;
947; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
948; RV64:       # %bb.0:
949; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
950; RV64-NEXT:    vsext.vf2 v24, v16
951; RV64-NEXT:    vsll.vi v16, v24, 3
952; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
953; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
954; RV64-NEXT:    ret
955  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs
956  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
957  ret void
958}
959
960define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
961; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
962; RV32:       # %bb.0:
963; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
964; RV32-NEXT:    vsext.vf2 v24, v16
965; RV32-NEXT:    vsll.vi v16, v24, 3
966; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
967; RV32-NEXT:    vncvt.x.x.w v24, v16
968; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
969; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
970; RV32-NEXT:    ret
971;
972; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
973; RV64:       # %bb.0:
974; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
975; RV64-NEXT:    vsext.vf2 v24, v16
976; RV64-NEXT:    vsll.vi v16, v24, 3
977; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
978; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
979; RV64-NEXT:    ret
980  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
981  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
982  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
983  ret void
984}
985
986define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
987; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
988; RV32:       # %bb.0:
989; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
990; RV32-NEXT:    vzext.vf2 v24, v16
991; RV32-NEXT:    vsll.vi v16, v24, 3
992; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
993; RV32-NEXT:    vncvt.x.x.w v24, v16
994; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
995; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
996; RV32-NEXT:    ret
997;
998; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
999; RV64:       # %bb.0:
1000; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1001; RV64-NEXT:    vzext.vf2 v24, v16
1002; RV64-NEXT:    vsll.vi v16, v24, 3
1003; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1004; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1005; RV64-NEXT:    ret
1006  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1007  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1008  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1009  ret void
1010}
1011
1012define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1013; RV32-LABEL: vpscatter_baseidx_nxv8i64:
1014; RV32:       # %bb.0:
1015; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1016; RV32-NEXT:    vsll.vi v16, v16, 3
1017; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1018; RV32-NEXT:    vncvt.x.x.w v24, v16
1019; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1020; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1021; RV32-NEXT:    ret
1022;
1023; RV64-LABEL: vpscatter_baseidx_nxv8i64:
1024; RV64:       # %bb.0:
1025; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1026; RV64-NEXT:    vsll.vi v16, v16, 3
1027; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1028; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1029; RV64-NEXT:    ret
1030  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs
1031  call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1032  ret void
1033}
1034
1035declare void @llvm.vp.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, <vscale x 1 x i1>, i32)
1036
1037define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1038; RV32-LABEL: vpscatter_nxv1f16:
1039; RV32:       # %bb.0:
1040; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
1041; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1042; RV32-NEXT:    ret
1043;
1044; RV64-LABEL: vpscatter_nxv1f16:
1045; RV64:       # %bb.0:
1046; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
1047; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1048; RV64-NEXT:    ret
1049  call void @llvm.vp.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1050  ret void
1051}
1052
1053declare void @llvm.vp.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, <vscale x 2 x i1>, i32)
1054
1055define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1056; RV32-LABEL: vpscatter_nxv2f16:
1057; RV32:       # %bb.0:
1058; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
1059; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1060; RV32-NEXT:    ret
1061;
1062; RV64-LABEL: vpscatter_nxv2f16:
1063; RV64:       # %bb.0:
1064; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
1065; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1066; RV64-NEXT:    ret
1067  call void @llvm.vp.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1068  ret void
1069}
1070
1071declare void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, <vscale x 4 x i1>, i32)
1072
1073define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1074; RV32-LABEL: vpscatter_nxv4f16:
1075; RV32:       # %bb.0:
1076; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1077; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1078; RV32-NEXT:    ret
1079;
1080; RV64-LABEL: vpscatter_nxv4f16:
1081; RV64:       # %bb.0:
1082; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1083; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1084; RV64-NEXT:    ret
1085  call void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1086  ret void
1087}
1088
1089define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 zeroext %evl) {
1090; RV32-LABEL: vpscatter_truemask_nxv4f16:
1091; RV32:       # %bb.0:
1092; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1093; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1094; RV32-NEXT:    ret
1095;
1096; RV64-LABEL: vpscatter_truemask_nxv4f16:
1097; RV64:       # %bb.0:
1098; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1099; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1100; RV64-NEXT:    ret
1101  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1102  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1103  call void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1104  ret void
1105}
1106
1107declare void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, <vscale x 8 x i1>, i32)
1108
1109define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1110; RV32-LABEL: vpscatter_nxv8f16:
1111; RV32:       # %bb.0:
1112; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
1113; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1114; RV32-NEXT:    ret
1115;
1116; RV64-LABEL: vpscatter_nxv8f16:
1117; RV64:       # %bb.0:
1118; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
1119; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1120; RV64-NEXT:    ret
1121  call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1122  ret void
1123}
1124
1125define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1126; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1127; RV32:       # %bb.0:
1128; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1129; RV32-NEXT:    vsext.vf4 v12, v10
1130; RV32-NEXT:    vadd.vv v12, v12, v12
1131; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1132; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1133; RV32-NEXT:    ret
1134;
1135; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1136; RV64:       # %bb.0:
1137; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1138; RV64-NEXT:    vsext.vf8 v16, v10
1139; RV64-NEXT:    vadd.vv v16, v16, v16
1140; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1141; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1142; RV64-NEXT:    ret
1143  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs
1144  call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1145  ret void
1146}
1147
1148define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1149; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1150; RV32:       # %bb.0:
1151; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1152; RV32-NEXT:    vsext.vf4 v12, v10
1153; RV32-NEXT:    vadd.vv v12, v12, v12
1154; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1155; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1156; RV32-NEXT:    ret
1157;
1158; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1159; RV64:       # %bb.0:
1160; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1161; RV64-NEXT:    vsext.vf8 v16, v10
1162; RV64-NEXT:    vadd.vv v16, v16, v16
1163; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1164; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1165; RV64-NEXT:    ret
1166  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1167  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1168  call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1169  ret void
1170}
1171
1172define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1173; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1174; RV32:       # %bb.0:
1175; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1176; RV32-NEXT:    vzext.vf4 v12, v10
1177; RV32-NEXT:    vadd.vv v12, v12, v12
1178; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1179; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1180; RV32-NEXT:    ret
1181;
1182; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1183; RV64:       # %bb.0:
1184; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1185; RV64-NEXT:    vzext.vf8 v16, v10
1186; RV64-NEXT:    vadd.vv v16, v16, v16
1187; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1188; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1189; RV64-NEXT:    ret
1190  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1191  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1192  call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1193  ret void
1194}
1195
1196define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1197; RV32-LABEL: vpscatter_baseidx_nxv8f16:
1198; RV32:       # %bb.0:
1199; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1200; RV32-NEXT:    vsext.vf2 v12, v10
1201; RV32-NEXT:    vadd.vv v12, v12, v12
1202; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1203; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1204; RV32-NEXT:    ret
1205;
1206; RV64-LABEL: vpscatter_baseidx_nxv8f16:
1207; RV64:       # %bb.0:
1208; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1209; RV64-NEXT:    vsext.vf4 v16, v10
1210; RV64-NEXT:    vadd.vv v16, v16, v16
1211; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1212; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1213; RV64-NEXT:    ret
1214  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs
1215  call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1216  ret void
1217}
1218
1219declare void @llvm.vp.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, <vscale x 1 x i1>, i32)
1220
1221define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1222; RV32-LABEL: vpscatter_nxv1f32:
1223; RV32:       # %bb.0:
1224; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1225; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1226; RV32-NEXT:    ret
1227;
1228; RV64-LABEL: vpscatter_nxv1f32:
1229; RV64:       # %bb.0:
1230; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1231; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1232; RV64-NEXT:    ret
1233  call void @llvm.vp.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1234  ret void
1235}
1236
1237declare void @llvm.vp.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, <vscale x 2 x i1>, i32)
1238
1239define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1240; RV32-LABEL: vpscatter_nxv2f32:
1241; RV32:       # %bb.0:
1242; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1243; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1244; RV32-NEXT:    ret
1245;
1246; RV64-LABEL: vpscatter_nxv2f32:
1247; RV64:       # %bb.0:
1248; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1249; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1250; RV64-NEXT:    ret
1251  call void @llvm.vp.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1252  ret void
1253}
1254
1255declare void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, <vscale x 4 x i1>, i32)
1256
1257define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1258; RV32-LABEL: vpscatter_nxv4f32:
1259; RV32:       # %bb.0:
1260; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1261; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1262; RV32-NEXT:    ret
1263;
1264; RV64-LABEL: vpscatter_nxv4f32:
1265; RV64:       # %bb.0:
1266; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1267; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1268; RV64-NEXT:    ret
1269  call void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1270  ret void
1271}
1272
1273define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 zeroext %evl) {
1274; RV32-LABEL: vpscatter_truemask_nxv4f32:
1275; RV32:       # %bb.0:
1276; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1277; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1278; RV32-NEXT:    ret
1279;
1280; RV64-LABEL: vpscatter_truemask_nxv4f32:
1281; RV64:       # %bb.0:
1282; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1283; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1284; RV64-NEXT:    ret
1285  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1286  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1287  call void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1288  ret void
1289}
1290
1291declare void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, <vscale x 8 x i1>, i32)
1292
1293define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1294; RV32-LABEL: vpscatter_nxv8f32:
1295; RV32:       # %bb.0:
1296; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1297; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1298; RV32-NEXT:    ret
1299;
1300; RV64-LABEL: vpscatter_nxv8f32:
1301; RV64:       # %bb.0:
1302; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1303; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1304; RV64-NEXT:    ret
1305  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1306  ret void
1307}
1308
1309define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1310; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1311; RV32:       # %bb.0:
1312; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1313; RV32-NEXT:    vsext.vf4 v16, v12
1314; RV32-NEXT:    vsll.vi v12, v16, 2
1315; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1316; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1317; RV32-NEXT:    ret
1318;
1319; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1320; RV64:       # %bb.0:
1321; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1322; RV64-NEXT:    vsext.vf8 v16, v12
1323; RV64-NEXT:    vsll.vi v16, v16, 2
1324; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1325; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1326; RV64-NEXT:    ret
1327  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs
1328  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1329  ret void
1330}
1331
1332define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1333; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1334; RV32:       # %bb.0:
1335; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1336; RV32-NEXT:    vsext.vf4 v16, v12
1337; RV32-NEXT:    vsll.vi v12, v16, 2
1338; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1339; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1340; RV32-NEXT:    ret
1341;
1342; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1343; RV64:       # %bb.0:
1344; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1345; RV64-NEXT:    vsext.vf8 v16, v12
1346; RV64-NEXT:    vsll.vi v16, v16, 2
1347; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1348; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1349; RV64-NEXT:    ret
1350  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1351  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1352  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1353  ret void
1354}
1355
1356define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1357; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1358; RV32:       # %bb.0:
1359; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1360; RV32-NEXT:    vzext.vf4 v16, v12
1361; RV32-NEXT:    vsll.vi v12, v16, 2
1362; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1363; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1364; RV32-NEXT:    ret
1365;
1366; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1367; RV64:       # %bb.0:
1368; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1369; RV64-NEXT:    vzext.vf8 v16, v12
1370; RV64-NEXT:    vsll.vi v16, v16, 2
1371; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1372; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1373; RV64-NEXT:    ret
1374  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1375  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1376  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1377  ret void
1378}
1379
1380define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1381; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1382; RV32:       # %bb.0:
1383; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1384; RV32-NEXT:    vsext.vf2 v16, v12
1385; RV32-NEXT:    vsll.vi v12, v16, 2
1386; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1387; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1388; RV32-NEXT:    ret
1389;
1390; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1391; RV64:       # %bb.0:
1392; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1393; RV64-NEXT:    vsext.vf4 v16, v12
1394; RV64-NEXT:    vsll.vi v16, v16, 2
1395; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1396; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1397; RV64-NEXT:    ret
1398  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs
1399  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1400  ret void
1401}
1402
1403define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1404; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1405; RV32:       # %bb.0:
1406; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1407; RV32-NEXT:    vsext.vf2 v16, v12
1408; RV32-NEXT:    vsll.vi v12, v16, 2
1409; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1410; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1411; RV32-NEXT:    ret
1412;
1413; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1414; RV64:       # %bb.0:
1415; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1416; RV64-NEXT:    vsext.vf4 v16, v12
1417; RV64-NEXT:    vsll.vi v16, v16, 2
1418; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1419; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1420; RV64-NEXT:    ret
1421  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1422  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1423  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1424  ret void
1425}
1426
1427define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1428; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1429; RV32:       # %bb.0:
1430; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1431; RV32-NEXT:    vzext.vf2 v16, v12
1432; RV32-NEXT:    vsll.vi v12, v16, 2
1433; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1434; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1435; RV32-NEXT:    ret
1436;
1437; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1438; RV64:       # %bb.0:
1439; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1440; RV64-NEXT:    vzext.vf4 v16, v12
1441; RV64-NEXT:    vsll.vi v16, v16, 2
1442; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1443; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1444; RV64-NEXT:    ret
1445  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1446  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1447  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1448  ret void
1449}
1450
1451define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1452; RV32-LABEL: vpscatter_baseidx_nxv8f32:
1453; RV32:       # %bb.0:
1454; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1455; RV32-NEXT:    vsll.vi v12, v12, 2
1456; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1457; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1458; RV32-NEXT:    ret
1459;
1460; RV64-LABEL: vpscatter_baseidx_nxv8f32:
1461; RV64:       # %bb.0:
1462; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1463; RV64-NEXT:    vsext.vf2 v16, v12
1464; RV64-NEXT:    vsll.vi v16, v16, 2
1465; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1466; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1467; RV64-NEXT:    ret
1468  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs
1469  call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1470  ret void
1471}
1472
1473declare void @llvm.vp.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, <vscale x 1 x i1>, i32)
1474
1475define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1476; RV32-LABEL: vpscatter_nxv1f64:
1477; RV32:       # %bb.0:
1478; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1479; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1480; RV32-NEXT:    ret
1481;
1482; RV64-LABEL: vpscatter_nxv1f64:
1483; RV64:       # %bb.0:
1484; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1485; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1486; RV64-NEXT:    ret
1487  call void @llvm.vp.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1488  ret void
1489}
1490
1491declare void @llvm.vp.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, <vscale x 2 x i1>, i32)
1492
1493define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1494; RV32-LABEL: vpscatter_nxv2f64:
1495; RV32:       # %bb.0:
1496; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1497; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1498; RV32-NEXT:    ret
1499;
1500; RV64-LABEL: vpscatter_nxv2f64:
1501; RV64:       # %bb.0:
1502; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1503; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1504; RV64-NEXT:    ret
1505  call void @llvm.vp.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1506  ret void
1507}
1508
1509declare void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, <vscale x 4 x i1>, i32)
1510
1511define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1512; RV32-LABEL: vpscatter_nxv4f64:
1513; RV32:       # %bb.0:
1514; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1515; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1516; RV32-NEXT:    ret
1517;
1518; RV64-LABEL: vpscatter_nxv4f64:
1519; RV64:       # %bb.0:
1520; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1521; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1522; RV64-NEXT:    ret
1523  call void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1524  ret void
1525}
1526
1527define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 zeroext %evl) {
1528; RV32-LABEL: vpscatter_truemask_nxv4f64:
1529; RV32:       # %bb.0:
1530; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1531; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1532; RV32-NEXT:    ret
1533;
1534; RV64-LABEL: vpscatter_truemask_nxv4f64:
1535; RV64:       # %bb.0:
1536; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1537; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1538; RV64-NEXT:    ret
1539  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1540  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1541  call void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1542  ret void
1543}
1544
1545declare void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double>, <vscale x 6 x double*>, <vscale x 6 x i1>, i32)
1546
1547define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1548; RV32-LABEL: vpscatter_nxv6f64:
1549; RV32:       # %bb.0:
1550; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1551; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1552; RV32-NEXT:    ret
1553;
1554; RV64-LABEL: vpscatter_nxv6f64:
1555; RV64:       # %bb.0:
1556; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1557; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1558; RV64-NEXT:    ret
1559  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1560  ret void
1561}
1562
1563define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1564; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1565; RV32:       # %bb.0:
1566; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1567; RV32-NEXT:    vsext.vf4 v20, v16
1568; RV32-NEXT:    vsll.vi v16, v20, 3
1569; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1570; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1571; RV32-NEXT:    ret
1572;
1573; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1574; RV64:       # %bb.0:
1575; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1576; RV64-NEXT:    vsext.vf8 v24, v16
1577; RV64-NEXT:    vsll.vi v16, v24, 3
1578; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1579; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1580; RV64-NEXT:    ret
1581  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i8> %idxs
1582  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1583  ret void
1584}
1585
1586define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1587; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1588; RV32:       # %bb.0:
1589; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1590; RV32-NEXT:    vsext.vf8 v24, v16
1591; RV32-NEXT:    vsll.vi v16, v24, 3
1592; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1593; RV32-NEXT:    vncvt.x.x.w v24, v16
1594; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1595; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1596; RV32-NEXT:    ret
1597;
1598; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1599; RV64:       # %bb.0:
1600; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1601; RV64-NEXT:    vsext.vf8 v24, v16
1602; RV64-NEXT:    vsll.vi v16, v24, 3
1603; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1604; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1605; RV64-NEXT:    ret
1606  %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1607  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1608  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1609  ret void
1610}
1611
1612define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1613; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1614; RV32:       # %bb.0:
1615; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1616; RV32-NEXT:    vzext.vf8 v24, v16
1617; RV32-NEXT:    vsll.vi v16, v24, 3
1618; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1619; RV32-NEXT:    vncvt.x.x.w v24, v16
1620; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1621; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1622; RV32-NEXT:    ret
1623;
1624; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1625; RV64:       # %bb.0:
1626; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1627; RV64-NEXT:    vzext.vf8 v24, v16
1628; RV64-NEXT:    vsll.vi v16, v24, 3
1629; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1630; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1631; RV64-NEXT:    ret
1632  %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1633  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1634  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1635  ret void
1636}
1637
1638define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1639; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1640; RV32:       # %bb.0:
1641; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1642; RV32-NEXT:    vsext.vf2 v20, v16
1643; RV32-NEXT:    vsll.vi v16, v20, 3
1644; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1645; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1646; RV32-NEXT:    ret
1647;
1648; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1649; RV64:       # %bb.0:
1650; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1651; RV64-NEXT:    vsext.vf4 v24, v16
1652; RV64-NEXT:    vsll.vi v16, v24, 3
1653; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1654; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1655; RV64-NEXT:    ret
1656  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i16> %idxs
1657  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1658  ret void
1659}
1660
1661define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1662; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1663; RV32:       # %bb.0:
1664; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1665; RV32-NEXT:    vsext.vf4 v24, v16
1666; RV32-NEXT:    vsll.vi v16, v24, 3
1667; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1668; RV32-NEXT:    vncvt.x.x.w v24, v16
1669; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1670; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1671; RV32-NEXT:    ret
1672;
1673; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1674; RV64:       # %bb.0:
1675; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1676; RV64-NEXT:    vsext.vf4 v24, v16
1677; RV64-NEXT:    vsll.vi v16, v24, 3
1678; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1679; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1680; RV64-NEXT:    ret
1681  %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1682  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1683  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1684  ret void
1685}
1686
1687define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1688; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1689; RV32:       # %bb.0:
1690; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1691; RV32-NEXT:    vzext.vf4 v24, v16
1692; RV32-NEXT:    vsll.vi v16, v24, 3
1693; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1694; RV32-NEXT:    vncvt.x.x.w v24, v16
1695; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1696; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1697; RV32-NEXT:    ret
1698;
1699; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1700; RV64:       # %bb.0:
1701; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1702; RV64-NEXT:    vzext.vf4 v24, v16
1703; RV64-NEXT:    vsll.vi v16, v24, 3
1704; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1705; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1706; RV64-NEXT:    ret
1707  %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1708  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1709  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1710  ret void
1711}
1712
1713define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1714; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1715; RV32:       # %bb.0:
1716; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1717; RV32-NEXT:    vsll.vi v16, v16, 3
1718; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1719; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1720; RV32-NEXT:    ret
1721;
1722; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1723; RV64:       # %bb.0:
1724; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1725; RV64-NEXT:    vsext.vf2 v24, v16
1726; RV64-NEXT:    vsll.vi v16, v24, 3
1727; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1728; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1729; RV64-NEXT:    ret
1730  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i32> %idxs
1731  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1732  ret void
1733}
1734
1735define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1736; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1737; RV32:       # %bb.0:
1738; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1739; RV32-NEXT:    vsext.vf2 v24, v16
1740; RV32-NEXT:    vsll.vi v16, v24, 3
1741; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1742; RV32-NEXT:    vncvt.x.x.w v24, v16
1743; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1744; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1745; RV32-NEXT:    ret
1746;
1747; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1748; RV64:       # %bb.0:
1749; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1750; RV64-NEXT:    vsext.vf2 v24, v16
1751; RV64-NEXT:    vsll.vi v16, v24, 3
1752; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1753; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1754; RV64-NEXT:    ret
1755  %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1756  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1757  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1758  ret void
1759}
1760
1761define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1762; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1763; RV32:       # %bb.0:
1764; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1765; RV32-NEXT:    vzext.vf2 v24, v16
1766; RV32-NEXT:    vsll.vi v16, v24, 3
1767; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1768; RV32-NEXT:    vncvt.x.x.w v24, v16
1769; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1770; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1771; RV32-NEXT:    ret
1772;
1773; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1774; RV64:       # %bb.0:
1775; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1776; RV64-NEXT:    vzext.vf2 v24, v16
1777; RV64-NEXT:    vsll.vi v16, v24, 3
1778; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1779; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1780; RV64-NEXT:    ret
1781  %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1782  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1783  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1784  ret void
1785}
1786
1787define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1788; RV32-LABEL: vpscatter_baseidx_nxv6f64:
1789; RV32:       # %bb.0:
1790; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1791; RV32-NEXT:    vsll.vi v16, v16, 3
1792; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1793; RV32-NEXT:    vncvt.x.x.w v24, v16
1794; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1795; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1796; RV32-NEXT:    ret
1797;
1798; RV64-LABEL: vpscatter_baseidx_nxv6f64:
1799; RV64:       # %bb.0:
1800; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1801; RV64-NEXT:    vsll.vi v16, v16, 3
1802; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1803; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1804; RV64-NEXT:    ret
1805  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %idxs
1806  call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1807  ret void
1808}
1809
1810declare void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, <vscale x 8 x i1>, i32)
1811
1812define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1813; RV32-LABEL: vpscatter_nxv8f64:
1814; RV32:       # %bb.0:
1815; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1816; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1817; RV32-NEXT:    ret
1818;
1819; RV64-LABEL: vpscatter_nxv8f64:
1820; RV64:       # %bb.0:
1821; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1822; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1823; RV64-NEXT:    ret
1824  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1825  ret void
1826}
1827
1828define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1829; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
1830; RV32:       # %bb.0:
1831; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1832; RV32-NEXT:    vsext.vf4 v20, v16
1833; RV32-NEXT:    vsll.vi v16, v20, 3
1834; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1835; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1836; RV32-NEXT:    ret
1837;
1838; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
1839; RV64:       # %bb.0:
1840; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1841; RV64-NEXT:    vsext.vf8 v24, v16
1842; RV64-NEXT:    vsll.vi v16, v24, 3
1843; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1844; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1845; RV64-NEXT:    ret
1846  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs
1847  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1848  ret void
1849}
1850
1851define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1852; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
1853; RV32:       # %bb.0:
1854; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1855; RV32-NEXT:    vsext.vf8 v24, v16
1856; RV32-NEXT:    vsll.vi v16, v24, 3
1857; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1858; RV32-NEXT:    vncvt.x.x.w v24, v16
1859; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1860; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1861; RV32-NEXT:    ret
1862;
1863; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
1864; RV64:       # %bb.0:
1865; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1866; RV64-NEXT:    vsext.vf8 v24, v16
1867; RV64-NEXT:    vsll.vi v16, v24, 3
1868; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1869; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1870; RV64-NEXT:    ret
1871  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1872  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1873  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1874  ret void
1875}
1876
1877define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1878; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
1879; RV32:       # %bb.0:
1880; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1881; RV32-NEXT:    vzext.vf8 v24, v16
1882; RV32-NEXT:    vsll.vi v16, v24, 3
1883; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1884; RV32-NEXT:    vncvt.x.x.w v24, v16
1885; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1886; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1887; RV32-NEXT:    ret
1888;
1889; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
1890; RV64:       # %bb.0:
1891; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1892; RV64-NEXT:    vzext.vf8 v24, v16
1893; RV64-NEXT:    vsll.vi v16, v24, 3
1894; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1895; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1896; RV64-NEXT:    ret
1897  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1898  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1899  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1900  ret void
1901}
1902
1903define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1904; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
1905; RV32:       # %bb.0:
1906; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1907; RV32-NEXT:    vsext.vf2 v20, v16
1908; RV32-NEXT:    vsll.vi v16, v20, 3
1909; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1910; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1911; RV32-NEXT:    ret
1912;
1913; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
1914; RV64:       # %bb.0:
1915; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1916; RV64-NEXT:    vsext.vf4 v24, v16
1917; RV64-NEXT:    vsll.vi v16, v24, 3
1918; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1919; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1920; RV64-NEXT:    ret
1921  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs
1922  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1923  ret void
1924}
1925
1926define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1927; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
1928; RV32:       # %bb.0:
1929; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1930; RV32-NEXT:    vsext.vf4 v24, v16
1931; RV32-NEXT:    vsll.vi v16, v24, 3
1932; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1933; RV32-NEXT:    vncvt.x.x.w v24, v16
1934; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1935; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1936; RV32-NEXT:    ret
1937;
1938; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
1939; RV64:       # %bb.0:
1940; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1941; RV64-NEXT:    vsext.vf4 v24, v16
1942; RV64-NEXT:    vsll.vi v16, v24, 3
1943; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1944; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1945; RV64-NEXT:    ret
1946  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1947  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1948  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1949  ret void
1950}
1951
1952define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1953; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
1954; RV32:       # %bb.0:
1955; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1956; RV32-NEXT:    vzext.vf4 v24, v16
1957; RV32-NEXT:    vsll.vi v16, v24, 3
1958; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1959; RV32-NEXT:    vncvt.x.x.w v24, v16
1960; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1961; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1962; RV32-NEXT:    ret
1963;
1964; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
1965; RV64:       # %bb.0:
1966; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1967; RV64-NEXT:    vzext.vf4 v24, v16
1968; RV64-NEXT:    vsll.vi v16, v24, 3
1969; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1970; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1971; RV64-NEXT:    ret
1972  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1973  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1974  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1975  ret void
1976}
1977
1978define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1979; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
1980; RV32:       # %bb.0:
1981; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1982; RV32-NEXT:    vsll.vi v16, v16, 3
1983; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1984; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1985; RV32-NEXT:    ret
1986;
1987; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
1988; RV64:       # %bb.0:
1989; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1990; RV64-NEXT:    vsext.vf2 v24, v16
1991; RV64-NEXT:    vsll.vi v16, v24, 3
1992; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1993; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1994; RV64-NEXT:    ret
1995  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs
1996  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1997  ret void
1998}
1999
2000define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2001; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2002; RV32:       # %bb.0:
2003; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2004; RV32-NEXT:    vsext.vf2 v24, v16
2005; RV32-NEXT:    vsll.vi v16, v24, 3
2006; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2007; RV32-NEXT:    vncvt.x.x.w v24, v16
2008; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2009; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2010; RV32-NEXT:    ret
2011;
2012; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2013; RV64:       # %bb.0:
2014; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2015; RV64-NEXT:    vsext.vf2 v24, v16
2016; RV64-NEXT:    vsll.vi v16, v24, 3
2017; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2018; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2019; RV64-NEXT:    ret
2020  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2021  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2022  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2023  ret void
2024}
2025
2026define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2027; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2028; RV32:       # %bb.0:
2029; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2030; RV32-NEXT:    vzext.vf2 v24, v16
2031; RV32-NEXT:    vsll.vi v16, v24, 3
2032; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2033; RV32-NEXT:    vncvt.x.x.w v24, v16
2034; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2035; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2036; RV32-NEXT:    ret
2037;
2038; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2039; RV64:       # %bb.0:
2040; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2041; RV64-NEXT:    vzext.vf2 v24, v16
2042; RV64-NEXT:    vsll.vi v16, v24, 3
2043; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2044; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2045; RV64-NEXT:    ret
2046  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2047  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2048  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2049  ret void
2050}
2051
2052define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2053; RV32-LABEL: vpscatter_baseidx_nxv8f64:
2054; RV32:       # %bb.0:
2055; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2056; RV32-NEXT:    vsll.vi v16, v16, 3
2057; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2058; RV32-NEXT:    vncvt.x.x.w v24, v16
2059; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2060; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2061; RV32-NEXT:    ret
2062;
2063; RV64-LABEL: vpscatter_baseidx_nxv8f64:
2064; RV64:       # %bb.0:
2065; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2066; RV64-NEXT:    vsll.vi v16, v16, 3
2067; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2068; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2069; RV64-NEXT:    ret
2070  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs
2071  call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2072  ret void
2073}
2074
2075declare void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, <vscale x 16 x i1>, i32)
2076
2077define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2078; RV32-LABEL: vpscatter_nxv16f64:
2079; RV32:       # %bb.0:
2080; RV32-NEXT:    vl8re32.v v24, (a0)
2081; RV32-NEXT:    csrr a0, vlenb
2082; RV32-NEXT:    mv a2, a1
2083; RV32-NEXT:    bltu a1, a0, .LBB95_2
2084; RV32-NEXT:  # %bb.1:
2085; RV32-NEXT:    mv a2, a0
2086; RV32-NEXT:  .LBB95_2:
2087; RV32-NEXT:    li a3, 0
2088; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
2089; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
2090; RV32-NEXT:    srli a2, a0, 3
2091; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
2092; RV32-NEXT:    sub a0, a1, a0
2093; RV32-NEXT:    vslidedown.vx v0, v0, a2
2094; RV32-NEXT:    bltu a1, a0, .LBB95_4
2095; RV32-NEXT:  # %bb.3:
2096; RV32-NEXT:    mv a3, a0
2097; RV32-NEXT:  .LBB95_4:
2098; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2099; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
2100; RV32-NEXT:    ret
2101;
2102; RV64-LABEL: vpscatter_nxv16f64:
2103; RV64:       # %bb.0:
2104; RV64-NEXT:    addi sp, sp, -16
2105; RV64-NEXT:    .cfi_def_cfa_offset 16
2106; RV64-NEXT:    csrr a1, vlenb
2107; RV64-NEXT:    slli a1, a1, 3
2108; RV64-NEXT:    sub sp, sp, a1
2109; RV64-NEXT:    addi a1, sp, 16
2110; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2111; RV64-NEXT:    vl8re64.v v16, (a0)
2112; RV64-NEXT:    csrr a1, vlenb
2113; RV64-NEXT:    slli a3, a1, 3
2114; RV64-NEXT:    add a0, a0, a3
2115; RV64-NEXT:    mv a3, a2
2116; RV64-NEXT:    bltu a2, a1, .LBB95_2
2117; RV64-NEXT:  # %bb.1:
2118; RV64-NEXT:    mv a3, a1
2119; RV64-NEXT:  .LBB95_2:
2120; RV64-NEXT:    li a4, 0
2121; RV64-NEXT:    vl8re64.v v24, (a0)
2122; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2123; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
2124; RV64-NEXT:    srli a3, a1, 3
2125; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
2126; RV64-NEXT:    sub a0, a2, a1
2127; RV64-NEXT:    vslidedown.vx v0, v0, a3
2128; RV64-NEXT:    bltu a2, a0, .LBB95_4
2129; RV64-NEXT:  # %bb.3:
2130; RV64-NEXT:    mv a4, a0
2131; RV64-NEXT:  .LBB95_4:
2132; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
2133; RV64-NEXT:    addi a0, sp, 16
2134; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
2135; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
2136; RV64-NEXT:    csrr a0, vlenb
2137; RV64-NEXT:    slli a0, a0, 3
2138; RV64-NEXT:    add sp, sp, a0
2139; RV64-NEXT:    addi sp, sp, 16
2140; RV64-NEXT:    ret
2141  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2142  ret void
2143}
2144
2145define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2146; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2147; RV32:       # %bb.0:
2148; RV32-NEXT:    vl4re16.v v4, (a1)
2149; RV32-NEXT:    csrr a1, vlenb
2150; RV32-NEXT:    mv a3, a2
2151; RV32-NEXT:    bltu a2, a1, .LBB96_2
2152; RV32-NEXT:  # %bb.1:
2153; RV32-NEXT:    mv a3, a1
2154; RV32-NEXT:  .LBB96_2:
2155; RV32-NEXT:    li a4, 0
2156; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, mu
2157; RV32-NEXT:    vsext.vf2 v24, v4
2158; RV32-NEXT:    vsll.vi v24, v24, 3
2159; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2160; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2161; RV32-NEXT:    srli a3, a1, 3
2162; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2163; RV32-NEXT:    sub a1, a2, a1
2164; RV32-NEXT:    vslidedown.vx v0, v0, a3
2165; RV32-NEXT:    bltu a2, a1, .LBB96_4
2166; RV32-NEXT:  # %bb.3:
2167; RV32-NEXT:    mv a4, a1
2168; RV32-NEXT:  .LBB96_4:
2169; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
2170; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
2171; RV32-NEXT:    ret
2172;
2173; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2174; RV64:       # %bb.0:
2175; RV64-NEXT:    vl4re16.v v4, (a1)
2176; RV64-NEXT:    csrr a1, vlenb
2177; RV64-NEXT:    mv a3, a2
2178; RV64-NEXT:    bltu a2, a1, .LBB96_2
2179; RV64-NEXT:  # %bb.1:
2180; RV64-NEXT:    mv a3, a1
2181; RV64-NEXT:  .LBB96_2:
2182; RV64-NEXT:    li a4, 0
2183; RV64-NEXT:    vsetvli a5, zero, e64, m8, ta, mu
2184; RV64-NEXT:    vsext.vf4 v24, v4
2185; RV64-NEXT:    vsll.vi v24, v24, 3
2186; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2187; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
2188; RV64-NEXT:    srli a3, a1, 3
2189; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2190; RV64-NEXT:    sub a1, a2, a1
2191; RV64-NEXT:    vslidedown.vx v0, v0, a3
2192; RV64-NEXT:    bltu a2, a1, .LBB96_4
2193; RV64-NEXT:  # %bb.3:
2194; RV64-NEXT:    mv a4, a1
2195; RV64-NEXT:  .LBB96_4:
2196; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2197; RV64-NEXT:    vsext.vf4 v8, v6
2198; RV64-NEXT:    vsll.vi v8, v8, 3
2199; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
2200; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
2201; RV64-NEXT:    ret
2202  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
2203  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2204  ret void
2205}
2206
2207define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2208; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2209; RV32:       # %bb.0:
2210; RV32-NEXT:    addi sp, sp, -16
2211; RV32-NEXT:    .cfi_def_cfa_offset 16
2212; RV32-NEXT:    csrr a3, vlenb
2213; RV32-NEXT:    slli a3, a3, 4
2214; RV32-NEXT:    sub sp, sp, a3
2215; RV32-NEXT:    vl4re16.v v24, (a1)
2216; RV32-NEXT:    csrr a1, vlenb
2217; RV32-NEXT:    slli a1, a1, 3
2218; RV32-NEXT:    add a1, sp, a1
2219; RV32-NEXT:    addi a1, a1, 16
2220; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2221; RV32-NEXT:    addi a1, sp, 16
2222; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2223; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2224; RV32-NEXT:    csrr a1, vlenb
2225; RV32-NEXT:    vsext.vf4 v8, v24
2226; RV32-NEXT:    mv a3, a2
2227; RV32-NEXT:    bltu a2, a1, .LBB97_2
2228; RV32-NEXT:  # %bb.1:
2229; RV32-NEXT:    mv a3, a1
2230; RV32-NEXT:  .LBB97_2:
2231; RV32-NEXT:    li a4, 0
2232; RV32-NEXT:    vsext.vf4 v16, v26
2233; RV32-NEXT:    vsll.vi v8, v8, 3
2234; RV32-NEXT:    vsetvli zero, a3, e32, m4, ta, mu
2235; RV32-NEXT:    vncvt.x.x.w v24, v8
2236; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2237; RV32-NEXT:    addi a3, sp, 16
2238; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
2239; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2240; RV32-NEXT:    srli a3, a1, 3
2241; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2242; RV32-NEXT:    sub a1, a2, a1
2243; RV32-NEXT:    vslidedown.vx v0, v0, a3
2244; RV32-NEXT:    bltu a2, a1, .LBB97_4
2245; RV32-NEXT:  # %bb.3:
2246; RV32-NEXT:    mv a4, a1
2247; RV32-NEXT:  .LBB97_4:
2248; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2249; RV32-NEXT:    vsll.vi v8, v16, 3
2250; RV32-NEXT:    vsetvli zero, a4, e32, m4, ta, mu
2251; RV32-NEXT:    vncvt.x.x.w v16, v8
2252; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2253; RV32-NEXT:    csrr a1, vlenb
2254; RV32-NEXT:    slli a1, a1, 3
2255; RV32-NEXT:    add a1, sp, a1
2256; RV32-NEXT:    addi a1, a1, 16
2257; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
2258; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2259; RV32-NEXT:    csrr a0, vlenb
2260; RV32-NEXT:    slli a0, a0, 4
2261; RV32-NEXT:    add sp, sp, a0
2262; RV32-NEXT:    addi sp, sp, 16
2263; RV32-NEXT:    ret
2264;
2265; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2266; RV64:       # %bb.0:
2267; RV64-NEXT:    addi sp, sp, -16
2268; RV64-NEXT:    .cfi_def_cfa_offset 16
2269; RV64-NEXT:    csrr a3, vlenb
2270; RV64-NEXT:    slli a3, a3, 4
2271; RV64-NEXT:    sub sp, sp, a3
2272; RV64-NEXT:    vl4re16.v v24, (a1)
2273; RV64-NEXT:    csrr a1, vlenb
2274; RV64-NEXT:    slli a1, a1, 3
2275; RV64-NEXT:    add a1, sp, a1
2276; RV64-NEXT:    addi a1, a1, 16
2277; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2278; RV64-NEXT:    addi a1, sp, 16
2279; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2280; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2281; RV64-NEXT:    csrr a1, vlenb
2282; RV64-NEXT:    vsext.vf4 v8, v24
2283; RV64-NEXT:    mv a3, a2
2284; RV64-NEXT:    bltu a2, a1, .LBB97_2
2285; RV64-NEXT:  # %bb.1:
2286; RV64-NEXT:    mv a3, a1
2287; RV64-NEXT:  .LBB97_2:
2288; RV64-NEXT:    li a4, 0
2289; RV64-NEXT:    vsext.vf4 v16, v26
2290; RV64-NEXT:    vsll.vi v8, v8, 3
2291; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2292; RV64-NEXT:    addi a3, sp, 16
2293; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
2294; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
2295; RV64-NEXT:    srli a3, a1, 3
2296; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2297; RV64-NEXT:    sub a1, a2, a1
2298; RV64-NEXT:    vslidedown.vx v0, v0, a3
2299; RV64-NEXT:    bltu a2, a1, .LBB97_4
2300; RV64-NEXT:  # %bb.3:
2301; RV64-NEXT:    mv a4, a1
2302; RV64-NEXT:  .LBB97_4:
2303; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2304; RV64-NEXT:    vsll.vi v8, v16, 3
2305; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
2306; RV64-NEXT:    csrr a1, vlenb
2307; RV64-NEXT:    slli a1, a1, 3
2308; RV64-NEXT:    add a1, sp, a1
2309; RV64-NEXT:    addi a1, a1, 16
2310; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
2311; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
2312; RV64-NEXT:    csrr a0, vlenb
2313; RV64-NEXT:    slli a0, a0, 4
2314; RV64-NEXT:    add sp, sp, a0
2315; RV64-NEXT:    addi sp, sp, 16
2316; RV64-NEXT:    ret
2317  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2318  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
2319  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2320  ret void
2321}
2322
2323define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2324; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2325; RV32:       # %bb.0:
2326; RV32-NEXT:    addi sp, sp, -16
2327; RV32-NEXT:    .cfi_def_cfa_offset 16
2328; RV32-NEXT:    csrr a3, vlenb
2329; RV32-NEXT:    slli a3, a3, 4
2330; RV32-NEXT:    sub sp, sp, a3
2331; RV32-NEXT:    vl4re16.v v24, (a1)
2332; RV32-NEXT:    csrr a1, vlenb
2333; RV32-NEXT:    slli a1, a1, 3
2334; RV32-NEXT:    add a1, sp, a1
2335; RV32-NEXT:    addi a1, a1, 16
2336; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2337; RV32-NEXT:    addi a1, sp, 16
2338; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2339; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2340; RV32-NEXT:    csrr a1, vlenb
2341; RV32-NEXT:    vzext.vf4 v8, v24
2342; RV32-NEXT:    mv a3, a2
2343; RV32-NEXT:    bltu a2, a1, .LBB98_2
2344; RV32-NEXT:  # %bb.1:
2345; RV32-NEXT:    mv a3, a1
2346; RV32-NEXT:  .LBB98_2:
2347; RV32-NEXT:    li a4, 0
2348; RV32-NEXT:    vzext.vf4 v16, v26
2349; RV32-NEXT:    vsll.vi v8, v8, 3
2350; RV32-NEXT:    vsetvli zero, a3, e32, m4, ta, mu
2351; RV32-NEXT:    vncvt.x.x.w v24, v8
2352; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2353; RV32-NEXT:    addi a3, sp, 16
2354; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
2355; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2356; RV32-NEXT:    srli a3, a1, 3
2357; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2358; RV32-NEXT:    sub a1, a2, a1
2359; RV32-NEXT:    vslidedown.vx v0, v0, a3
2360; RV32-NEXT:    bltu a2, a1, .LBB98_4
2361; RV32-NEXT:  # %bb.3:
2362; RV32-NEXT:    mv a4, a1
2363; RV32-NEXT:  .LBB98_4:
2364; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2365; RV32-NEXT:    vsll.vi v8, v16, 3
2366; RV32-NEXT:    vsetvli zero, a4, e32, m4, ta, mu
2367; RV32-NEXT:    vncvt.x.x.w v16, v8
2368; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2369; RV32-NEXT:    csrr a1, vlenb
2370; RV32-NEXT:    slli a1, a1, 3
2371; RV32-NEXT:    add a1, sp, a1
2372; RV32-NEXT:    addi a1, a1, 16
2373; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
2374; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2375; RV32-NEXT:    csrr a0, vlenb
2376; RV32-NEXT:    slli a0, a0, 4
2377; RV32-NEXT:    add sp, sp, a0
2378; RV32-NEXT:    addi sp, sp, 16
2379; RV32-NEXT:    ret
2380;
2381; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2382; RV64:       # %bb.0:
2383; RV64-NEXT:    addi sp, sp, -16
2384; RV64-NEXT:    .cfi_def_cfa_offset 16
2385; RV64-NEXT:    csrr a3, vlenb
2386; RV64-NEXT:    slli a3, a3, 4
2387; RV64-NEXT:    sub sp, sp, a3
2388; RV64-NEXT:    vl4re16.v v24, (a1)
2389; RV64-NEXT:    csrr a1, vlenb
2390; RV64-NEXT:    slli a1, a1, 3
2391; RV64-NEXT:    add a1, sp, a1
2392; RV64-NEXT:    addi a1, a1, 16
2393; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2394; RV64-NEXT:    addi a1, sp, 16
2395; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
2396; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2397; RV64-NEXT:    csrr a1, vlenb
2398; RV64-NEXT:    vzext.vf4 v8, v24
2399; RV64-NEXT:    mv a3, a2
2400; RV64-NEXT:    bltu a2, a1, .LBB98_2
2401; RV64-NEXT:  # %bb.1:
2402; RV64-NEXT:    mv a3, a1
2403; RV64-NEXT:  .LBB98_2:
2404; RV64-NEXT:    li a4, 0
2405; RV64-NEXT:    vzext.vf4 v16, v26
2406; RV64-NEXT:    vsll.vi v8, v8, 3
2407; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2408; RV64-NEXT:    addi a3, sp, 16
2409; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
2410; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
2411; RV64-NEXT:    srli a3, a1, 3
2412; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2413; RV64-NEXT:    sub a1, a2, a1
2414; RV64-NEXT:    vslidedown.vx v0, v0, a3
2415; RV64-NEXT:    bltu a2, a1, .LBB98_4
2416; RV64-NEXT:  # %bb.3:
2417; RV64-NEXT:    mv a4, a1
2418; RV64-NEXT:  .LBB98_4:
2419; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2420; RV64-NEXT:    vsll.vi v8, v16, 3
2421; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
2422; RV64-NEXT:    csrr a1, vlenb
2423; RV64-NEXT:    slli a1, a1, 3
2424; RV64-NEXT:    add a1, sp, a1
2425; RV64-NEXT:    addi a1, a1, 16
2426; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
2427; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
2428; RV64-NEXT:    csrr a0, vlenb
2429; RV64-NEXT:    slli a0, a0, 4
2430; RV64-NEXT:    add sp, sp, a0
2431; RV64-NEXT:    addi sp, sp, 16
2432; RV64-NEXT:    ret
2433  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2434  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
2435  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2436  ret void
2437}
2438