1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
6
7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
8
9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) {
10; RV32-LABEL: mscatter_nxv1i8:
11; RV32:       # %bb.0:
12; RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
13; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
14; RV32-NEXT:    ret
15;
16; RV64-LABEL: mscatter_nxv1i8:
17; RV64:       # %bb.0:
18; RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
19; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
20; RV64-NEXT:    ret
21  call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m)
22  ret void
23}
24
25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
26
27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
28; RV32-LABEL: mscatter_nxv2i8:
29; RV32:       # %bb.0:
30; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
31; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
32; RV32-NEXT:    ret
33;
34; RV64-LABEL: mscatter_nxv2i8:
35; RV64:       # %bb.0:
36; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
37; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
38; RV64-NEXT:    ret
39  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
40  ret void
41}
42
43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
45; RV32:       # %bb.0:
46; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
47; RV32-NEXT:    vnsrl.wi v8, v8, 0
48; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
49; RV32-NEXT:    ret
50;
51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
52; RV64:       # %bb.0:
53; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
54; RV64-NEXT:    vnsrl.wi v8, v8, 0
55; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
56; RV64-NEXT:    ret
57  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
58  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
59  ret void
60}
61
62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
64; RV32:       # %bb.0:
65; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
66; RV32-NEXT:    vnsrl.wi v8, v8, 0
67; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
68; RV32-NEXT:    vnsrl.wi v8, v8, 0
69; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
70; RV32-NEXT:    ret
71;
72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
73; RV64:       # %bb.0:
74; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
75; RV64-NEXT:    vnsrl.wi v8, v8, 0
76; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
77; RV64-NEXT:    vnsrl.wi v8, v8, 0
78; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
79; RV64-NEXT:    ret
80  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
81  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
82  ret void
83}
84
85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
87; RV32:       # %bb.0:
88; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
89; RV32-NEXT:    vnsrl.wi v11, v8, 0
90; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
91; RV32-NEXT:    vnsrl.wi v8, v11, 0
92; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
93; RV32-NEXT:    vnsrl.wi v8, v8, 0
94; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
95; RV32-NEXT:    ret
96;
97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
98; RV64:       # %bb.0:
99; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
100; RV64-NEXT:    vnsrl.wi v12, v8, 0
101; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
102; RV64-NEXT:    vnsrl.wi v8, v12, 0
103; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
104; RV64-NEXT:    vnsrl.wi v8, v8, 0
105; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
106; RV64-NEXT:    ret
107  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
108  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
109  ret void
110}
111
112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
113
114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) {
115; RV32-LABEL: mscatter_nxv4i8:
116; RV32:       # %bb.0:
117; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
118; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
119; RV32-NEXT:    ret
120;
121; RV64-LABEL: mscatter_nxv4i8:
122; RV64:       # %bb.0:
123; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
124; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
125; RV64-NEXT:    ret
126  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m)
127  ret void
128}
129
130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
131; RV32-LABEL: mscatter_truemask_nxv4i8:
132; RV32:       # %bb.0:
133; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
134; RV32-NEXT:    vsoxei32.v v8, (zero), v10
135; RV32-NEXT:    ret
136;
137; RV64-LABEL: mscatter_truemask_nxv4i8:
138; RV64:       # %bb.0:
139; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
140; RV64-NEXT:    vsoxei64.v v8, (zero), v12
141; RV64-NEXT:    ret
142  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
143  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
144  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue)
145  ret void
146}
147
148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
149; RV32-LABEL: mscatter_falsemask_nxv4i8:
150; RV32:       # %bb.0:
151; RV32-NEXT:    ret
152;
153; RV64-LABEL: mscatter_falsemask_nxv4i8:
154; RV64:       # %bb.0:
155; RV64-NEXT:    ret
156  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
157  ret void
158}
159
160declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>)
161
162define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) {
163; RV32-LABEL: mscatter_nxv8i8:
164; RV32:       # %bb.0:
165; RV32-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
166; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
167; RV32-NEXT:    ret
168;
169; RV64-LABEL: mscatter_nxv8i8:
170; RV64:       # %bb.0:
171; RV64-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
172; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
173; RV64-NEXT:    ret
174  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
175  ret void
176}
177
178define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
179; RV32-LABEL: mscatter_baseidx_nxv8i8:
180; RV32:       # %bb.0:
181; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
182; RV32-NEXT:    vsext.vf4 v12, v9
183; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
184; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
185; RV32-NEXT:    ret
186;
187; RV64-LABEL: mscatter_baseidx_nxv8i8:
188; RV64:       # %bb.0:
189; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
190; RV64-NEXT:    vsext.vf8 v16, v9
191; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
192; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
193; RV64-NEXT:    ret
194  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs
195  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
196  ret void
197}
198
199declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>)
200
201define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) {
202; RV32-LABEL: mscatter_nxv1i16:
203; RV32:       # %bb.0:
204; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
205; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
206; RV32-NEXT:    ret
207;
208; RV64-LABEL: mscatter_nxv1i16:
209; RV64:       # %bb.0:
210; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
211; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
212; RV64-NEXT:    ret
213  call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m)
214  ret void
215}
216
217declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
218
219define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
220; RV32-LABEL: mscatter_nxv2i16:
221; RV32:       # %bb.0:
222; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
223; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
224; RV32-NEXT:    ret
225;
226; RV64-LABEL: mscatter_nxv2i16:
227; RV64:       # %bb.0:
228; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
229; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
230; RV64-NEXT:    ret
231  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
232  ret void
233}
234
235define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
236; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
237; RV32:       # %bb.0:
238; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
239; RV32-NEXT:    vnsrl.wi v8, v8, 0
240; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
241; RV32-NEXT:    ret
242;
243; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
244; RV64:       # %bb.0:
245; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
246; RV64-NEXT:    vnsrl.wi v8, v8, 0
247; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
248; RV64-NEXT:    ret
249  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
250  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
251  ret void
252}
253
254define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
255; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
256; RV32:       # %bb.0:
257; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
258; RV32-NEXT:    vnsrl.wi v11, v8, 0
259; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
260; RV32-NEXT:    vnsrl.wi v8, v11, 0
261; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
262; RV32-NEXT:    ret
263;
264; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
265; RV64:       # %bb.0:
266; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
267; RV64-NEXT:    vnsrl.wi v12, v8, 0
268; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
269; RV64-NEXT:    vnsrl.wi v8, v12, 0
270; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
271; RV64-NEXT:    ret
272  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
273  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
274  ret void
275}
276
277declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
278
279define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) {
280; RV32-LABEL: mscatter_nxv4i16:
281; RV32:       # %bb.0:
282; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
283; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
284; RV32-NEXT:    ret
285;
286; RV64-LABEL: mscatter_nxv4i16:
287; RV64:       # %bb.0:
288; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
289; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
290; RV64-NEXT:    ret
291  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m)
292  ret void
293}
294
295define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
296; RV32-LABEL: mscatter_truemask_nxv4i16:
297; RV32:       # %bb.0:
298; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
299; RV32-NEXT:    vsoxei32.v v8, (zero), v10
300; RV32-NEXT:    ret
301;
302; RV64-LABEL: mscatter_truemask_nxv4i16:
303; RV64:       # %bb.0:
304; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
305; RV64-NEXT:    vsoxei64.v v8, (zero), v12
306; RV64-NEXT:    ret
307  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
308  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
309  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
310  ret void
311}
312
313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
314; RV32-LABEL: mscatter_falsemask_nxv4i16:
315; RV32:       # %bb.0:
316; RV32-NEXT:    ret
317;
318; RV64-LABEL: mscatter_falsemask_nxv4i16:
319; RV64:       # %bb.0:
320; RV64-NEXT:    ret
321  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
322  ret void
323}
324
325declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
326
327define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) {
328; RV32-LABEL: mscatter_nxv8i16:
329; RV32:       # %bb.0:
330; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
331; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
332; RV32-NEXT:    ret
333;
334; RV64-LABEL: mscatter_nxv8i16:
335; RV64:       # %bb.0:
336; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
337; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
338; RV64-NEXT:    ret
339  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
340  ret void
341}
342
343define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
344; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
345; RV32:       # %bb.0:
346; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
347; RV32-NEXT:    vsext.vf4 v12, v10
348; RV32-NEXT:    vadd.vv v12, v12, v12
349; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
350; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
351; RV32-NEXT:    ret
352;
353; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
354; RV64:       # %bb.0:
355; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
356; RV64-NEXT:    vsext.vf8 v16, v10
357; RV64-NEXT:    vadd.vv v16, v16, v16
358; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
359; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
360; RV64-NEXT:    ret
361  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs
362  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
363  ret void
364}
365
366define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
367; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
368; RV32:       # %bb.0:
369; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
370; RV32-NEXT:    vsext.vf4 v12, v10
371; RV32-NEXT:    vadd.vv v12, v12, v12
372; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
373; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
374; RV32-NEXT:    ret
375;
376; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
377; RV64:       # %bb.0:
378; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
379; RV64-NEXT:    vsext.vf8 v16, v10
380; RV64-NEXT:    vadd.vv v16, v16, v16
381; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
382; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
383; RV64-NEXT:    ret
384  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
385  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
386  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
387  ret void
388}
389
390define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
391; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
392; RV32:       # %bb.0:
393; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
394; RV32-NEXT:    vzext.vf4 v12, v10
395; RV32-NEXT:    vadd.vv v12, v12, v12
396; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
397; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
398; RV32-NEXT:    ret
399;
400; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
401; RV64:       # %bb.0:
402; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
403; RV64-NEXT:    vzext.vf8 v16, v10
404; RV64-NEXT:    vadd.vv v16, v16, v16
405; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
406; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
407; RV64-NEXT:    ret
408  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
409  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
410  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
411  ret void
412}
413
414define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
415; RV32-LABEL: mscatter_baseidx_nxv8i16:
416; RV32:       # %bb.0:
417; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
418; RV32-NEXT:    vsext.vf2 v12, v10
419; RV32-NEXT:    vadd.vv v12, v12, v12
420; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
421; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
422; RV32-NEXT:    ret
423;
424; RV64-LABEL: mscatter_baseidx_nxv8i16:
425; RV64:       # %bb.0:
426; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
427; RV64-NEXT:    vsext.vf4 v16, v10
428; RV64-NEXT:    vadd.vv v16, v16, v16
429; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
430; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
431; RV64-NEXT:    ret
432  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs
433  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
434  ret void
435}
436
437declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>)
438
439define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) {
440; RV32-LABEL: mscatter_nxv1i32:
441; RV32:       # %bb.0:
442; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
443; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
444; RV32-NEXT:    ret
445;
446; RV64-LABEL: mscatter_nxv1i32:
447; RV64:       # %bb.0:
448; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
449; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
450; RV64-NEXT:    ret
451  call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m)
452  ret void
453}
454
455declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
456
457define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
458; RV32-LABEL: mscatter_nxv2i32:
459; RV32:       # %bb.0:
460; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
461; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
462; RV32-NEXT:    ret
463;
464; RV64-LABEL: mscatter_nxv2i32:
465; RV64:       # %bb.0:
466; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
467; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
468; RV64-NEXT:    ret
469  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
470  ret void
471}
472
473define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
474; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
475; RV32:       # %bb.0:
476; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
477; RV32-NEXT:    vnsrl.wi v11, v8, 0
478; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
479; RV32-NEXT:    ret
480;
481; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
482; RV64:       # %bb.0:
483; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
484; RV64-NEXT:    vnsrl.wi v12, v8, 0
485; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
486; RV64-NEXT:    ret
487  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
488  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
489  ret void
490}
491
492declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
493
494define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) {
495; RV32-LABEL: mscatter_nxv4i32:
496; RV32:       # %bb.0:
497; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
498; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
499; RV32-NEXT:    ret
500;
501; RV64-LABEL: mscatter_nxv4i32:
502; RV64:       # %bb.0:
503; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
504; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
505; RV64-NEXT:    ret
506  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m)
507  ret void
508}
509
510define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
511; RV32-LABEL: mscatter_truemask_nxv4i32:
512; RV32:       # %bb.0:
513; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
514; RV32-NEXT:    vsoxei32.v v8, (zero), v10
515; RV32-NEXT:    ret
516;
517; RV64-LABEL: mscatter_truemask_nxv4i32:
518; RV64:       # %bb.0:
519; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
520; RV64-NEXT:    vsoxei64.v v8, (zero), v12
521; RV64-NEXT:    ret
522  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
523  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
524  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
525  ret void
526}
527
528define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
529; RV32-LABEL: mscatter_falsemask_nxv4i32:
530; RV32:       # %bb.0:
531; RV32-NEXT:    ret
532;
533; RV64-LABEL: mscatter_falsemask_nxv4i32:
534; RV64:       # %bb.0:
535; RV64-NEXT:    ret
536  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
537  ret void
538}
539
540declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
541
542define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) {
543; RV32-LABEL: mscatter_nxv8i32:
544; RV32:       # %bb.0:
545; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
546; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
547; RV32-NEXT:    ret
548;
549; RV64-LABEL: mscatter_nxv8i32:
550; RV64:       # %bb.0:
551; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
552; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
553; RV64-NEXT:    ret
554  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
555  ret void
556}
557
558define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
559; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
560; RV32:       # %bb.0:
561; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
562; RV32-NEXT:    vsext.vf4 v16, v12
563; RV32-NEXT:    vsll.vi v12, v16, 2
564; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
565; RV32-NEXT:    ret
566;
567; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
568; RV64:       # %bb.0:
569; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
570; RV64-NEXT:    vsext.vf8 v16, v12
571; RV64-NEXT:    vsll.vi v16, v16, 2
572; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
573; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
574; RV64-NEXT:    ret
575  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs
576  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
577  ret void
578}
579
580define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
581; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
582; RV32:       # %bb.0:
583; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
584; RV32-NEXT:    vsext.vf4 v16, v12
585; RV32-NEXT:    vsll.vi v12, v16, 2
586; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
587; RV32-NEXT:    ret
588;
589; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
590; RV64:       # %bb.0:
591; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
592; RV64-NEXT:    vsext.vf8 v16, v12
593; RV64-NEXT:    vsll.vi v16, v16, 2
594; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
595; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
596; RV64-NEXT:    ret
597  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
598  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
599  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
600  ret void
601}
602
603define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
604; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
605; RV32:       # %bb.0:
606; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
607; RV32-NEXT:    vzext.vf4 v16, v12
608; RV32-NEXT:    vsll.vi v12, v16, 2
609; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
610; RV32-NEXT:    ret
611;
612; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
613; RV64:       # %bb.0:
614; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
615; RV64-NEXT:    vzext.vf8 v16, v12
616; RV64-NEXT:    vsll.vi v16, v16, 2
617; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
618; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
619; RV64-NEXT:    ret
620  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
621  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
622  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
623  ret void
624}
625
626define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
627; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
628; RV32:       # %bb.0:
629; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
630; RV32-NEXT:    vsext.vf2 v16, v12
631; RV32-NEXT:    vsll.vi v12, v16, 2
632; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
633; RV32-NEXT:    ret
634;
635; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
636; RV64:       # %bb.0:
637; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
638; RV64-NEXT:    vsext.vf4 v16, v12
639; RV64-NEXT:    vsll.vi v16, v16, 2
640; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
641; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
642; RV64-NEXT:    ret
643  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs
644  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
645  ret void
646}
647
648define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
649; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
650; RV32:       # %bb.0:
651; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
652; RV32-NEXT:    vsext.vf2 v16, v12
653; RV32-NEXT:    vsll.vi v12, v16, 2
654; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
655; RV32-NEXT:    ret
656;
657; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
658; RV64:       # %bb.0:
659; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
660; RV64-NEXT:    vsext.vf4 v16, v12
661; RV64-NEXT:    vsll.vi v16, v16, 2
662; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
663; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
664; RV64-NEXT:    ret
665  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
666  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
667  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
668  ret void
669}
670
671define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
672; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
673; RV32:       # %bb.0:
674; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
675; RV32-NEXT:    vzext.vf2 v16, v12
676; RV32-NEXT:    vsll.vi v12, v16, 2
677; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
678; RV32-NEXT:    ret
679;
680; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
681; RV64:       # %bb.0:
682; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
683; RV64-NEXT:    vzext.vf4 v16, v12
684; RV64-NEXT:    vsll.vi v16, v16, 2
685; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
686; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
687; RV64-NEXT:    ret
688  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
689  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
690  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
691  ret void
692}
693
694define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
695; RV32-LABEL: mscatter_baseidx_nxv8i32:
696; RV32:       # %bb.0:
697; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
698; RV32-NEXT:    vsll.vi v12, v12, 2
699; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
700; RV32-NEXT:    ret
701;
702; RV64-LABEL: mscatter_baseidx_nxv8i32:
703; RV64:       # %bb.0:
704; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
705; RV64-NEXT:    vsext.vf2 v16, v12
706; RV64-NEXT:    vsll.vi v16, v16, 2
707; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
708; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
709; RV64-NEXT:    ret
710  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs
711  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
712  ret void
713}
714
715declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
716
717define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) {
718; RV32-LABEL: mscatter_nxv1i64:
719; RV32:       # %bb.0:
720; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
721; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
722; RV32-NEXT:    ret
723;
724; RV64-LABEL: mscatter_nxv1i64:
725; RV64:       # %bb.0:
726; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
727; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
728; RV64-NEXT:    ret
729  call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m)
730  ret void
731}
732
733declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
734
735define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) {
736; RV32-LABEL: mscatter_nxv2i64:
737; RV32:       # %bb.0:
738; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
739; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
740; RV32-NEXT:    ret
741;
742; RV64-LABEL: mscatter_nxv2i64:
743; RV64:       # %bb.0:
744; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
745; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
746; RV64-NEXT:    ret
747  call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m)
748  ret void
749}
750
751declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>)
752
753define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) {
754; RV32-LABEL: mscatter_nxv4i64:
755; RV32:       # %bb.0:
756; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
757; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
758; RV32-NEXT:    ret
759;
760; RV64-LABEL: mscatter_nxv4i64:
761; RV64:       # %bb.0:
762; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
763; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
764; RV64-NEXT:    ret
765  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m)
766  ret void
767}
768
769define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
770; RV32-LABEL: mscatter_truemask_nxv4i64:
771; RV32:       # %bb.0:
772; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
773; RV32-NEXT:    vsoxei32.v v8, (zero), v12
774; RV32-NEXT:    ret
775;
776; RV64-LABEL: mscatter_truemask_nxv4i64:
777; RV64:       # %bb.0:
778; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
779; RV64-NEXT:    vsoxei64.v v8, (zero), v12
780; RV64-NEXT:    ret
781  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
782  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
783  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
784  ret void
785}
786
787define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
788; RV32-LABEL: mscatter_falsemask_nxv4i64:
789; RV32:       # %bb.0:
790; RV32-NEXT:    ret
791;
792; RV64-LABEL: mscatter_falsemask_nxv4i64:
793; RV64:       # %bb.0:
794; RV64-NEXT:    ret
795  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
796  ret void
797}
798
799declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>)
800
801define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) {
802; RV32-LABEL: mscatter_nxv8i64:
803; RV32:       # %bb.0:
804; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
805; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
806; RV32-NEXT:    ret
807;
808; RV64-LABEL: mscatter_nxv8i64:
809; RV64:       # %bb.0:
810; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
811; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
812; RV64-NEXT:    ret
813  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
814  ret void
815}
816
817define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
818; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
819; RV32:       # %bb.0:
820; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
821; RV32-NEXT:    vsext.vf4 v20, v16
822; RV32-NEXT:    vsll.vi v16, v20, 3
823; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
824; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
830; RV64-NEXT:    vsext.vf8 v24, v16
831; RV64-NEXT:    vsll.vi v16, v24, 3
832; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
833; RV64-NEXT:    ret
834  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs
835  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
836  ret void
837}
838
839define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
840; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
841; RV32:       # %bb.0:
842; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
843; RV32-NEXT:    vsext.vf8 v24, v16
844; RV32-NEXT:    vsll.vi v16, v24, 3
845; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
846; RV32-NEXT:    vnsrl.wi v24, v16, 0
847; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
848; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
849; RV32-NEXT:    ret
850;
851; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
852; RV64:       # %bb.0:
853; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
854; RV64-NEXT:    vsext.vf8 v24, v16
855; RV64-NEXT:    vsll.vi v16, v24, 3
856; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
857; RV64-NEXT:    ret
858  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
859  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
860  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
861  ret void
862}
863
864define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
865; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
866; RV32:       # %bb.0:
867; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
868; RV32-NEXT:    vzext.vf8 v24, v16
869; RV32-NEXT:    vsll.vi v16, v24, 3
870; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
871; RV32-NEXT:    vnsrl.wi v24, v16, 0
872; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
873; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
874; RV32-NEXT:    ret
875;
876; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
877; RV64:       # %bb.0:
878; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
879; RV64-NEXT:    vzext.vf8 v24, v16
880; RV64-NEXT:    vsll.vi v16, v24, 3
881; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
882; RV64-NEXT:    ret
883  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
884  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
885  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
886  ret void
887}
888
889define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
890; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
891; RV32:       # %bb.0:
892; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
893; RV32-NEXT:    vsext.vf2 v20, v16
894; RV32-NEXT:    vsll.vi v16, v20, 3
895; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
896; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
897; RV32-NEXT:    ret
898;
899; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
900; RV64:       # %bb.0:
901; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
902; RV64-NEXT:    vsext.vf4 v24, v16
903; RV64-NEXT:    vsll.vi v16, v24, 3
904; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
905; RV64-NEXT:    ret
906  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs
907  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
908  ret void
909}
910
911define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
912; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
913; RV32:       # %bb.0:
914; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
915; RV32-NEXT:    vsext.vf4 v24, v16
916; RV32-NEXT:    vsll.vi v16, v24, 3
917; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
918; RV32-NEXT:    vnsrl.wi v24, v16, 0
919; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
920; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
921; RV32-NEXT:    ret
922;
923; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
924; RV64:       # %bb.0:
925; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
926; RV64-NEXT:    vsext.vf4 v24, v16
927; RV64-NEXT:    vsll.vi v16, v24, 3
928; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
929; RV64-NEXT:    ret
930  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
931  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
932  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
933  ret void
934}
935
936define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
937; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
938; RV32:       # %bb.0:
939; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
940; RV32-NEXT:    vzext.vf4 v24, v16
941; RV32-NEXT:    vsll.vi v16, v24, 3
942; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
943; RV32-NEXT:    vnsrl.wi v24, v16, 0
944; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
945; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
946; RV32-NEXT:    ret
947;
948; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
949; RV64:       # %bb.0:
950; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
951; RV64-NEXT:    vzext.vf4 v24, v16
952; RV64-NEXT:    vsll.vi v16, v24, 3
953; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
954; RV64-NEXT:    ret
955  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
956  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
957  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
958  ret void
959}
960
961define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
962; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
963; RV32:       # %bb.0:
964; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
965; RV32-NEXT:    vsll.vi v16, v16, 3
966; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
967; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
968; RV32-NEXT:    ret
969;
970; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
971; RV64:       # %bb.0:
972; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
973; RV64-NEXT:    vsext.vf2 v24, v16
974; RV64-NEXT:    vsll.vi v16, v24, 3
975; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
976; RV64-NEXT:    ret
977  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs
978  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
979  ret void
980}
981
982define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
983; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
984; RV32:       # %bb.0:
985; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
986; RV32-NEXT:    vsext.vf2 v24, v16
987; RV32-NEXT:    vsll.vi v16, v24, 3
988; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
989; RV32-NEXT:    vnsrl.wi v24, v16, 0
990; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
991; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
992; RV32-NEXT:    ret
993;
994; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
995; RV64:       # %bb.0:
996; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
997; RV64-NEXT:    vsext.vf2 v24, v16
998; RV64-NEXT:    vsll.vi v16, v24, 3
999; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1000; RV64-NEXT:    ret
1001  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1002  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1003  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1004  ret void
1005}
1006
1007define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1008; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
1009; RV32:       # %bb.0:
1010; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1011; RV32-NEXT:    vzext.vf2 v24, v16
1012; RV32-NEXT:    vsll.vi v16, v24, 3
1013; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1014; RV32-NEXT:    vnsrl.wi v24, v16, 0
1015; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1016; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1017; RV32-NEXT:    ret
1018;
1019; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
1020; RV64:       # %bb.0:
1021; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1022; RV64-NEXT:    vzext.vf2 v24, v16
1023; RV64-NEXT:    vsll.vi v16, v24, 3
1024; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1025; RV64-NEXT:    ret
1026  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1027  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1028  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1029  ret void
1030}
1031
1032define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1033; RV32-LABEL: mscatter_baseidx_nxv8i64:
1034; RV32:       # %bb.0:
1035; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1036; RV32-NEXT:    vsll.vi v16, v16, 3
1037; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1038; RV32-NEXT:    vnsrl.wi v24, v16, 0
1039; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1040; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1041; RV32-NEXT:    ret
1042;
1043; RV64-LABEL: mscatter_baseidx_nxv8i64:
1044; RV64:       # %bb.0:
1045; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1046; RV64-NEXT:    vsll.vi v16, v16, 3
1047; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1048; RV64-NEXT:    ret
1049  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs
1050  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1051  ret void
1052}
1053
1054declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>)
1055
1056define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) {
1057; RV32-LABEL: mscatter_nxv1f16:
1058; RV32:       # %bb.0:
1059; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1060; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1061; RV32-NEXT:    ret
1062;
1063; RV64-LABEL: mscatter_nxv1f16:
1064; RV64:       # %bb.0:
1065; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1066; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1067; RV64-NEXT:    ret
1068  call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m)
1069  ret void
1070}
1071
1072declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
1073
1074define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) {
1075; RV32-LABEL: mscatter_nxv2f16:
1076; RV32:       # %bb.0:
1077; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1078; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1079; RV32-NEXT:    ret
1080;
1081; RV64-LABEL: mscatter_nxv2f16:
1082; RV64:       # %bb.0:
1083; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1084; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1085; RV64-NEXT:    ret
1086  call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m)
1087  ret void
1088}
1089
1090declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
1091
1092define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) {
1093; RV32-LABEL: mscatter_nxv4f16:
1094; RV32:       # %bb.0:
1095; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1096; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1097; RV32-NEXT:    ret
1098;
1099; RV64-LABEL: mscatter_nxv4f16:
1100; RV64:       # %bb.0:
1101; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1102; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1103; RV64-NEXT:    ret
1104  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m)
1105  ret void
1106}
1107
1108define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1109; RV32-LABEL: mscatter_truemask_nxv4f16:
1110; RV32:       # %bb.0:
1111; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1112; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1113; RV32-NEXT:    ret
1114;
1115; RV64-LABEL: mscatter_truemask_nxv4f16:
1116; RV64:       # %bb.0:
1117; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1118; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1119; RV64-NEXT:    ret
1120  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1121  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1122  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
1123  ret void
1124}
1125
1126define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1127; RV32-LABEL: mscatter_falsemask_nxv4f16:
1128; RV32:       # %bb.0:
1129; RV32-NEXT:    ret
1130;
1131; RV64-LABEL: mscatter_falsemask_nxv4f16:
1132; RV64:       # %bb.0:
1133; RV64-NEXT:    ret
1134  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1135  ret void
1136}
1137
1138declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>)
1139
1140define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) {
1141; RV32-LABEL: mscatter_nxv8f16:
1142; RV32:       # %bb.0:
1143; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1144; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1145; RV32-NEXT:    ret
1146;
1147; RV64-LABEL: mscatter_nxv8f16:
1148; RV64:       # %bb.0:
1149; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1150; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1151; RV64-NEXT:    ret
1152  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1153  ret void
1154}
1155
1156define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1157; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1158; RV32:       # %bb.0:
1159; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1160; RV32-NEXT:    vsext.vf4 v12, v10
1161; RV32-NEXT:    vadd.vv v12, v12, v12
1162; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1163; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1164; RV32-NEXT:    ret
1165;
1166; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1167; RV64:       # %bb.0:
1168; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1169; RV64-NEXT:    vsext.vf8 v16, v10
1170; RV64-NEXT:    vadd.vv v16, v16, v16
1171; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1172; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1173; RV64-NEXT:    ret
1174  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs
1175  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1176  ret void
1177}
1178
1179define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1180; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1181; RV32:       # %bb.0:
1182; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1183; RV32-NEXT:    vsext.vf4 v12, v10
1184; RV32-NEXT:    vadd.vv v12, v12, v12
1185; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1186; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1187; RV32-NEXT:    ret
1188;
1189; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1190; RV64:       # %bb.0:
1191; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1192; RV64-NEXT:    vsext.vf8 v16, v10
1193; RV64-NEXT:    vadd.vv v16, v16, v16
1194; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1195; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1196; RV64-NEXT:    ret
1197  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1198  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1199  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1200  ret void
1201}
1202
1203define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1204; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1205; RV32:       # %bb.0:
1206; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1207; RV32-NEXT:    vzext.vf4 v12, v10
1208; RV32-NEXT:    vadd.vv v12, v12, v12
1209; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1210; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1211; RV32-NEXT:    ret
1212;
1213; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1214; RV64:       # %bb.0:
1215; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1216; RV64-NEXT:    vzext.vf8 v16, v10
1217; RV64-NEXT:    vadd.vv v16, v16, v16
1218; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1219; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1220; RV64-NEXT:    ret
1221  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1222  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1223  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1224  ret void
1225}
1226
1227define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1228; RV32-LABEL: mscatter_baseidx_nxv8f16:
1229; RV32:       # %bb.0:
1230; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1231; RV32-NEXT:    vsext.vf2 v12, v10
1232; RV32-NEXT:    vadd.vv v12, v12, v12
1233; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1234; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1235; RV32-NEXT:    ret
1236;
1237; RV64-LABEL: mscatter_baseidx_nxv8f16:
1238; RV64:       # %bb.0:
1239; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1240; RV64-NEXT:    vsext.vf4 v16, v10
1241; RV64-NEXT:    vadd.vv v16, v16, v16
1242; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1243; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1244; RV64-NEXT:    ret
1245  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs
1246  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1247  ret void
1248}
1249
1250declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>)
1251
1252define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) {
1253; RV32-LABEL: mscatter_nxv1f32:
1254; RV32:       # %bb.0:
1255; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1256; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1257; RV32-NEXT:    ret
1258;
1259; RV64-LABEL: mscatter_nxv1f32:
1260; RV64:       # %bb.0:
1261; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1262; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1263; RV64-NEXT:    ret
1264  call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m)
1265  ret void
1266}
1267
1268declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
1269
1270define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) {
1271; RV32-LABEL: mscatter_nxv2f32:
1272; RV32:       # %bb.0:
1273; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1274; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1275; RV32-NEXT:    ret
1276;
1277; RV64-LABEL: mscatter_nxv2f32:
1278; RV64:       # %bb.0:
1279; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1280; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1281; RV64-NEXT:    ret
1282  call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m)
1283  ret void
1284}
1285
1286declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
1287
1288define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) {
1289; RV32-LABEL: mscatter_nxv4f32:
1290; RV32:       # %bb.0:
1291; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1292; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1293; RV32-NEXT:    ret
1294;
1295; RV64-LABEL: mscatter_nxv4f32:
1296; RV64:       # %bb.0:
1297; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1298; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1299; RV64-NEXT:    ret
1300  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m)
1301  ret void
1302}
1303
1304define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1305; RV32-LABEL: mscatter_truemask_nxv4f32:
1306; RV32:       # %bb.0:
1307; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1308; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1309; RV32-NEXT:    ret
1310;
1311; RV64-LABEL: mscatter_truemask_nxv4f32:
1312; RV64:       # %bb.0:
1313; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1314; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1315; RV64-NEXT:    ret
1316  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1317  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1318  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
1319  ret void
1320}
1321
1322define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1323; RV32-LABEL: mscatter_falsemask_nxv4f32:
1324; RV32:       # %bb.0:
1325; RV32-NEXT:    ret
1326;
1327; RV64-LABEL: mscatter_falsemask_nxv4f32:
1328; RV64:       # %bb.0:
1329; RV64-NEXT:    ret
1330  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1331  ret void
1332}
1333
1334declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
1335
1336define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) {
1337; RV32-LABEL: mscatter_nxv8f32:
1338; RV32:       # %bb.0:
1339; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1340; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1341; RV32-NEXT:    ret
1342;
1343; RV64-LABEL: mscatter_nxv8f32:
1344; RV64:       # %bb.0:
1345; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1346; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1347; RV64-NEXT:    ret
1348  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1349  ret void
1350}
1351
1352define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1353; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1354; RV32:       # %bb.0:
1355; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1356; RV32-NEXT:    vsext.vf4 v16, v12
1357; RV32-NEXT:    vsll.vi v12, v16, 2
1358; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1359; RV32-NEXT:    ret
1360;
1361; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1362; RV64:       # %bb.0:
1363; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1364; RV64-NEXT:    vsext.vf8 v16, v12
1365; RV64-NEXT:    vsll.vi v16, v16, 2
1366; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1367; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1368; RV64-NEXT:    ret
1369  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs
1370  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1371  ret void
1372}
1373
1374define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1375; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1376; RV32:       # %bb.0:
1377; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1378; RV32-NEXT:    vsext.vf4 v16, v12
1379; RV32-NEXT:    vsll.vi v12, v16, 2
1380; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1381; RV32-NEXT:    ret
1382;
1383; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1384; RV64:       # %bb.0:
1385; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1386; RV64-NEXT:    vsext.vf8 v16, v12
1387; RV64-NEXT:    vsll.vi v16, v16, 2
1388; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1389; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1390; RV64-NEXT:    ret
1391  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1392  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1393  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1394  ret void
1395}
1396
1397define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1398; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1399; RV32:       # %bb.0:
1400; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1401; RV32-NEXT:    vzext.vf4 v16, v12
1402; RV32-NEXT:    vsll.vi v12, v16, 2
1403; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1404; RV32-NEXT:    ret
1405;
1406; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1407; RV64:       # %bb.0:
1408; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1409; RV64-NEXT:    vzext.vf8 v16, v12
1410; RV64-NEXT:    vsll.vi v16, v16, 2
1411; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1412; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1413; RV64-NEXT:    ret
1414  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1415  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1416  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1417  ret void
1418}
1419
1420define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1421; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1422; RV32:       # %bb.0:
1423; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1424; RV32-NEXT:    vsext.vf2 v16, v12
1425; RV32-NEXT:    vsll.vi v12, v16, 2
1426; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1427; RV32-NEXT:    ret
1428;
1429; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1430; RV64:       # %bb.0:
1431; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1432; RV64-NEXT:    vsext.vf4 v16, v12
1433; RV64-NEXT:    vsll.vi v16, v16, 2
1434; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1435; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1436; RV64-NEXT:    ret
1437  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs
1438  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1439  ret void
1440}
1441
1442define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1443; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1444; RV32:       # %bb.0:
1445; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1446; RV32-NEXT:    vsext.vf2 v16, v12
1447; RV32-NEXT:    vsll.vi v12, v16, 2
1448; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1449; RV32-NEXT:    ret
1450;
1451; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1452; RV64:       # %bb.0:
1453; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1454; RV64-NEXT:    vsext.vf4 v16, v12
1455; RV64-NEXT:    vsll.vi v16, v16, 2
1456; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1457; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1458; RV64-NEXT:    ret
1459  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1460  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1461  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1462  ret void
1463}
1464
1465define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1466; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1467; RV32:       # %bb.0:
1468; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1469; RV32-NEXT:    vzext.vf2 v16, v12
1470; RV32-NEXT:    vsll.vi v12, v16, 2
1471; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1472; RV32-NEXT:    ret
1473;
1474; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1475; RV64:       # %bb.0:
1476; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1477; RV64-NEXT:    vzext.vf4 v16, v12
1478; RV64-NEXT:    vsll.vi v16, v16, 2
1479; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1480; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1481; RV64-NEXT:    ret
1482  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1483  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1484  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1485  ret void
1486}
1487
1488define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1489; RV32-LABEL: mscatter_baseidx_nxv8f32:
1490; RV32:       # %bb.0:
1491; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1492; RV32-NEXT:    vsll.vi v12, v12, 2
1493; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1494; RV32-NEXT:    ret
1495;
1496; RV64-LABEL: mscatter_baseidx_nxv8f32:
1497; RV64:       # %bb.0:
1498; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1499; RV64-NEXT:    vsext.vf2 v16, v12
1500; RV64-NEXT:    vsll.vi v16, v16, 2
1501; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1502; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1503; RV64-NEXT:    ret
1504  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs
1505  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1506  ret void
1507}
1508
1509declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>)
1510
1511define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) {
1512; RV32-LABEL: mscatter_nxv1f64:
1513; RV32:       # %bb.0:
1514; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1515; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1516; RV32-NEXT:    ret
1517;
1518; RV64-LABEL: mscatter_nxv1f64:
1519; RV64:       # %bb.0:
1520; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1521; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1522; RV64-NEXT:    ret
1523  call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m)
1524  ret void
1525}
1526
1527declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
1528
1529define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) {
1530; RV32-LABEL: mscatter_nxv2f64:
1531; RV32:       # %bb.0:
1532; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1533; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1534; RV32-NEXT:    ret
1535;
1536; RV64-LABEL: mscatter_nxv2f64:
1537; RV64:       # %bb.0:
1538; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1539; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1540; RV64-NEXT:    ret
1541  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m)
1542  ret void
1543}
1544
1545declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
1546
1547define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) {
1548; RV32-LABEL: mscatter_nxv4f64:
1549; RV32:       # %bb.0:
1550; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1551; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1552; RV32-NEXT:    ret
1553;
1554; RV64-LABEL: mscatter_nxv4f64:
1555; RV64:       # %bb.0:
1556; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1557; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1558; RV64-NEXT:    ret
1559  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m)
1560  ret void
1561}
1562
1563define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1564; RV32-LABEL: mscatter_truemask_nxv4f64:
1565; RV32:       # %bb.0:
1566; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1567; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1568; RV32-NEXT:    ret
1569;
1570; RV64-LABEL: mscatter_truemask_nxv4f64:
1571; RV64:       # %bb.0:
1572; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1573; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1574; RV64-NEXT:    ret
1575  %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
1576  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
1577  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
1578  ret void
1579}
1580
1581define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1582; RV32-LABEL: mscatter_falsemask_nxv4f64:
1583; RV32:       # %bb.0:
1584; RV32-NEXT:    ret
1585;
1586; RV64-LABEL: mscatter_falsemask_nxv4f64:
1587; RV64:       # %bb.0:
1588; RV64-NEXT:    ret
1589  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1590  ret void
1591}
1592
1593declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>)
1594
1595define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) {
1596; RV32-LABEL: mscatter_nxv8f64:
1597; RV32:       # %bb.0:
1598; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1599; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1600; RV32-NEXT:    ret
1601;
1602; RV64-LABEL: mscatter_nxv8f64:
1603; RV64:       # %bb.0:
1604; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1605; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1606; RV64-NEXT:    ret
1607  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1608  ret void
1609}
1610
1611define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1612; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1613; RV32:       # %bb.0:
1614; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1615; RV32-NEXT:    vsext.vf4 v20, v16
1616; RV32-NEXT:    vsll.vi v16, v20, 3
1617; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1618; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1619; RV32-NEXT:    ret
1620;
1621; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1622; RV64:       # %bb.0:
1623; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1624; RV64-NEXT:    vsext.vf8 v24, v16
1625; RV64-NEXT:    vsll.vi v16, v24, 3
1626; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1627; RV64-NEXT:    ret
1628  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs
1629  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1630  ret void
1631}
1632
1633define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1634; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1635; RV32:       # %bb.0:
1636; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1637; RV32-NEXT:    vsext.vf8 v24, v16
1638; RV32-NEXT:    vsll.vi v16, v24, 3
1639; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1640; RV32-NEXT:    vnsrl.wi v24, v16, 0
1641; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1642; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1643; RV32-NEXT:    ret
1644;
1645; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1646; RV64:       # %bb.0:
1647; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1648; RV64-NEXT:    vsext.vf8 v24, v16
1649; RV64-NEXT:    vsll.vi v16, v24, 3
1650; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1651; RV64-NEXT:    ret
1652  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1653  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1654  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1655  ret void
1656}
1657
1658define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1659; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1660; RV32:       # %bb.0:
1661; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1662; RV32-NEXT:    vzext.vf8 v24, v16
1663; RV32-NEXT:    vsll.vi v16, v24, 3
1664; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1665; RV32-NEXT:    vnsrl.wi v24, v16, 0
1666; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1667; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1668; RV32-NEXT:    ret
1669;
1670; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1671; RV64:       # %bb.0:
1672; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1673; RV64-NEXT:    vzext.vf8 v24, v16
1674; RV64-NEXT:    vsll.vi v16, v24, 3
1675; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1676; RV64-NEXT:    ret
1677  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1678  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1679  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1680  ret void
1681}
1682
1683define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1684; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1685; RV32:       # %bb.0:
1686; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1687; RV32-NEXT:    vsext.vf2 v20, v16
1688; RV32-NEXT:    vsll.vi v16, v20, 3
1689; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1690; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1691; RV32-NEXT:    ret
1692;
1693; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1694; RV64:       # %bb.0:
1695; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1696; RV64-NEXT:    vsext.vf4 v24, v16
1697; RV64-NEXT:    vsll.vi v16, v24, 3
1698; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1699; RV64-NEXT:    ret
1700  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs
1701  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1702  ret void
1703}
1704
1705define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1706; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1707; RV32:       # %bb.0:
1708; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1709; RV32-NEXT:    vsext.vf4 v24, v16
1710; RV32-NEXT:    vsll.vi v16, v24, 3
1711; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1712; RV32-NEXT:    vnsrl.wi v24, v16, 0
1713; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1714; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1715; RV32-NEXT:    ret
1716;
1717; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1718; RV64:       # %bb.0:
1719; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1720; RV64-NEXT:    vsext.vf4 v24, v16
1721; RV64-NEXT:    vsll.vi v16, v24, 3
1722; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1723; RV64-NEXT:    ret
1724  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1725  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1726  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1727  ret void
1728}
1729
1730define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1731; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1732; RV32:       # %bb.0:
1733; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1734; RV32-NEXT:    vzext.vf4 v24, v16
1735; RV32-NEXT:    vsll.vi v16, v24, 3
1736; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1737; RV32-NEXT:    vnsrl.wi v24, v16, 0
1738; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1739; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1740; RV32-NEXT:    ret
1741;
1742; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1743; RV64:       # %bb.0:
1744; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1745; RV64-NEXT:    vzext.vf4 v24, v16
1746; RV64-NEXT:    vsll.vi v16, v24, 3
1747; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1748; RV64-NEXT:    ret
1749  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1750  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1751  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1752  ret void
1753}
1754
1755define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1756; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1757; RV32:       # %bb.0:
1758; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1759; RV32-NEXT:    vsll.vi v16, v16, 3
1760; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1761; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1762; RV32-NEXT:    ret
1763;
1764; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1765; RV64:       # %bb.0:
1766; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1767; RV64-NEXT:    vsext.vf2 v24, v16
1768; RV64-NEXT:    vsll.vi v16, v24, 3
1769; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1770; RV64-NEXT:    ret
1771  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs
1772  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1773  ret void
1774}
1775
1776define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1777; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1778; RV32:       # %bb.0:
1779; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1780; RV32-NEXT:    vsext.vf2 v24, v16
1781; RV32-NEXT:    vsll.vi v16, v24, 3
1782; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1783; RV32-NEXT:    vnsrl.wi v24, v16, 0
1784; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1785; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1786; RV32-NEXT:    ret
1787;
1788; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1789; RV64:       # %bb.0:
1790; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1791; RV64-NEXT:    vsext.vf2 v24, v16
1792; RV64-NEXT:    vsll.vi v16, v24, 3
1793; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1794; RV64-NEXT:    ret
1795  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1796  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1797  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1798  ret void
1799}
1800
1801define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1802; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1803; RV32:       # %bb.0:
1804; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1805; RV32-NEXT:    vzext.vf2 v24, v16
1806; RV32-NEXT:    vsll.vi v16, v24, 3
1807; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1808; RV32-NEXT:    vnsrl.wi v24, v16, 0
1809; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1810; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1811; RV32-NEXT:    ret
1812;
1813; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1814; RV64:       # %bb.0:
1815; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1816; RV64-NEXT:    vzext.vf2 v24, v16
1817; RV64-NEXT:    vsll.vi v16, v24, 3
1818; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1819; RV64-NEXT:    ret
1820  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1821  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1822  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1823  ret void
1824}
1825
1826define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1827; RV32-LABEL: mscatter_baseidx_nxv8f64:
1828; RV32:       # %bb.0:
1829; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1830; RV32-NEXT:    vsll.vi v16, v16, 3
1831; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1832; RV32-NEXT:    vnsrl.wi v24, v16, 0
1833; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1834; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1835; RV32-NEXT:    ret
1836;
1837; RV64-LABEL: mscatter_baseidx_nxv8f64:
1838; RV64:       # %bb.0:
1839; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1840; RV64-NEXT:    vsll.vi v16, v16, 3
1841; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1842; RV64-NEXT:    ret
1843  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs
1844  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1845  ret void
1846}
1847
1848declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>)
1849
1850declare <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1851declare <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64)
1852
1853define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) {
1854; RV32-LABEL: mscatter_nxv16f64:
1855; RV32:       # %bb.0:
1856; RV32-NEXT:    vl4re32.v v24, (a0)
1857; RV32-NEXT:    vl4re32.v v28, (a1)
1858; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1859; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
1860; RV32-NEXT:    csrr a0, vlenb
1861; RV32-NEXT:    srli a0, a0, 3
1862; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1863; RV32-NEXT:    vslidedown.vx v0, v0, a0
1864; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1865; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
1866; RV32-NEXT:    ret
1867;
1868; RV64-LABEL: mscatter_nxv16f64:
1869; RV64:       # %bb.0:
1870; RV64-NEXT:    addi sp, sp, -16
1871; RV64-NEXT:    .cfi_def_cfa_offset 16
1872; RV64-NEXT:    csrr a2, vlenb
1873; RV64-NEXT:    slli a2, a2, 3
1874; RV64-NEXT:    sub sp, sp, a2
1875; RV64-NEXT:    vl8re64.v v24, (a0)
1876; RV64-NEXT:    addi a0, sp, 16
1877; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1878; RV64-NEXT:    vl8re64.v v16, (a1)
1879; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1880; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
1881; RV64-NEXT:    csrr a0, vlenb
1882; RV64-NEXT:    srli a0, a0, 3
1883; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1884; RV64-NEXT:    vslidedown.vx v0, v0, a0
1885; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1886; RV64-NEXT:    addi a0, sp, 16
1887; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
1888; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1889; RV64-NEXT:    csrr a0, vlenb
1890; RV64-NEXT:    slli a0, a0, 3
1891; RV64-NEXT:    add sp, sp, a0
1892; RV64-NEXT:    addi sp, sp, 16
1893; RV64-NEXT:    ret
1894  %p0 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0)
1895  %p1 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8)
1896  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1897  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1898  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m)
1899  ret void
1900}
1901
1902define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1903; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1904; RV32:       # %bb.0:
1905; RV32-NEXT:    vl2r.v v2, (a1)
1906; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1907; RV32-NEXT:    vsext.vf4 v24, v2
1908; RV32-NEXT:    vsll.vi v24, v24, 3
1909; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1910; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1911; RV32-NEXT:    csrr a1, vlenb
1912; RV32-NEXT:    srli a1, a1, 3
1913; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1914; RV32-NEXT:    vslidedown.vx v0, v0, a1
1915; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1916; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1917; RV32-NEXT:    ret
1918;
1919; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1920; RV64:       # %bb.0:
1921; RV64-NEXT:    vl2r.v v2, (a1)
1922; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1923; RV64-NEXT:    vsext.vf8 v24, v2
1924; RV64-NEXT:    vsll.vi v24, v24, 3
1925; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1926; RV64-NEXT:    csrr a1, vlenb
1927; RV64-NEXT:    srli a1, a1, 3
1928; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1929; RV64-NEXT:    vslidedown.vx v0, v0, a1
1930; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1931; RV64-NEXT:    vsext.vf8 v8, v3
1932; RV64-NEXT:    vsll.vi v8, v8, 3
1933; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1934; RV64-NEXT:    ret
1935  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs
1936  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1937  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1938  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1939  ret void
1940}
1941
1942define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
1943; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1944; RV32:       # %bb.0:
1945; RV32-NEXT:    vl4re16.v v4, (a1)
1946; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1947; RV32-NEXT:    vsext.vf2 v24, v4
1948; RV32-NEXT:    vsll.vi v24, v24, 3
1949; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1950; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1951; RV32-NEXT:    csrr a1, vlenb
1952; RV32-NEXT:    srli a1, a1, 3
1953; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1954; RV32-NEXT:    vslidedown.vx v0, v0, a1
1955; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1956; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1957; RV32-NEXT:    ret
1958;
1959; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1960; RV64:       # %bb.0:
1961; RV64-NEXT:    vl4re16.v v4, (a1)
1962; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1963; RV64-NEXT:    vsext.vf4 v24, v4
1964; RV64-NEXT:    vsll.vi v24, v24, 3
1965; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1966; RV64-NEXT:    csrr a1, vlenb
1967; RV64-NEXT:    srli a1, a1, 3
1968; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1969; RV64-NEXT:    vslidedown.vx v0, v0, a1
1970; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1971; RV64-NEXT:    vsext.vf4 v8, v6
1972; RV64-NEXT:    vsll.vi v8, v8, 3
1973; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1974; RV64-NEXT:    ret
1975  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
1976  %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1977  %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1978  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1979  ret void
1980}
1981