1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
8
9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) {
10; RV32-LABEL: mscatter_nxv1i8:
11; RV32:       # %bb.0:
12; RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
13; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
14; RV32-NEXT:    ret
15;
16; RV64-LABEL: mscatter_nxv1i8:
17; RV64:       # %bb.0:
18; RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
19; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
20; RV64-NEXT:    ret
21  call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m)
22  ret void
23}
24
25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
26
27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
28; RV32-LABEL: mscatter_nxv2i8:
29; RV32:       # %bb.0:
30; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
31; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
32; RV32-NEXT:    ret
33;
34; RV64-LABEL: mscatter_nxv2i8:
35; RV64:       # %bb.0:
36; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
37; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
38; RV64-NEXT:    ret
39  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
40  ret void
41}
42
43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
45; RV32:       # %bb.0:
46; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
47; RV32-NEXT:    vncvt.x.x.w v8, v8
48; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
49; RV32-NEXT:    ret
50;
51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
52; RV64:       # %bb.0:
53; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
54; RV64-NEXT:    vncvt.x.x.w v8, v8
55; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
56; RV64-NEXT:    ret
57  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
58  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
59  ret void
60}
61
62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
64; RV32:       # %bb.0:
65; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
66; RV32-NEXT:    vncvt.x.x.w v8, v8
67; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
68; RV32-NEXT:    vncvt.x.x.w v8, v8
69; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
70; RV32-NEXT:    ret
71;
72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
73; RV64:       # %bb.0:
74; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
75; RV64-NEXT:    vncvt.x.x.w v8, v8
76; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
77; RV64-NEXT:    vncvt.x.x.w v8, v8
78; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
79; RV64-NEXT:    ret
80  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
81  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
82  ret void
83}
84
85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) {
86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
87; RV32:       # %bb.0:
88; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
89; RV32-NEXT:    vncvt.x.x.w v11, v8
90; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
91; RV32-NEXT:    vncvt.x.x.w v8, v11
92; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
93; RV32-NEXT:    vncvt.x.x.w v8, v8
94; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
95; RV32-NEXT:    ret
96;
97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
98; RV64:       # %bb.0:
99; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
100; RV64-NEXT:    vncvt.x.x.w v12, v8
101; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
102; RV64-NEXT:    vncvt.x.x.w v8, v12
103; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
104; RV64-NEXT:    vncvt.x.x.w v8, v8
105; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
106; RV64-NEXT:    ret
107  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
108  call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m)
109  ret void
110}
111
112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
113
114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) {
115; RV32-LABEL: mscatter_nxv4i8:
116; RV32:       # %bb.0:
117; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
118; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
119; RV32-NEXT:    ret
120;
121; RV64-LABEL: mscatter_nxv4i8:
122; RV64:       # %bb.0:
123; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
124; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
125; RV64-NEXT:    ret
126  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m)
127  ret void
128}
129
130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
131; RV32-LABEL: mscatter_truemask_nxv4i8:
132; RV32:       # %bb.0:
133; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
134; RV32-NEXT:    vsoxei32.v v8, (zero), v10
135; RV32-NEXT:    ret
136;
137; RV64-LABEL: mscatter_truemask_nxv4i8:
138; RV64:       # %bb.0:
139; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
140; RV64-NEXT:    vsoxei64.v v8, (zero), v12
141; RV64-NEXT:    ret
142  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
143  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
144  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue)
145  ret void
146}
147
148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) {
149; CHECK-LABEL: mscatter_falsemask_nxv4i8:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    ret
152  call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
153  ret void
154}
155
156declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>)
157
158define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) {
159; RV32-LABEL: mscatter_nxv8i8:
160; RV32:       # %bb.0:
161; RV32-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
162; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
163; RV32-NEXT:    ret
164;
165; RV64-LABEL: mscatter_nxv8i8:
166; RV64:       # %bb.0:
167; RV64-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
168; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
169; RV64-NEXT:    ret
170  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
171  ret void
172}
173
174define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
175; RV32-LABEL: mscatter_baseidx_nxv8i8:
176; RV32:       # %bb.0:
177; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
178; RV32-NEXT:    vsext.vf4 v12, v9
179; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
180; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
181; RV32-NEXT:    ret
182;
183; RV64-LABEL: mscatter_baseidx_nxv8i8:
184; RV64:       # %bb.0:
185; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
186; RV64-NEXT:    vsext.vf8 v16, v9
187; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
188; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
189; RV64-NEXT:    ret
190  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs
191  call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m)
192  ret void
193}
194
195declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>)
196
197define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) {
198; RV32-LABEL: mscatter_nxv1i16:
199; RV32:       # %bb.0:
200; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
201; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
202; RV32-NEXT:    ret
203;
204; RV64-LABEL: mscatter_nxv1i16:
205; RV64:       # %bb.0:
206; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
207; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
208; RV64-NEXT:    ret
209  call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m)
210  ret void
211}
212
213declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
214
215define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
216; RV32-LABEL: mscatter_nxv2i16:
217; RV32:       # %bb.0:
218; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
219; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
220; RV32-NEXT:    ret
221;
222; RV64-LABEL: mscatter_nxv2i16:
223; RV64:       # %bb.0:
224; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
225; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
226; RV64-NEXT:    ret
227  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
228  ret void
229}
230
231define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
232; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
233; RV32:       # %bb.0:
234; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
235; RV32-NEXT:    vncvt.x.x.w v8, v8
236; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
237; RV32-NEXT:    ret
238;
239; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
240; RV64:       # %bb.0:
241; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
242; RV64-NEXT:    vncvt.x.x.w v8, v8
243; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
244; RV64-NEXT:    ret
245  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
246  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
247  ret void
248}
249
250define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) {
251; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
252; RV32:       # %bb.0:
253; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
254; RV32-NEXT:    vncvt.x.x.w v11, v8
255; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
256; RV32-NEXT:    vncvt.x.x.w v8, v11
257; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
258; RV32-NEXT:    ret
259;
260; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
261; RV64:       # %bb.0:
262; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
263; RV64-NEXT:    vncvt.x.x.w v12, v8
264; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
265; RV64-NEXT:    vncvt.x.x.w v8, v12
266; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
267; RV64-NEXT:    ret
268  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
269  call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m)
270  ret void
271}
272
273declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
274
275define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) {
276; RV32-LABEL: mscatter_nxv4i16:
277; RV32:       # %bb.0:
278; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
279; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
280; RV32-NEXT:    ret
281;
282; RV64-LABEL: mscatter_nxv4i16:
283; RV64:       # %bb.0:
284; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
285; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
286; RV64-NEXT:    ret
287  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m)
288  ret void
289}
290
291define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
292; RV32-LABEL: mscatter_truemask_nxv4i16:
293; RV32:       # %bb.0:
294; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
295; RV32-NEXT:    vsoxei32.v v8, (zero), v10
296; RV32-NEXT:    ret
297;
298; RV64-LABEL: mscatter_truemask_nxv4i16:
299; RV64:       # %bb.0:
300; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
301; RV64-NEXT:    vsoxei64.v v8, (zero), v12
302; RV64-NEXT:    ret
303  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
304  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
305  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
306  ret void
307}
308
309define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) {
310; CHECK-LABEL: mscatter_falsemask_nxv4i16:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    ret
313  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
314  ret void
315}
316
317declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
318
319define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) {
320; RV32-LABEL: mscatter_nxv8i16:
321; RV32:       # %bb.0:
322; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
323; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
324; RV32-NEXT:    ret
325;
326; RV64-LABEL: mscatter_nxv8i16:
327; RV64:       # %bb.0:
328; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
329; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
330; RV64-NEXT:    ret
331  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
332  ret void
333}
334
335define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
336; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
337; RV32:       # %bb.0:
338; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
339; RV32-NEXT:    vsext.vf4 v12, v10
340; RV32-NEXT:    vadd.vv v12, v12, v12
341; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
342; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
343; RV32-NEXT:    ret
344;
345; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
346; RV64:       # %bb.0:
347; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
348; RV64-NEXT:    vsext.vf8 v16, v10
349; RV64-NEXT:    vadd.vv v16, v16, v16
350; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
351; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
352; RV64-NEXT:    ret
353  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs
354  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
355  ret void
356}
357
358define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
359; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
360; RV32:       # %bb.0:
361; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
362; RV32-NEXT:    vsext.vf4 v12, v10
363; RV32-NEXT:    vadd.vv v12, v12, v12
364; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
365; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
366; RV32-NEXT:    ret
367;
368; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
369; RV64:       # %bb.0:
370; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
371; RV64-NEXT:    vsext.vf8 v16, v10
372; RV64-NEXT:    vadd.vv v16, v16, v16
373; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
374; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
375; RV64-NEXT:    ret
376  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
377  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
378  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
379  ret void
380}
381
382define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
383; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
384; RV32:       # %bb.0:
385; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
386; RV32-NEXT:    vzext.vf4 v12, v10
387; RV32-NEXT:    vadd.vv v12, v12, v12
388; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
389; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
390; RV32-NEXT:    ret
391;
392; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
393; RV64:       # %bb.0:
394; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
395; RV64-NEXT:    vzext.vf8 v16, v10
396; RV64-NEXT:    vadd.vv v16, v16, v16
397; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
398; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
399; RV64-NEXT:    ret
400  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
401  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
402  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
403  ret void
404}
405
406define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
407; RV32-LABEL: mscatter_baseidx_nxv8i16:
408; RV32:       # %bb.0:
409; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
410; RV32-NEXT:    vsext.vf2 v12, v10
411; RV32-NEXT:    vadd.vv v12, v12, v12
412; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
413; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
414; RV32-NEXT:    ret
415;
416; RV64-LABEL: mscatter_baseidx_nxv8i16:
417; RV64:       # %bb.0:
418; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
419; RV64-NEXT:    vsext.vf4 v16, v10
420; RV64-NEXT:    vadd.vv v16, v16, v16
421; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
422; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
423; RV64-NEXT:    ret
424  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs
425  call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m)
426  ret void
427}
428
429declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>)
430
431define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) {
432; RV32-LABEL: mscatter_nxv1i32:
433; RV32:       # %bb.0:
434; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
435; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
436; RV32-NEXT:    ret
437;
438; RV64-LABEL: mscatter_nxv1i32:
439; RV64:       # %bb.0:
440; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
441; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
442; RV64-NEXT:    ret
443  call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m)
444  ret void
445}
446
447declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
448
449define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
450; RV32-LABEL: mscatter_nxv2i32:
451; RV32:       # %bb.0:
452; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
453; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
454; RV32-NEXT:    ret
455;
456; RV64-LABEL: mscatter_nxv2i32:
457; RV64:       # %bb.0:
458; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
459; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
460; RV64-NEXT:    ret
461  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
462  ret void
463}
464
465define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) {
466; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
467; RV32:       # %bb.0:
468; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
469; RV32-NEXT:    vncvt.x.x.w v11, v8
470; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
471; RV32-NEXT:    ret
472;
473; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
474; RV64:       # %bb.0:
475; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
476; RV64-NEXT:    vncvt.x.x.w v12, v8
477; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
478; RV64-NEXT:    ret
479  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
480  call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m)
481  ret void
482}
483
484declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
485
486define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) {
487; RV32-LABEL: mscatter_nxv4i32:
488; RV32:       # %bb.0:
489; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
490; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
491; RV32-NEXT:    ret
492;
493; RV64-LABEL: mscatter_nxv4i32:
494; RV64:       # %bb.0:
495; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
496; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
497; RV64-NEXT:    ret
498  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m)
499  ret void
500}
501
502define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
503; RV32-LABEL: mscatter_truemask_nxv4i32:
504; RV32:       # %bb.0:
505; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
506; RV32-NEXT:    vsoxei32.v v8, (zero), v10
507; RV32-NEXT:    ret
508;
509; RV64-LABEL: mscatter_truemask_nxv4i32:
510; RV64:       # %bb.0:
511; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
512; RV64-NEXT:    vsoxei64.v v8, (zero), v12
513; RV64-NEXT:    ret
514  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
515  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
516  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
517  ret void
518}
519
520define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) {
521; CHECK-LABEL: mscatter_falsemask_nxv4i32:
522; CHECK:       # %bb.0:
523; CHECK-NEXT:    ret
524  call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
525  ret void
526}
527
528declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
529
530define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) {
531; RV32-LABEL: mscatter_nxv8i32:
532; RV32:       # %bb.0:
533; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
534; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
535; RV32-NEXT:    ret
536;
537; RV64-LABEL: mscatter_nxv8i32:
538; RV64:       # %bb.0:
539; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
540; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
541; RV64-NEXT:    ret
542  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
543  ret void
544}
545
546define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
547; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
548; RV32:       # %bb.0:
549; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
550; RV32-NEXT:    vsext.vf4 v16, v12
551; RV32-NEXT:    vsll.vi v12, v16, 2
552; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
553; RV32-NEXT:    ret
554;
555; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
556; RV64:       # %bb.0:
557; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
558; RV64-NEXT:    vsext.vf8 v16, v12
559; RV64-NEXT:    vsll.vi v16, v16, 2
560; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
561; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
562; RV64-NEXT:    ret
563  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs
564  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
565  ret void
566}
567
568define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
569; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
570; RV32:       # %bb.0:
571; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
572; RV32-NEXT:    vsext.vf4 v16, v12
573; RV32-NEXT:    vsll.vi v12, v16, 2
574; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
575; RV32-NEXT:    ret
576;
577; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
578; RV64:       # %bb.0:
579; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
580; RV64-NEXT:    vsext.vf8 v16, v12
581; RV64-NEXT:    vsll.vi v16, v16, 2
582; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
583; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
584; RV64-NEXT:    ret
585  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
586  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
587  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
588  ret void
589}
590
591define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
592; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
593; RV32:       # %bb.0:
594; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
595; RV32-NEXT:    vzext.vf4 v16, v12
596; RV32-NEXT:    vsll.vi v12, v16, 2
597; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
598; RV32-NEXT:    ret
599;
600; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
601; RV64:       # %bb.0:
602; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
603; RV64-NEXT:    vzext.vf8 v16, v12
604; RV64-NEXT:    vsll.vi v16, v16, 2
605; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
606; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
607; RV64-NEXT:    ret
608  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
609  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
610  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
611  ret void
612}
613
614define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
615; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
616; RV32:       # %bb.0:
617; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
618; RV32-NEXT:    vsext.vf2 v16, v12
619; RV32-NEXT:    vsll.vi v12, v16, 2
620; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
621; RV32-NEXT:    ret
622;
623; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
624; RV64:       # %bb.0:
625; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
626; RV64-NEXT:    vsext.vf4 v16, v12
627; RV64-NEXT:    vsll.vi v16, v16, 2
628; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
629; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
630; RV64-NEXT:    ret
631  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs
632  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
633  ret void
634}
635
636define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
637; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
638; RV32:       # %bb.0:
639; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
640; RV32-NEXT:    vsext.vf2 v16, v12
641; RV32-NEXT:    vsll.vi v12, v16, 2
642; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
643; RV32-NEXT:    ret
644;
645; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
646; RV64:       # %bb.0:
647; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
648; RV64-NEXT:    vsext.vf4 v16, v12
649; RV64-NEXT:    vsll.vi v16, v16, 2
650; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
651; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
652; RV64-NEXT:    ret
653  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
654  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
655  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
656  ret void
657}
658
659define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
660; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
661; RV32:       # %bb.0:
662; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
663; RV32-NEXT:    vzext.vf2 v16, v12
664; RV32-NEXT:    vsll.vi v12, v16, 2
665; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
666; RV32-NEXT:    ret
667;
668; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
669; RV64:       # %bb.0:
670; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
671; RV64-NEXT:    vzext.vf4 v16, v12
672; RV64-NEXT:    vsll.vi v16, v16, 2
673; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
674; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
675; RV64-NEXT:    ret
676  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
677  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
678  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
679  ret void
680}
681
682define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
683; RV32-LABEL: mscatter_baseidx_nxv8i32:
684; RV32:       # %bb.0:
685; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
686; RV32-NEXT:    vsll.vi v12, v12, 2
687; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
688; RV32-NEXT:    ret
689;
690; RV64-LABEL: mscatter_baseidx_nxv8i32:
691; RV64:       # %bb.0:
692; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
693; RV64-NEXT:    vsext.vf2 v16, v12
694; RV64-NEXT:    vsll.vi v16, v16, 2
695; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
696; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
697; RV64-NEXT:    ret
698  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs
699  call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m)
700  ret void
701}
702
703declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
704
705define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) {
706; RV32-LABEL: mscatter_nxv1i64:
707; RV32:       # %bb.0:
708; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
709; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
710; RV32-NEXT:    ret
711;
712; RV64-LABEL: mscatter_nxv1i64:
713; RV64:       # %bb.0:
714; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
715; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
716; RV64-NEXT:    ret
717  call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m)
718  ret void
719}
720
721declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
722
723define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) {
724; RV32-LABEL: mscatter_nxv2i64:
725; RV32:       # %bb.0:
726; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
727; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
728; RV32-NEXT:    ret
729;
730; RV64-LABEL: mscatter_nxv2i64:
731; RV64:       # %bb.0:
732; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
733; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
734; RV64-NEXT:    ret
735  call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m)
736  ret void
737}
738
739declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>)
740
741define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) {
742; RV32-LABEL: mscatter_nxv4i64:
743; RV32:       # %bb.0:
744; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
745; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
746; RV32-NEXT:    ret
747;
748; RV64-LABEL: mscatter_nxv4i64:
749; RV64:       # %bb.0:
750; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
751; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
752; RV64-NEXT:    ret
753  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m)
754  ret void
755}
756
757define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
758; RV32-LABEL: mscatter_truemask_nxv4i64:
759; RV32:       # %bb.0:
760; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
761; RV32-NEXT:    vsoxei32.v v8, (zero), v12
762; RV32-NEXT:    ret
763;
764; RV64-LABEL: mscatter_truemask_nxv4i64:
765; RV64:       # %bb.0:
766; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
767; RV64-NEXT:    vsoxei64.v v8, (zero), v12
768; RV64-NEXT:    ret
769  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
770  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
771  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
772  ret void
773}
774
775define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) {
776; CHECK-LABEL: mscatter_falsemask_nxv4i64:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    ret
779  call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
780  ret void
781}
782
783declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>)
784
785define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) {
786; RV32-LABEL: mscatter_nxv8i64:
787; RV32:       # %bb.0:
788; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
789; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
790; RV32-NEXT:    ret
791;
792; RV64-LABEL: mscatter_nxv8i64:
793; RV64:       # %bb.0:
794; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
795; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
796; RV64-NEXT:    ret
797  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
798  ret void
799}
800
801define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
802; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
803; RV32:       # %bb.0:
804; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
805; RV32-NEXT:    vsext.vf4 v20, v16
806; RV32-NEXT:    vsll.vi v16, v20, 3
807; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
808; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
809; RV32-NEXT:    ret
810;
811; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
812; RV64:       # %bb.0:
813; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
814; RV64-NEXT:    vsext.vf8 v24, v16
815; RV64-NEXT:    vsll.vi v16, v24, 3
816; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
817; RV64-NEXT:    ret
818  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs
819  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
820  ret void
821}
822
823define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
824; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
825; RV32:       # %bb.0:
826; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
827; RV32-NEXT:    vsext.vf8 v24, v16
828; RV32-NEXT:    vsll.vi v16, v24, 3
829; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
830; RV32-NEXT:    vncvt.x.x.w v24, v16
831; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
832; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
833; RV32-NEXT:    ret
834;
835; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
836; RV64:       # %bb.0:
837; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
838; RV64-NEXT:    vsext.vf8 v24, v16
839; RV64-NEXT:    vsll.vi v16, v24, 3
840; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
841; RV64-NEXT:    ret
842  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
843  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
844  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
845  ret void
846}
847
848define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
849; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
850; RV32:       # %bb.0:
851; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
852; RV32-NEXT:    vzext.vf8 v24, v16
853; RV32-NEXT:    vsll.vi v16, v24, 3
854; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
855; RV32-NEXT:    vncvt.x.x.w v24, v16
856; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
857; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
858; RV32-NEXT:    ret
859;
860; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
861; RV64:       # %bb.0:
862; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
863; RV64-NEXT:    vzext.vf8 v24, v16
864; RV64-NEXT:    vsll.vi v16, v24, 3
865; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
866; RV64-NEXT:    ret
867  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
868  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
869  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
870  ret void
871}
872
873define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
874; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
875; RV32:       # %bb.0:
876; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
877; RV32-NEXT:    vsext.vf2 v20, v16
878; RV32-NEXT:    vsll.vi v16, v20, 3
879; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
880; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
881; RV32-NEXT:    ret
882;
883; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
884; RV64:       # %bb.0:
885; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
886; RV64-NEXT:    vsext.vf4 v24, v16
887; RV64-NEXT:    vsll.vi v16, v24, 3
888; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
889; RV64-NEXT:    ret
890  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs
891  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
892  ret void
893}
894
895define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
896; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
897; RV32:       # %bb.0:
898; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
899; RV32-NEXT:    vsext.vf4 v24, v16
900; RV32-NEXT:    vsll.vi v16, v24, 3
901; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
902; RV32-NEXT:    vncvt.x.x.w v24, v16
903; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
904; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
905; RV32-NEXT:    ret
906;
907; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
908; RV64:       # %bb.0:
909; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
910; RV64-NEXT:    vsext.vf4 v24, v16
911; RV64-NEXT:    vsll.vi v16, v24, 3
912; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
913; RV64-NEXT:    ret
914  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
915  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
916  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
917  ret void
918}
919
920define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
921; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
922; RV32:       # %bb.0:
923; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
924; RV32-NEXT:    vzext.vf4 v24, v16
925; RV32-NEXT:    vsll.vi v16, v24, 3
926; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
927; RV32-NEXT:    vncvt.x.x.w v24, v16
928; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
929; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
930; RV32-NEXT:    ret
931;
932; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
933; RV64:       # %bb.0:
934; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
935; RV64-NEXT:    vzext.vf4 v24, v16
936; RV64-NEXT:    vsll.vi v16, v24, 3
937; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
938; RV64-NEXT:    ret
939  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
940  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
941  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
942  ret void
943}
944
945define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
946; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
947; RV32:       # %bb.0:
948; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
949; RV32-NEXT:    vsll.vi v16, v16, 3
950; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
951; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
952; RV32-NEXT:    ret
953;
954; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
955; RV64:       # %bb.0:
956; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
957; RV64-NEXT:    vsext.vf2 v24, v16
958; RV64-NEXT:    vsll.vi v16, v24, 3
959; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
960; RV64-NEXT:    ret
961  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs
962  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
963  ret void
964}
965
966define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
967; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
968; RV32:       # %bb.0:
969; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
970; RV32-NEXT:    vsext.vf2 v24, v16
971; RV32-NEXT:    vsll.vi v16, v24, 3
972; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
973; RV32-NEXT:    vncvt.x.x.w v24, v16
974; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
975; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
976; RV32-NEXT:    ret
977;
978; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
979; RV64:       # %bb.0:
980; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
981; RV64-NEXT:    vsext.vf2 v24, v16
982; RV64-NEXT:    vsll.vi v16, v24, 3
983; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
984; RV64-NEXT:    ret
985  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
986  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
987  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
988  ret void
989}
990
991define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
992; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
993; RV32:       # %bb.0:
994; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
995; RV32-NEXT:    vzext.vf2 v24, v16
996; RV32-NEXT:    vsll.vi v16, v24, 3
997; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
998; RV32-NEXT:    vncvt.x.x.w v24, v16
999; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1000; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1001; RV32-NEXT:    ret
1002;
1003; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
1004; RV64:       # %bb.0:
1005; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1006; RV64-NEXT:    vzext.vf2 v24, v16
1007; RV64-NEXT:    vsll.vi v16, v24, 3
1008; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1009; RV64-NEXT:    ret
1010  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1011  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1012  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1013  ret void
1014}
1015
1016define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1017; RV32-LABEL: mscatter_baseidx_nxv8i64:
1018; RV32:       # %bb.0:
1019; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1020; RV32-NEXT:    vsll.vi v16, v16, 3
1021; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1022; RV32-NEXT:    vncvt.x.x.w v24, v16
1023; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1024; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1025; RV32-NEXT:    ret
1026;
1027; RV64-LABEL: mscatter_baseidx_nxv8i64:
1028; RV64:       # %bb.0:
1029; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1030; RV64-NEXT:    vsll.vi v16, v16, 3
1031; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1032; RV64-NEXT:    ret
1033  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs
1034  call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1035  ret void
1036}
1037
1038declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>)
1039
1040define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) {
1041; RV32-LABEL: mscatter_nxv1f16:
1042; RV32:       # %bb.0:
1043; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1044; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1045; RV32-NEXT:    ret
1046;
1047; RV64-LABEL: mscatter_nxv1f16:
1048; RV64:       # %bb.0:
1049; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1050; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1051; RV64-NEXT:    ret
1052  call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m)
1053  ret void
1054}
1055
1056declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
1057
1058define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) {
1059; RV32-LABEL: mscatter_nxv2f16:
1060; RV32:       # %bb.0:
1061; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1062; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1063; RV32-NEXT:    ret
1064;
1065; RV64-LABEL: mscatter_nxv2f16:
1066; RV64:       # %bb.0:
1067; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1068; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1069; RV64-NEXT:    ret
1070  call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m)
1071  ret void
1072}
1073
1074declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
1075
1076define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) {
1077; RV32-LABEL: mscatter_nxv4f16:
1078; RV32:       # %bb.0:
1079; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1080; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1081; RV32-NEXT:    ret
1082;
1083; RV64-LABEL: mscatter_nxv4f16:
1084; RV64:       # %bb.0:
1085; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1086; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1087; RV64-NEXT:    ret
1088  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m)
1089  ret void
1090}
1091
1092define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1093; RV32-LABEL: mscatter_truemask_nxv4f16:
1094; RV32:       # %bb.0:
1095; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1096; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1097; RV32-NEXT:    ret
1098;
1099; RV64-LABEL: mscatter_truemask_nxv4f16:
1100; RV64:       # %bb.0:
1101; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1102; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1103; RV64-NEXT:    ret
1104  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1105  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1106  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue)
1107  ret void
1108}
1109
1110define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) {
1111; CHECK-LABEL: mscatter_falsemask_nxv4f16:
1112; CHECK:       # %bb.0:
1113; CHECK-NEXT:    ret
1114  call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1115  ret void
1116}
1117
1118declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>)
1119
1120define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) {
1121; RV32-LABEL: mscatter_nxv8f16:
1122; RV32:       # %bb.0:
1123; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1124; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1125; RV32-NEXT:    ret
1126;
1127; RV64-LABEL: mscatter_nxv8f16:
1128; RV64:       # %bb.0:
1129; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1130; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1131; RV64-NEXT:    ret
1132  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1133  ret void
1134}
1135
1136define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1137; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1138; RV32:       # %bb.0:
1139; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1140; RV32-NEXT:    vsext.vf4 v12, v10
1141; RV32-NEXT:    vadd.vv v12, v12, v12
1142; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1143; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1144; RV32-NEXT:    ret
1145;
1146; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1147; RV64:       # %bb.0:
1148; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1149; RV64-NEXT:    vsext.vf8 v16, v10
1150; RV64-NEXT:    vadd.vv v16, v16, v16
1151; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1152; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1153; RV64-NEXT:    ret
1154  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs
1155  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1156  ret void
1157}
1158
1159define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1160; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1161; RV32:       # %bb.0:
1162; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1163; RV32-NEXT:    vsext.vf4 v12, v10
1164; RV32-NEXT:    vadd.vv v12, v12, v12
1165; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1166; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1167; RV32-NEXT:    ret
1168;
1169; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1170; RV64:       # %bb.0:
1171; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1172; RV64-NEXT:    vsext.vf8 v16, v10
1173; RV64-NEXT:    vadd.vv v16, v16, v16
1174; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1175; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1176; RV64-NEXT:    ret
1177  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1178  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1179  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1180  ret void
1181}
1182
1183define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1184; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1185; RV32:       # %bb.0:
1186; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1187; RV32-NEXT:    vzext.vf4 v12, v10
1188; RV32-NEXT:    vadd.vv v12, v12, v12
1189; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1190; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1191; RV32-NEXT:    ret
1192;
1193; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1194; RV64:       # %bb.0:
1195; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1196; RV64-NEXT:    vzext.vf8 v16, v10
1197; RV64-NEXT:    vadd.vv v16, v16, v16
1198; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1199; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1200; RV64-NEXT:    ret
1201  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1202  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1203  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1204  ret void
1205}
1206
1207define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1208; RV32-LABEL: mscatter_baseidx_nxv8f16:
1209; RV32:       # %bb.0:
1210; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1211; RV32-NEXT:    vsext.vf2 v12, v10
1212; RV32-NEXT:    vadd.vv v12, v12, v12
1213; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1214; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1215; RV32-NEXT:    ret
1216;
1217; RV64-LABEL: mscatter_baseidx_nxv8f16:
1218; RV64:       # %bb.0:
1219; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1220; RV64-NEXT:    vsext.vf4 v16, v10
1221; RV64-NEXT:    vadd.vv v16, v16, v16
1222; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1223; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1224; RV64-NEXT:    ret
1225  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs
1226  call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m)
1227  ret void
1228}
1229
1230declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>)
1231
1232define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) {
1233; RV32-LABEL: mscatter_nxv1f32:
1234; RV32:       # %bb.0:
1235; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1236; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1237; RV32-NEXT:    ret
1238;
1239; RV64-LABEL: mscatter_nxv1f32:
1240; RV64:       # %bb.0:
1241; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1242; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1243; RV64-NEXT:    ret
1244  call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m)
1245  ret void
1246}
1247
1248declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
1249
1250define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) {
1251; RV32-LABEL: mscatter_nxv2f32:
1252; RV32:       # %bb.0:
1253; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1254; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1255; RV32-NEXT:    ret
1256;
1257; RV64-LABEL: mscatter_nxv2f32:
1258; RV64:       # %bb.0:
1259; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1260; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1261; RV64-NEXT:    ret
1262  call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m)
1263  ret void
1264}
1265
1266declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
1267
1268define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) {
1269; RV32-LABEL: mscatter_nxv4f32:
1270; RV32:       # %bb.0:
1271; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1272; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1273; RV32-NEXT:    ret
1274;
1275; RV64-LABEL: mscatter_nxv4f32:
1276; RV64:       # %bb.0:
1277; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1278; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1279; RV64-NEXT:    ret
1280  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m)
1281  ret void
1282}
1283
1284define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1285; RV32-LABEL: mscatter_truemask_nxv4f32:
1286; RV32:       # %bb.0:
1287; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1288; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1289; RV32-NEXT:    ret
1290;
1291; RV64-LABEL: mscatter_truemask_nxv4f32:
1292; RV64:       # %bb.0:
1293; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1294; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1295; RV64-NEXT:    ret
1296  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1297  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1298  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue)
1299  ret void
1300}
1301
1302define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) {
1303; CHECK-LABEL: mscatter_falsemask_nxv4f32:
1304; CHECK:       # %bb.0:
1305; CHECK-NEXT:    ret
1306  call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1307  ret void
1308}
1309
1310declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
1311
1312define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) {
1313; RV32-LABEL: mscatter_nxv8f32:
1314; RV32:       # %bb.0:
1315; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1316; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1317; RV32-NEXT:    ret
1318;
1319; RV64-LABEL: mscatter_nxv8f32:
1320; RV64:       # %bb.0:
1321; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1322; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1323; RV64-NEXT:    ret
1324  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1325  ret void
1326}
1327
1328define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1329; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1330; RV32:       # %bb.0:
1331; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1332; RV32-NEXT:    vsext.vf4 v16, v12
1333; RV32-NEXT:    vsll.vi v12, v16, 2
1334; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1335; RV32-NEXT:    ret
1336;
1337; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1338; RV64:       # %bb.0:
1339; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1340; RV64-NEXT:    vsext.vf8 v16, v12
1341; RV64-NEXT:    vsll.vi v16, v16, 2
1342; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1343; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1344; RV64-NEXT:    ret
1345  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs
1346  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1347  ret void
1348}
1349
1350define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1351; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1352; RV32:       # %bb.0:
1353; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1354; RV32-NEXT:    vsext.vf4 v16, v12
1355; RV32-NEXT:    vsll.vi v12, v16, 2
1356; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1357; RV32-NEXT:    ret
1358;
1359; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1360; RV64:       # %bb.0:
1361; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1362; RV64-NEXT:    vsext.vf8 v16, v12
1363; RV64-NEXT:    vsll.vi v16, v16, 2
1364; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1365; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1366; RV64-NEXT:    ret
1367  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1368  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1369  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1370  ret void
1371}
1372
1373define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1374; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1375; RV32:       # %bb.0:
1376; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1377; RV32-NEXT:    vzext.vf4 v16, v12
1378; RV32-NEXT:    vsll.vi v12, v16, 2
1379; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1380; RV32-NEXT:    ret
1381;
1382; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1383; RV64:       # %bb.0:
1384; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1385; RV64-NEXT:    vzext.vf8 v16, v12
1386; RV64-NEXT:    vsll.vi v16, v16, 2
1387; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1388; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1389; RV64-NEXT:    ret
1390  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1391  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1392  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1393  ret void
1394}
1395
1396define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1397; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1398; RV32:       # %bb.0:
1399; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1400; RV32-NEXT:    vsext.vf2 v16, v12
1401; RV32-NEXT:    vsll.vi v12, v16, 2
1402; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1403; RV32-NEXT:    ret
1404;
1405; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1406; RV64:       # %bb.0:
1407; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1408; RV64-NEXT:    vsext.vf4 v16, v12
1409; RV64-NEXT:    vsll.vi v16, v16, 2
1410; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1411; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1412; RV64-NEXT:    ret
1413  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs
1414  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1415  ret void
1416}
1417
1418define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1419; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1420; RV32:       # %bb.0:
1421; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1422; RV32-NEXT:    vsext.vf2 v16, v12
1423; RV32-NEXT:    vsll.vi v12, v16, 2
1424; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1425; RV32-NEXT:    ret
1426;
1427; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1428; RV64:       # %bb.0:
1429; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1430; RV64-NEXT:    vsext.vf4 v16, v12
1431; RV64-NEXT:    vsll.vi v16, v16, 2
1432; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1433; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1434; RV64-NEXT:    ret
1435  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1436  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1437  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1438  ret void
1439}
1440
1441define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1442; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1443; RV32:       # %bb.0:
1444; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1445; RV32-NEXT:    vzext.vf2 v16, v12
1446; RV32-NEXT:    vsll.vi v12, v16, 2
1447; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1448; RV32-NEXT:    ret
1449;
1450; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1451; RV64:       # %bb.0:
1452; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1453; RV64-NEXT:    vzext.vf4 v16, v12
1454; RV64-NEXT:    vsll.vi v16, v16, 2
1455; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1456; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1457; RV64-NEXT:    ret
1458  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1459  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1460  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1461  ret void
1462}
1463
1464define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1465; RV32-LABEL: mscatter_baseidx_nxv8f32:
1466; RV32:       # %bb.0:
1467; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1468; RV32-NEXT:    vsll.vi v12, v12, 2
1469; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1470; RV32-NEXT:    ret
1471;
1472; RV64-LABEL: mscatter_baseidx_nxv8f32:
1473; RV64:       # %bb.0:
1474; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1475; RV64-NEXT:    vsext.vf2 v16, v12
1476; RV64-NEXT:    vsll.vi v16, v16, 2
1477; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1478; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1479; RV64-NEXT:    ret
1480  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs
1481  call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m)
1482  ret void
1483}
1484
1485declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>)
1486
1487define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) {
1488; RV32-LABEL: mscatter_nxv1f64:
1489; RV32:       # %bb.0:
1490; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1491; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1492; RV32-NEXT:    ret
1493;
1494; RV64-LABEL: mscatter_nxv1f64:
1495; RV64:       # %bb.0:
1496; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1497; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1498; RV64-NEXT:    ret
1499  call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m)
1500  ret void
1501}
1502
1503declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
1504
1505define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) {
1506; RV32-LABEL: mscatter_nxv2f64:
1507; RV32:       # %bb.0:
1508; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1509; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1510; RV32-NEXT:    ret
1511;
1512; RV64-LABEL: mscatter_nxv2f64:
1513; RV64:       # %bb.0:
1514; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1515; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1516; RV64-NEXT:    ret
1517  call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m)
1518  ret void
1519}
1520
1521declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
1522
1523define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) {
1524; RV32-LABEL: mscatter_nxv4f64:
1525; RV32:       # %bb.0:
1526; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1527; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1528; RV32-NEXT:    ret
1529;
1530; RV64-LABEL: mscatter_nxv4f64:
1531; RV64:       # %bb.0:
1532; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1533; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1534; RV64-NEXT:    ret
1535  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m)
1536  ret void
1537}
1538
1539define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1540; RV32-LABEL: mscatter_truemask_nxv4f64:
1541; RV32:       # %bb.0:
1542; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1543; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1544; RV32-NEXT:    ret
1545;
1546; RV64-LABEL: mscatter_truemask_nxv4f64:
1547; RV64:       # %bb.0:
1548; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1549; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1550; RV64-NEXT:    ret
1551  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1552  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1553  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue)
1554  ret void
1555}
1556
1557define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) {
1558; CHECK-LABEL: mscatter_falsemask_nxv4f64:
1559; CHECK:       # %bb.0:
1560; CHECK-NEXT:    ret
1561  call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1562  ret void
1563}
1564
1565declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>)
1566
1567define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) {
1568; RV32-LABEL: mscatter_nxv8f64:
1569; RV32:       # %bb.0:
1570; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1571; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1572; RV32-NEXT:    ret
1573;
1574; RV64-LABEL: mscatter_nxv8f64:
1575; RV64:       # %bb.0:
1576; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1577; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1578; RV64-NEXT:    ret
1579  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1580  ret void
1581}
1582
1583define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1584; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1585; RV32:       # %bb.0:
1586; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1587; RV32-NEXT:    vsext.vf4 v20, v16
1588; RV32-NEXT:    vsll.vi v16, v20, 3
1589; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1590; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1591; RV32-NEXT:    ret
1592;
1593; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1594; RV64:       # %bb.0:
1595; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1596; RV64-NEXT:    vsext.vf8 v24, v16
1597; RV64-NEXT:    vsll.vi v16, v24, 3
1598; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1599; RV64-NEXT:    ret
1600  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs
1601  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1602  ret void
1603}
1604
1605define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1606; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1607; RV32:       # %bb.0:
1608; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1609; RV32-NEXT:    vsext.vf8 v24, v16
1610; RV32-NEXT:    vsll.vi v16, v24, 3
1611; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1612; RV32-NEXT:    vncvt.x.x.w v24, v16
1613; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1614; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1615; RV32-NEXT:    ret
1616;
1617; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1618; RV64:       # %bb.0:
1619; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1620; RV64-NEXT:    vsext.vf8 v24, v16
1621; RV64-NEXT:    vsll.vi v16, v24, 3
1622; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1623; RV64-NEXT:    ret
1624  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1625  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1626  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1627  ret void
1628}
1629
1630define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1631; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1632; RV32:       # %bb.0:
1633; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1634; RV32-NEXT:    vzext.vf8 v24, v16
1635; RV32-NEXT:    vsll.vi v16, v24, 3
1636; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1637; RV32-NEXT:    vncvt.x.x.w v24, v16
1638; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1639; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1640; RV32-NEXT:    ret
1641;
1642; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1643; RV64:       # %bb.0:
1644; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1645; RV64-NEXT:    vzext.vf8 v24, v16
1646; RV64-NEXT:    vsll.vi v16, v24, 3
1647; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1648; RV64-NEXT:    ret
1649  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1650  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1651  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1652  ret void
1653}
1654
1655define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1656; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1657; RV32:       # %bb.0:
1658; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1659; RV32-NEXT:    vsext.vf2 v20, v16
1660; RV32-NEXT:    vsll.vi v16, v20, 3
1661; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1662; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1663; RV32-NEXT:    ret
1664;
1665; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1666; RV64:       # %bb.0:
1667; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1668; RV64-NEXT:    vsext.vf4 v24, v16
1669; RV64-NEXT:    vsll.vi v16, v24, 3
1670; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1671; RV64-NEXT:    ret
1672  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs
1673  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1674  ret void
1675}
1676
1677define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1678; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1679; RV32:       # %bb.0:
1680; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1681; RV32-NEXT:    vsext.vf4 v24, v16
1682; RV32-NEXT:    vsll.vi v16, v24, 3
1683; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1684; RV32-NEXT:    vncvt.x.x.w v24, v16
1685; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1686; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1687; RV32-NEXT:    ret
1688;
1689; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1690; RV64:       # %bb.0:
1691; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1692; RV64-NEXT:    vsext.vf4 v24, v16
1693; RV64-NEXT:    vsll.vi v16, v24, 3
1694; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1695; RV64-NEXT:    ret
1696  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1697  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1698  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1699  ret void
1700}
1701
1702define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1703; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1704; RV32:       # %bb.0:
1705; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1706; RV32-NEXT:    vzext.vf4 v24, v16
1707; RV32-NEXT:    vsll.vi v16, v24, 3
1708; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1709; RV32-NEXT:    vncvt.x.x.w v24, v16
1710; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1711; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1712; RV32-NEXT:    ret
1713;
1714; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1715; RV64:       # %bb.0:
1716; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1717; RV64-NEXT:    vzext.vf4 v24, v16
1718; RV64-NEXT:    vsll.vi v16, v24, 3
1719; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1720; RV64-NEXT:    ret
1721  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1722  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1723  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1724  ret void
1725}
1726
1727define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1728; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1729; RV32:       # %bb.0:
1730; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1731; RV32-NEXT:    vsll.vi v16, v16, 3
1732; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1733; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1734; RV32-NEXT:    ret
1735;
1736; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1737; RV64:       # %bb.0:
1738; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1739; RV64-NEXT:    vsext.vf2 v24, v16
1740; RV64-NEXT:    vsll.vi v16, v24, 3
1741; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1742; RV64-NEXT:    ret
1743  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs
1744  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1745  ret void
1746}
1747
1748define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1749; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1750; RV32:       # %bb.0:
1751; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1752; RV32-NEXT:    vsext.vf2 v24, v16
1753; RV32-NEXT:    vsll.vi v16, v24, 3
1754; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1755; RV32-NEXT:    vncvt.x.x.w v24, v16
1756; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1757; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1758; RV32-NEXT:    ret
1759;
1760; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1761; RV64:       # %bb.0:
1762; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1763; RV64-NEXT:    vsext.vf2 v24, v16
1764; RV64-NEXT:    vsll.vi v16, v24, 3
1765; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1766; RV64-NEXT:    ret
1767  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1768  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1769  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1770  ret void
1771}
1772
1773define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1774; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1775; RV32:       # %bb.0:
1776; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1777; RV32-NEXT:    vzext.vf2 v24, v16
1778; RV32-NEXT:    vsll.vi v16, v24, 3
1779; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1780; RV32-NEXT:    vncvt.x.x.w v24, v16
1781; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1782; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1783; RV32-NEXT:    ret
1784;
1785; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1786; RV64:       # %bb.0:
1787; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1788; RV64-NEXT:    vzext.vf2 v24, v16
1789; RV64-NEXT:    vsll.vi v16, v24, 3
1790; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1791; RV64-NEXT:    ret
1792  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1793  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
1794  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1795  ret void
1796}
1797
1798define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1799; RV32-LABEL: mscatter_baseidx_nxv8f64:
1800; RV32:       # %bb.0:
1801; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1802; RV32-NEXT:    vsll.vi v16, v16, 3
1803; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1804; RV32-NEXT:    vncvt.x.x.w v24, v16
1805; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1806; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1807; RV32-NEXT:    ret
1808;
1809; RV64-LABEL: mscatter_baseidx_nxv8f64:
1810; RV64:       # %bb.0:
1811; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1812; RV64-NEXT:    vsll.vi v16, v16, 3
1813; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1814; RV64-NEXT:    ret
1815  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs
1816  call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m)
1817  ret void
1818}
1819
1820declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>)
1821
1822declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1823declare <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64)
1824
1825define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) {
1826; RV32-LABEL: mscatter_nxv16f64:
1827; RV32:       # %bb.0:
1828; RV32-NEXT:    vl4re32.v v24, (a0)
1829; RV32-NEXT:    vl4re32.v v28, (a1)
1830; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1831; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
1832; RV32-NEXT:    csrr a0, vlenb
1833; RV32-NEXT:    srli a0, a0, 3
1834; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1835; RV32-NEXT:    vslidedown.vx v0, v0, a0
1836; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1837; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
1838; RV32-NEXT:    ret
1839;
1840; RV64-LABEL: mscatter_nxv16f64:
1841; RV64:       # %bb.0:
1842; RV64-NEXT:    addi sp, sp, -16
1843; RV64-NEXT:    .cfi_def_cfa_offset 16
1844; RV64-NEXT:    csrr a2, vlenb
1845; RV64-NEXT:    slli a2, a2, 3
1846; RV64-NEXT:    sub sp, sp, a2
1847; RV64-NEXT:    vl8re64.v v24, (a0)
1848; RV64-NEXT:    addi a0, sp, 16
1849; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1850; RV64-NEXT:    vl8re64.v v16, (a1)
1851; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1852; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
1853; RV64-NEXT:    csrr a0, vlenb
1854; RV64-NEXT:    srli a0, a0, 3
1855; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
1856; RV64-NEXT:    vslidedown.vx v0, v0, a0
1857; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1858; RV64-NEXT:    addi a0, sp, 16
1859; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
1860; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1861; RV64-NEXT:    csrr a0, vlenb
1862; RV64-NEXT:    slli a0, a0, 3
1863; RV64-NEXT:    add sp, sp, a0
1864; RV64-NEXT:    addi sp, sp, 16
1865; RV64-NEXT:    ret
1866  %p0 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0)
1867  %p1 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8)
1868  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1869  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1870  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m)
1871  ret void
1872}
1873
1874define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1875; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1876; RV32:       # %bb.0:
1877; RV32-NEXT:    vl2r.v v2, (a1)
1878; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1879; RV32-NEXT:    vsext.vf4 v24, v2
1880; RV32-NEXT:    vsll.vi v24, v24, 3
1881; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1882; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1883; RV32-NEXT:    csrr a1, vlenb
1884; RV32-NEXT:    srli a1, a1, 3
1885; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1886; RV32-NEXT:    vslidedown.vx v0, v0, a1
1887; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1888; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1889; RV32-NEXT:    ret
1890;
1891; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1892; RV64:       # %bb.0:
1893; RV64-NEXT:    vl2r.v v2, (a1)
1894; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1895; RV64-NEXT:    vsext.vf8 v24, v2
1896; RV64-NEXT:    vsll.vi v24, v24, 3
1897; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1898; RV64-NEXT:    csrr a1, vlenb
1899; RV64-NEXT:    srli a1, a1, 3
1900; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1901; RV64-NEXT:    vslidedown.vx v0, v0, a1
1902; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1903; RV64-NEXT:    vsext.vf8 v8, v3
1904; RV64-NEXT:    vsll.vi v8, v8, 3
1905; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1906; RV64-NEXT:    ret
1907  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs
1908  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1909  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1910  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1911  ret void
1912}
1913
1914define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
1915; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1916; RV32:       # %bb.0:
1917; RV32-NEXT:    vl4re16.v v4, (a1)
1918; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
1919; RV32-NEXT:    vsext.vf2 v24, v4
1920; RV32-NEXT:    vsll.vi v24, v24, 3
1921; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1922; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1923; RV32-NEXT:    csrr a1, vlenb
1924; RV32-NEXT:    srli a1, a1, 3
1925; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1926; RV32-NEXT:    vslidedown.vx v0, v0, a1
1927; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1928; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1929; RV32-NEXT:    ret
1930;
1931; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
1932; RV64:       # %bb.0:
1933; RV64-NEXT:    vl4re16.v v4, (a1)
1934; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1935; RV64-NEXT:    vsext.vf4 v24, v4
1936; RV64-NEXT:    vsll.vi v24, v24, 3
1937; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1938; RV64-NEXT:    csrr a1, vlenb
1939; RV64-NEXT:    srli a1, a1, 3
1940; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
1941; RV64-NEXT:    vslidedown.vx v0, v0, a1
1942; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1943; RV64-NEXT:    vsext.vf4 v8, v6
1944; RV64-NEXT:    vsll.vi v8, v8, 3
1945; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1946; RV64-NEXT:    ret
1947  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
1948  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1949  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1950  call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m)
1951  ret void
1952}
1953