1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6
7declare <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
8
9define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
10; RV32-LABEL: vpgather_nxv1i8:
11; RV32:       # %bb.0:
12; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
13; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
14; RV32-NEXT:    vmv1r.v v8, v9
15; RV32-NEXT:    ret
16;
17; RV64-LABEL: vpgather_nxv1i8:
18; RV64:       # %bb.0:
19; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
20; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
21; RV64-NEXT:    vmv1r.v v8, v9
22; RV64-NEXT:    ret
23  %v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
24  ret <vscale x 1 x i8> %v
25}
26
27declare <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i1>, i32)
28
29define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
30; RV32-LABEL: vpgather_nxv2i8:
31; RV32:       # %bb.0:
32; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
33; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
34; RV32-NEXT:    vmv1r.v v8, v9
35; RV32-NEXT:    ret
36;
37; RV64-LABEL: vpgather_nxv2i8:
38; RV64:       # %bb.0:
39; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
40; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
41; RV64-NEXT:    vmv1r.v v8, v10
42; RV64-NEXT:    ret
43  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
44  ret <vscale x 2 x i8> %v
45}
46
47define <vscale x 2 x i16> @vpgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
48; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
49; RV32:       # %bb.0:
50; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
51; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
52; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
53; RV32-NEXT:    vsext.vf2 v8, v9
54; RV32-NEXT:    ret
55;
56; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
57; RV64:       # %bb.0:
58; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
59; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
60; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
61; RV64-NEXT:    vsext.vf2 v8, v10
62; RV64-NEXT:    ret
63  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
64  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
65  ret <vscale x 2 x i16> %ev
66}
67
68define <vscale x 2 x i16> @vpgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
69; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
70; RV32:       # %bb.0:
71; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
72; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
73; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
74; RV32-NEXT:    vzext.vf2 v8, v9
75; RV32-NEXT:    ret
76;
77; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
78; RV64:       # %bb.0:
79; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
80; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
81; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
82; RV64-NEXT:    vzext.vf2 v8, v10
83; RV64-NEXT:    ret
84  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
85  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16>
86  ret <vscale x 2 x i16> %ev
87}
88
89define <vscale x 2 x i32> @vpgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
90; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
91; RV32:       # %bb.0:
92; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
93; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
94; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
95; RV32-NEXT:    vsext.vf4 v8, v9
96; RV32-NEXT:    ret
97;
98; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
99; RV64:       # %bb.0:
100; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
101; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
102; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
103; RV64-NEXT:    vsext.vf4 v8, v10
104; RV64-NEXT:    ret
105  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
106  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32>
107  ret <vscale x 2 x i32> %ev
108}
109
110define <vscale x 2 x i32> @vpgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
111; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
112; RV32:       # %bb.0:
113; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
114; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
115; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
116; RV32-NEXT:    vzext.vf4 v8, v9
117; RV32-NEXT:    ret
118;
119; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
120; RV64:       # %bb.0:
121; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
122; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
123; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
124; RV64-NEXT:    vzext.vf4 v8, v10
125; RV64-NEXT:    ret
126  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
127  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32>
128  ret <vscale x 2 x i32> %ev
129}
130
131define <vscale x 2 x i64> @vpgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
132; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
133; RV32:       # %bb.0:
134; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
135; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
136; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
137; RV32-NEXT:    vsext.vf8 v8, v10
138; RV32-NEXT:    ret
139;
140; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
141; RV64:       # %bb.0:
142; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
143; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
144; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
145; RV64-NEXT:    vsext.vf8 v8, v10
146; RV64-NEXT:    ret
147  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
148  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64>
149  ret <vscale x 2 x i64> %ev
150}
151
152define <vscale x 2 x i64> @vpgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
153; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
154; RV32:       # %bb.0:
155; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
156; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
157; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
158; RV32-NEXT:    vzext.vf8 v8, v10
159; RV32-NEXT:    ret
160;
161; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
162; RV64:       # %bb.0:
163; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
164; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
165; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
166; RV64-NEXT:    vzext.vf8 v8, v10
167; RV64-NEXT:    ret
168  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
169  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64>
170  ret <vscale x 2 x i64> %ev
171}
172
173declare <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*>, <vscale x 4 x i1>, i32)
174
175define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
176; RV32-LABEL: vpgather_nxv4i8:
177; RV32:       # %bb.0:
178; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
179; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
180; RV32-NEXT:    vmv1r.v v8, v10
181; RV32-NEXT:    ret
182;
183; RV64-LABEL: vpgather_nxv4i8:
184; RV64:       # %bb.0:
185; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
186; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
187; RV64-NEXT:    vmv1r.v v8, v12
188; RV64-NEXT:    ret
189  %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
190  ret <vscale x 4 x i8> %v
191}
192
193define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x i8*> %ptrs, i32 zeroext %evl) {
194; RV32-LABEL: vpgather_truemask_nxv4i8:
195; RV32:       # %bb.0:
196; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
197; RV32-NEXT:    vluxei32.v v10, (zero), v8
198; RV32-NEXT:    vmv1r.v v8, v10
199; RV32-NEXT:    ret
200;
201; RV64-LABEL: vpgather_truemask_nxv4i8:
202; RV64:       # %bb.0:
203; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
204; RV64-NEXT:    vluxei64.v v12, (zero), v8
205; RV64-NEXT:    vmv1r.v v8, v12
206; RV64-NEXT:    ret
207  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
208  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
209  %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
210  ret <vscale x 4 x i8> %v
211}
212
213declare <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*>, <vscale x 8 x i1>, i32)
214
215define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
216; RV32-LABEL: vpgather_nxv8i8:
217; RV32:       # %bb.0:
218; RV32-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
219; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
220; RV32-NEXT:    vmv.v.v v8, v12
221; RV32-NEXT:    ret
222;
223; RV64-LABEL: vpgather_nxv8i8:
224; RV64:       # %bb.0:
225; RV64-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
226; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
227; RV64-NEXT:    vmv.v.v v8, v16
228; RV64-NEXT:    ret
229  %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
230  ret <vscale x 8 x i8> %v
231}
232
233define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
234; RV32-LABEL: vpgather_baseidx_nxv8i8:
235; RV32:       # %bb.0:
236; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
237; RV32-NEXT:    vsext.vf4 v12, v8
238; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
239; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
240; RV32-NEXT:    ret
241;
242; RV64-LABEL: vpgather_baseidx_nxv8i8:
243; RV64:       # %bb.0:
244; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
245; RV64-NEXT:    vsext.vf8 v16, v8
246; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
247; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
248; RV64-NEXT:    ret
249  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs
250  %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
251  ret <vscale x 8 x i8> %v
252}
253
254declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, <vscale x 32 x i1>, i32)
255
256define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) {
257; RV32-LABEL: vpgather_baseidx_nxv32i8:
258; RV32:       # %bb.0:
259; RV32-NEXT:    vmv1r.v v12, v0
260; RV32-NEXT:    li a3, 0
261; RV32-NEXT:    csrr a2, vlenb
262; RV32-NEXT:    srli a5, a2, 2
263; RV32-NEXT:    vsetvli a4, zero, e8, mf2, ta, mu
264; RV32-NEXT:    slli a2, a2, 1
265; RV32-NEXT:    sub a4, a1, a2
266; RV32-NEXT:    vslidedown.vx v0, v0, a5
267; RV32-NEXT:    bltu a1, a4, .LBB12_2
268; RV32-NEXT:  # %bb.1:
269; RV32-NEXT:    mv a3, a4
270; RV32-NEXT:  .LBB12_2:
271; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
272; RV32-NEXT:    vsext.vf4 v24, v10
273; RV32-NEXT:    vsetvli zero, a3, e8, m2, ta, mu
274; RV32-NEXT:    vluxei32.v v18, (a0), v24, v0.t
275; RV32-NEXT:    bltu a1, a2, .LBB12_4
276; RV32-NEXT:  # %bb.3:
277; RV32-NEXT:    mv a1, a2
278; RV32-NEXT:  .LBB12_4:
279; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
280; RV32-NEXT:    vsext.vf4 v24, v8
281; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
282; RV32-NEXT:    vmv1r.v v0, v12
283; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
284; RV32-NEXT:    vmv4r.v v8, v16
285; RV32-NEXT:    ret
286;
287; RV64-LABEL: vpgather_baseidx_nxv32i8:
288; RV64:       # %bb.0:
289; RV64-NEXT:    csrr a3, vlenb
290; RV64-NEXT:    slli a5, a3, 1
291; RV64-NEXT:    sub a6, a1, a5
292; RV64-NEXT:    vmv1r.v v12, v0
293; RV64-NEXT:    li a4, 0
294; RV64-NEXT:    li a2, 0
295; RV64-NEXT:    bltu a1, a6, .LBB12_2
296; RV64-NEXT:  # %bb.1:
297; RV64-NEXT:    mv a2, a6
298; RV64-NEXT:  .LBB12_2:
299; RV64-NEXT:    sub a6, a2, a3
300; RV64-NEXT:    mv a7, a4
301; RV64-NEXT:    bltu a2, a6, .LBB12_4
302; RV64-NEXT:  # %bb.3:
303; RV64-NEXT:    mv a7, a6
304; RV64-NEXT:  .LBB12_4:
305; RV64-NEXT:    srli a6, a3, 2
306; RV64-NEXT:    vsetvli t0, zero, e8, mf2, ta, mu
307; RV64-NEXT:    vslidedown.vx v13, v12, a6
308; RV64-NEXT:    srli a6, a3, 3
309; RV64-NEXT:    vsetvli t0, zero, e8, mf4, ta, mu
310; RV64-NEXT:    vslidedown.vx v0, v13, a6
311; RV64-NEXT:    vsetvli t0, zero, e64, m8, ta, mu
312; RV64-NEXT:    vsext.vf8 v24, v11
313; RV64-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
314; RV64-NEXT:    vluxei64.v v19, (a0), v24, v0.t
315; RV64-NEXT:    bltu a1, a5, .LBB12_6
316; RV64-NEXT:  # %bb.5:
317; RV64-NEXT:    mv a1, a5
318; RV64-NEXT:  .LBB12_6:
319; RV64-NEXT:    sub a5, a1, a3
320; RV64-NEXT:    bltu a1, a5, .LBB12_8
321; RV64-NEXT:  # %bb.7:
322; RV64-NEXT:    mv a4, a5
323; RV64-NEXT:  .LBB12_8:
324; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
325; RV64-NEXT:    vslidedown.vx v0, v12, a6
326; RV64-NEXT:    vsetvli a5, zero, e64, m8, ta, mu
327; RV64-NEXT:    vsext.vf8 v24, v9
328; RV64-NEXT:    vsetvli zero, a4, e8, m1, ta, mu
329; RV64-NEXT:    vluxei64.v v17, (a0), v24, v0.t
330; RV64-NEXT:    bltu a1, a3, .LBB12_10
331; RV64-NEXT:  # %bb.9:
332; RV64-NEXT:    mv a1, a3
333; RV64-NEXT:  .LBB12_10:
334; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
335; RV64-NEXT:    vsext.vf8 v24, v8
336; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
337; RV64-NEXT:    vmv1r.v v0, v12
338; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
339; RV64-NEXT:    bltu a2, a3, .LBB12_12
340; RV64-NEXT:  # %bb.11:
341; RV64-NEXT:    mv a2, a3
342; RV64-NEXT:  .LBB12_12:
343; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
344; RV64-NEXT:    vsext.vf8 v24, v10
345; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
346; RV64-NEXT:    vmv1r.v v0, v13
347; RV64-NEXT:    vluxei64.v v18, (a0), v24, v0.t
348; RV64-NEXT:    vmv4r.v v8, v16
349; RV64-NEXT:    ret
350  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs
351  %v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
352  ret <vscale x 32 x i8> %v
353}
354
355declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, <vscale x 1 x i1>, i32)
356
357define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
358; RV32-LABEL: vpgather_nxv1i16:
359; RV32:       # %bb.0:
360; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
361; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
362; RV32-NEXT:    vmv1r.v v8, v9
363; RV32-NEXT:    ret
364;
365; RV64-LABEL: vpgather_nxv1i16:
366; RV64:       # %bb.0:
367; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
368; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
369; RV64-NEXT:    vmv1r.v v8, v9
370; RV64-NEXT:    ret
371  %v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
372  ret <vscale x 1 x i16> %v
373}
374
375declare <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*>, <vscale x 2 x i1>, i32)
376
377define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
378; RV32-LABEL: vpgather_nxv2i16:
379; RV32:       # %bb.0:
380; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
381; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
382; RV32-NEXT:    vmv1r.v v8, v9
383; RV32-NEXT:    ret
384;
385; RV64-LABEL: vpgather_nxv2i16:
386; RV64:       # %bb.0:
387; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
388; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
389; RV64-NEXT:    vmv1r.v v8, v10
390; RV64-NEXT:    ret
391  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
392  ret <vscale x 2 x i16> %v
393}
394
395define <vscale x 2 x i32> @vpgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
396; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
397; RV32:       # %bb.0:
398; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
399; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
400; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
401; RV32-NEXT:    vsext.vf2 v8, v9
402; RV32-NEXT:    ret
403;
404; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
405; RV64:       # %bb.0:
406; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
407; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
408; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
409; RV64-NEXT:    vsext.vf2 v8, v10
410; RV64-NEXT:    ret
411  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
412  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
413  ret <vscale x 2 x i32> %ev
414}
415
416define <vscale x 2 x i32> @vpgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
417; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
418; RV32:       # %bb.0:
419; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
420; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
421; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
422; RV32-NEXT:    vzext.vf2 v8, v9
423; RV32-NEXT:    ret
424;
425; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
426; RV64:       # %bb.0:
427; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
428; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
429; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
430; RV64-NEXT:    vzext.vf2 v8, v10
431; RV64-NEXT:    ret
432  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
433  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
434  ret <vscale x 2 x i32> %ev
435}
436
437define <vscale x 2 x i64> @vpgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
438; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
439; RV32:       # %bb.0:
440; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
441; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
442; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
443; RV32-NEXT:    vsext.vf4 v8, v10
444; RV32-NEXT:    ret
445;
446; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
447; RV64:       # %bb.0:
448; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
449; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
450; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
451; RV64-NEXT:    vsext.vf4 v8, v10
452; RV64-NEXT:    ret
453  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
454  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64>
455  ret <vscale x 2 x i64> %ev
456}
457
458define <vscale x 2 x i64> @vpgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
459; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
460; RV32:       # %bb.0:
461; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
462; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
463; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
464; RV32-NEXT:    vzext.vf4 v8, v10
465; RV32-NEXT:    ret
466;
467; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
468; RV64:       # %bb.0:
469; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
470; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
471; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
472; RV64-NEXT:    vzext.vf4 v8, v10
473; RV64-NEXT:    ret
474  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
475  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64>
476  ret <vscale x 2 x i64> %ev
477}
478
479declare <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*>, <vscale x 4 x i1>, i32)
480
481define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
482; RV32-LABEL: vpgather_nxv4i16:
483; RV32:       # %bb.0:
484; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
485; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
486; RV32-NEXT:    vmv.v.v v8, v10
487; RV32-NEXT:    ret
488;
489; RV64-LABEL: vpgather_nxv4i16:
490; RV64:       # %bb.0:
491; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
492; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
493; RV64-NEXT:    vmv.v.v v8, v12
494; RV64-NEXT:    ret
495  %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
496  ret <vscale x 4 x i16> %v
497}
498
499define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x i16*> %ptrs, i32 zeroext %evl) {
500; RV32-LABEL: vpgather_truemask_nxv4i16:
501; RV32:       # %bb.0:
502; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
503; RV32-NEXT:    vluxei32.v v10, (zero), v8
504; RV32-NEXT:    vmv.v.v v8, v10
505; RV32-NEXT:    ret
506;
507; RV64-LABEL: vpgather_truemask_nxv4i16:
508; RV64:       # %bb.0:
509; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
510; RV64-NEXT:    vluxei64.v v12, (zero), v8
511; RV64-NEXT:    vmv.v.v v8, v12
512; RV64-NEXT:    ret
513  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
514  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
515  %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
516  ret <vscale x 4 x i16> %v
517}
518
519declare <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*>, <vscale x 8 x i1>, i32)
520
521define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
522; RV32-LABEL: vpgather_nxv8i16:
523; RV32:       # %bb.0:
524; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
525; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
526; RV32-NEXT:    vmv.v.v v8, v12
527; RV32-NEXT:    ret
528;
529; RV64-LABEL: vpgather_nxv8i16:
530; RV64:       # %bb.0:
531; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
532; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
533; RV64-NEXT:    vmv.v.v v8, v16
534; RV64-NEXT:    ret
535  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
536  ret <vscale x 8 x i16> %v
537}
538
539define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
540; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
541; RV32:       # %bb.0:
542; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
543; RV32-NEXT:    vsext.vf4 v12, v8
544; RV32-NEXT:    vadd.vv v12, v12, v12
545; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
546; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
547; RV32-NEXT:    ret
548;
549; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
550; RV64:       # %bb.0:
551; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
552; RV64-NEXT:    vsext.vf8 v16, v8
553; RV64-NEXT:    vadd.vv v16, v16, v16
554; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
555; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
556; RV64-NEXT:    ret
557  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs
558  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
559  ret <vscale x 8 x i16> %v
560}
561
562define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
563; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
564; RV32:       # %bb.0:
565; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
566; RV32-NEXT:    vsext.vf4 v12, v8
567; RV32-NEXT:    vadd.vv v12, v12, v12
568; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
569; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
570; RV32-NEXT:    ret
571;
572; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
573; RV64:       # %bb.0:
574; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
575; RV64-NEXT:    vsext.vf8 v16, v8
576; RV64-NEXT:    vadd.vv v16, v16, v16
577; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
578; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
579; RV64-NEXT:    ret
580  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
581  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
582  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
583  ret <vscale x 8 x i16> %v
584}
585
586define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
587; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
588; RV32:       # %bb.0:
589; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
590; RV32-NEXT:    vzext.vf4 v12, v8
591; RV32-NEXT:    vadd.vv v12, v12, v12
592; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
593; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
594; RV32-NEXT:    ret
595;
596; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
597; RV64:       # %bb.0:
598; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
599; RV64-NEXT:    vzext.vf8 v16, v8
600; RV64-NEXT:    vadd.vv v16, v16, v16
601; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
602; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
603; RV64-NEXT:    ret
604  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
605  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs
606  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
607  ret <vscale x 8 x i16> %v
608}
609
610define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
611; RV32-LABEL: vpgather_baseidx_nxv8i16:
612; RV32:       # %bb.0:
613; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
614; RV32-NEXT:    vsext.vf2 v12, v8
615; RV32-NEXT:    vadd.vv v12, v12, v12
616; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
617; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
618; RV32-NEXT:    ret
619;
620; RV64-LABEL: vpgather_baseidx_nxv8i16:
621; RV64:       # %bb.0:
622; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
623; RV64-NEXT:    vsext.vf4 v16, v8
624; RV64-NEXT:    vadd.vv v16, v16, v16
625; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
626; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
627; RV64-NEXT:    ret
628  %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs
629  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
630  ret <vscale x 8 x i16> %v
631}
632
633declare <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*>, <vscale x 1 x i1>, i32)
634
635define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
636; RV32-LABEL: vpgather_nxv1i32:
637; RV32:       # %bb.0:
638; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
639; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
640; RV32-NEXT:    ret
641;
642; RV64-LABEL: vpgather_nxv1i32:
643; RV64:       # %bb.0:
644; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
645; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
646; RV64-NEXT:    vmv1r.v v8, v9
647; RV64-NEXT:    ret
648  %v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
649  ret <vscale x 1 x i32> %v
650}
651
652declare <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*>, <vscale x 2 x i1>, i32)
653
654define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
655; RV32-LABEL: vpgather_nxv2i32:
656; RV32:       # %bb.0:
657; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
658; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
659; RV32-NEXT:    ret
660;
661; RV64-LABEL: vpgather_nxv2i32:
662; RV64:       # %bb.0:
663; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
664; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
665; RV64-NEXT:    vmv.v.v v8, v10
666; RV64-NEXT:    ret
667  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
668  ret <vscale x 2 x i32> %v
669}
670
671define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
672; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
673; RV32:       # %bb.0:
674; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
675; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
676; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
677; RV32-NEXT:    vsext.vf2 v8, v10
678; RV32-NEXT:    ret
679;
680; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
681; RV64:       # %bb.0:
682; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
683; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
684; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
685; RV64-NEXT:    vsext.vf2 v8, v10
686; RV64-NEXT:    ret
687  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
688  %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
689  ret <vscale x 2 x i64> %ev
690}
691
692define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
693; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
694; RV32:       # %bb.0:
695; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
696; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
697; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
698; RV32-NEXT:    vzext.vf2 v8, v10
699; RV32-NEXT:    ret
700;
701; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
702; RV64:       # %bb.0:
703; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
704; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
705; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
706; RV64-NEXT:    vzext.vf2 v8, v10
707; RV64-NEXT:    ret
708  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
709  %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
710  ret <vscale x 2 x i64> %ev
711}
712
713declare <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, <vscale x 4 x i1>, i32)
714
715define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
716; RV32-LABEL: vpgather_nxv4i32:
717; RV32:       # %bb.0:
718; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
719; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
720; RV32-NEXT:    ret
721;
722; RV64-LABEL: vpgather_nxv4i32:
723; RV64:       # %bb.0:
724; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
725; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
726; RV64-NEXT:    vmv.v.v v8, v12
727; RV64-NEXT:    ret
728  %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
729  ret <vscale x 4 x i32> %v
730}
731
732define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x i32*> %ptrs, i32 zeroext %evl) {
733; RV32-LABEL: vpgather_truemask_nxv4i32:
734; RV32:       # %bb.0:
735; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
736; RV32-NEXT:    vluxei32.v v8, (zero), v8
737; RV32-NEXT:    ret
738;
739; RV64-LABEL: vpgather_truemask_nxv4i32:
740; RV64:       # %bb.0:
741; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
742; RV64-NEXT:    vluxei64.v v12, (zero), v8
743; RV64-NEXT:    vmv.v.v v8, v12
744; RV64-NEXT:    ret
745  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
746  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
747  %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
748  ret <vscale x 4 x i32> %v
749}
750
751declare <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*>, <vscale x 8 x i1>, i32)
752
753define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
754; RV32-LABEL: vpgather_nxv8i32:
755; RV32:       # %bb.0:
756; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
757; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
758; RV32-NEXT:    ret
759;
760; RV64-LABEL: vpgather_nxv8i32:
761; RV64:       # %bb.0:
762; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
763; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
764; RV64-NEXT:    vmv.v.v v8, v16
765; RV64-NEXT:    ret
766  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
767  ret <vscale x 8 x i32> %v
768}
769
770define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
771; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
772; RV32:       # %bb.0:
773; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
774; RV32-NEXT:    vsext.vf4 v12, v8
775; RV32-NEXT:    vsll.vi v8, v12, 2
776; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
777; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
778; RV32-NEXT:    ret
779;
780; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
781; RV64:       # %bb.0:
782; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
783; RV64-NEXT:    vsext.vf8 v16, v8
784; RV64-NEXT:    vsll.vi v16, v16, 2
785; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
786; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
787; RV64-NEXT:    ret
788  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs
789  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
790  ret <vscale x 8 x i32> %v
791}
792
793define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
794; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
795; RV32:       # %bb.0:
796; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
797; RV32-NEXT:    vsext.vf4 v12, v8
798; RV32-NEXT:    vsll.vi v8, v12, 2
799; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
800; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
801; RV32-NEXT:    ret
802;
803; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
804; RV64:       # %bb.0:
805; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
806; RV64-NEXT:    vsext.vf8 v16, v8
807; RV64-NEXT:    vsll.vi v16, v16, 2
808; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
809; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
810; RV64-NEXT:    ret
811  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
812  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
813  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
814  ret <vscale x 8 x i32> %v
815}
816
817define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
818; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
819; RV32:       # %bb.0:
820; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
821; RV32-NEXT:    vzext.vf4 v12, v8
822; RV32-NEXT:    vsll.vi v8, v12, 2
823; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
824; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
830; RV64-NEXT:    vzext.vf8 v16, v8
831; RV64-NEXT:    vsll.vi v16, v16, 2
832; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
833; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
834; RV64-NEXT:    ret
835  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
836  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
837  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
838  ret <vscale x 8 x i32> %v
839}
840
841define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
842; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
843; RV32:       # %bb.0:
844; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
845; RV32-NEXT:    vsext.vf2 v12, v8
846; RV32-NEXT:    vsll.vi v8, v12, 2
847; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
848; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
849; RV32-NEXT:    ret
850;
851; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
852; RV64:       # %bb.0:
853; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
854; RV64-NEXT:    vsext.vf4 v16, v8
855; RV64-NEXT:    vsll.vi v16, v16, 2
856; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
857; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
858; RV64-NEXT:    ret
859  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs
860  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
861  ret <vscale x 8 x i32> %v
862}
863
864define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
865; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
866; RV32:       # %bb.0:
867; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
868; RV32-NEXT:    vsext.vf2 v12, v8
869; RV32-NEXT:    vsll.vi v8, v12, 2
870; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
871; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
872; RV32-NEXT:    ret
873;
874; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
875; RV64:       # %bb.0:
876; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
877; RV64-NEXT:    vsext.vf4 v16, v8
878; RV64-NEXT:    vsll.vi v16, v16, 2
879; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
880; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
881; RV64-NEXT:    ret
882  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
883  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
884  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
885  ret <vscale x 8 x i32> %v
886}
887
888define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
889; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
890; RV32:       # %bb.0:
891; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
892; RV32-NEXT:    vzext.vf2 v12, v8
893; RV32-NEXT:    vsll.vi v8, v12, 2
894; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
895; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
896; RV32-NEXT:    ret
897;
898; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
899; RV64:       # %bb.0:
900; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
901; RV64-NEXT:    vzext.vf4 v16, v8
902; RV64-NEXT:    vsll.vi v16, v16, 2
903; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
904; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
905; RV64-NEXT:    ret
906  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
907  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs
908  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
909  ret <vscale x 8 x i32> %v
910}
911
912define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
913; RV32-LABEL: vpgather_baseidx_nxv8i32:
914; RV32:       # %bb.0:
915; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
916; RV32-NEXT:    vsll.vi v8, v8, 2
917; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
918; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
919; RV32-NEXT:    ret
920;
921; RV64-LABEL: vpgather_baseidx_nxv8i32:
922; RV64:       # %bb.0:
923; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
924; RV64-NEXT:    vsext.vf2 v16, v8
925; RV64-NEXT:    vsll.vi v16, v16, 2
926; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
927; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
928; RV64-NEXT:    ret
929  %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs
930  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
931  ret <vscale x 8 x i32> %v
932}
933
934declare <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*>, <vscale x 1 x i1>, i32)
935
936define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
937; RV32-LABEL: vpgather_nxv1i64:
938; RV32:       # %bb.0:
939; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
940; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
941; RV32-NEXT:    vmv.v.v v8, v9
942; RV32-NEXT:    ret
943;
944; RV64-LABEL: vpgather_nxv1i64:
945; RV64:       # %bb.0:
946; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
947; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
948; RV64-NEXT:    ret
949  %v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
950  ret <vscale x 1 x i64> %v
951}
952
953declare <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*>, <vscale x 2 x i1>, i32)
954
955define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
956; RV32-LABEL: vpgather_nxv2i64:
957; RV32:       # %bb.0:
958; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
959; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
960; RV32-NEXT:    vmv.v.v v8, v10
961; RV32-NEXT:    ret
962;
963; RV64-LABEL: vpgather_nxv2i64:
964; RV64:       # %bb.0:
965; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
966; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
967; RV64-NEXT:    ret
968  %v = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
969  ret <vscale x 2 x i64> %v
970}
971
972declare <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*>, <vscale x 4 x i1>, i32)
973
974define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
975; RV32-LABEL: vpgather_nxv4i64:
976; RV32:       # %bb.0:
977; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
978; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
979; RV32-NEXT:    vmv.v.v v8, v12
980; RV32-NEXT:    ret
981;
982; RV64-LABEL: vpgather_nxv4i64:
983; RV64:       # %bb.0:
984; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
985; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
986; RV64-NEXT:    ret
987  %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
988  ret <vscale x 4 x i64> %v
989}
990
991define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x i64*> %ptrs, i32 zeroext %evl) {
992; RV32-LABEL: vpgather_truemask_nxv4i64:
993; RV32:       # %bb.0:
994; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
995; RV32-NEXT:    vluxei32.v v12, (zero), v8
996; RV32-NEXT:    vmv.v.v v8, v12
997; RV32-NEXT:    ret
998;
999; RV64-LABEL: vpgather_truemask_nxv4i64:
1000; RV64:       # %bb.0:
1001; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1002; RV64-NEXT:    vluxei64.v v8, (zero), v8
1003; RV64-NEXT:    ret
1004  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1005  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1006  %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1007  ret <vscale x 4 x i64> %v
1008}
1009
1010declare <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*>, <vscale x 8 x i1>, i32)
1011
1012define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1013; RV32-LABEL: vpgather_nxv8i64:
1014; RV32:       # %bb.0:
1015; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1016; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
1017; RV32-NEXT:    vmv.v.v v8, v16
1018; RV32-NEXT:    ret
1019;
1020; RV64-LABEL: vpgather_nxv8i64:
1021; RV64:       # %bb.0:
1022; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1023; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1024; RV64-NEXT:    ret
1025  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1026  ret <vscale x 8 x i64> %v
1027}
1028
1029define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1030; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
1031; RV32:       # %bb.0:
1032; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1033; RV32-NEXT:    vsext.vf4 v12, v8
1034; RV32-NEXT:    vsll.vi v16, v12, 3
1035; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1036; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1037; RV32-NEXT:    ret
1038;
1039; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
1040; RV64:       # %bb.0:
1041; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1042; RV64-NEXT:    vsext.vf8 v16, v8
1043; RV64-NEXT:    vsll.vi v8, v16, 3
1044; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1045; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1046; RV64-NEXT:    ret
1047  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs
1048  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1049  ret <vscale x 8 x i64> %v
1050}
1051
1052define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1053; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
1054; RV32:       # %bb.0:
1055; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1056; RV32-NEXT:    vsext.vf8 v16, v8
1057; RV32-NEXT:    vsll.vi v8, v16, 3
1058; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1059; RV32-NEXT:    vncvt.x.x.w v16, v8
1060; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1061; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1062; RV32-NEXT:    ret
1063;
1064; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
1065; RV64:       # %bb.0:
1066; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1067; RV64-NEXT:    vsext.vf8 v16, v8
1068; RV64-NEXT:    vsll.vi v8, v16, 3
1069; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1070; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1071; RV64-NEXT:    ret
1072  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1073  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1074  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1075  ret <vscale x 8 x i64> %v
1076}
1077
1078define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1079; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
1080; RV32:       # %bb.0:
1081; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1082; RV32-NEXT:    vzext.vf8 v16, v8
1083; RV32-NEXT:    vsll.vi v8, v16, 3
1084; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1085; RV32-NEXT:    vncvt.x.x.w v16, v8
1086; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1087; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1088; RV32-NEXT:    ret
1089;
1090; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
1091; RV64:       # %bb.0:
1092; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1093; RV64-NEXT:    vzext.vf8 v16, v8
1094; RV64-NEXT:    vsll.vi v8, v16, 3
1095; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1096; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1097; RV64-NEXT:    ret
1098  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1099  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1100  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1101  ret <vscale x 8 x i64> %v
1102}
1103
1104define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1105; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
1106; RV32:       # %bb.0:
1107; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1108; RV32-NEXT:    vsext.vf2 v12, v8
1109; RV32-NEXT:    vsll.vi v16, v12, 3
1110; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1111; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1112; RV32-NEXT:    ret
1113;
1114; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
1115; RV64:       # %bb.0:
1116; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1117; RV64-NEXT:    vsext.vf4 v16, v8
1118; RV64-NEXT:    vsll.vi v8, v16, 3
1119; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1120; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1121; RV64-NEXT:    ret
1122  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs
1123  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1124  ret <vscale x 8 x i64> %v
1125}
1126
1127define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1128; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
1129; RV32:       # %bb.0:
1130; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1131; RV32-NEXT:    vsext.vf4 v16, v8
1132; RV32-NEXT:    vsll.vi v8, v16, 3
1133; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1134; RV32-NEXT:    vncvt.x.x.w v16, v8
1135; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1136; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1137; RV32-NEXT:    ret
1138;
1139; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
1140; RV64:       # %bb.0:
1141; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1142; RV64-NEXT:    vsext.vf4 v16, v8
1143; RV64-NEXT:    vsll.vi v8, v16, 3
1144; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1145; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1146; RV64-NEXT:    ret
1147  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1148  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1149  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1150  ret <vscale x 8 x i64> %v
1151}
1152
1153define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1154; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
1155; RV32:       # %bb.0:
1156; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1157; RV32-NEXT:    vzext.vf4 v16, v8
1158; RV32-NEXT:    vsll.vi v8, v16, 3
1159; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1160; RV32-NEXT:    vncvt.x.x.w v16, v8
1161; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1162; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1163; RV32-NEXT:    ret
1164;
1165; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
1166; RV64:       # %bb.0:
1167; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1168; RV64-NEXT:    vzext.vf4 v16, v8
1169; RV64-NEXT:    vsll.vi v8, v16, 3
1170; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1171; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1172; RV64-NEXT:    ret
1173  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1174  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1175  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1176  ret <vscale x 8 x i64> %v
1177}
1178
1179define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1180; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
1181; RV32:       # %bb.0:
1182; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1183; RV32-NEXT:    vsll.vi v16, v8, 3
1184; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1185; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1186; RV32-NEXT:    ret
1187;
1188; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
1189; RV64:       # %bb.0:
1190; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1191; RV64-NEXT:    vsext.vf2 v16, v8
1192; RV64-NEXT:    vsll.vi v8, v16, 3
1193; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1194; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1195; RV64-NEXT:    ret
1196  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs
1197  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1198  ret <vscale x 8 x i64> %v
1199}
1200
1201define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1202; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
1203; RV32:       # %bb.0:
1204; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1205; RV32-NEXT:    vsext.vf2 v16, v8
1206; RV32-NEXT:    vsll.vi v8, v16, 3
1207; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1208; RV32-NEXT:    vncvt.x.x.w v16, v8
1209; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1210; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1211; RV32-NEXT:    ret
1212;
1213; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
1214; RV64:       # %bb.0:
1215; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1216; RV64-NEXT:    vsext.vf2 v16, v8
1217; RV64-NEXT:    vsll.vi v8, v16, 3
1218; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1219; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1220; RV64-NEXT:    ret
1221  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1222  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1223  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1224  ret <vscale x 8 x i64> %v
1225}
1226
1227define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1228; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
1229; RV32:       # %bb.0:
1230; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1231; RV32-NEXT:    vzext.vf2 v16, v8
1232; RV32-NEXT:    vsll.vi v8, v16, 3
1233; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1234; RV32-NEXT:    vncvt.x.x.w v16, v8
1235; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1236; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1237; RV32-NEXT:    ret
1238;
1239; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
1240; RV64:       # %bb.0:
1241; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1242; RV64-NEXT:    vzext.vf2 v16, v8
1243; RV64-NEXT:    vsll.vi v8, v16, 3
1244; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1245; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1246; RV64-NEXT:    ret
1247  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1248  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs
1249  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1250  ret <vscale x 8 x i64> %v
1251}
1252
1253define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1254; RV32-LABEL: vpgather_baseidx_nxv8i64:
1255; RV32:       # %bb.0:
1256; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1257; RV32-NEXT:    vsll.vi v8, v8, 3
1258; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1259; RV32-NEXT:    vncvt.x.x.w v16, v8
1260; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1261; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1262; RV32-NEXT:    ret
1263;
1264; RV64-LABEL: vpgather_baseidx_nxv8i64:
1265; RV64:       # %bb.0:
1266; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1267; RV64-NEXT:    vsll.vi v8, v8, 3
1268; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1269; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1270; RV64-NEXT:    ret
1271  %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs
1272  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1273  ret <vscale x 8 x i64> %v
1274}
1275
1276declare <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*>, <vscale x 1 x i1>, i32)
1277
1278define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1279; RV32-LABEL: vpgather_nxv1f16:
1280; RV32:       # %bb.0:
1281; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
1282; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1283; RV32-NEXT:    vmv1r.v v8, v9
1284; RV32-NEXT:    ret
1285;
1286; RV64-LABEL: vpgather_nxv1f16:
1287; RV64:       # %bb.0:
1288; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
1289; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1290; RV64-NEXT:    vmv1r.v v8, v9
1291; RV64-NEXT:    ret
1292  %v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1293  ret <vscale x 1 x half> %v
1294}
1295
1296declare <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*>, <vscale x 2 x i1>, i32)
1297
1298define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1299; RV32-LABEL: vpgather_nxv2f16:
1300; RV32:       # %bb.0:
1301; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
1302; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1303; RV32-NEXT:    vmv1r.v v8, v9
1304; RV32-NEXT:    ret
1305;
1306; RV64-LABEL: vpgather_nxv2f16:
1307; RV64:       # %bb.0:
1308; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
1309; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1310; RV64-NEXT:    vmv1r.v v8, v10
1311; RV64-NEXT:    ret
1312  %v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1313  ret <vscale x 2 x half> %v
1314}
1315
1316declare <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*>, <vscale x 4 x i1>, i32)
1317
1318define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1319; RV32-LABEL: vpgather_nxv4f16:
1320; RV32:       # %bb.0:
1321; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1322; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1323; RV32-NEXT:    vmv.v.v v8, v10
1324; RV32-NEXT:    ret
1325;
1326; RV64-LABEL: vpgather_nxv4f16:
1327; RV64:       # %bb.0:
1328; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1329; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1330; RV64-NEXT:    vmv.v.v v8, v12
1331; RV64-NEXT:    ret
1332  %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1333  ret <vscale x 4 x half> %v
1334}
1335
1336define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x half*> %ptrs, i32 zeroext %evl) {
1337; RV32-LABEL: vpgather_truemask_nxv4f16:
1338; RV32:       # %bb.0:
1339; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1340; RV32-NEXT:    vluxei32.v v10, (zero), v8
1341; RV32-NEXT:    vmv.v.v v8, v10
1342; RV32-NEXT:    ret
1343;
1344; RV64-LABEL: vpgather_truemask_nxv4f16:
1345; RV64:       # %bb.0:
1346; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
1347; RV64-NEXT:    vluxei64.v v12, (zero), v8
1348; RV64-NEXT:    vmv.v.v v8, v12
1349; RV64-NEXT:    ret
1350  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1351  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1352  %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1353  ret <vscale x 4 x half> %v
1354}
1355
1356declare <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*>, <vscale x 8 x i1>, i32)
1357
1358define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1359; RV32-LABEL: vpgather_nxv8f16:
1360; RV32:       # %bb.0:
1361; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
1362; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1363; RV32-NEXT:    vmv.v.v v8, v12
1364; RV32-NEXT:    ret
1365;
1366; RV64-LABEL: vpgather_nxv8f16:
1367; RV64:       # %bb.0:
1368; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
1369; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1370; RV64-NEXT:    vmv.v.v v8, v16
1371; RV64-NEXT:    ret
1372  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1373  ret <vscale x 8 x half> %v
1374}
1375
1376define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1377; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
1378; RV32:       # %bb.0:
1379; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1380; RV32-NEXT:    vsext.vf4 v12, v8
1381; RV32-NEXT:    vadd.vv v12, v12, v12
1382; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1383; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1384; RV32-NEXT:    ret
1385;
1386; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
1387; RV64:       # %bb.0:
1388; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1389; RV64-NEXT:    vsext.vf8 v16, v8
1390; RV64-NEXT:    vadd.vv v16, v16, v16
1391; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1392; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1393; RV64-NEXT:    ret
1394  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs
1395  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1396  ret <vscale x 8 x half> %v
1397}
1398
1399define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1400; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
1401; RV32:       # %bb.0:
1402; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1403; RV32-NEXT:    vsext.vf4 v12, v8
1404; RV32-NEXT:    vadd.vv v12, v12, v12
1405; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1406; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1407; RV32-NEXT:    ret
1408;
1409; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
1410; RV64:       # %bb.0:
1411; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1412; RV64-NEXT:    vsext.vf8 v16, v8
1413; RV64-NEXT:    vadd.vv v16, v16, v16
1414; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1415; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1416; RV64-NEXT:    ret
1417  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1418  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1419  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1420  ret <vscale x 8 x half> %v
1421}
1422
1423define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1424; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
1425; RV32:       # %bb.0:
1426; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1427; RV32-NEXT:    vzext.vf4 v12, v8
1428; RV32-NEXT:    vadd.vv v12, v12, v12
1429; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1430; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1431; RV32-NEXT:    ret
1432;
1433; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
1434; RV64:       # %bb.0:
1435; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1436; RV64-NEXT:    vzext.vf8 v16, v8
1437; RV64-NEXT:    vadd.vv v16, v16, v16
1438; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1439; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1440; RV64-NEXT:    ret
1441  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1442  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs
1443  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1444  ret <vscale x 8 x half> %v
1445}
1446
1447define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1448; RV32-LABEL: vpgather_baseidx_nxv8f16:
1449; RV32:       # %bb.0:
1450; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1451; RV32-NEXT:    vsext.vf2 v12, v8
1452; RV32-NEXT:    vadd.vv v12, v12, v12
1453; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1454; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1455; RV32-NEXT:    ret
1456;
1457; RV64-LABEL: vpgather_baseidx_nxv8f16:
1458; RV64:       # %bb.0:
1459; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1460; RV64-NEXT:    vsext.vf4 v16, v8
1461; RV64-NEXT:    vadd.vv v16, v16, v16
1462; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
1463; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1464; RV64-NEXT:    ret
1465  %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs
1466  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1467  ret <vscale x 8 x half> %v
1468}
1469
1470declare <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*>, <vscale x 1 x i1>, i32)
1471
1472define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1473; RV32-LABEL: vpgather_nxv1f32:
1474; RV32:       # %bb.0:
1475; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1476; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1477; RV32-NEXT:    ret
1478;
1479; RV64-LABEL: vpgather_nxv1f32:
1480; RV64:       # %bb.0:
1481; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1482; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1483; RV64-NEXT:    vmv1r.v v8, v9
1484; RV64-NEXT:    ret
1485  %v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1486  ret <vscale x 1 x float> %v
1487}
1488
1489declare <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*>, <vscale x 2 x i1>, i32)
1490
1491define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1492; RV32-LABEL: vpgather_nxv2f32:
1493; RV32:       # %bb.0:
1494; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1495; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1496; RV32-NEXT:    ret
1497;
1498; RV64-LABEL: vpgather_nxv2f32:
1499; RV64:       # %bb.0:
1500; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1501; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1502; RV64-NEXT:    vmv.v.v v8, v10
1503; RV64-NEXT:    ret
1504  %v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1505  ret <vscale x 2 x float> %v
1506}
1507
1508declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*>, <vscale x 4 x i1>, i32)
1509
1510define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1511; RV32-LABEL: vpgather_nxv4f32:
1512; RV32:       # %bb.0:
1513; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1514; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1515; RV32-NEXT:    ret
1516;
1517; RV64-LABEL: vpgather_nxv4f32:
1518; RV64:       # %bb.0:
1519; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1520; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1521; RV64-NEXT:    vmv.v.v v8, v12
1522; RV64-NEXT:    ret
1523  %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1524  ret <vscale x 4 x float> %v
1525}
1526
1527define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x float*> %ptrs, i32 zeroext %evl) {
1528; RV32-LABEL: vpgather_truemask_nxv4f32:
1529; RV32:       # %bb.0:
1530; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1531; RV32-NEXT:    vluxei32.v v8, (zero), v8
1532; RV32-NEXT:    ret
1533;
1534; RV64-LABEL: vpgather_truemask_nxv4f32:
1535; RV64:       # %bb.0:
1536; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1537; RV64-NEXT:    vluxei64.v v12, (zero), v8
1538; RV64-NEXT:    vmv.v.v v8, v12
1539; RV64-NEXT:    ret
1540  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1541  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1542  %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1543  ret <vscale x 4 x float> %v
1544}
1545
1546declare <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*>, <vscale x 8 x i1>, i32)
1547
1548define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1549; RV32-LABEL: vpgather_nxv8f32:
1550; RV32:       # %bb.0:
1551; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1552; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1553; RV32-NEXT:    ret
1554;
1555; RV64-LABEL: vpgather_nxv8f32:
1556; RV64:       # %bb.0:
1557; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1558; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1559; RV64-NEXT:    vmv.v.v v8, v16
1560; RV64-NEXT:    ret
1561  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1562  ret <vscale x 8 x float> %v
1563}
1564
1565define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1566; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
1567; RV32:       # %bb.0:
1568; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1569; RV32-NEXT:    vsext.vf4 v12, v8
1570; RV32-NEXT:    vsll.vi v8, v12, 2
1571; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1572; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1573; RV32-NEXT:    ret
1574;
1575; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
1576; RV64:       # %bb.0:
1577; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1578; RV64-NEXT:    vsext.vf8 v16, v8
1579; RV64-NEXT:    vsll.vi v16, v16, 2
1580; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1581; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1582; RV64-NEXT:    ret
1583  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs
1584  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1585  ret <vscale x 8 x float> %v
1586}
1587
1588define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1589; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
1590; RV32:       # %bb.0:
1591; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1592; RV32-NEXT:    vsext.vf4 v12, v8
1593; RV32-NEXT:    vsll.vi v8, v12, 2
1594; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1595; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1596; RV32-NEXT:    ret
1597;
1598; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
1599; RV64:       # %bb.0:
1600; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1601; RV64-NEXT:    vsext.vf8 v16, v8
1602; RV64-NEXT:    vsll.vi v16, v16, 2
1603; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1604; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1605; RV64-NEXT:    ret
1606  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1607  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1608  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1609  ret <vscale x 8 x float> %v
1610}
1611
1612define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1613; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
1614; RV32:       # %bb.0:
1615; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1616; RV32-NEXT:    vzext.vf4 v12, v8
1617; RV32-NEXT:    vsll.vi v8, v12, 2
1618; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1619; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1620; RV32-NEXT:    ret
1621;
1622; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
1623; RV64:       # %bb.0:
1624; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1625; RV64-NEXT:    vzext.vf8 v16, v8
1626; RV64-NEXT:    vsll.vi v16, v16, 2
1627; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1628; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1629; RV64-NEXT:    ret
1630  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1631  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1632  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1633  ret <vscale x 8 x float> %v
1634}
1635
1636define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1637; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
1638; RV32:       # %bb.0:
1639; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1640; RV32-NEXT:    vsext.vf2 v12, v8
1641; RV32-NEXT:    vsll.vi v8, v12, 2
1642; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1643; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1644; RV32-NEXT:    ret
1645;
1646; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
1647; RV64:       # %bb.0:
1648; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1649; RV64-NEXT:    vsext.vf4 v16, v8
1650; RV64-NEXT:    vsll.vi v16, v16, 2
1651; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1652; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1653; RV64-NEXT:    ret
1654  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs
1655  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1656  ret <vscale x 8 x float> %v
1657}
1658
1659define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1660; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
1661; RV32:       # %bb.0:
1662; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1663; RV32-NEXT:    vsext.vf2 v12, v8
1664; RV32-NEXT:    vsll.vi v8, v12, 2
1665; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1666; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1667; RV32-NEXT:    ret
1668;
1669; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
1670; RV64:       # %bb.0:
1671; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1672; RV64-NEXT:    vsext.vf4 v16, v8
1673; RV64-NEXT:    vsll.vi v16, v16, 2
1674; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1675; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1676; RV64-NEXT:    ret
1677  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1678  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1679  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1680  ret <vscale x 8 x float> %v
1681}
1682
1683define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1684; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
1685; RV32:       # %bb.0:
1686; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1687; RV32-NEXT:    vzext.vf2 v12, v8
1688; RV32-NEXT:    vsll.vi v8, v12, 2
1689; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1690; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1691; RV32-NEXT:    ret
1692;
1693; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
1694; RV64:       # %bb.0:
1695; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1696; RV64-NEXT:    vzext.vf4 v16, v8
1697; RV64-NEXT:    vsll.vi v16, v16, 2
1698; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1699; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1700; RV64-NEXT:    ret
1701  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1702  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs
1703  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1704  ret <vscale x 8 x float> %v
1705}
1706
1707define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1708; RV32-LABEL: vpgather_baseidx_nxv8f32:
1709; RV32:       # %bb.0:
1710; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1711; RV32-NEXT:    vsll.vi v8, v8, 2
1712; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1713; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1714; RV32-NEXT:    ret
1715;
1716; RV64-LABEL: vpgather_baseidx_nxv8f32:
1717; RV64:       # %bb.0:
1718; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1719; RV64-NEXT:    vsext.vf2 v16, v8
1720; RV64-NEXT:    vsll.vi v16, v16, 2
1721; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1722; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1723; RV64-NEXT:    ret
1724  %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs
1725  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1726  ret <vscale x 8 x float> %v
1727}
1728
1729declare <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*>, <vscale x 1 x i1>, i32)
1730
1731define <vscale x 1 x double> @vpgather_nxv1f64(<vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1732; RV32-LABEL: vpgather_nxv1f64:
1733; RV32:       # %bb.0:
1734; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1735; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1736; RV32-NEXT:    vmv.v.v v8, v9
1737; RV32-NEXT:    ret
1738;
1739; RV64-LABEL: vpgather_nxv1f64:
1740; RV64:       # %bb.0:
1741; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1742; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1743; RV64-NEXT:    ret
1744  %v = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1745  ret <vscale x 1 x double> %v
1746}
1747
1748declare <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*>, <vscale x 2 x i1>, i32)
1749
1750define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1751; RV32-LABEL: vpgather_nxv2f64:
1752; RV32:       # %bb.0:
1753; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1754; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1755; RV32-NEXT:    vmv.v.v v8, v10
1756; RV32-NEXT:    ret
1757;
1758; RV64-LABEL: vpgather_nxv2f64:
1759; RV64:       # %bb.0:
1760; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1761; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1762; RV64-NEXT:    ret
1763  %v = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1764  ret <vscale x 2 x double> %v
1765}
1766
1767declare <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*>, <vscale x 4 x i1>, i32)
1768
1769define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1770; RV32-LABEL: vpgather_nxv4f64:
1771; RV32:       # %bb.0:
1772; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1773; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1774; RV32-NEXT:    vmv.v.v v8, v12
1775; RV32-NEXT:    ret
1776;
1777; RV64-LABEL: vpgather_nxv4f64:
1778; RV64:       # %bb.0:
1779; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1780; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1781; RV64-NEXT:    ret
1782  %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1783  ret <vscale x 4 x double> %v
1784}
1785
1786define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x double*> %ptrs, i32 zeroext %evl) {
1787; RV32-LABEL: vpgather_truemask_nxv4f64:
1788; RV32:       # %bb.0:
1789; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1790; RV32-NEXT:    vluxei32.v v12, (zero), v8
1791; RV32-NEXT:    vmv.v.v v8, v12
1792; RV32-NEXT:    ret
1793;
1794; RV64-LABEL: vpgather_truemask_nxv4f64:
1795; RV64:       # %bb.0:
1796; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1797; RV64-NEXT:    vluxei64.v v8, (zero), v8
1798; RV64-NEXT:    ret
1799  %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0
1800  %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1801  %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl)
1802  ret <vscale x 4 x double> %v
1803}
1804
1805declare <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*>, <vscale x 6 x i1>, i32)
1806
1807define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1808; RV32-LABEL: vpgather_nxv6f64:
1809; RV32:       # %bb.0:
1810; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1811; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
1812; RV32-NEXT:    vmv.v.v v8, v16
1813; RV32-NEXT:    ret
1814;
1815; RV64-LABEL: vpgather_nxv6f64:
1816; RV64:       # %bb.0:
1817; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
1818; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1819; RV64-NEXT:    ret
1820  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1821  ret <vscale x 6 x double> %v
1822}
1823
1824define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1825; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
1826; RV32:       # %bb.0:
1827; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1828; RV32-NEXT:    vsext.vf4 v12, v8
1829; RV32-NEXT:    vsll.vi v16, v12, 3
1830; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1831; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1832; RV32-NEXT:    ret
1833;
1834; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
1835; RV64:       # %bb.0:
1836; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1837; RV64-NEXT:    vsext.vf8 v16, v8
1838; RV64-NEXT:    vsll.vi v8, v16, 3
1839; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1840; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1841; RV64-NEXT:    ret
1842  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i8> %idxs
1843  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1844  ret <vscale x 6 x double> %v
1845}
1846
1847define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1848; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
1849; RV32:       # %bb.0:
1850; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1851; RV32-NEXT:    vsext.vf8 v16, v8
1852; RV32-NEXT:    vsll.vi v8, v16, 3
1853; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1854; RV32-NEXT:    vncvt.x.x.w v16, v8
1855; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1856; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1857; RV32-NEXT:    ret
1858;
1859; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
1860; RV64:       # %bb.0:
1861; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1862; RV64-NEXT:    vsext.vf8 v16, v8
1863; RV64-NEXT:    vsll.vi v8, v16, 3
1864; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1865; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1866; RV64-NEXT:    ret
1867  %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1868  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1869  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1870  ret <vscale x 6 x double> %v
1871}
1872
1873define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1874; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
1875; RV32:       # %bb.0:
1876; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1877; RV32-NEXT:    vzext.vf8 v16, v8
1878; RV32-NEXT:    vsll.vi v8, v16, 3
1879; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1880; RV32-NEXT:    vncvt.x.x.w v16, v8
1881; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1882; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1883; RV32-NEXT:    ret
1884;
1885; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
1886; RV64:       # %bb.0:
1887; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1888; RV64-NEXT:    vzext.vf8 v16, v8
1889; RV64-NEXT:    vsll.vi v8, v16, 3
1890; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1891; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1892; RV64-NEXT:    ret
1893  %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1894  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1895  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1896  ret <vscale x 6 x double> %v
1897}
1898
1899define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1900; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
1901; RV32:       # %bb.0:
1902; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1903; RV32-NEXT:    vsext.vf2 v12, v8
1904; RV32-NEXT:    vsll.vi v16, v12, 3
1905; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1906; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1907; RV32-NEXT:    ret
1908;
1909; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
1910; RV64:       # %bb.0:
1911; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1912; RV64-NEXT:    vsext.vf4 v16, v8
1913; RV64-NEXT:    vsll.vi v8, v16, 3
1914; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1915; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1916; RV64-NEXT:    ret
1917  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i16> %idxs
1918  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1919  ret <vscale x 6 x double> %v
1920}
1921
1922define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1923; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
1924; RV32:       # %bb.0:
1925; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1926; RV32-NEXT:    vsext.vf4 v16, v8
1927; RV32-NEXT:    vsll.vi v8, v16, 3
1928; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1929; RV32-NEXT:    vncvt.x.x.w v16, v8
1930; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1931; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1932; RV32-NEXT:    ret
1933;
1934; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
1935; RV64:       # %bb.0:
1936; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1937; RV64-NEXT:    vsext.vf4 v16, v8
1938; RV64-NEXT:    vsll.vi v8, v16, 3
1939; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1940; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1941; RV64-NEXT:    ret
1942  %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1943  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1944  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1945  ret <vscale x 6 x double> %v
1946}
1947
1948define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1949; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
1950; RV32:       # %bb.0:
1951; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1952; RV32-NEXT:    vzext.vf4 v16, v8
1953; RV32-NEXT:    vsll.vi v8, v16, 3
1954; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1955; RV32-NEXT:    vncvt.x.x.w v16, v8
1956; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1957; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1958; RV32-NEXT:    ret
1959;
1960; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
1961; RV64:       # %bb.0:
1962; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1963; RV64-NEXT:    vzext.vf4 v16, v8
1964; RV64-NEXT:    vsll.vi v8, v16, 3
1965; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1966; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1967; RV64-NEXT:    ret
1968  %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1969  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
1970  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1971  ret <vscale x 6 x double> %v
1972}
1973
1974define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1975; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
1976; RV32:       # %bb.0:
1977; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
1978; RV32-NEXT:    vsll.vi v16, v8, 3
1979; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1980; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1981; RV32-NEXT:    ret
1982;
1983; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
1984; RV64:       # %bb.0:
1985; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1986; RV64-NEXT:    vsext.vf2 v16, v8
1987; RV64-NEXT:    vsll.vi v8, v16, 3
1988; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
1989; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1990; RV64-NEXT:    ret
1991  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i32> %idxs
1992  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1993  ret <vscale x 6 x double> %v
1994}
1995
1996define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1997; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
1998; RV32:       # %bb.0:
1999; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2000; RV32-NEXT:    vsext.vf2 v16, v8
2001; RV32-NEXT:    vsll.vi v8, v16, 3
2002; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2003; RV32-NEXT:    vncvt.x.x.w v16, v8
2004; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2005; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2006; RV32-NEXT:    ret
2007;
2008; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
2009; RV64:       # %bb.0:
2010; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2011; RV64-NEXT:    vsext.vf2 v16, v8
2012; RV64-NEXT:    vsll.vi v8, v16, 3
2013; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2014; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2015; RV64-NEXT:    ret
2016  %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
2017  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
2018  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2019  ret <vscale x 6 x double> %v
2020}
2021
2022define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2023; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
2024; RV32:       # %bb.0:
2025; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2026; RV32-NEXT:    vzext.vf2 v16, v8
2027; RV32-NEXT:    vsll.vi v8, v16, 3
2028; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2029; RV32-NEXT:    vncvt.x.x.w v16, v8
2030; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2031; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2032; RV32-NEXT:    ret
2033;
2034; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
2035; RV64:       # %bb.0:
2036; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2037; RV64-NEXT:    vzext.vf2 v16, v8
2038; RV64-NEXT:    vsll.vi v8, v16, 3
2039; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2040; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2041; RV64-NEXT:    ret
2042  %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
2043  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs
2044  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2045  ret <vscale x 6 x double> %v
2046}
2047
2048define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(double* %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2049; RV32-LABEL: vpgather_baseidx_nxv6f64:
2050; RV32:       # %bb.0:
2051; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2052; RV32-NEXT:    vsll.vi v8, v8, 3
2053; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2054; RV32-NEXT:    vncvt.x.x.w v16, v8
2055; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2056; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2057; RV32-NEXT:    ret
2058;
2059; RV64-LABEL: vpgather_baseidx_nxv6f64:
2060; RV64:       # %bb.0:
2061; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2062; RV64-NEXT:    vsll.vi v8, v8, 3
2063; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2064; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2065; RV64-NEXT:    ret
2066  %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %idxs
2067  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2068  ret <vscale x 6 x double> %v
2069}
2070
2071declare <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*>, <vscale x 8 x i1>, i32)
2072
2073define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2074; RV32-LABEL: vpgather_nxv8f64:
2075; RV32:       # %bb.0:
2076; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
2077; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
2078; RV32-NEXT:    vmv.v.v v8, v16
2079; RV32-NEXT:    ret
2080;
2081; RV64-LABEL: vpgather_nxv8f64:
2082; RV64:       # %bb.0:
2083; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
2084; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
2085; RV64-NEXT:    ret
2086  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2087  ret <vscale x 8 x double> %v
2088}
2089
2090define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2091; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
2092; RV32:       # %bb.0:
2093; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
2094; RV32-NEXT:    vsext.vf4 v12, v8
2095; RV32-NEXT:    vsll.vi v16, v12, 3
2096; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2097; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2098; RV32-NEXT:    ret
2099;
2100; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
2101; RV64:       # %bb.0:
2102; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2103; RV64-NEXT:    vsext.vf8 v16, v8
2104; RV64-NEXT:    vsll.vi v8, v16, 3
2105; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2106; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2107; RV64-NEXT:    ret
2108  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs
2109  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2110  ret <vscale x 8 x double> %v
2111}
2112
2113define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2114; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
2115; RV32:       # %bb.0:
2116; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2117; RV32-NEXT:    vsext.vf8 v16, v8
2118; RV32-NEXT:    vsll.vi v8, v16, 3
2119; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2120; RV32-NEXT:    vncvt.x.x.w v16, v8
2121; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2122; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2123; RV32-NEXT:    ret
2124;
2125; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
2126; RV64:       # %bb.0:
2127; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2128; RV64-NEXT:    vsext.vf8 v16, v8
2129; RV64-NEXT:    vsll.vi v8, v16, 3
2130; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2131; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2132; RV64-NEXT:    ret
2133  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2134  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2135  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2136  ret <vscale x 8 x double> %v
2137}
2138
2139define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2140; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
2141; RV32:       # %bb.0:
2142; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2143; RV32-NEXT:    vzext.vf8 v16, v8
2144; RV32-NEXT:    vsll.vi v8, v16, 3
2145; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2146; RV32-NEXT:    vncvt.x.x.w v16, v8
2147; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2148; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2149; RV32-NEXT:    ret
2150;
2151; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
2152; RV64:       # %bb.0:
2153; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2154; RV64-NEXT:    vzext.vf8 v16, v8
2155; RV64-NEXT:    vsll.vi v8, v16, 3
2156; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2157; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2158; RV64-NEXT:    ret
2159  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2160  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2161  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2162  ret <vscale x 8 x double> %v
2163}
2164
2165define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2166; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
2167; RV32:       # %bb.0:
2168; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
2169; RV32-NEXT:    vsext.vf2 v12, v8
2170; RV32-NEXT:    vsll.vi v16, v12, 3
2171; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2172; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2173; RV32-NEXT:    ret
2174;
2175; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
2176; RV64:       # %bb.0:
2177; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2178; RV64-NEXT:    vsext.vf4 v16, v8
2179; RV64-NEXT:    vsll.vi v8, v16, 3
2180; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2181; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2182; RV64-NEXT:    ret
2183  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs
2184  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2185  ret <vscale x 8 x double> %v
2186}
2187
2188define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2189; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
2190; RV32:       # %bb.0:
2191; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2192; RV32-NEXT:    vsext.vf4 v16, v8
2193; RV32-NEXT:    vsll.vi v8, v16, 3
2194; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2195; RV32-NEXT:    vncvt.x.x.w v16, v8
2196; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2197; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2198; RV32-NEXT:    ret
2199;
2200; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
2201; RV64:       # %bb.0:
2202; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2203; RV64-NEXT:    vsext.vf4 v16, v8
2204; RV64-NEXT:    vsll.vi v8, v16, 3
2205; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2206; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2207; RV64-NEXT:    ret
2208  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2209  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2210  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2211  ret <vscale x 8 x double> %v
2212}
2213
2214define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2215; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
2216; RV32:       # %bb.0:
2217; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2218; RV32-NEXT:    vzext.vf4 v16, v8
2219; RV32-NEXT:    vsll.vi v8, v16, 3
2220; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2221; RV32-NEXT:    vncvt.x.x.w v16, v8
2222; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2223; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2224; RV32-NEXT:    ret
2225;
2226; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
2227; RV64:       # %bb.0:
2228; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2229; RV64-NEXT:    vzext.vf4 v16, v8
2230; RV64-NEXT:    vsll.vi v8, v16, 3
2231; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2232; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2233; RV64-NEXT:    ret
2234  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2235  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2236  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2237  ret <vscale x 8 x double> %v
2238}
2239
2240define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2241; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
2242; RV32:       # %bb.0:
2243; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
2244; RV32-NEXT:    vsll.vi v16, v8, 3
2245; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2246; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2247; RV32-NEXT:    ret
2248;
2249; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
2250; RV64:       # %bb.0:
2251; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2252; RV64-NEXT:    vsext.vf2 v16, v8
2253; RV64-NEXT:    vsll.vi v8, v16, 3
2254; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2255; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2256; RV64-NEXT:    ret
2257  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs
2258  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2259  ret <vscale x 8 x double> %v
2260}
2261
2262define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2263; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
2264; RV32:       # %bb.0:
2265; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2266; RV32-NEXT:    vsext.vf2 v16, v8
2267; RV32-NEXT:    vsll.vi v8, v16, 3
2268; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2269; RV32-NEXT:    vncvt.x.x.w v16, v8
2270; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2271; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2272; RV32-NEXT:    ret
2273;
2274; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
2275; RV64:       # %bb.0:
2276; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2277; RV64-NEXT:    vsext.vf2 v16, v8
2278; RV64-NEXT:    vsll.vi v8, v16, 3
2279; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2280; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2281; RV64-NEXT:    ret
2282  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2283  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2284  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2285  ret <vscale x 8 x double> %v
2286}
2287
2288define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2289; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
2290; RV32:       # %bb.0:
2291; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2292; RV32-NEXT:    vzext.vf2 v16, v8
2293; RV32-NEXT:    vsll.vi v8, v16, 3
2294; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2295; RV32-NEXT:    vncvt.x.x.w v16, v8
2296; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2297; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2298; RV32-NEXT:    ret
2299;
2300; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
2301; RV64:       # %bb.0:
2302; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2303; RV64-NEXT:    vzext.vf2 v16, v8
2304; RV64-NEXT:    vsll.vi v8, v16, 3
2305; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2306; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2307; RV64-NEXT:    ret
2308  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2309  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs
2310  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2311  ret <vscale x 8 x double> %v
2312}
2313
2314define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2315; RV32-LABEL: vpgather_baseidx_nxv8f64:
2316; RV32:       # %bb.0:
2317; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2318; RV32-NEXT:    vsll.vi v8, v8, 3
2319; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
2320; RV32-NEXT:    vncvt.x.x.w v16, v8
2321; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2322; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2323; RV32-NEXT:    ret
2324;
2325; RV64-LABEL: vpgather_baseidx_nxv8f64:
2326; RV64:       # %bb.0:
2327; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2328; RV64-NEXT:    vsll.vi v8, v8, 3
2329; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2330; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2331; RV64-NEXT:    ret
2332  %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs
2333  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2334  ret <vscale x 8 x double> %v
2335}
2336
2337declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 16 x i1>, i32)
2338
2339define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2340; RV32-LABEL: vpgather_nxv16f64:
2341; RV32:       # %bb.0:
2342; RV32-NEXT:    vmv1r.v v24, v0
2343; RV32-NEXT:    li a2, 0
2344; RV32-NEXT:    csrr a1, vlenb
2345; RV32-NEXT:    srli a4, a1, 3
2346; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
2347; RV32-NEXT:    sub a3, a0, a1
2348; RV32-NEXT:    vslidedown.vx v0, v0, a4
2349; RV32-NEXT:    bltu a0, a3, .LBB102_2
2350; RV32-NEXT:  # %bb.1:
2351; RV32-NEXT:    mv a2, a3
2352; RV32-NEXT:  .LBB102_2:
2353; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
2354; RV32-NEXT:    vluxei32.v v16, (zero), v12, v0.t
2355; RV32-NEXT:    bltu a0, a1, .LBB102_4
2356; RV32-NEXT:  # %bb.3:
2357; RV32-NEXT:    mv a0, a1
2358; RV32-NEXT:  .LBB102_4:
2359; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
2360; RV32-NEXT:    vmv1r.v v0, v24
2361; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
2362; RV32-NEXT:    vmv.v.v v8, v24
2363; RV32-NEXT:    ret
2364;
2365; RV64-LABEL: vpgather_nxv16f64:
2366; RV64:       # %bb.0:
2367; RV64-NEXT:    vmv1r.v v24, v0
2368; RV64-NEXT:    li a2, 0
2369; RV64-NEXT:    csrr a1, vlenb
2370; RV64-NEXT:    srli a4, a1, 3
2371; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
2372; RV64-NEXT:    sub a3, a0, a1
2373; RV64-NEXT:    vslidedown.vx v0, v0, a4
2374; RV64-NEXT:    bltu a0, a3, .LBB102_2
2375; RV64-NEXT:  # %bb.1:
2376; RV64-NEXT:    mv a2, a3
2377; RV64-NEXT:  .LBB102_2:
2378; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
2379; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
2380; RV64-NEXT:    bltu a0, a1, .LBB102_4
2381; RV64-NEXT:  # %bb.3:
2382; RV64-NEXT:    mv a0, a1
2383; RV64-NEXT:  .LBB102_4:
2384; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
2385; RV64-NEXT:    vmv1r.v v0, v24
2386; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
2387; RV64-NEXT:    ret
2388  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2389  ret <vscale x 16 x double> %v
2390}
2391
2392define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2393; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
2394; RV32:       # %bb.0:
2395; RV32-NEXT:    vmv1r.v v12, v0
2396; RV32-NEXT:    li a3, 0
2397; RV32-NEXT:    csrr a2, vlenb
2398; RV32-NEXT:    srli a5, a2, 3
2399; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
2400; RV32-NEXT:    sub a4, a1, a2
2401; RV32-NEXT:    vslidedown.vx v0, v0, a5
2402; RV32-NEXT:    bltu a1, a4, .LBB103_2
2403; RV32-NEXT:  # %bb.1:
2404; RV32-NEXT:    mv a3, a4
2405; RV32-NEXT:  .LBB103_2:
2406; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
2407; RV32-NEXT:    vsext.vf2 v16, v8
2408; RV32-NEXT:    vsll.vi v24, v16, 3
2409; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2410; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
2411; RV32-NEXT:    bltu a1, a2, .LBB103_4
2412; RV32-NEXT:  # %bb.3:
2413; RV32-NEXT:    mv a1, a2
2414; RV32-NEXT:  .LBB103_4:
2415; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2416; RV32-NEXT:    vmv1r.v v0, v12
2417; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2418; RV32-NEXT:    ret
2419;
2420; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
2421; RV64:       # %bb.0:
2422; RV64-NEXT:    vmv1r.v v12, v0
2423; RV64-NEXT:    li a3, 0
2424; RV64-NEXT:    csrr a2, vlenb
2425; RV64-NEXT:    srli a5, a2, 3
2426; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
2427; RV64-NEXT:    sub a4, a1, a2
2428; RV64-NEXT:    vslidedown.vx v0, v0, a5
2429; RV64-NEXT:    bltu a1, a4, .LBB103_2
2430; RV64-NEXT:  # %bb.1:
2431; RV64-NEXT:    mv a3, a4
2432; RV64-NEXT:  .LBB103_2:
2433; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
2434; RV64-NEXT:    vsext.vf4 v16, v10
2435; RV64-NEXT:    vsll.vi v16, v16, 3
2436; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2437; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2438; RV64-NEXT:    bltu a1, a2, .LBB103_4
2439; RV64-NEXT:  # %bb.3:
2440; RV64-NEXT:    mv a1, a2
2441; RV64-NEXT:  .LBB103_4:
2442; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2443; RV64-NEXT:    vsext.vf4 v24, v8
2444; RV64-NEXT:    vsll.vi v24, v24, 3
2445; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2446; RV64-NEXT:    vmv1r.v v0, v12
2447; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
2448; RV64-NEXT:    ret
2449  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
2450  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2451  ret <vscale x 16 x double> %v
2452}
2453
2454define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2455; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
2456; RV32:       # %bb.0:
2457; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2458; RV32-NEXT:    csrr a2, vlenb
2459; RV32-NEXT:    vsext.vf4 v16, v8
2460; RV32-NEXT:    mv a3, a1
2461; RV32-NEXT:    bltu a1, a2, .LBB104_2
2462; RV32-NEXT:  # %bb.1:
2463; RV32-NEXT:    mv a3, a2
2464; RV32-NEXT:  .LBB104_2:
2465; RV32-NEXT:    li a4, 0
2466; RV32-NEXT:    vsext.vf4 v24, v10
2467; RV32-NEXT:    vsll.vi v8, v16, 3
2468; RV32-NEXT:    vsetvli zero, a3, e32, m4, ta, mu
2469; RV32-NEXT:    vncvt.x.x.w v16, v8
2470; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2471; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2472; RV32-NEXT:    srli a3, a2, 3
2473; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2474; RV32-NEXT:    sub a2, a1, a2
2475; RV32-NEXT:    vslidedown.vx v0, v0, a3
2476; RV32-NEXT:    bltu a1, a2, .LBB104_4
2477; RV32-NEXT:  # %bb.3:
2478; RV32-NEXT:    mv a4, a2
2479; RV32-NEXT:  .LBB104_4:
2480; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2481; RV32-NEXT:    vsll.vi v16, v24, 3
2482; RV32-NEXT:    vsetvli zero, a4, e32, m4, ta, mu
2483; RV32-NEXT:    vncvt.x.x.w v24, v16
2484; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2485; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2486; RV32-NEXT:    ret
2487;
2488; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
2489; RV64:       # %bb.0:
2490; RV64-NEXT:    vmv1r.v v12, v0
2491; RV64-NEXT:    li a3, 0
2492; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2493; RV64-NEXT:    vsext.vf4 v16, v10
2494; RV64-NEXT:    csrr a2, vlenb
2495; RV64-NEXT:    srli a5, a2, 3
2496; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
2497; RV64-NEXT:    sub a4, a1, a2
2498; RV64-NEXT:    vslidedown.vx v0, v0, a5
2499; RV64-NEXT:    bltu a1, a4, .LBB104_2
2500; RV64-NEXT:  # %bb.1:
2501; RV64-NEXT:    mv a3, a4
2502; RV64-NEXT:  .LBB104_2:
2503; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
2504; RV64-NEXT:    vsext.vf4 v24, v8
2505; RV64-NEXT:    vsll.vi v16, v16, 3
2506; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2507; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2508; RV64-NEXT:    bltu a1, a2, .LBB104_4
2509; RV64-NEXT:  # %bb.3:
2510; RV64-NEXT:    mv a1, a2
2511; RV64-NEXT:  .LBB104_4:
2512; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2513; RV64-NEXT:    vsll.vi v24, v24, 3
2514; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2515; RV64-NEXT:    vmv1r.v v0, v12
2516; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
2517; RV64-NEXT:    ret
2518  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2519  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
2520  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2521  ret <vscale x 16 x double> %v
2522}
2523
2524define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2525; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
2526; RV32:       # %bb.0:
2527; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2528; RV32-NEXT:    csrr a2, vlenb
2529; RV32-NEXT:    vzext.vf4 v16, v8
2530; RV32-NEXT:    mv a3, a1
2531; RV32-NEXT:    bltu a1, a2, .LBB105_2
2532; RV32-NEXT:  # %bb.1:
2533; RV32-NEXT:    mv a3, a2
2534; RV32-NEXT:  .LBB105_2:
2535; RV32-NEXT:    li a4, 0
2536; RV32-NEXT:    vzext.vf4 v24, v10
2537; RV32-NEXT:    vsll.vi v8, v16, 3
2538; RV32-NEXT:    vsetvli zero, a3, e32, m4, ta, mu
2539; RV32-NEXT:    vncvt.x.x.w v16, v8
2540; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2541; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2542; RV32-NEXT:    srli a3, a2, 3
2543; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
2544; RV32-NEXT:    sub a2, a1, a2
2545; RV32-NEXT:    vslidedown.vx v0, v0, a3
2546; RV32-NEXT:    bltu a1, a2, .LBB105_4
2547; RV32-NEXT:  # %bb.3:
2548; RV32-NEXT:    mv a4, a2
2549; RV32-NEXT:  .LBB105_4:
2550; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2551; RV32-NEXT:    vsll.vi v16, v24, 3
2552; RV32-NEXT:    vsetvli zero, a4, e32, m4, ta, mu
2553; RV32-NEXT:    vncvt.x.x.w v24, v16
2554; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2555; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2556; RV32-NEXT:    ret
2557;
2558; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
2559; RV64:       # %bb.0:
2560; RV64-NEXT:    vmv1r.v v12, v0
2561; RV64-NEXT:    li a3, 0
2562; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2563; RV64-NEXT:    vzext.vf4 v16, v10
2564; RV64-NEXT:    csrr a2, vlenb
2565; RV64-NEXT:    srli a5, a2, 3
2566; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
2567; RV64-NEXT:    sub a4, a1, a2
2568; RV64-NEXT:    vslidedown.vx v0, v0, a5
2569; RV64-NEXT:    bltu a1, a4, .LBB105_2
2570; RV64-NEXT:  # %bb.1:
2571; RV64-NEXT:    mv a3, a4
2572; RV64-NEXT:  .LBB105_2:
2573; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
2574; RV64-NEXT:    vzext.vf4 v24, v8
2575; RV64-NEXT:    vsll.vi v16, v16, 3
2576; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
2577; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2578; RV64-NEXT:    bltu a1, a2, .LBB105_4
2579; RV64-NEXT:  # %bb.3:
2580; RV64-NEXT:    mv a1, a2
2581; RV64-NEXT:  .LBB105_4:
2582; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
2583; RV64-NEXT:    vsll.vi v24, v24, 3
2584; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2585; RV64-NEXT:    vmv1r.v v0, v12
2586; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
2587; RV64-NEXT:    ret
2588  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2589  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
2590  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2591  ret <vscale x 16 x double> %v
2592}
2593