1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6 7declare <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, <vscale x 1 x i1>, i32) 8 9define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 10; RV32-LABEL: vpgather_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, mu 13; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 14; RV32-NEXT: vmv1r.v v8, v9 15; RV32-NEXT: ret 16; 17; RV64-LABEL: vpgather_nxv1i8: 18; RV64: # %bb.0: 19; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, mu 20; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 21; RV64-NEXT: vmv1r.v v8, v9 22; RV64-NEXT: ret 23 %v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 24 ret <vscale x 1 x i8> %v 25} 26 27declare <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i1>, i32) 28 29define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 30; RV32-LABEL: vpgather_nxv2i8: 31; RV32: # %bb.0: 32; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 33; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 34; RV32-NEXT: vmv1r.v v8, v9 35; RV32-NEXT: ret 36; 37; RV64-LABEL: vpgather_nxv2i8: 38; RV64: # %bb.0: 39; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 40; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 41; RV64-NEXT: vmv1r.v v8, v10 42; RV64-NEXT: ret 43 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 44 ret <vscale x 2 x i8> %v 45} 46 47define <vscale x 2 x i16> @vpgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 48; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16: 49; RV32: # %bb.0: 50; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 51; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 52; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 53; RV32-NEXT: vsext.vf2 v8, v9 54; RV32-NEXT: ret 55; 56; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i16: 57; RV64: # %bb.0: 58; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 59; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 60; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 61; RV64-NEXT: vsext.vf2 v8, v10 62; RV64-NEXT: ret 63 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 64 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16> 65 ret <vscale x 2 x i16> %ev 66} 67 68define <vscale x 2 x i16> @vpgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 69; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i16: 70; RV32: # %bb.0: 71; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 72; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 73; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 74; RV32-NEXT: vzext.vf2 v8, v9 75; RV32-NEXT: ret 76; 77; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i16: 78; RV64: # %bb.0: 79; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 80; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 81; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 82; RV64-NEXT: vzext.vf2 v8, v10 83; RV64-NEXT: ret 84 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 85 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16> 86 ret <vscale x 2 x i16> %ev 87} 88 89define <vscale x 2 x i32> @vpgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 90; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i32: 91; RV32: # %bb.0: 92; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 93; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 94; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 95; RV32-NEXT: vsext.vf4 v8, v9 96; RV32-NEXT: ret 97; 98; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i32: 99; RV64: # %bb.0: 100; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 101; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 102; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 103; RV64-NEXT: vsext.vf4 v8, v10 104; RV64-NEXT: ret 105 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 106 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32> 107 ret <vscale x 2 x i32> %ev 108} 109 110define <vscale x 2 x i32> @vpgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 111; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i32: 112; RV32: # %bb.0: 113; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 114; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 115; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 116; RV32-NEXT: vzext.vf4 v8, v9 117; RV32-NEXT: ret 118; 119; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i32: 120; RV64: # %bb.0: 121; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 122; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 123; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 124; RV64-NEXT: vzext.vf4 v8, v10 125; RV64-NEXT: ret 126 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 127 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32> 128 ret <vscale x 2 x i32> %ev 129} 130 131define <vscale x 2 x i64> @vpgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 132; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i64: 133; RV32: # %bb.0: 134; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 135; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 136; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 137; RV32-NEXT: vsext.vf8 v8, v10 138; RV32-NEXT: ret 139; 140; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i64: 141; RV64: # %bb.0: 142; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 143; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 144; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 145; RV64-NEXT: vsext.vf8 v8, v10 146; RV64-NEXT: ret 147 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 148 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64> 149 ret <vscale x 2 x i64> %ev 150} 151 152define <vscale x 2 x i64> @vpgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 153; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i64: 154; RV32: # %bb.0: 155; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 156; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 157; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 158; RV32-NEXT: vzext.vf8 v8, v10 159; RV32-NEXT: ret 160; 161; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i64: 162; RV64: # %bb.0: 163; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 164; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 165; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 166; RV64-NEXT: vzext.vf8 v8, v10 167; RV64-NEXT: ret 168 %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 169 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64> 170 ret <vscale x 2 x i64> %ev 171} 172 173declare <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*>, <vscale x 4 x i1>, i32) 174 175define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 176; RV32-LABEL: vpgather_nxv4i8: 177; RV32: # %bb.0: 178; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 179; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 180; RV32-NEXT: vmv1r.v v8, v10 181; RV32-NEXT: ret 182; 183; RV64-LABEL: vpgather_nxv4i8: 184; RV64: # %bb.0: 185; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 186; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 187; RV64-NEXT: vmv1r.v v8, v12 188; RV64-NEXT: ret 189 %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 190 ret <vscale x 4 x i8> %v 191} 192 193define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x i8*> %ptrs, i32 zeroext %evl) { 194; RV32-LABEL: vpgather_truemask_nxv4i8: 195; RV32: # %bb.0: 196; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 197; RV32-NEXT: vluxei32.v v10, (zero), v8 198; RV32-NEXT: vmv1r.v v8, v10 199; RV32-NEXT: ret 200; 201; RV64-LABEL: vpgather_truemask_nxv4i8: 202; RV64: # %bb.0: 203; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 204; RV64-NEXT: vluxei64.v v12, (zero), v8 205; RV64-NEXT: vmv1r.v v8, v12 206; RV64-NEXT: ret 207 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 208 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 209 %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 210 ret <vscale x 4 x i8> %v 211} 212 213declare <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*>, <vscale x 8 x i1>, i32) 214 215define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 216; RV32-LABEL: vpgather_nxv8i8: 217; RV32: # %bb.0: 218; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, mu 219; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 220; RV32-NEXT: vmv.v.v v8, v12 221; RV32-NEXT: ret 222; 223; RV64-LABEL: vpgather_nxv8i8: 224; RV64: # %bb.0: 225; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, mu 226; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 227; RV64-NEXT: vmv.v.v v8, v16 228; RV64-NEXT: ret 229 %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 230 ret <vscale x 8 x i8> %v 231} 232 233define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 234; RV32-LABEL: vpgather_baseidx_nxv8i8: 235; RV32: # %bb.0: 236; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 237; RV32-NEXT: vsext.vf4 v12, v8 238; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, mu 239; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 240; RV32-NEXT: ret 241; 242; RV64-LABEL: vpgather_baseidx_nxv8i8: 243; RV64: # %bb.0: 244; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 245; RV64-NEXT: vsext.vf8 v16, v8 246; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu 247; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 248; RV64-NEXT: ret 249 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 250 %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 251 ret <vscale x 8 x i8> %v 252} 253 254declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, <vscale x 32 x i1>, i32) 255 256define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) { 257; RV32-LABEL: vpgather_baseidx_nxv32i8: 258; RV32: # %bb.0: 259; RV32-NEXT: vmv1r.v v12, v0 260; RV32-NEXT: li a3, 0 261; RV32-NEXT: csrr a2, vlenb 262; RV32-NEXT: srli a5, a2, 2 263; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, mu 264; RV32-NEXT: slli a2, a2, 1 265; RV32-NEXT: sub a4, a1, a2 266; RV32-NEXT: vslidedown.vx v0, v0, a5 267; RV32-NEXT: bltu a1, a4, .LBB12_2 268; RV32-NEXT: # %bb.1: 269; RV32-NEXT: mv a3, a4 270; RV32-NEXT: .LBB12_2: 271; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu 272; RV32-NEXT: vsext.vf4 v24, v10 273; RV32-NEXT: vsetvli zero, a3, e8, m2, ta, mu 274; RV32-NEXT: vluxei32.v v18, (a0), v24, v0.t 275; RV32-NEXT: bltu a1, a2, .LBB12_4 276; RV32-NEXT: # %bb.3: 277; RV32-NEXT: mv a1, a2 278; RV32-NEXT: .LBB12_4: 279; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, mu 280; RV32-NEXT: vsext.vf4 v24, v8 281; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu 282; RV32-NEXT: vmv1r.v v0, v12 283; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 284; RV32-NEXT: vmv4r.v v8, v16 285; RV32-NEXT: ret 286; 287; RV64-LABEL: vpgather_baseidx_nxv32i8: 288; RV64: # %bb.0: 289; RV64-NEXT: csrr a3, vlenb 290; RV64-NEXT: slli a5, a3, 1 291; RV64-NEXT: sub a6, a1, a5 292; RV64-NEXT: vmv1r.v v12, v0 293; RV64-NEXT: li a4, 0 294; RV64-NEXT: li a2, 0 295; RV64-NEXT: bltu a1, a6, .LBB12_2 296; RV64-NEXT: # %bb.1: 297; RV64-NEXT: mv a2, a6 298; RV64-NEXT: .LBB12_2: 299; RV64-NEXT: sub a6, a2, a3 300; RV64-NEXT: mv a7, a4 301; RV64-NEXT: bltu a2, a6, .LBB12_4 302; RV64-NEXT: # %bb.3: 303; RV64-NEXT: mv a7, a6 304; RV64-NEXT: .LBB12_4: 305; RV64-NEXT: srli a6, a3, 2 306; RV64-NEXT: vsetvli t0, zero, e8, mf2, ta, mu 307; RV64-NEXT: vslidedown.vx v13, v12, a6 308; RV64-NEXT: srli a6, a3, 3 309; RV64-NEXT: vsetvli t0, zero, e8, mf4, ta, mu 310; RV64-NEXT: vslidedown.vx v0, v13, a6 311; RV64-NEXT: vsetvli t0, zero, e64, m8, ta, mu 312; RV64-NEXT: vsext.vf8 v24, v11 313; RV64-NEXT: vsetvli zero, a7, e8, m1, ta, mu 314; RV64-NEXT: vluxei64.v v19, (a0), v24, v0.t 315; RV64-NEXT: bltu a1, a5, .LBB12_6 316; RV64-NEXT: # %bb.5: 317; RV64-NEXT: mv a1, a5 318; RV64-NEXT: .LBB12_6: 319; RV64-NEXT: sub a5, a1, a3 320; RV64-NEXT: bltu a1, a5, .LBB12_8 321; RV64-NEXT: # %bb.7: 322; RV64-NEXT: mv a4, a5 323; RV64-NEXT: .LBB12_8: 324; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 325; RV64-NEXT: vslidedown.vx v0, v12, a6 326; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu 327; RV64-NEXT: vsext.vf8 v24, v9 328; RV64-NEXT: vsetvli zero, a4, e8, m1, ta, mu 329; RV64-NEXT: vluxei64.v v17, (a0), v24, v0.t 330; RV64-NEXT: bltu a1, a3, .LBB12_10 331; RV64-NEXT: # %bb.9: 332; RV64-NEXT: mv a1, a3 333; RV64-NEXT: .LBB12_10: 334; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu 335; RV64-NEXT: vsext.vf8 v24, v8 336; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu 337; RV64-NEXT: vmv1r.v v0, v12 338; RV64-NEXT: vluxei64.v v16, (a0), v24, v0.t 339; RV64-NEXT: bltu a2, a3, .LBB12_12 340; RV64-NEXT: # %bb.11: 341; RV64-NEXT: mv a2, a3 342; RV64-NEXT: .LBB12_12: 343; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 344; RV64-NEXT: vsext.vf8 v24, v10 345; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, mu 346; RV64-NEXT: vmv1r.v v0, v13 347; RV64-NEXT: vluxei64.v v18, (a0), v24, v0.t 348; RV64-NEXT: vmv4r.v v8, v16 349; RV64-NEXT: ret 350 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs 351 %v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl) 352 ret <vscale x 32 x i8> %v 353} 354 355declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, <vscale x 1 x i1>, i32) 356 357define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 358; RV32-LABEL: vpgather_nxv1i16: 359; RV32: # %bb.0: 360; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 361; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 362; RV32-NEXT: vmv1r.v v8, v9 363; RV32-NEXT: ret 364; 365; RV64-LABEL: vpgather_nxv1i16: 366; RV64: # %bb.0: 367; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 368; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 369; RV64-NEXT: vmv1r.v v8, v9 370; RV64-NEXT: ret 371 %v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 372 ret <vscale x 1 x i16> %v 373} 374 375declare <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*>, <vscale x 2 x i1>, i32) 376 377define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 378; RV32-LABEL: vpgather_nxv2i16: 379; RV32: # %bb.0: 380; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 381; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 382; RV32-NEXT: vmv1r.v v8, v9 383; RV32-NEXT: ret 384; 385; RV64-LABEL: vpgather_nxv2i16: 386; RV64: # %bb.0: 387; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 388; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 389; RV64-NEXT: vmv1r.v v8, v10 390; RV64-NEXT: ret 391 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 392 ret <vscale x 2 x i16> %v 393} 394 395define <vscale x 2 x i32> @vpgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 396; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i32: 397; RV32: # %bb.0: 398; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 399; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 400; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 401; RV32-NEXT: vsext.vf2 v8, v9 402; RV32-NEXT: ret 403; 404; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i32: 405; RV64: # %bb.0: 406; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 407; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 408; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 409; RV64-NEXT: vsext.vf2 v8, v10 410; RV64-NEXT: ret 411 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 412 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32> 413 ret <vscale x 2 x i32> %ev 414} 415 416define <vscale x 2 x i32> @vpgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 417; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i32: 418; RV32: # %bb.0: 419; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 420; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 421; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 422; RV32-NEXT: vzext.vf2 v8, v9 423; RV32-NEXT: ret 424; 425; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i32: 426; RV64: # %bb.0: 427; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 428; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 429; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 430; RV64-NEXT: vzext.vf2 v8, v10 431; RV64-NEXT: ret 432 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 433 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32> 434 ret <vscale x 2 x i32> %ev 435} 436 437define <vscale x 2 x i64> @vpgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 438; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i64: 439; RV32: # %bb.0: 440; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 441; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 442; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 443; RV32-NEXT: vsext.vf4 v8, v10 444; RV32-NEXT: ret 445; 446; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i64: 447; RV64: # %bb.0: 448; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 449; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 450; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 451; RV64-NEXT: vsext.vf4 v8, v10 452; RV64-NEXT: ret 453 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 454 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64> 455 ret <vscale x 2 x i64> %ev 456} 457 458define <vscale x 2 x i64> @vpgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 459; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i64: 460; RV32: # %bb.0: 461; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 462; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 463; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 464; RV32-NEXT: vzext.vf4 v8, v10 465; RV32-NEXT: ret 466; 467; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i64: 468; RV64: # %bb.0: 469; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 470; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 471; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 472; RV64-NEXT: vzext.vf4 v8, v10 473; RV64-NEXT: ret 474 %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 475 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64> 476 ret <vscale x 2 x i64> %ev 477} 478 479declare <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*>, <vscale x 4 x i1>, i32) 480 481define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 482; RV32-LABEL: vpgather_nxv4i16: 483; RV32: # %bb.0: 484; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 485; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 486; RV32-NEXT: vmv.v.v v8, v10 487; RV32-NEXT: ret 488; 489; RV64-LABEL: vpgather_nxv4i16: 490; RV64: # %bb.0: 491; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 492; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 493; RV64-NEXT: vmv.v.v v8, v12 494; RV64-NEXT: ret 495 %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 496 ret <vscale x 4 x i16> %v 497} 498 499define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x i16*> %ptrs, i32 zeroext %evl) { 500; RV32-LABEL: vpgather_truemask_nxv4i16: 501; RV32: # %bb.0: 502; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 503; RV32-NEXT: vluxei32.v v10, (zero), v8 504; RV32-NEXT: vmv.v.v v8, v10 505; RV32-NEXT: ret 506; 507; RV64-LABEL: vpgather_truemask_nxv4i16: 508; RV64: # %bb.0: 509; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 510; RV64-NEXT: vluxei64.v v12, (zero), v8 511; RV64-NEXT: vmv.v.v v8, v12 512; RV64-NEXT: ret 513 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 514 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 515 %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 516 ret <vscale x 4 x i16> %v 517} 518 519declare <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*>, <vscale x 8 x i1>, i32) 520 521define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 522; RV32-LABEL: vpgather_nxv8i16: 523; RV32: # %bb.0: 524; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, mu 525; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 526; RV32-NEXT: vmv.v.v v8, v12 527; RV32-NEXT: ret 528; 529; RV64-LABEL: vpgather_nxv8i16: 530; RV64: # %bb.0: 531; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, mu 532; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 533; RV64-NEXT: vmv.v.v v8, v16 534; RV64-NEXT: ret 535 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 536 ret <vscale x 8 x i16> %v 537} 538 539define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 540; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: 541; RV32: # %bb.0: 542; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 543; RV32-NEXT: vsext.vf4 v12, v8 544; RV32-NEXT: vadd.vv v12, v12, v12 545; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 546; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 547; RV32-NEXT: ret 548; 549; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16: 550; RV64: # %bb.0: 551; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 552; RV64-NEXT: vsext.vf8 v16, v8 553; RV64-NEXT: vadd.vv v16, v16, v16 554; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 555; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 556; RV64-NEXT: ret 557 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 558 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 559 ret <vscale x 8 x i16> %v 560} 561 562define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 563; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: 564; RV32: # %bb.0: 565; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 566; RV32-NEXT: vsext.vf4 v12, v8 567; RV32-NEXT: vadd.vv v12, v12, v12 568; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 569; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 570; RV32-NEXT: ret 571; 572; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16: 573; RV64: # %bb.0: 574; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 575; RV64-NEXT: vsext.vf8 v16, v8 576; RV64-NEXT: vadd.vv v16, v16, v16 577; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 578; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 579; RV64-NEXT: ret 580 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 581 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 582 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 583 ret <vscale x 8 x i16> %v 584} 585 586define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 587; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: 588; RV32: # %bb.0: 589; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 590; RV32-NEXT: vzext.vf4 v12, v8 591; RV32-NEXT: vadd.vv v12, v12, v12 592; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 593; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 594; RV32-NEXT: ret 595; 596; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16: 597; RV64: # %bb.0: 598; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 599; RV64-NEXT: vzext.vf8 v16, v8 600; RV64-NEXT: vadd.vv v16, v16, v16 601; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 602; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 603; RV64-NEXT: ret 604 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 605 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 606 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 607 ret <vscale x 8 x i16> %v 608} 609 610define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 611; RV32-LABEL: vpgather_baseidx_nxv8i16: 612; RV32: # %bb.0: 613; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 614; RV32-NEXT: vsext.vf2 v12, v8 615; RV32-NEXT: vadd.vv v12, v12, v12 616; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 617; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 618; RV32-NEXT: ret 619; 620; RV64-LABEL: vpgather_baseidx_nxv8i16: 621; RV64: # %bb.0: 622; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 623; RV64-NEXT: vsext.vf4 v16, v8 624; RV64-NEXT: vadd.vv v16, v16, v16 625; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 626; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 627; RV64-NEXT: ret 628 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 629 %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 630 ret <vscale x 8 x i16> %v 631} 632 633declare <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*>, <vscale x 1 x i1>, i32) 634 635define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 636; RV32-LABEL: vpgather_nxv1i32: 637; RV32: # %bb.0: 638; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 639; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 640; RV32-NEXT: ret 641; 642; RV64-LABEL: vpgather_nxv1i32: 643; RV64: # %bb.0: 644; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 645; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 646; RV64-NEXT: vmv1r.v v8, v9 647; RV64-NEXT: ret 648 %v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 649 ret <vscale x 1 x i32> %v 650} 651 652declare <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*>, <vscale x 2 x i1>, i32) 653 654define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 655; RV32-LABEL: vpgather_nxv2i32: 656; RV32: # %bb.0: 657; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 658; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 659; RV32-NEXT: ret 660; 661; RV64-LABEL: vpgather_nxv2i32: 662; RV64: # %bb.0: 663; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 664; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 665; RV64-NEXT: vmv.v.v v8, v10 666; RV64-NEXT: ret 667 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 668 ret <vscale x 2 x i32> %v 669} 670 671define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 672; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64: 673; RV32: # %bb.0: 674; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 675; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 676; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 677; RV32-NEXT: vsext.vf2 v8, v10 678; RV32-NEXT: ret 679; 680; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64: 681; RV64: # %bb.0: 682; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 683; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 684; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 685; RV64-NEXT: vsext.vf2 v8, v10 686; RV64-NEXT: ret 687 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 688 %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> 689 ret <vscale x 2 x i64> %ev 690} 691 692define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 693; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64: 694; RV32: # %bb.0: 695; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 696; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 697; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 698; RV32-NEXT: vzext.vf2 v8, v10 699; RV32-NEXT: ret 700; 701; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64: 702; RV64: # %bb.0: 703; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 704; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 705; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 706; RV64-NEXT: vzext.vf2 v8, v10 707; RV64-NEXT: ret 708 %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 709 %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> 710 ret <vscale x 2 x i64> %ev 711} 712 713declare <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, <vscale x 4 x i1>, i32) 714 715define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 716; RV32-LABEL: vpgather_nxv4i32: 717; RV32: # %bb.0: 718; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 719; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 720; RV32-NEXT: ret 721; 722; RV64-LABEL: vpgather_nxv4i32: 723; RV64: # %bb.0: 724; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 725; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 726; RV64-NEXT: vmv.v.v v8, v12 727; RV64-NEXT: ret 728 %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 729 ret <vscale x 4 x i32> %v 730} 731 732define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x i32*> %ptrs, i32 zeroext %evl) { 733; RV32-LABEL: vpgather_truemask_nxv4i32: 734; RV32: # %bb.0: 735; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 736; RV32-NEXT: vluxei32.v v8, (zero), v8 737; RV32-NEXT: ret 738; 739; RV64-LABEL: vpgather_truemask_nxv4i32: 740; RV64: # %bb.0: 741; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 742; RV64-NEXT: vluxei64.v v12, (zero), v8 743; RV64-NEXT: vmv.v.v v8, v12 744; RV64-NEXT: ret 745 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 746 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 747 %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 748 ret <vscale x 4 x i32> %v 749} 750 751declare <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*>, <vscale x 8 x i1>, i32) 752 753define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 754; RV32-LABEL: vpgather_nxv8i32: 755; RV32: # %bb.0: 756; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, mu 757; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 758; RV32-NEXT: ret 759; 760; RV64-LABEL: vpgather_nxv8i32: 761; RV64: # %bb.0: 762; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, mu 763; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 764; RV64-NEXT: vmv.v.v v8, v16 765; RV64-NEXT: ret 766 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 767 ret <vscale x 8 x i32> %v 768} 769 770define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 771; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: 772; RV32: # %bb.0: 773; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 774; RV32-NEXT: vsext.vf4 v12, v8 775; RV32-NEXT: vsll.vi v8, v12, 2 776; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 777; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 778; RV32-NEXT: ret 779; 780; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32: 781; RV64: # %bb.0: 782; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 783; RV64-NEXT: vsext.vf8 v16, v8 784; RV64-NEXT: vsll.vi v16, v16, 2 785; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 786; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 787; RV64-NEXT: ret 788 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 789 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 790 ret <vscale x 8 x i32> %v 791} 792 793define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 794; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: 795; RV32: # %bb.0: 796; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 797; RV32-NEXT: vsext.vf4 v12, v8 798; RV32-NEXT: vsll.vi v8, v12, 2 799; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 800; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 801; RV32-NEXT: ret 802; 803; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32: 804; RV64: # %bb.0: 805; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 806; RV64-NEXT: vsext.vf8 v16, v8 807; RV64-NEXT: vsll.vi v16, v16, 2 808; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 809; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 810; RV64-NEXT: ret 811 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 812 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 813 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 814 ret <vscale x 8 x i32> %v 815} 816 817define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 818; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: 819; RV32: # %bb.0: 820; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 821; RV32-NEXT: vzext.vf4 v12, v8 822; RV32-NEXT: vsll.vi v8, v12, 2 823; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 824; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 825; RV32-NEXT: ret 826; 827; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 830; RV64-NEXT: vzext.vf8 v16, v8 831; RV64-NEXT: vsll.vi v16, v16, 2 832; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 833; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 834; RV64-NEXT: ret 835 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 836 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 837 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 838 ret <vscale x 8 x i32> %v 839} 840 841define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 842; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: 843; RV32: # %bb.0: 844; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 845; RV32-NEXT: vsext.vf2 v12, v8 846; RV32-NEXT: vsll.vi v8, v12, 2 847; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 848; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 849; RV32-NEXT: ret 850; 851; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32: 852; RV64: # %bb.0: 853; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 854; RV64-NEXT: vsext.vf4 v16, v8 855; RV64-NEXT: vsll.vi v16, v16, 2 856; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 857; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 858; RV64-NEXT: ret 859 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 860 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 861 ret <vscale x 8 x i32> %v 862} 863 864define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 865; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: 866; RV32: # %bb.0: 867; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 868; RV32-NEXT: vsext.vf2 v12, v8 869; RV32-NEXT: vsll.vi v8, v12, 2 870; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 871; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 872; RV32-NEXT: ret 873; 874; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32: 875; RV64: # %bb.0: 876; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 877; RV64-NEXT: vsext.vf4 v16, v8 878; RV64-NEXT: vsll.vi v16, v16, 2 879; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 880; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 881; RV64-NEXT: ret 882 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 883 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 884 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 885 ret <vscale x 8 x i32> %v 886} 887 888define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 889; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: 890; RV32: # %bb.0: 891; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 892; RV32-NEXT: vzext.vf2 v12, v8 893; RV32-NEXT: vsll.vi v8, v12, 2 894; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 895; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 896; RV32-NEXT: ret 897; 898; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32: 899; RV64: # %bb.0: 900; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 901; RV64-NEXT: vzext.vf4 v16, v8 902; RV64-NEXT: vsll.vi v16, v16, 2 903; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 904; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 905; RV64-NEXT: ret 906 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 907 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 908 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 909 ret <vscale x 8 x i32> %v 910} 911 912define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 913; RV32-LABEL: vpgather_baseidx_nxv8i32: 914; RV32: # %bb.0: 915; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 916; RV32-NEXT: vsll.vi v8, v8, 2 917; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 918; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 919; RV32-NEXT: ret 920; 921; RV64-LABEL: vpgather_baseidx_nxv8i32: 922; RV64: # %bb.0: 923; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 924; RV64-NEXT: vsext.vf2 v16, v8 925; RV64-NEXT: vsll.vi v16, v16, 2 926; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 927; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 928; RV64-NEXT: ret 929 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 930 %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 931 ret <vscale x 8 x i32> %v 932} 933 934declare <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*>, <vscale x 1 x i1>, i32) 935 936define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 937; RV32-LABEL: vpgather_nxv1i64: 938; RV32: # %bb.0: 939; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, mu 940; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 941; RV32-NEXT: vmv.v.v v8, v9 942; RV32-NEXT: ret 943; 944; RV64-LABEL: vpgather_nxv1i64: 945; RV64: # %bb.0: 946; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, mu 947; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 948; RV64-NEXT: ret 949 %v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 950 ret <vscale x 1 x i64> %v 951} 952 953declare <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*>, <vscale x 2 x i1>, i32) 954 955define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 956; RV32-LABEL: vpgather_nxv2i64: 957; RV32: # %bb.0: 958; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, mu 959; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 960; RV32-NEXT: vmv.v.v v8, v10 961; RV32-NEXT: ret 962; 963; RV64-LABEL: vpgather_nxv2i64: 964; RV64: # %bb.0: 965; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, mu 966; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 967; RV64-NEXT: ret 968 %v = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 969 ret <vscale x 2 x i64> %v 970} 971 972declare <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*>, <vscale x 4 x i1>, i32) 973 974define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 975; RV32-LABEL: vpgather_nxv4i64: 976; RV32: # %bb.0: 977; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 978; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 979; RV32-NEXT: vmv.v.v v8, v12 980; RV32-NEXT: ret 981; 982; RV64-LABEL: vpgather_nxv4i64: 983; RV64: # %bb.0: 984; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 985; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 986; RV64-NEXT: ret 987 %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 988 ret <vscale x 4 x i64> %v 989} 990 991define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x i64*> %ptrs, i32 zeroext %evl) { 992; RV32-LABEL: vpgather_truemask_nxv4i64: 993; RV32: # %bb.0: 994; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 995; RV32-NEXT: vluxei32.v v12, (zero), v8 996; RV32-NEXT: vmv.v.v v8, v12 997; RV32-NEXT: ret 998; 999; RV64-LABEL: vpgather_truemask_nxv4i64: 1000; RV64: # %bb.0: 1001; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1002; RV64-NEXT: vluxei64.v v8, (zero), v8 1003; RV64-NEXT: ret 1004 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1005 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1006 %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1007 ret <vscale x 4 x i64> %v 1008} 1009 1010declare <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*>, <vscale x 8 x i1>, i32) 1011 1012define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1013; RV32-LABEL: vpgather_nxv8i64: 1014; RV32: # %bb.0: 1015; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1016; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 1017; RV32-NEXT: vmv.v.v v8, v16 1018; RV32-NEXT: ret 1019; 1020; RV64-LABEL: vpgather_nxv8i64: 1021; RV64: # %bb.0: 1022; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1023; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1024; RV64-NEXT: ret 1025 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1026 ret <vscale x 8 x i64> %v 1027} 1028 1029define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1030; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: 1031; RV32: # %bb.0: 1032; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1033; RV32-NEXT: vsext.vf4 v12, v8 1034; RV32-NEXT: vsll.vi v16, v12, 3 1035; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1036; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1037; RV32-NEXT: ret 1038; 1039; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64: 1040; RV64: # %bb.0: 1041; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1042; RV64-NEXT: vsext.vf8 v16, v8 1043; RV64-NEXT: vsll.vi v8, v16, 3 1044; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1045; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1046; RV64-NEXT: ret 1047 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 1048 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1049 ret <vscale x 8 x i64> %v 1050} 1051 1052define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1053; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: 1054; RV32: # %bb.0: 1055; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1056; RV32-NEXT: vsext.vf8 v16, v8 1057; RV32-NEXT: vsll.vi v8, v16, 3 1058; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1059; RV32-NEXT: vncvt.x.x.w v16, v8 1060; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1061; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1062; RV32-NEXT: ret 1063; 1064; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64: 1065; RV64: # %bb.0: 1066; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1067; RV64-NEXT: vsext.vf8 v16, v8 1068; RV64-NEXT: vsll.vi v8, v16, 3 1069; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1070; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1071; RV64-NEXT: ret 1072 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1073 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1074 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1075 ret <vscale x 8 x i64> %v 1076} 1077 1078define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1079; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: 1080; RV32: # %bb.0: 1081; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1082; RV32-NEXT: vzext.vf8 v16, v8 1083; RV32-NEXT: vsll.vi v8, v16, 3 1084; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1085; RV32-NEXT: vncvt.x.x.w v16, v8 1086; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1087; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1088; RV32-NEXT: ret 1089; 1090; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64: 1091; RV64: # %bb.0: 1092; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1093; RV64-NEXT: vzext.vf8 v16, v8 1094; RV64-NEXT: vsll.vi v8, v16, 3 1095; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1096; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1097; RV64-NEXT: ret 1098 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1099 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1100 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1101 ret <vscale x 8 x i64> %v 1102} 1103 1104define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1105; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: 1106; RV32: # %bb.0: 1107; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1108; RV32-NEXT: vsext.vf2 v12, v8 1109; RV32-NEXT: vsll.vi v16, v12, 3 1110; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1111; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1112; RV32-NEXT: ret 1113; 1114; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64: 1115; RV64: # %bb.0: 1116; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1117; RV64-NEXT: vsext.vf4 v16, v8 1118; RV64-NEXT: vsll.vi v8, v16, 3 1119; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1120; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1121; RV64-NEXT: ret 1122 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 1123 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1124 ret <vscale x 8 x i64> %v 1125} 1126 1127define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1128; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: 1129; RV32: # %bb.0: 1130; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1131; RV32-NEXT: vsext.vf4 v16, v8 1132; RV32-NEXT: vsll.vi v8, v16, 3 1133; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1134; RV32-NEXT: vncvt.x.x.w v16, v8 1135; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1136; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1137; RV32-NEXT: ret 1138; 1139; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64: 1140; RV64: # %bb.0: 1141; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1142; RV64-NEXT: vsext.vf4 v16, v8 1143; RV64-NEXT: vsll.vi v8, v16, 3 1144; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1145; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1146; RV64-NEXT: ret 1147 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1148 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1149 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1150 ret <vscale x 8 x i64> %v 1151} 1152 1153define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1154; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: 1155; RV32: # %bb.0: 1156; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1157; RV32-NEXT: vzext.vf4 v16, v8 1158; RV32-NEXT: vsll.vi v8, v16, 3 1159; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1160; RV32-NEXT: vncvt.x.x.w v16, v8 1161; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1162; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1163; RV32-NEXT: ret 1164; 1165; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64: 1166; RV64: # %bb.0: 1167; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1168; RV64-NEXT: vzext.vf4 v16, v8 1169; RV64-NEXT: vsll.vi v8, v16, 3 1170; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1171; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1172; RV64-NEXT: ret 1173 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1174 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1175 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1176 ret <vscale x 8 x i64> %v 1177} 1178 1179define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1180; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: 1181; RV32: # %bb.0: 1182; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1183; RV32-NEXT: vsll.vi v16, v8, 3 1184; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1185; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1186; RV32-NEXT: ret 1187; 1188; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64: 1189; RV64: # %bb.0: 1190; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1191; RV64-NEXT: vsext.vf2 v16, v8 1192; RV64-NEXT: vsll.vi v8, v16, 3 1193; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1194; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1195; RV64-NEXT: ret 1196 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 1197 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1198 ret <vscale x 8 x i64> %v 1199} 1200 1201define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1202; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: 1203; RV32: # %bb.0: 1204; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1205; RV32-NEXT: vsext.vf2 v16, v8 1206; RV32-NEXT: vsll.vi v8, v16, 3 1207; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1208; RV32-NEXT: vncvt.x.x.w v16, v8 1209; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1210; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1211; RV32-NEXT: ret 1212; 1213; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64: 1214; RV64: # %bb.0: 1215; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1216; RV64-NEXT: vsext.vf2 v16, v8 1217; RV64-NEXT: vsll.vi v8, v16, 3 1218; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1219; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1220; RV64-NEXT: ret 1221 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1222 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1223 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1224 ret <vscale x 8 x i64> %v 1225} 1226 1227define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1228; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: 1229; RV32: # %bb.0: 1230; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1231; RV32-NEXT: vzext.vf2 v16, v8 1232; RV32-NEXT: vsll.vi v8, v16, 3 1233; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1234; RV32-NEXT: vncvt.x.x.w v16, v8 1235; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1236; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1237; RV32-NEXT: ret 1238; 1239; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64: 1240; RV64: # %bb.0: 1241; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1242; RV64-NEXT: vzext.vf2 v16, v8 1243; RV64-NEXT: vsll.vi v8, v16, 3 1244; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1245; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1246; RV64-NEXT: ret 1247 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1248 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1249 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1250 ret <vscale x 8 x i64> %v 1251} 1252 1253define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1254; RV32-LABEL: vpgather_baseidx_nxv8i64: 1255; RV32: # %bb.0: 1256; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1257; RV32-NEXT: vsll.vi v8, v8, 3 1258; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1259; RV32-NEXT: vncvt.x.x.w v16, v8 1260; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1261; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1262; RV32-NEXT: ret 1263; 1264; RV64-LABEL: vpgather_baseidx_nxv8i64: 1265; RV64: # %bb.0: 1266; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1267; RV64-NEXT: vsll.vi v8, v8, 3 1268; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1269; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1270; RV64-NEXT: ret 1271 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1272 %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1273 ret <vscale x 8 x i64> %v 1274} 1275 1276declare <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*>, <vscale x 1 x i1>, i32) 1277 1278define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1279; RV32-LABEL: vpgather_nxv1f16: 1280; RV32: # %bb.0: 1281; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 1282; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1283; RV32-NEXT: vmv1r.v v8, v9 1284; RV32-NEXT: ret 1285; 1286; RV64-LABEL: vpgather_nxv1f16: 1287; RV64: # %bb.0: 1288; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 1289; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1290; RV64-NEXT: vmv1r.v v8, v9 1291; RV64-NEXT: ret 1292 %v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1293 ret <vscale x 1 x half> %v 1294} 1295 1296declare <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*>, <vscale x 2 x i1>, i32) 1297 1298define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1299; RV32-LABEL: vpgather_nxv2f16: 1300; RV32: # %bb.0: 1301; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 1302; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1303; RV32-NEXT: vmv1r.v v8, v9 1304; RV32-NEXT: ret 1305; 1306; RV64-LABEL: vpgather_nxv2f16: 1307; RV64: # %bb.0: 1308; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 1309; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1310; RV64-NEXT: vmv1r.v v8, v10 1311; RV64-NEXT: ret 1312 %v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1313 ret <vscale x 2 x half> %v 1314} 1315 1316declare <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*>, <vscale x 4 x i1>, i32) 1317 1318define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1319; RV32-LABEL: vpgather_nxv4f16: 1320; RV32: # %bb.0: 1321; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1322; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1323; RV32-NEXT: vmv.v.v v8, v10 1324; RV32-NEXT: ret 1325; 1326; RV64-LABEL: vpgather_nxv4f16: 1327; RV64: # %bb.0: 1328; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1329; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1330; RV64-NEXT: vmv.v.v v8, v12 1331; RV64-NEXT: ret 1332 %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1333 ret <vscale x 4 x half> %v 1334} 1335 1336define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x half*> %ptrs, i32 zeroext %evl) { 1337; RV32-LABEL: vpgather_truemask_nxv4f16: 1338; RV32: # %bb.0: 1339; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1340; RV32-NEXT: vluxei32.v v10, (zero), v8 1341; RV32-NEXT: vmv.v.v v8, v10 1342; RV32-NEXT: ret 1343; 1344; RV64-LABEL: vpgather_truemask_nxv4f16: 1345; RV64: # %bb.0: 1346; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1347; RV64-NEXT: vluxei64.v v12, (zero), v8 1348; RV64-NEXT: vmv.v.v v8, v12 1349; RV64-NEXT: ret 1350 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1351 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1352 %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1353 ret <vscale x 4 x half> %v 1354} 1355 1356declare <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*>, <vscale x 8 x i1>, i32) 1357 1358define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1359; RV32-LABEL: vpgather_nxv8f16: 1360; RV32: # %bb.0: 1361; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, mu 1362; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1363; RV32-NEXT: vmv.v.v v8, v12 1364; RV32-NEXT: ret 1365; 1366; RV64-LABEL: vpgather_nxv8f16: 1367; RV64: # %bb.0: 1368; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, mu 1369; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1370; RV64-NEXT: vmv.v.v v8, v16 1371; RV64-NEXT: ret 1372 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1373 ret <vscale x 8 x half> %v 1374} 1375 1376define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1377; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: 1378; RV32: # %bb.0: 1379; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1380; RV32-NEXT: vsext.vf4 v12, v8 1381; RV32-NEXT: vadd.vv v12, v12, v12 1382; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1383; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1384; RV32-NEXT: ret 1385; 1386; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16: 1387; RV64: # %bb.0: 1388; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1389; RV64-NEXT: vsext.vf8 v16, v8 1390; RV64-NEXT: vadd.vv v16, v16, v16 1391; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1392; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1393; RV64-NEXT: ret 1394 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1395 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1396 ret <vscale x 8 x half> %v 1397} 1398 1399define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1400; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: 1401; RV32: # %bb.0: 1402; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1403; RV32-NEXT: vsext.vf4 v12, v8 1404; RV32-NEXT: vadd.vv v12, v12, v12 1405; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1406; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1407; RV32-NEXT: ret 1408; 1409; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16: 1410; RV64: # %bb.0: 1411; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1412; RV64-NEXT: vsext.vf8 v16, v8 1413; RV64-NEXT: vadd.vv v16, v16, v16 1414; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1415; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1416; RV64-NEXT: ret 1417 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1418 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1419 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1420 ret <vscale x 8 x half> %v 1421} 1422 1423define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1424; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: 1425; RV32: # %bb.0: 1426; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1427; RV32-NEXT: vzext.vf4 v12, v8 1428; RV32-NEXT: vadd.vv v12, v12, v12 1429; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1430; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1431; RV32-NEXT: ret 1432; 1433; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16: 1434; RV64: # %bb.0: 1435; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1436; RV64-NEXT: vzext.vf8 v16, v8 1437; RV64-NEXT: vadd.vv v16, v16, v16 1438; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1439; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1440; RV64-NEXT: ret 1441 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1442 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1443 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1444 ret <vscale x 8 x half> %v 1445} 1446 1447define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1448; RV32-LABEL: vpgather_baseidx_nxv8f16: 1449; RV32: # %bb.0: 1450; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1451; RV32-NEXT: vsext.vf2 v12, v8 1452; RV32-NEXT: vadd.vv v12, v12, v12 1453; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1454; RV32-NEXT: vluxei32.v v8, (a0), v12, v0.t 1455; RV32-NEXT: ret 1456; 1457; RV64-LABEL: vpgather_baseidx_nxv8f16: 1458; RV64: # %bb.0: 1459; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1460; RV64-NEXT: vsext.vf4 v16, v8 1461; RV64-NEXT: vadd.vv v16, v16, v16 1462; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1463; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1464; RV64-NEXT: ret 1465 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1466 %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1467 ret <vscale x 8 x half> %v 1468} 1469 1470declare <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*>, <vscale x 1 x i1>, i32) 1471 1472define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1473; RV32-LABEL: vpgather_nxv1f32: 1474; RV32: # %bb.0: 1475; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 1476; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1477; RV32-NEXT: ret 1478; 1479; RV64-LABEL: vpgather_nxv1f32: 1480; RV64: # %bb.0: 1481; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 1482; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1483; RV64-NEXT: vmv1r.v v8, v9 1484; RV64-NEXT: ret 1485 %v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1486 ret <vscale x 1 x float> %v 1487} 1488 1489declare <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*>, <vscale x 2 x i1>, i32) 1490 1491define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1492; RV32-LABEL: vpgather_nxv2f32: 1493; RV32: # %bb.0: 1494; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1495; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1496; RV32-NEXT: ret 1497; 1498; RV64-LABEL: vpgather_nxv2f32: 1499; RV64: # %bb.0: 1500; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1501; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1502; RV64-NEXT: vmv.v.v v8, v10 1503; RV64-NEXT: ret 1504 %v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1505 ret <vscale x 2 x float> %v 1506} 1507 1508declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*>, <vscale x 4 x i1>, i32) 1509 1510define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1511; RV32-LABEL: vpgather_nxv4f32: 1512; RV32: # %bb.0: 1513; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1514; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1515; RV32-NEXT: ret 1516; 1517; RV64-LABEL: vpgather_nxv4f32: 1518; RV64: # %bb.0: 1519; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1520; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1521; RV64-NEXT: vmv.v.v v8, v12 1522; RV64-NEXT: ret 1523 %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1524 ret <vscale x 4 x float> %v 1525} 1526 1527define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x float*> %ptrs, i32 zeroext %evl) { 1528; RV32-LABEL: vpgather_truemask_nxv4f32: 1529; RV32: # %bb.0: 1530; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1531; RV32-NEXT: vluxei32.v v8, (zero), v8 1532; RV32-NEXT: ret 1533; 1534; RV64-LABEL: vpgather_truemask_nxv4f32: 1535; RV64: # %bb.0: 1536; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1537; RV64-NEXT: vluxei64.v v12, (zero), v8 1538; RV64-NEXT: vmv.v.v v8, v12 1539; RV64-NEXT: ret 1540 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1541 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1542 %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1543 ret <vscale x 4 x float> %v 1544} 1545 1546declare <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*>, <vscale x 8 x i1>, i32) 1547 1548define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1549; RV32-LABEL: vpgather_nxv8f32: 1550; RV32: # %bb.0: 1551; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, mu 1552; RV32-NEXT: vluxei32.v v8, (zero), v8, v0.t 1553; RV32-NEXT: ret 1554; 1555; RV64-LABEL: vpgather_nxv8f32: 1556; RV64: # %bb.0: 1557; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, mu 1558; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1559; RV64-NEXT: vmv.v.v v8, v16 1560; RV64-NEXT: ret 1561 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1562 ret <vscale x 8 x float> %v 1563} 1564 1565define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1566; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: 1567; RV32: # %bb.0: 1568; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1569; RV32-NEXT: vsext.vf4 v12, v8 1570; RV32-NEXT: vsll.vi v8, v12, 2 1571; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1572; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1573; RV32-NEXT: ret 1574; 1575; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32: 1576; RV64: # %bb.0: 1577; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1578; RV64-NEXT: vsext.vf8 v16, v8 1579; RV64-NEXT: vsll.vi v16, v16, 2 1580; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1581; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1582; RV64-NEXT: ret 1583 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1584 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1585 ret <vscale x 8 x float> %v 1586} 1587 1588define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1589; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: 1590; RV32: # %bb.0: 1591; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1592; RV32-NEXT: vsext.vf4 v12, v8 1593; RV32-NEXT: vsll.vi v8, v12, 2 1594; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1595; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1596; RV32-NEXT: ret 1597; 1598; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32: 1599; RV64: # %bb.0: 1600; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1601; RV64-NEXT: vsext.vf8 v16, v8 1602; RV64-NEXT: vsll.vi v16, v16, 2 1603; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1604; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1605; RV64-NEXT: ret 1606 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1607 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1608 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1609 ret <vscale x 8 x float> %v 1610} 1611 1612define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1613; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: 1614; RV32: # %bb.0: 1615; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1616; RV32-NEXT: vzext.vf4 v12, v8 1617; RV32-NEXT: vsll.vi v8, v12, 2 1618; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1619; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1620; RV32-NEXT: ret 1621; 1622; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32: 1623; RV64: # %bb.0: 1624; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1625; RV64-NEXT: vzext.vf8 v16, v8 1626; RV64-NEXT: vsll.vi v16, v16, 2 1627; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1628; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1629; RV64-NEXT: ret 1630 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1631 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1632 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1633 ret <vscale x 8 x float> %v 1634} 1635 1636define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1637; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: 1638; RV32: # %bb.0: 1639; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1640; RV32-NEXT: vsext.vf2 v12, v8 1641; RV32-NEXT: vsll.vi v8, v12, 2 1642; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1643; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1644; RV32-NEXT: ret 1645; 1646; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32: 1647; RV64: # %bb.0: 1648; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1649; RV64-NEXT: vsext.vf4 v16, v8 1650; RV64-NEXT: vsll.vi v16, v16, 2 1651; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1652; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1653; RV64-NEXT: ret 1654 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1655 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1656 ret <vscale x 8 x float> %v 1657} 1658 1659define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1660; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: 1661; RV32: # %bb.0: 1662; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1663; RV32-NEXT: vsext.vf2 v12, v8 1664; RV32-NEXT: vsll.vi v8, v12, 2 1665; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1666; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1667; RV32-NEXT: ret 1668; 1669; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32: 1670; RV64: # %bb.0: 1671; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1672; RV64-NEXT: vsext.vf4 v16, v8 1673; RV64-NEXT: vsll.vi v16, v16, 2 1674; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1675; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1676; RV64-NEXT: ret 1677 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1678 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1679 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1680 ret <vscale x 8 x float> %v 1681} 1682 1683define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1684; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: 1685; RV32: # %bb.0: 1686; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1687; RV32-NEXT: vzext.vf2 v12, v8 1688; RV32-NEXT: vsll.vi v8, v12, 2 1689; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1690; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1691; RV32-NEXT: ret 1692; 1693; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32: 1694; RV64: # %bb.0: 1695; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1696; RV64-NEXT: vzext.vf4 v16, v8 1697; RV64-NEXT: vsll.vi v16, v16, 2 1698; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1699; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1700; RV64-NEXT: ret 1701 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1702 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1703 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1704 ret <vscale x 8 x float> %v 1705} 1706 1707define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1708; RV32-LABEL: vpgather_baseidx_nxv8f32: 1709; RV32: # %bb.0: 1710; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1711; RV32-NEXT: vsll.vi v8, v8, 2 1712; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1713; RV32-NEXT: vluxei32.v v8, (a0), v8, v0.t 1714; RV32-NEXT: ret 1715; 1716; RV64-LABEL: vpgather_baseidx_nxv8f32: 1717; RV64: # %bb.0: 1718; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1719; RV64-NEXT: vsext.vf2 v16, v8 1720; RV64-NEXT: vsll.vi v16, v16, 2 1721; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1722; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t 1723; RV64-NEXT: ret 1724 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1725 %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1726 ret <vscale x 8 x float> %v 1727} 1728 1729declare <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*>, <vscale x 1 x i1>, i32) 1730 1731define <vscale x 1 x double> @vpgather_nxv1f64(<vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1732; RV32-LABEL: vpgather_nxv1f64: 1733; RV32: # %bb.0: 1734; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, mu 1735; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1736; RV32-NEXT: vmv.v.v v8, v9 1737; RV32-NEXT: ret 1738; 1739; RV64-LABEL: vpgather_nxv1f64: 1740; RV64: # %bb.0: 1741; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, mu 1742; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1743; RV64-NEXT: ret 1744 %v = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1745 ret <vscale x 1 x double> %v 1746} 1747 1748declare <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*>, <vscale x 2 x i1>, i32) 1749 1750define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1751; RV32-LABEL: vpgather_nxv2f64: 1752; RV32: # %bb.0: 1753; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, mu 1754; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1755; RV32-NEXT: vmv.v.v v8, v10 1756; RV32-NEXT: ret 1757; 1758; RV64-LABEL: vpgather_nxv2f64: 1759; RV64: # %bb.0: 1760; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, mu 1761; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1762; RV64-NEXT: ret 1763 %v = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1764 ret <vscale x 2 x double> %v 1765} 1766 1767declare <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*>, <vscale x 4 x i1>, i32) 1768 1769define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1770; RV32-LABEL: vpgather_nxv4f64: 1771; RV32: # %bb.0: 1772; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1773; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1774; RV32-NEXT: vmv.v.v v8, v12 1775; RV32-NEXT: ret 1776; 1777; RV64-LABEL: vpgather_nxv4f64: 1778; RV64: # %bb.0: 1779; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1780; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1781; RV64-NEXT: ret 1782 %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1783 ret <vscale x 4 x double> %v 1784} 1785 1786define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x double*> %ptrs, i32 zeroext %evl) { 1787; RV32-LABEL: vpgather_truemask_nxv4f64: 1788; RV32: # %bb.0: 1789; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1790; RV32-NEXT: vluxei32.v v12, (zero), v8 1791; RV32-NEXT: vmv.v.v v8, v12 1792; RV32-NEXT: ret 1793; 1794; RV64-LABEL: vpgather_truemask_nxv4f64: 1795; RV64: # %bb.0: 1796; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1797; RV64-NEXT: vluxei64.v v8, (zero), v8 1798; RV64-NEXT: ret 1799 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1800 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1801 %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1802 ret <vscale x 4 x double> %v 1803} 1804 1805declare <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*>, <vscale x 6 x i1>, i32) 1806 1807define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1808; RV32-LABEL: vpgather_nxv6f64: 1809; RV32: # %bb.0: 1810; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1811; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 1812; RV32-NEXT: vmv.v.v v8, v16 1813; RV32-NEXT: ret 1814; 1815; RV64-LABEL: vpgather_nxv6f64: 1816; RV64: # %bb.0: 1817; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1818; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 1819; RV64-NEXT: ret 1820 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1821 ret <vscale x 6 x double> %v 1822} 1823 1824define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1825; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: 1826; RV32: # %bb.0: 1827; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1828; RV32-NEXT: vsext.vf4 v12, v8 1829; RV32-NEXT: vsll.vi v16, v12, 3 1830; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1831; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1832; RV32-NEXT: ret 1833; 1834; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64: 1835; RV64: # %bb.0: 1836; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1837; RV64-NEXT: vsext.vf8 v16, v8 1838; RV64-NEXT: vsll.vi v8, v16, 3 1839; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1840; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1841; RV64-NEXT: ret 1842 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i8> %idxs 1843 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1844 ret <vscale x 6 x double> %v 1845} 1846 1847define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1848; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: 1849; RV32: # %bb.0: 1850; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1851; RV32-NEXT: vsext.vf8 v16, v8 1852; RV32-NEXT: vsll.vi v8, v16, 3 1853; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1854; RV32-NEXT: vncvt.x.x.w v16, v8 1855; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1856; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1857; RV32-NEXT: ret 1858; 1859; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64: 1860; RV64: # %bb.0: 1861; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1862; RV64-NEXT: vsext.vf8 v16, v8 1863; RV64-NEXT: vsll.vi v8, v16, 3 1864; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1865; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1866; RV64-NEXT: ret 1867 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1868 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1869 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1870 ret <vscale x 6 x double> %v 1871} 1872 1873define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1874; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: 1875; RV32: # %bb.0: 1876; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1877; RV32-NEXT: vzext.vf8 v16, v8 1878; RV32-NEXT: vsll.vi v8, v16, 3 1879; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1880; RV32-NEXT: vncvt.x.x.w v16, v8 1881; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1882; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1883; RV32-NEXT: ret 1884; 1885; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64: 1886; RV64: # %bb.0: 1887; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1888; RV64-NEXT: vzext.vf8 v16, v8 1889; RV64-NEXT: vsll.vi v8, v16, 3 1890; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1891; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1892; RV64-NEXT: ret 1893 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1894 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1895 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1896 ret <vscale x 6 x double> %v 1897} 1898 1899define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1900; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: 1901; RV32: # %bb.0: 1902; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1903; RV32-NEXT: vsext.vf2 v12, v8 1904; RV32-NEXT: vsll.vi v16, v12, 3 1905; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1906; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1907; RV32-NEXT: ret 1908; 1909; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64: 1910; RV64: # %bb.0: 1911; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1912; RV64-NEXT: vsext.vf4 v16, v8 1913; RV64-NEXT: vsll.vi v8, v16, 3 1914; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1915; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1916; RV64-NEXT: ret 1917 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i16> %idxs 1918 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1919 ret <vscale x 6 x double> %v 1920} 1921 1922define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1923; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: 1924; RV32: # %bb.0: 1925; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1926; RV32-NEXT: vsext.vf4 v16, v8 1927; RV32-NEXT: vsll.vi v8, v16, 3 1928; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1929; RV32-NEXT: vncvt.x.x.w v16, v8 1930; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1931; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1932; RV32-NEXT: ret 1933; 1934; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64: 1935; RV64: # %bb.0: 1936; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1937; RV64-NEXT: vsext.vf4 v16, v8 1938; RV64-NEXT: vsll.vi v8, v16, 3 1939; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1940; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1941; RV64-NEXT: ret 1942 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1943 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1944 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1945 ret <vscale x 6 x double> %v 1946} 1947 1948define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1949; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: 1950; RV32: # %bb.0: 1951; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1952; RV32-NEXT: vzext.vf4 v16, v8 1953; RV32-NEXT: vsll.vi v8, v16, 3 1954; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1955; RV32-NEXT: vncvt.x.x.w v16, v8 1956; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1957; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1958; RV32-NEXT: ret 1959; 1960; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64: 1961; RV64: # %bb.0: 1962; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1963; RV64-NEXT: vzext.vf4 v16, v8 1964; RV64-NEXT: vsll.vi v8, v16, 3 1965; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1966; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1967; RV64-NEXT: ret 1968 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1969 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1970 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1971 ret <vscale x 6 x double> %v 1972} 1973 1974define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1975; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: 1976; RV32: # %bb.0: 1977; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1978; RV32-NEXT: vsll.vi v16, v8, 3 1979; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1980; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 1981; RV32-NEXT: ret 1982; 1983; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64: 1984; RV64: # %bb.0: 1985; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1986; RV64-NEXT: vsext.vf2 v16, v8 1987; RV64-NEXT: vsll.vi v8, v16, 3 1988; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1989; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 1990; RV64-NEXT: ret 1991 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i32> %idxs 1992 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1993 ret <vscale x 6 x double> %v 1994} 1995 1996define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1997; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: 1998; RV32: # %bb.0: 1999; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2000; RV32-NEXT: vsext.vf2 v16, v8 2001; RV32-NEXT: vsll.vi v8, v16, 3 2002; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2003; RV32-NEXT: vncvt.x.x.w v16, v8 2004; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2005; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2006; RV32-NEXT: ret 2007; 2008; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64: 2009; RV64: # %bb.0: 2010; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2011; RV64-NEXT: vsext.vf2 v16, v8 2012; RV64-NEXT: vsll.vi v8, v16, 3 2013; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2014; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2015; RV64-NEXT: ret 2016 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 2017 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 2018 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2019 ret <vscale x 6 x double> %v 2020} 2021 2022define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2023; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: 2024; RV32: # %bb.0: 2025; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2026; RV32-NEXT: vzext.vf2 v16, v8 2027; RV32-NEXT: vsll.vi v8, v16, 3 2028; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2029; RV32-NEXT: vncvt.x.x.w v16, v8 2030; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2031; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2032; RV32-NEXT: ret 2033; 2034; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64: 2035; RV64: # %bb.0: 2036; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2037; RV64-NEXT: vzext.vf2 v16, v8 2038; RV64-NEXT: vsll.vi v8, v16, 3 2039; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2040; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2041; RV64-NEXT: ret 2042 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 2043 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 2044 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2045 ret <vscale x 6 x double> %v 2046} 2047 2048define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(double* %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 2049; RV32-LABEL: vpgather_baseidx_nxv6f64: 2050; RV32: # %bb.0: 2051; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2052; RV32-NEXT: vsll.vi v8, v8, 3 2053; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2054; RV32-NEXT: vncvt.x.x.w v16, v8 2055; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2056; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2057; RV32-NEXT: ret 2058; 2059; RV64-LABEL: vpgather_baseidx_nxv6f64: 2060; RV64: # %bb.0: 2061; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2062; RV64-NEXT: vsll.vi v8, v8, 3 2063; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2064; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2065; RV64-NEXT: ret 2066 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %idxs 2067 %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0f64(<vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 2068 ret <vscale x 6 x double> %v 2069} 2070 2071declare <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*>, <vscale x 8 x i1>, i32) 2072 2073define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2074; RV32-LABEL: vpgather_nxv8f64: 2075; RV32: # %bb.0: 2076; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 2077; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 2078; RV32-NEXT: vmv.v.v v8, v16 2079; RV32-NEXT: ret 2080; 2081; RV64-LABEL: vpgather_nxv8f64: 2082; RV64: # %bb.0: 2083; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 2084; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 2085; RV64-NEXT: ret 2086 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2087 ret <vscale x 8 x double> %v 2088} 2089 2090define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2091; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: 2092; RV32: # %bb.0: 2093; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 2094; RV32-NEXT: vsext.vf4 v12, v8 2095; RV32-NEXT: vsll.vi v16, v12, 3 2096; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2097; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2098; RV32-NEXT: ret 2099; 2100; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64: 2101; RV64: # %bb.0: 2102; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2103; RV64-NEXT: vsext.vf8 v16, v8 2104; RV64-NEXT: vsll.vi v8, v16, 3 2105; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2106; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2107; RV64-NEXT: ret 2108 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 2109 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2110 ret <vscale x 8 x double> %v 2111} 2112 2113define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2114; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: 2115; RV32: # %bb.0: 2116; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2117; RV32-NEXT: vsext.vf8 v16, v8 2118; RV32-NEXT: vsll.vi v8, v16, 3 2119; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2120; RV32-NEXT: vncvt.x.x.w v16, v8 2121; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2122; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2123; RV32-NEXT: ret 2124; 2125; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64: 2126; RV64: # %bb.0: 2127; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2128; RV64-NEXT: vsext.vf8 v16, v8 2129; RV64-NEXT: vsll.vi v8, v16, 3 2130; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2131; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2132; RV64-NEXT: ret 2133 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2134 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2135 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2136 ret <vscale x 8 x double> %v 2137} 2138 2139define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2140; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: 2141; RV32: # %bb.0: 2142; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2143; RV32-NEXT: vzext.vf8 v16, v8 2144; RV32-NEXT: vsll.vi v8, v16, 3 2145; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2146; RV32-NEXT: vncvt.x.x.w v16, v8 2147; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2148; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2149; RV32-NEXT: ret 2150; 2151; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64: 2152; RV64: # %bb.0: 2153; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2154; RV64-NEXT: vzext.vf8 v16, v8 2155; RV64-NEXT: vsll.vi v8, v16, 3 2156; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2157; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2158; RV64-NEXT: ret 2159 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2160 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2161 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2162 ret <vscale x 8 x double> %v 2163} 2164 2165define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2166; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: 2167; RV32: # %bb.0: 2168; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 2169; RV32-NEXT: vsext.vf2 v12, v8 2170; RV32-NEXT: vsll.vi v16, v12, 3 2171; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2172; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2173; RV32-NEXT: ret 2174; 2175; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64: 2176; RV64: # %bb.0: 2177; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2178; RV64-NEXT: vsext.vf4 v16, v8 2179; RV64-NEXT: vsll.vi v8, v16, 3 2180; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2181; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2182; RV64-NEXT: ret 2183 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 2184 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2185 ret <vscale x 8 x double> %v 2186} 2187 2188define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2189; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: 2190; RV32: # %bb.0: 2191; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2192; RV32-NEXT: vsext.vf4 v16, v8 2193; RV32-NEXT: vsll.vi v8, v16, 3 2194; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2195; RV32-NEXT: vncvt.x.x.w v16, v8 2196; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2197; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2198; RV32-NEXT: ret 2199; 2200; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64: 2201; RV64: # %bb.0: 2202; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2203; RV64-NEXT: vsext.vf4 v16, v8 2204; RV64-NEXT: vsll.vi v8, v16, 3 2205; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2206; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2207; RV64-NEXT: ret 2208 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2209 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2210 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2211 ret <vscale x 8 x double> %v 2212} 2213 2214define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2215; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: 2216; RV32: # %bb.0: 2217; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2218; RV32-NEXT: vzext.vf4 v16, v8 2219; RV32-NEXT: vsll.vi v8, v16, 3 2220; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2221; RV32-NEXT: vncvt.x.x.w v16, v8 2222; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2223; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2224; RV32-NEXT: ret 2225; 2226; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64: 2227; RV64: # %bb.0: 2228; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2229; RV64-NEXT: vzext.vf4 v16, v8 2230; RV64-NEXT: vsll.vi v8, v16, 3 2231; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2232; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2233; RV64-NEXT: ret 2234 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2235 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2236 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2237 ret <vscale x 8 x double> %v 2238} 2239 2240define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2241; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: 2242; RV32: # %bb.0: 2243; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 2244; RV32-NEXT: vsll.vi v16, v8, 3 2245; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2246; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2247; RV32-NEXT: ret 2248; 2249; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64: 2250; RV64: # %bb.0: 2251; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2252; RV64-NEXT: vsext.vf2 v16, v8 2253; RV64-NEXT: vsll.vi v8, v16, 3 2254; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2255; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2256; RV64-NEXT: ret 2257 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 2258 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2259 ret <vscale x 8 x double> %v 2260} 2261 2262define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2263; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: 2264; RV32: # %bb.0: 2265; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2266; RV32-NEXT: vsext.vf2 v16, v8 2267; RV32-NEXT: vsll.vi v8, v16, 3 2268; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2269; RV32-NEXT: vncvt.x.x.w v16, v8 2270; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2271; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2272; RV32-NEXT: ret 2273; 2274; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64: 2275; RV64: # %bb.0: 2276; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2277; RV64-NEXT: vsext.vf2 v16, v8 2278; RV64-NEXT: vsll.vi v8, v16, 3 2279; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2280; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2281; RV64-NEXT: ret 2282 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2283 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2284 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2285 ret <vscale x 8 x double> %v 2286} 2287 2288define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2289; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: 2290; RV32: # %bb.0: 2291; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2292; RV32-NEXT: vzext.vf2 v16, v8 2293; RV32-NEXT: vsll.vi v8, v16, 3 2294; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2295; RV32-NEXT: vncvt.x.x.w v16, v8 2296; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2297; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2298; RV32-NEXT: ret 2299; 2300; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64: 2301; RV64: # %bb.0: 2302; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2303; RV64-NEXT: vzext.vf2 v16, v8 2304; RV64-NEXT: vsll.vi v8, v16, 3 2305; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2306; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2307; RV64-NEXT: ret 2308 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2309 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2310 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2311 ret <vscale x 8 x double> %v 2312} 2313 2314define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2315; RV32-LABEL: vpgather_baseidx_nxv8f64: 2316; RV32: # %bb.0: 2317; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2318; RV32-NEXT: vsll.vi v8, v8, 3 2319; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2320; RV32-NEXT: vncvt.x.x.w v16, v8 2321; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2322; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2323; RV32-NEXT: ret 2324; 2325; RV64-LABEL: vpgather_baseidx_nxv8f64: 2326; RV64: # %bb.0: 2327; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2328; RV64-NEXT: vsll.vi v8, v8, 3 2329; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2330; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t 2331; RV64-NEXT: ret 2332 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 2333 %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2334 ret <vscale x 8 x double> %v 2335} 2336 2337declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 16 x i1>, i32) 2338 2339define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2340; RV32-LABEL: vpgather_nxv16f64: 2341; RV32: # %bb.0: 2342; RV32-NEXT: vmv1r.v v24, v0 2343; RV32-NEXT: li a2, 0 2344; RV32-NEXT: csrr a1, vlenb 2345; RV32-NEXT: srli a4, a1, 3 2346; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 2347; RV32-NEXT: sub a3, a0, a1 2348; RV32-NEXT: vslidedown.vx v0, v0, a4 2349; RV32-NEXT: bltu a0, a3, .LBB102_2 2350; RV32-NEXT: # %bb.1: 2351; RV32-NEXT: mv a2, a3 2352; RV32-NEXT: .LBB102_2: 2353; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu 2354; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t 2355; RV32-NEXT: bltu a0, a1, .LBB102_4 2356; RV32-NEXT: # %bb.3: 2357; RV32-NEXT: mv a0, a1 2358; RV32-NEXT: .LBB102_4: 2359; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 2360; RV32-NEXT: vmv1r.v v0, v24 2361; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t 2362; RV32-NEXT: vmv.v.v v8, v24 2363; RV32-NEXT: ret 2364; 2365; RV64-LABEL: vpgather_nxv16f64: 2366; RV64: # %bb.0: 2367; RV64-NEXT: vmv1r.v v24, v0 2368; RV64-NEXT: li a2, 0 2369; RV64-NEXT: csrr a1, vlenb 2370; RV64-NEXT: srli a4, a1, 3 2371; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 2372; RV64-NEXT: sub a3, a0, a1 2373; RV64-NEXT: vslidedown.vx v0, v0, a4 2374; RV64-NEXT: bltu a0, a3, .LBB102_2 2375; RV64-NEXT: # %bb.1: 2376; RV64-NEXT: mv a2, a3 2377; RV64-NEXT: .LBB102_2: 2378; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, mu 2379; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t 2380; RV64-NEXT: bltu a0, a1, .LBB102_4 2381; RV64-NEXT: # %bb.3: 2382; RV64-NEXT: mv a0, a1 2383; RV64-NEXT: .LBB102_4: 2384; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 2385; RV64-NEXT: vmv1r.v v0, v24 2386; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t 2387; RV64-NEXT: ret 2388 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2389 ret <vscale x 16 x double> %v 2390} 2391 2392define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2393; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: 2394; RV32: # %bb.0: 2395; RV32-NEXT: vmv1r.v v12, v0 2396; RV32-NEXT: li a3, 0 2397; RV32-NEXT: csrr a2, vlenb 2398; RV32-NEXT: srli a5, a2, 3 2399; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu 2400; RV32-NEXT: sub a4, a1, a2 2401; RV32-NEXT: vslidedown.vx v0, v0, a5 2402; RV32-NEXT: bltu a1, a4, .LBB103_2 2403; RV32-NEXT: # %bb.1: 2404; RV32-NEXT: mv a3, a4 2405; RV32-NEXT: .LBB103_2: 2406; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, mu 2407; RV32-NEXT: vsext.vf2 v16, v8 2408; RV32-NEXT: vsll.vi v24, v16, 3 2409; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2410; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t 2411; RV32-NEXT: bltu a1, a2, .LBB103_4 2412; RV32-NEXT: # %bb.3: 2413; RV32-NEXT: mv a1, a2 2414; RV32-NEXT: .LBB103_4: 2415; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2416; RV32-NEXT: vmv1r.v v0, v12 2417; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t 2418; RV32-NEXT: ret 2419; 2420; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: 2421; RV64: # %bb.0: 2422; RV64-NEXT: vmv1r.v v12, v0 2423; RV64-NEXT: li a3, 0 2424; RV64-NEXT: csrr a2, vlenb 2425; RV64-NEXT: srli a5, a2, 3 2426; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu 2427; RV64-NEXT: sub a4, a1, a2 2428; RV64-NEXT: vslidedown.vx v0, v0, a5 2429; RV64-NEXT: bltu a1, a4, .LBB103_2 2430; RV64-NEXT: # %bb.1: 2431; RV64-NEXT: mv a3, a4 2432; RV64-NEXT: .LBB103_2: 2433; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu 2434; RV64-NEXT: vsext.vf4 v16, v10 2435; RV64-NEXT: vsll.vi v16, v16, 3 2436; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2437; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2438; RV64-NEXT: bltu a1, a2, .LBB103_4 2439; RV64-NEXT: # %bb.3: 2440; RV64-NEXT: mv a1, a2 2441; RV64-NEXT: .LBB103_4: 2442; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2443; RV64-NEXT: vsext.vf4 v24, v8 2444; RV64-NEXT: vsll.vi v24, v24, 3 2445; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2446; RV64-NEXT: vmv1r.v v0, v12 2447; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t 2448; RV64-NEXT: ret 2449 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs 2450 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2451 ret <vscale x 16 x double> %v 2452} 2453 2454define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2455; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: 2456; RV32: # %bb.0: 2457; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2458; RV32-NEXT: csrr a2, vlenb 2459; RV32-NEXT: vsext.vf4 v16, v8 2460; RV32-NEXT: mv a3, a1 2461; RV32-NEXT: bltu a1, a2, .LBB104_2 2462; RV32-NEXT: # %bb.1: 2463; RV32-NEXT: mv a3, a2 2464; RV32-NEXT: .LBB104_2: 2465; RV32-NEXT: li a4, 0 2466; RV32-NEXT: vsext.vf4 v24, v10 2467; RV32-NEXT: vsll.vi v8, v16, 3 2468; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu 2469; RV32-NEXT: vncvt.x.x.w v16, v8 2470; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2471; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2472; RV32-NEXT: srli a3, a2, 3 2473; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2474; RV32-NEXT: sub a2, a1, a2 2475; RV32-NEXT: vslidedown.vx v0, v0, a3 2476; RV32-NEXT: bltu a1, a2, .LBB104_4 2477; RV32-NEXT: # %bb.3: 2478; RV32-NEXT: mv a4, a2 2479; RV32-NEXT: .LBB104_4: 2480; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2481; RV32-NEXT: vsll.vi v16, v24, 3 2482; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu 2483; RV32-NEXT: vncvt.x.x.w v24, v16 2484; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2485; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2486; RV32-NEXT: ret 2487; 2488; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: 2489; RV64: # %bb.0: 2490; RV64-NEXT: vmv1r.v v12, v0 2491; RV64-NEXT: li a3, 0 2492; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2493; RV64-NEXT: vsext.vf4 v16, v10 2494; RV64-NEXT: csrr a2, vlenb 2495; RV64-NEXT: srli a5, a2, 3 2496; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu 2497; RV64-NEXT: sub a4, a1, a2 2498; RV64-NEXT: vslidedown.vx v0, v0, a5 2499; RV64-NEXT: bltu a1, a4, .LBB104_2 2500; RV64-NEXT: # %bb.1: 2501; RV64-NEXT: mv a3, a4 2502; RV64-NEXT: .LBB104_2: 2503; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu 2504; RV64-NEXT: vsext.vf4 v24, v8 2505; RV64-NEXT: vsll.vi v16, v16, 3 2506; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2507; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2508; RV64-NEXT: bltu a1, a2, .LBB104_4 2509; RV64-NEXT: # %bb.3: 2510; RV64-NEXT: mv a1, a2 2511; RV64-NEXT: .LBB104_4: 2512; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2513; RV64-NEXT: vsll.vi v24, v24, 3 2514; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2515; RV64-NEXT: vmv1r.v v0, v12 2516; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t 2517; RV64-NEXT: ret 2518 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2519 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs 2520 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2521 ret <vscale x 16 x double> %v 2522} 2523 2524define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2525; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: 2526; RV32: # %bb.0: 2527; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2528; RV32-NEXT: csrr a2, vlenb 2529; RV32-NEXT: vzext.vf4 v16, v8 2530; RV32-NEXT: mv a3, a1 2531; RV32-NEXT: bltu a1, a2, .LBB105_2 2532; RV32-NEXT: # %bb.1: 2533; RV32-NEXT: mv a3, a2 2534; RV32-NEXT: .LBB105_2: 2535; RV32-NEXT: li a4, 0 2536; RV32-NEXT: vzext.vf4 v24, v10 2537; RV32-NEXT: vsll.vi v8, v16, 3 2538; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu 2539; RV32-NEXT: vncvt.x.x.w v16, v8 2540; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2541; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t 2542; RV32-NEXT: srli a3, a2, 3 2543; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2544; RV32-NEXT: sub a2, a1, a2 2545; RV32-NEXT: vslidedown.vx v0, v0, a3 2546; RV32-NEXT: bltu a1, a2, .LBB105_4 2547; RV32-NEXT: # %bb.3: 2548; RV32-NEXT: mv a4, a2 2549; RV32-NEXT: .LBB105_4: 2550; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2551; RV32-NEXT: vsll.vi v16, v24, 3 2552; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu 2553; RV32-NEXT: vncvt.x.x.w v24, v16 2554; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2555; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2556; RV32-NEXT: ret 2557; 2558; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: 2559; RV64: # %bb.0: 2560; RV64-NEXT: vmv1r.v v12, v0 2561; RV64-NEXT: li a3, 0 2562; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2563; RV64-NEXT: vzext.vf4 v16, v10 2564; RV64-NEXT: csrr a2, vlenb 2565; RV64-NEXT: srli a5, a2, 3 2566; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, mu 2567; RV64-NEXT: sub a4, a1, a2 2568; RV64-NEXT: vslidedown.vx v0, v0, a5 2569; RV64-NEXT: bltu a1, a4, .LBB105_2 2570; RV64-NEXT: # %bb.1: 2571; RV64-NEXT: mv a3, a4 2572; RV64-NEXT: .LBB105_2: 2573; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, mu 2574; RV64-NEXT: vzext.vf4 v24, v8 2575; RV64-NEXT: vsll.vi v16, v16, 3 2576; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2577; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t 2578; RV64-NEXT: bltu a1, a2, .LBB105_4 2579; RV64-NEXT: # %bb.3: 2580; RV64-NEXT: mv a1, a2 2581; RV64-NEXT: .LBB105_4: 2582; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2583; RV64-NEXT: vsll.vi v24, v24, 3 2584; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2585; RV64-NEXT: vmv1r.v v0, v12 2586; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t 2587; RV64-NEXT: ret 2588 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2589 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs 2590 %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2591 ret <vscale x 16 x double> %v 2592} 2593