1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>) 8 9define <vscale x 1 x i8> @mgather_nxv1i8(<vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru) { 10; RV32-LABEL: mgather_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 13; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 14; RV32-NEXT: vmv1r.v v8, v9 15; RV32-NEXT: ret 16; 17; RV64-LABEL: mgather_nxv1i8: 18; RV64: # %bb.0: 19; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 20; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 21; RV64-NEXT: vmv1r.v v8, v9 22; RV64-NEXT: ret 23 %v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru) 24 ret <vscale x 1 x i8> %v 25} 26 27declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>) 28 29define <vscale x 2 x i8> @mgather_nxv2i8(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 30; RV32-LABEL: mgather_nxv2i8: 31; RV32: # %bb.0: 32; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 33; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 34; RV32-NEXT: vmv1r.v v8, v9 35; RV32-NEXT: ret 36; 37; RV64-LABEL: mgather_nxv2i8: 38; RV64: # %bb.0: 39; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 40; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 41; RV64-NEXT: vmv1r.v v8, v10 42; RV64-NEXT: ret 43 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 44 ret <vscale x 2 x i8> %v 45} 46 47define <vscale x 2 x i16> @mgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 48; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i16: 49; RV32: # %bb.0: 50; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 51; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 52; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 53; RV32-NEXT: vsext.vf2 v8, v9 54; RV32-NEXT: ret 55; 56; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i16: 57; RV64: # %bb.0: 58; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 59; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 60; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 61; RV64-NEXT: vsext.vf2 v8, v10 62; RV64-NEXT: ret 63 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 64 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16> 65 ret <vscale x 2 x i16> %ev 66} 67 68define <vscale x 2 x i16> @mgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 69; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i16: 70; RV32: # %bb.0: 71; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 72; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 73; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 74; RV32-NEXT: vzext.vf2 v8, v9 75; RV32-NEXT: ret 76; 77; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i16: 78; RV64: # %bb.0: 79; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 80; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 81; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 82; RV64-NEXT: vzext.vf2 v8, v10 83; RV64-NEXT: ret 84 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 85 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16> 86 ret <vscale x 2 x i16> %ev 87} 88 89define <vscale x 2 x i32> @mgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 90; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i32: 91; RV32: # %bb.0: 92; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 93; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 94; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu 95; RV32-NEXT: vsext.vf4 v8, v9 96; RV32-NEXT: ret 97; 98; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i32: 99; RV64: # %bb.0: 100; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 101; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 102; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu 103; RV64-NEXT: vsext.vf4 v8, v10 104; RV64-NEXT: ret 105 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 106 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32> 107 ret <vscale x 2 x i32> %ev 108} 109 110define <vscale x 2 x i32> @mgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 111; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i32: 112; RV32: # %bb.0: 113; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 114; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 115; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu 116; RV32-NEXT: vzext.vf4 v8, v9 117; RV32-NEXT: ret 118; 119; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i32: 120; RV64: # %bb.0: 121; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 122; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 123; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu 124; RV64-NEXT: vzext.vf4 v8, v10 125; RV64-NEXT: ret 126 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 127 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32> 128 ret <vscale x 2 x i32> %ev 129} 130 131define <vscale x 2 x i64> @mgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 132; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64: 133; RV32: # %bb.0: 134; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 135; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 136; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 137; RV32-NEXT: vsext.vf8 v10, v9 138; RV32-NEXT: vmv.v.v v8, v10 139; RV32-NEXT: ret 140; 141; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64: 142; RV64: # %bb.0: 143; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 144; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 145; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 146; RV64-NEXT: vsext.vf8 v8, v10 147; RV64-NEXT: ret 148 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 149 %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64> 150 ret <vscale x 2 x i64> %ev 151} 152 153define <vscale x 2 x i64> @mgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) { 154; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64: 155; RV32: # %bb.0: 156; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 157; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 158; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 159; RV32-NEXT: vzext.vf8 v10, v9 160; RV32-NEXT: vmv.v.v v8, v10 161; RV32-NEXT: ret 162; 163; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64: 164; RV64: # %bb.0: 165; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 166; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 167; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 168; RV64-NEXT: vzext.vf8 v8, v10 169; RV64-NEXT: ret 170 %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0i8(<vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) 171 %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64> 172 ret <vscale x 2 x i64> %ev 173} 174 175declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>) 176 177define <vscale x 4 x i8> @mgather_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru) { 178; RV32-LABEL: mgather_nxv4i8: 179; RV32: # %bb.0: 180; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 181; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 182; RV32-NEXT: vmv1r.v v8, v10 183; RV32-NEXT: ret 184; 185; RV64-LABEL: mgather_nxv4i8: 186; RV64: # %bb.0: 187; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 188; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 189; RV64-NEXT: vmv1r.v v8, v12 190; RV64-NEXT: ret 191 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru) 192 ret <vscale x 4 x i8> %v 193} 194 195define <vscale x 4 x i8> @mgather_truemask_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i8> %passthru) { 196; RV32-LABEL: mgather_truemask_nxv4i8: 197; RV32: # %bb.0: 198; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 199; RV32-NEXT: vluxei32.v v10, (zero), v8 200; RV32-NEXT: vmv1r.v v8, v10 201; RV32-NEXT: ret 202; 203; RV64-LABEL: mgather_truemask_nxv4i8: 204; RV64: # %bb.0: 205; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 206; RV64-NEXT: vluxei64.v v12, (zero), v8 207; RV64-NEXT: vmv1r.v v8, v12 208; RV64-NEXT: ret 209 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 210 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 211 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue, <vscale x 4 x i8> %passthru) 212 ret <vscale x 4 x i8> %v 213} 214 215define <vscale x 4 x i8> @mgather_falsemask_nxv4i8(<vscale x 4 x i8*> %ptrs, <vscale x 4 x i8> %passthru) { 216; RV32-LABEL: mgather_falsemask_nxv4i8: 217; RV32: # %bb.0: 218; RV32-NEXT: vmv1r.v v8, v10 219; RV32-NEXT: ret 220; 221; RV64-LABEL: mgather_falsemask_nxv4i8: 222; RV64: # %bb.0: 223; RV64-NEXT: vmv1r.v v8, v12 224; RV64-NEXT: ret 225 %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0i8(<vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i8> %passthru) 226 ret <vscale x 4 x i8> %v 227} 228 229declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>) 230 231define <vscale x 8 x i8> @mgather_nxv8i8(<vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) { 232; RV32-LABEL: mgather_nxv8i8: 233; RV32: # %bb.0: 234; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, mu 235; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 236; RV32-NEXT: vmv.v.v v8, v12 237; RV32-NEXT: ret 238; 239; RV64-LABEL: mgather_nxv8i8: 240; RV64: # %bb.0: 241; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, mu 242; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 243; RV64-NEXT: vmv.v.v v8, v16 244; RV64-NEXT: ret 245 %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) 246 ret <vscale x 8 x i8> %v 247} 248 249define <vscale x 8 x i8> @mgather_baseidx_nxv8i8(i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) { 250; RV32-LABEL: mgather_baseidx_nxv8i8: 251; RV32: # %bb.0: 252; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 253; RV32-NEXT: vsext.vf4 v12, v8 254; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu 255; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t 256; RV32-NEXT: vmv.v.v v8, v9 257; RV32-NEXT: ret 258; 259; RV64-LABEL: mgather_baseidx_nxv8i8: 260; RV64: # %bb.0: 261; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 262; RV64-NEXT: vsext.vf8 v16, v8 263; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 264; RV64-NEXT: vluxei64.v v9, (a0), v16, v0.t 265; RV64-NEXT: vmv.v.v v8, v9 266; RV64-NEXT: ret 267 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 268 %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0i8(<vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) 269 ret <vscale x 8 x i8> %v 270} 271 272declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>) 273 274define <vscale x 1 x i16> @mgather_nxv1i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru) { 275; RV32-LABEL: mgather_nxv1i16: 276; RV32: # %bb.0: 277; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 278; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 279; RV32-NEXT: vmv1r.v v8, v9 280; RV32-NEXT: ret 281; 282; RV64-LABEL: mgather_nxv1i16: 283; RV64: # %bb.0: 284; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 285; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 286; RV64-NEXT: vmv1r.v v8, v9 287; RV64-NEXT: ret 288 %v = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru) 289 ret <vscale x 1 x i16> %v 290} 291 292declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) 293 294define <vscale x 2 x i16> @mgather_nxv2i16(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) { 295; RV32-LABEL: mgather_nxv2i16: 296; RV32: # %bb.0: 297; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 298; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 299; RV32-NEXT: vmv1r.v v8, v9 300; RV32-NEXT: ret 301; 302; RV64-LABEL: mgather_nxv2i16: 303; RV64: # %bb.0: 304; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 305; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 306; RV64-NEXT: vmv1r.v v8, v10 307; RV64-NEXT: ret 308 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) 309 ret <vscale x 2 x i16> %v 310} 311 312define <vscale x 2 x i32> @mgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) { 313; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i32: 314; RV32: # %bb.0: 315; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 316; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 317; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu 318; RV32-NEXT: vsext.vf2 v8, v9 319; RV32-NEXT: ret 320; 321; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i32: 322; RV64: # %bb.0: 323; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 324; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 325; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu 326; RV64-NEXT: vsext.vf2 v8, v10 327; RV64-NEXT: ret 328 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) 329 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32> 330 ret <vscale x 2 x i32> %ev 331} 332 333define <vscale x 2 x i32> @mgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) { 334; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i32: 335; RV32: # %bb.0: 336; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 337; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 338; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu 339; RV32-NEXT: vzext.vf2 v8, v9 340; RV32-NEXT: ret 341; 342; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i32: 343; RV64: # %bb.0: 344; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 345; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 346; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu 347; RV64-NEXT: vzext.vf2 v8, v10 348; RV64-NEXT: ret 349 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) 350 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32> 351 ret <vscale x 2 x i32> %ev 352} 353 354define <vscale x 2 x i64> @mgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) { 355; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64: 356; RV32: # %bb.0: 357; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 358; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 359; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 360; RV32-NEXT: vsext.vf4 v10, v9 361; RV32-NEXT: vmv.v.v v8, v10 362; RV32-NEXT: ret 363; 364; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64: 365; RV64: # %bb.0: 366; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 367; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 368; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 369; RV64-NEXT: vsext.vf4 v8, v10 370; RV64-NEXT: ret 371 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) 372 %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64> 373 ret <vscale x 2 x i64> %ev 374} 375 376define <vscale x 2 x i64> @mgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) { 377; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64: 378; RV32: # %bb.0: 379; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 380; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 381; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 382; RV32-NEXT: vzext.vf4 v10, v9 383; RV32-NEXT: vmv.v.v v8, v10 384; RV32-NEXT: ret 385; 386; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64: 387; RV64: # %bb.0: 388; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 389; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 390; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 391; RV64-NEXT: vzext.vf4 v8, v10 392; RV64-NEXT: ret 393 %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0i16(<vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) 394 %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64> 395 ret <vscale x 2 x i64> %ev 396} 397 398declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>) 399 400define <vscale x 4 x i16> @mgather_nxv4i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru) { 401; RV32-LABEL: mgather_nxv4i16: 402; RV32: # %bb.0: 403; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 404; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 405; RV32-NEXT: vmv.v.v v8, v10 406; RV32-NEXT: ret 407; 408; RV64-LABEL: mgather_nxv4i16: 409; RV64: # %bb.0: 410; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 411; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 412; RV64-NEXT: vmv.v.v v8, v12 413; RV64-NEXT: ret 414 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru) 415 ret <vscale x 4 x i16> %v 416} 417 418define <vscale x 4 x i16> @mgather_truemask_nxv4i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i16> %passthru) { 419; RV32-LABEL: mgather_truemask_nxv4i16: 420; RV32: # %bb.0: 421; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 422; RV32-NEXT: vluxei32.v v10, (zero), v8 423; RV32-NEXT: vmv.v.v v8, v10 424; RV32-NEXT: ret 425; 426; RV64-LABEL: mgather_truemask_nxv4i16: 427; RV64: # %bb.0: 428; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 429; RV64-NEXT: vluxei64.v v12, (zero), v8 430; RV64-NEXT: vmv.v.v v8, v12 431; RV64-NEXT: ret 432 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 433 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 434 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue, <vscale x 4 x i16> %passthru) 435 ret <vscale x 4 x i16> %v 436} 437 438define <vscale x 4 x i16> @mgather_falsemask_nxv4i16(<vscale x 4 x i16*> %ptrs, <vscale x 4 x i16> %passthru) { 439; RV32-LABEL: mgather_falsemask_nxv4i16: 440; RV32: # %bb.0: 441; RV32-NEXT: vmv1r.v v8, v10 442; RV32-NEXT: ret 443; 444; RV64-LABEL: mgather_falsemask_nxv4i16: 445; RV64: # %bb.0: 446; RV64-NEXT: vmv1r.v v8, v12 447; RV64-NEXT: ret 448 %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0i16(<vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i16> %passthru) 449 ret <vscale x 4 x i16> %v 450} 451 452declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) 453 454define <vscale x 8 x i16> @mgather_nxv8i16(<vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) { 455; RV32-LABEL: mgather_nxv8i16: 456; RV32: # %bb.0: 457; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 458; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 459; RV32-NEXT: vmv.v.v v8, v12 460; RV32-NEXT: ret 461; 462; RV64-LABEL: mgather_nxv8i16: 463; RV64: # %bb.0: 464; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 465; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 466; RV64-NEXT: vmv.v.v v8, v16 467; RV64-NEXT: ret 468 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) 469 ret <vscale x 8 x i16> %v 470} 471 472define <vscale x 8 x i16> @mgather_baseidx_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) { 473; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i16: 474; RV32: # %bb.0: 475; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 476; RV32-NEXT: vsext.vf4 v12, v8 477; RV32-NEXT: vadd.vv v12, v12, v12 478; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 479; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 480; RV32-NEXT: vmv.v.v v8, v10 481; RV32-NEXT: ret 482; 483; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i16: 484; RV64: # %bb.0: 485; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 486; RV64-NEXT: vsext.vf8 v16, v8 487; RV64-NEXT: vadd.vv v16, v16, v16 488; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 489; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 490; RV64-NEXT: vmv.v.v v8, v10 491; RV64-NEXT: ret 492 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 493 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) 494 ret <vscale x 8 x i16> %v 495} 496 497define <vscale x 8 x i16> @mgather_baseidx_sext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) { 498; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: 499; RV32: # %bb.0: 500; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 501; RV32-NEXT: vsext.vf4 v12, v8 502; RV32-NEXT: vadd.vv v12, v12, v12 503; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 504; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 505; RV32-NEXT: vmv.v.v v8, v10 506; RV32-NEXT: ret 507; 508; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16: 509; RV64: # %bb.0: 510; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 511; RV64-NEXT: vsext.vf8 v16, v8 512; RV64-NEXT: vadd.vv v16, v16, v16 513; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 514; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 515; RV64-NEXT: vmv.v.v v8, v10 516; RV64-NEXT: ret 517 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 518 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 519 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) 520 ret <vscale x 8 x i16> %v 521} 522 523define <vscale x 8 x i16> @mgather_baseidx_zext_nxv8i8_nxv8i16(i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) { 524; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: 525; RV32: # %bb.0: 526; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 527; RV32-NEXT: vzext.vf4 v12, v8 528; RV32-NEXT: vadd.vv v12, v12, v12 529; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 530; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 531; RV32-NEXT: vmv.v.v v8, v10 532; RV32-NEXT: ret 533; 534; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16: 535; RV64: # %bb.0: 536; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 537; RV64-NEXT: vzext.vf8 v16, v8 538; RV64-NEXT: vadd.vv v16, v16, v16 539; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 540; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 541; RV64-NEXT: vmv.v.v v8, v10 542; RV64-NEXT: ret 543 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 544 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 545 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) 546 ret <vscale x 8 x i16> %v 547} 548 549define <vscale x 8 x i16> @mgather_baseidx_nxv8i16(i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) { 550; RV32-LABEL: mgather_baseidx_nxv8i16: 551; RV32: # %bb.0: 552; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 553; RV32-NEXT: vsext.vf2 v12, v8 554; RV32-NEXT: vadd.vv v12, v12, v12 555; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 556; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 557; RV32-NEXT: vmv.v.v v8, v10 558; RV32-NEXT: ret 559; 560; RV64-LABEL: mgather_baseidx_nxv8i16: 561; RV64: # %bb.0: 562; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 563; RV64-NEXT: vsext.vf4 v16, v8 564; RV64-NEXT: vadd.vv v16, v16, v16 565; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 566; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 567; RV64-NEXT: vmv.v.v v8, v10 568; RV64-NEXT: ret 569 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 570 %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0i16(<vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) 571 ret <vscale x 8 x i16> %v 572} 573 574declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>) 575 576define <vscale x 1 x i32> @mgather_nxv1i32(<vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru) { 577; RV32-LABEL: mgather_nxv1i32: 578; RV32: # %bb.0: 579; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 580; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 581; RV32-NEXT: vmv1r.v v8, v9 582; RV32-NEXT: ret 583; 584; RV64-LABEL: mgather_nxv1i32: 585; RV64: # %bb.0: 586; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 587; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 588; RV64-NEXT: vmv1r.v v8, v9 589; RV64-NEXT: ret 590 %v = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0i32(<vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru) 591 ret <vscale x 1 x i32> %v 592} 593 594declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) 595 596define <vscale x 2 x i32> @mgather_nxv2i32(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) { 597; RV32-LABEL: mgather_nxv2i32: 598; RV32: # %bb.0: 599; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 600; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 601; RV32-NEXT: vmv.v.v v8, v9 602; RV32-NEXT: ret 603; 604; RV64-LABEL: mgather_nxv2i32: 605; RV64: # %bb.0: 606; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 607; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 608; RV64-NEXT: vmv.v.v v8, v10 609; RV64-NEXT: ret 610 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) 611 ret <vscale x 2 x i32> %v 612} 613 614define <vscale x 2 x i64> @mgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) { 615; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64: 616; RV32: # %bb.0: 617; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 618; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 619; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 620; RV32-NEXT: vsext.vf2 v10, v9 621; RV32-NEXT: vmv.v.v v8, v10 622; RV32-NEXT: ret 623; 624; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64: 625; RV64: # %bb.0: 626; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 627; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 628; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 629; RV64-NEXT: vsext.vf2 v8, v10 630; RV64-NEXT: ret 631 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) 632 %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64> 633 ret <vscale x 2 x i64> %ev 634} 635 636define <vscale x 2 x i64> @mgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) { 637; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64: 638; RV32: # %bb.0: 639; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 640; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 641; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu 642; RV32-NEXT: vzext.vf2 v10, v9 643; RV32-NEXT: vmv.v.v v8, v10 644; RV32-NEXT: ret 645; 646; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64: 647; RV64: # %bb.0: 648; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 649; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 650; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu 651; RV64-NEXT: vzext.vf2 v8, v10 652; RV64-NEXT: ret 653 %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) 654 %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64> 655 ret <vscale x 2 x i64> %ev 656} 657 658declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) 659 660define <vscale x 4 x i32> @mgather_nxv4i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru) { 661; RV32-LABEL: mgather_nxv4i32: 662; RV32: # %bb.0: 663; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 664; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 665; RV32-NEXT: vmv.v.v v8, v10 666; RV32-NEXT: ret 667; 668; RV64-LABEL: mgather_nxv4i32: 669; RV64: # %bb.0: 670; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 671; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 672; RV64-NEXT: vmv.v.v v8, v12 673; RV64-NEXT: ret 674 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru) 675 ret <vscale x 4 x i32> %v 676} 677 678define <vscale x 4 x i32> @mgather_truemask_nxv4i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i32> %passthru) { 679; RV32-LABEL: mgather_truemask_nxv4i32: 680; RV32: # %bb.0: 681; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 682; RV32-NEXT: vluxei32.v v8, (zero), v8 683; RV32-NEXT: ret 684; 685; RV64-LABEL: mgather_truemask_nxv4i32: 686; RV64: # %bb.0: 687; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 688; RV64-NEXT: vluxei64.v v12, (zero), v8 689; RV64-NEXT: vmv.v.v v8, v12 690; RV64-NEXT: ret 691 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 692 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 693 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue, <vscale x 4 x i32> %passthru) 694 ret <vscale x 4 x i32> %v 695} 696 697define <vscale x 4 x i32> @mgather_falsemask_nxv4i32(<vscale x 4 x i32*> %ptrs, <vscale x 4 x i32> %passthru) { 698; RV32-LABEL: mgather_falsemask_nxv4i32: 699; RV32: # %bb.0: 700; RV32-NEXT: vmv2r.v v8, v10 701; RV32-NEXT: ret 702; 703; RV64-LABEL: mgather_falsemask_nxv4i32: 704; RV64: # %bb.0: 705; RV64-NEXT: vmv2r.v v8, v12 706; RV64-NEXT: ret 707 %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %passthru) 708 ret <vscale x 4 x i32> %v 709} 710 711declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>) 712 713define <vscale x 8 x i32> @mgather_nxv8i32(<vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 714; RV32-LABEL: mgather_nxv8i32: 715; RV32: # %bb.0: 716; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 717; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 718; RV32-NEXT: vmv.v.v v8, v12 719; RV32-NEXT: ret 720; 721; RV64-LABEL: mgather_nxv8i32: 722; RV64: # %bb.0: 723; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 724; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 725; RV64-NEXT: vmv.v.v v8, v16 726; RV64-NEXT: ret 727 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 728 ret <vscale x 8 x i32> %v 729} 730 731define <vscale x 8 x i32> @mgather_baseidx_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 732; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i32: 733; RV32: # %bb.0: 734; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 735; RV32-NEXT: vsext.vf4 v16, v8 736; RV32-NEXT: vsll.vi v8, v16, 2 737; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 738; RV32-NEXT: vmv.v.v v8, v12 739; RV32-NEXT: ret 740; 741; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i32: 742; RV64: # %bb.0: 743; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 744; RV64-NEXT: vsext.vf8 v16, v8 745; RV64-NEXT: vsll.vi v16, v16, 2 746; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 747; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 748; RV64-NEXT: vmv.v.v v8, v12 749; RV64-NEXT: ret 750 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 751 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 752 ret <vscale x 8 x i32> %v 753} 754 755define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 756; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: 757; RV32: # %bb.0: 758; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 759; RV32-NEXT: vsext.vf4 v16, v8 760; RV32-NEXT: vsll.vi v8, v16, 2 761; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 762; RV32-NEXT: vmv.v.v v8, v12 763; RV32-NEXT: ret 764; 765; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32: 766; RV64: # %bb.0: 767; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 768; RV64-NEXT: vsext.vf8 v16, v8 769; RV64-NEXT: vsll.vi v16, v16, 2 770; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 771; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 772; RV64-NEXT: vmv.v.v v8, v12 773; RV64-NEXT: ret 774 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 775 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 776 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 777 ret <vscale x 8 x i32> %v 778} 779 780define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 781; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: 782; RV32: # %bb.0: 783; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 784; RV32-NEXT: vzext.vf4 v16, v8 785; RV32-NEXT: vsll.vi v8, v16, 2 786; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 787; RV32-NEXT: vmv.v.v v8, v12 788; RV32-NEXT: ret 789; 790; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32: 791; RV64: # %bb.0: 792; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 793; RV64-NEXT: vzext.vf8 v16, v8 794; RV64-NEXT: vsll.vi v16, v16, 2 795; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 796; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 797; RV64-NEXT: vmv.v.v v8, v12 798; RV64-NEXT: ret 799 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 800 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 801 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 802 ret <vscale x 8 x i32> %v 803} 804 805define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 806; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32: 807; RV32: # %bb.0: 808; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 809; RV32-NEXT: vsext.vf2 v16, v8 810; RV32-NEXT: vsll.vi v8, v16, 2 811; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 812; RV32-NEXT: vmv.v.v v8, v12 813; RV32-NEXT: ret 814; 815; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i32: 816; RV64: # %bb.0: 817; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 818; RV64-NEXT: vsext.vf4 v16, v8 819; RV64-NEXT: vsll.vi v16, v16, 2 820; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 821; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 822; RV64-NEXT: vmv.v.v v8, v12 823; RV64-NEXT: ret 824 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 825 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 826 ret <vscale x 8 x i32> %v 827} 828 829define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 830; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: 831; RV32: # %bb.0: 832; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 833; RV32-NEXT: vsext.vf2 v16, v8 834; RV32-NEXT: vsll.vi v8, v16, 2 835; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 836; RV32-NEXT: vmv.v.v v8, v12 837; RV32-NEXT: ret 838; 839; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32: 840; RV64: # %bb.0: 841; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 842; RV64-NEXT: vsext.vf4 v16, v8 843; RV64-NEXT: vsll.vi v16, v16, 2 844; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 845; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 846; RV64-NEXT: vmv.v.v v8, v12 847; RV64-NEXT: ret 848 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 849 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 850 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 851 ret <vscale x 8 x i32> %v 852} 853 854define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i16_nxv8i32(i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 855; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: 856; RV32: # %bb.0: 857; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 858; RV32-NEXT: vzext.vf2 v16, v8 859; RV32-NEXT: vsll.vi v8, v16, 2 860; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 861; RV32-NEXT: vmv.v.v v8, v12 862; RV32-NEXT: ret 863; 864; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32: 865; RV64: # %bb.0: 866; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 867; RV64-NEXT: vzext.vf4 v16, v8 868; RV64-NEXT: vsll.vi v16, v16, 2 869; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 870; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 871; RV64-NEXT: vmv.v.v v8, v12 872; RV64-NEXT: ret 873 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 874 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 875 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 876 ret <vscale x 8 x i32> %v 877} 878 879define <vscale x 8 x i32> @mgather_baseidx_nxv8i32(i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) { 880; RV32-LABEL: mgather_baseidx_nxv8i32: 881; RV32: # %bb.0: 882; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 883; RV32-NEXT: vsll.vi v8, v8, 2 884; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 885; RV32-NEXT: vmv.v.v v8, v12 886; RV32-NEXT: ret 887; 888; RV64-LABEL: mgather_baseidx_nxv8i32: 889; RV64: # %bb.0: 890; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 891; RV64-NEXT: vsext.vf2 v16, v8 892; RV64-NEXT: vsll.vi v16, v16, 2 893; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 894; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 895; RV64-NEXT: vmv.v.v v8, v12 896; RV64-NEXT: ret 897 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 898 %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32(<vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) 899 ret <vscale x 8 x i32> %v 900} 901 902declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>) 903 904define <vscale x 1 x i64> @mgather_nxv1i64(<vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru) { 905; RV32-LABEL: mgather_nxv1i64: 906; RV32: # %bb.0: 907; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 908; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 909; RV32-NEXT: vmv.v.v v8, v9 910; RV32-NEXT: ret 911; 912; RV64-LABEL: mgather_nxv1i64: 913; RV64: # %bb.0: 914; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 915; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 916; RV64-NEXT: vmv.v.v v8, v9 917; RV64-NEXT: ret 918 %v = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0i64(<vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru) 919 ret <vscale x 1 x i64> %v 920} 921 922declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 923 924define <vscale x 2 x i64> @mgather_nxv2i64(<vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru) { 925; RV32-LABEL: mgather_nxv2i64: 926; RV32: # %bb.0: 927; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 928; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 929; RV32-NEXT: vmv.v.v v8, v10 930; RV32-NEXT: ret 931; 932; RV64-LABEL: mgather_nxv2i64: 933; RV64: # %bb.0: 934; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 935; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 936; RV64-NEXT: vmv.v.v v8, v10 937; RV64-NEXT: ret 938 %v = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru) 939 ret <vscale x 2 x i64> %v 940} 941 942declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>) 943 944define <vscale x 4 x i64> @mgather_nxv4i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru) { 945; RV32-LABEL: mgather_nxv4i64: 946; RV32: # %bb.0: 947; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 948; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 949; RV32-NEXT: vmv.v.v v8, v12 950; RV32-NEXT: ret 951; 952; RV64-LABEL: mgather_nxv4i64: 953; RV64: # %bb.0: 954; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 955; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 956; RV64-NEXT: vmv.v.v v8, v12 957; RV64-NEXT: ret 958 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru) 959 ret <vscale x 4 x i64> %v 960} 961 962define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i64> %passthru) { 963; RV32-LABEL: mgather_truemask_nxv4i64: 964; RV32: # %bb.0: 965; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 966; RV32-NEXT: vluxei32.v v12, (zero), v8 967; RV32-NEXT: vmv.v.v v8, v12 968; RV32-NEXT: ret 969; 970; RV64-LABEL: mgather_truemask_nxv4i64: 971; RV64: # %bb.0: 972; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 973; RV64-NEXT: vluxei64.v v8, (zero), v8 974; RV64-NEXT: ret 975 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 976 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 977 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue, <vscale x 4 x i64> %passthru) 978 ret <vscale x 4 x i64> %v 979} 980 981define <vscale x 4 x i64> @mgather_falsemask_nxv4i64(<vscale x 4 x i64*> %ptrs, <vscale x 4 x i64> %passthru) { 982; CHECK-LABEL: mgather_falsemask_nxv4i64: 983; CHECK: # %bb.0: 984; CHECK-NEXT: vmv4r.v v8, v12 985; CHECK-NEXT: ret 986 %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0i64(<vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i64> %passthru) 987 ret <vscale x 4 x i64> %v 988} 989 990declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>) 991 992define <vscale x 8 x i64> @mgather_nxv8i64(<vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 993; RV32-LABEL: mgather_nxv8i64: 994; RV32: # %bb.0: 995; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 996; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 997; RV32-NEXT: vmv.v.v v8, v16 998; RV32-NEXT: ret 999; 1000; RV64-LABEL: mgather_nxv8i64: 1001; RV64: # %bb.0: 1002; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1003; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1004; RV64-NEXT: vmv.v.v v8, v16 1005; RV64-NEXT: ret 1006 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1007 ret <vscale x 8 x i64> %v 1008} 1009 1010define <vscale x 8 x i64> @mgather_baseidx_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1011; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i64: 1012; RV32: # %bb.0: 1013; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1014; RV32-NEXT: vsext.vf4 v12, v8 1015; RV32-NEXT: vsll.vi v8, v12, 3 1016; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1017; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 1018; RV32-NEXT: vmv.v.v v8, v16 1019; RV32-NEXT: ret 1020; 1021; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64: 1022; RV64: # %bb.0: 1023; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1024; RV64-NEXT: vsext.vf8 v24, v8 1025; RV64-NEXT: vsll.vi v8, v24, 3 1026; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1027; RV64-NEXT: vmv.v.v v8, v16 1028; RV64-NEXT: ret 1029 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 1030 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1031 ret <vscale x 8 x i64> %v 1032} 1033 1034define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1035; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: 1036; RV32: # %bb.0: 1037; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1038; RV32-NEXT: vsext.vf8 v24, v8 1039; RV32-NEXT: vsll.vi v8, v24, 3 1040; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1041; RV32-NEXT: vncvt.x.x.w v24, v8 1042; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1043; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1044; RV32-NEXT: vmv.v.v v8, v16 1045; RV32-NEXT: ret 1046; 1047; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: 1048; RV64: # %bb.0: 1049; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1050; RV64-NEXT: vsext.vf8 v24, v8 1051; RV64-NEXT: vsll.vi v8, v24, 3 1052; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1053; RV64-NEXT: vmv.v.v v8, v16 1054; RV64-NEXT: ret 1055 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1056 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1057 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1058 ret <vscale x 8 x i64> %v 1059} 1060 1061define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1062; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: 1063; RV32: # %bb.0: 1064; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1065; RV32-NEXT: vzext.vf8 v24, v8 1066; RV32-NEXT: vsll.vi v8, v24, 3 1067; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1068; RV32-NEXT: vncvt.x.x.w v24, v8 1069; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1070; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1071; RV32-NEXT: vmv.v.v v8, v16 1072; RV32-NEXT: ret 1073; 1074; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64: 1075; RV64: # %bb.0: 1076; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1077; RV64-NEXT: vzext.vf8 v24, v8 1078; RV64-NEXT: vsll.vi v8, v24, 3 1079; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1080; RV64-NEXT: vmv.v.v v8, v16 1081; RV64-NEXT: ret 1082 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1083 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1084 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1085 ret <vscale x 8 x i64> %v 1086} 1087 1088define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1089; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64: 1090; RV32: # %bb.0: 1091; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1092; RV32-NEXT: vsext.vf2 v12, v8 1093; RV32-NEXT: vsll.vi v8, v12, 3 1094; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1095; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 1096; RV32-NEXT: vmv.v.v v8, v16 1097; RV32-NEXT: ret 1098; 1099; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64: 1100; RV64: # %bb.0: 1101; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1102; RV64-NEXT: vsext.vf4 v24, v8 1103; RV64-NEXT: vsll.vi v8, v24, 3 1104; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1105; RV64-NEXT: vmv.v.v v8, v16 1106; RV64-NEXT: ret 1107 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 1108 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1109 ret <vscale x 8 x i64> %v 1110} 1111 1112define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1113; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: 1114; RV32: # %bb.0: 1115; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1116; RV32-NEXT: vsext.vf4 v24, v8 1117; RV32-NEXT: vsll.vi v8, v24, 3 1118; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1119; RV32-NEXT: vncvt.x.x.w v24, v8 1120; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1121; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1122; RV32-NEXT: vmv.v.v v8, v16 1123; RV32-NEXT: ret 1124; 1125; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: 1126; RV64: # %bb.0: 1127; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1128; RV64-NEXT: vsext.vf4 v24, v8 1129; RV64-NEXT: vsll.vi v8, v24, 3 1130; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1131; RV64-NEXT: vmv.v.v v8, v16 1132; RV64-NEXT: ret 1133 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1134 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1135 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1136 ret <vscale x 8 x i64> %v 1137} 1138 1139define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1140; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: 1141; RV32: # %bb.0: 1142; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1143; RV32-NEXT: vzext.vf4 v24, v8 1144; RV32-NEXT: vsll.vi v8, v24, 3 1145; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1146; RV32-NEXT: vncvt.x.x.w v24, v8 1147; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1148; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1149; RV32-NEXT: vmv.v.v v8, v16 1150; RV32-NEXT: ret 1151; 1152; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64: 1153; RV64: # %bb.0: 1154; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1155; RV64-NEXT: vzext.vf4 v24, v8 1156; RV64-NEXT: vsll.vi v8, v24, 3 1157; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1158; RV64-NEXT: vmv.v.v v8, v16 1159; RV64-NEXT: ret 1160 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1161 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1162 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1163 ret <vscale x 8 x i64> %v 1164} 1165 1166define <vscale x 8 x i64> @mgather_baseidx_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1167; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8i64: 1168; RV32: # %bb.0: 1169; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1170; RV32-NEXT: vsll.vi v8, v8, 3 1171; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1172; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 1173; RV32-NEXT: vmv.v.v v8, v16 1174; RV32-NEXT: ret 1175; 1176; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64: 1177; RV64: # %bb.0: 1178; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1179; RV64-NEXT: vsext.vf2 v24, v8 1180; RV64-NEXT: vsll.vi v8, v24, 3 1181; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1182; RV64-NEXT: vmv.v.v v8, v16 1183; RV64-NEXT: ret 1184 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 1185 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1186 ret <vscale x 8 x i64> %v 1187} 1188 1189define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1190; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: 1191; RV32: # %bb.0: 1192; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1193; RV32-NEXT: vsext.vf2 v24, v8 1194; RV32-NEXT: vsll.vi v8, v24, 3 1195; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1196; RV32-NEXT: vncvt.x.x.w v24, v8 1197; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1198; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1199; RV32-NEXT: vmv.v.v v8, v16 1200; RV32-NEXT: ret 1201; 1202; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: 1203; RV64: # %bb.0: 1204; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1205; RV64-NEXT: vsext.vf2 v24, v8 1206; RV64-NEXT: vsll.vi v8, v24, 3 1207; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1208; RV64-NEXT: vmv.v.v v8, v16 1209; RV64-NEXT: ret 1210 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1211 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1212 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1213 ret <vscale x 8 x i64> %v 1214} 1215 1216define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1217; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: 1218; RV32: # %bb.0: 1219; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1220; RV32-NEXT: vzext.vf2 v24, v8 1221; RV32-NEXT: vsll.vi v8, v24, 3 1222; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1223; RV32-NEXT: vncvt.x.x.w v24, v8 1224; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1225; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1226; RV32-NEXT: vmv.v.v v8, v16 1227; RV32-NEXT: ret 1228; 1229; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: 1230; RV64: # %bb.0: 1231; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1232; RV64-NEXT: vzext.vf2 v24, v8 1233; RV64-NEXT: vsll.vi v8, v24, 3 1234; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1235; RV64-NEXT: vmv.v.v v8, v16 1236; RV64-NEXT: ret 1237 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1238 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1239 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1240 ret <vscale x 8 x i64> %v 1241} 1242 1243define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) { 1244; RV32-LABEL: mgather_baseidx_nxv8i64: 1245; RV32: # %bb.0: 1246; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1247; RV32-NEXT: vsll.vi v8, v8, 3 1248; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1249; RV32-NEXT: vncvt.x.x.w v24, v8 1250; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1251; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1252; RV32-NEXT: vmv.v.v v8, v16 1253; RV32-NEXT: ret 1254; 1255; RV64-LABEL: mgather_baseidx_nxv8i64: 1256; RV64: # %bb.0: 1257; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1258; RV64-NEXT: vsll.vi v8, v8, 3 1259; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1260; RV64-NEXT: vmv.v.v v8, v16 1261; RV64-NEXT: ret 1262 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1263 %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0i64(<vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) 1264 ret <vscale x 8 x i64> %v 1265} 1266 1267declare <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0f64(<vscale x 16 x i64*>, i32, <vscale x 16 x i1>, <vscale x 16 x i64>) 1268 1269declare <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64 %idx) 1270declare <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*>, <vscale x 8 x i64*>, i64 %idx) 1271 1272define void @mgather_nxv16i64(<vscale x 8 x i64*> %ptrs0, <vscale x 8 x i64*> %ptrs1, <vscale x 16 x i1> %m, <vscale x 8 x i64> %passthru0, <vscale x 8 x i64> %passthru1, <vscale x 16 x i64>* %out) { 1273; RV32-LABEL: mgather_nxv16i64: 1274; RV32: # %bb.0: 1275; RV32-NEXT: vl8re64.v v24, (a0) 1276; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1277; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 1278; RV32-NEXT: csrr a0, vlenb 1279; RV32-NEXT: srli a2, a0, 3 1280; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 1281; RV32-NEXT: vslidedown.vx v0, v0, a2 1282; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1283; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t 1284; RV32-NEXT: slli a0, a0, 3 1285; RV32-NEXT: add a0, a1, a0 1286; RV32-NEXT: vs8r.v v24, (a0) 1287; RV32-NEXT: vs8r.v v16, (a1) 1288; RV32-NEXT: ret 1289; 1290; RV64-LABEL: mgather_nxv16i64: 1291; RV64: # %bb.0: 1292; RV64-NEXT: addi sp, sp, -16 1293; RV64-NEXT: .cfi_def_cfa_offset 16 1294; RV64-NEXT: csrr a3, vlenb 1295; RV64-NEXT: slli a3, a3, 3 1296; RV64-NEXT: sub sp, sp, a3 1297; RV64-NEXT: vl8re64.v v24, (a0) 1298; RV64-NEXT: addi a0, sp, 16 1299; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1300; RV64-NEXT: vmv8r.v v16, v8 1301; RV64-NEXT: vl8re64.v v8, (a1) 1302; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1303; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t 1304; RV64-NEXT: csrr a0, vlenb 1305; RV64-NEXT: srli a1, a0, 3 1306; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 1307; RV64-NEXT: vslidedown.vx v0, v0, a1 1308; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1309; RV64-NEXT: addi a1, sp, 16 1310; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload 1311; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t 1312; RV64-NEXT: slli a0, a0, 3 1313; RV64-NEXT: add a0, a2, a0 1314; RV64-NEXT: vs8r.v v8, (a0) 1315; RV64-NEXT: vs8r.v v24, (a2) 1316; RV64-NEXT: csrr a0, vlenb 1317; RV64-NEXT: slli a0, a0, 3 1318; RV64-NEXT: add sp, sp, a0 1319; RV64-NEXT: addi sp, sp, 16 1320; RV64-NEXT: ret 1321 %p0 = call <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> undef, <vscale x 8 x i64*> %ptrs0, i64 0) 1322 %p1 = call <vscale x 16 x i64*> @llvm.vector.insert.nxv8p0i64.nxv16p0i64(<vscale x 16 x i64*> %p0, <vscale x 8 x i64*> %ptrs1, i64 8) 1323 1324 %pt0 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %passthru0, i64 0) 1325 %pt1 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %pt0, <vscale x 8 x i64> %passthru1, i64 8) 1326 1327 %v = call <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0f64(<vscale x 16 x i64*> %p1, i32 8, <vscale x 16 x i1> %m, <vscale x 16 x i64> %pt1) 1328 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out 1329 ret void 1330} 1331 1332 1333declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*>, i32, <vscale x 1 x i1>, <vscale x 1 x half>) 1334 1335define <vscale x 1 x half> @mgather_nxv1f16(<vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru) { 1336; RV32-LABEL: mgather_nxv1f16: 1337; RV32: # %bb.0: 1338; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1339; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1340; RV32-NEXT: vmv1r.v v8, v9 1341; RV32-NEXT: ret 1342; 1343; RV64-LABEL: mgather_nxv1f16: 1344; RV64: # %bb.0: 1345; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1346; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1347; RV64-NEXT: vmv1r.v v8, v9 1348; RV64-NEXT: ret 1349 %v = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0f16(<vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru) 1350 ret <vscale x 1 x half> %v 1351} 1352 1353declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*>, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 1354 1355define <vscale x 2 x half> @mgather_nxv2f16(<vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru) { 1356; RV32-LABEL: mgather_nxv2f16: 1357; RV32: # %bb.0: 1358; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1359; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1360; RV32-NEXT: vmv1r.v v8, v9 1361; RV32-NEXT: ret 1362; 1363; RV64-LABEL: mgather_nxv2f16: 1364; RV64: # %bb.0: 1365; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1366; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1367; RV64-NEXT: vmv1r.v v8, v10 1368; RV64-NEXT: ret 1369 %v = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0f16(<vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru) 1370 ret <vscale x 2 x half> %v 1371} 1372 1373declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*>, i32, <vscale x 4 x i1>, <vscale x 4 x half>) 1374 1375define <vscale x 4 x half> @mgather_nxv4f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru) { 1376; RV32-LABEL: mgather_nxv4f16: 1377; RV32: # %bb.0: 1378; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1379; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1380; RV32-NEXT: vmv.v.v v8, v10 1381; RV32-NEXT: ret 1382; 1383; RV64-LABEL: mgather_nxv4f16: 1384; RV64: # %bb.0: 1385; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1386; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1387; RV64-NEXT: vmv.v.v v8, v12 1388; RV64-NEXT: ret 1389 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru) 1390 ret <vscale x 4 x half> %v 1391} 1392 1393define <vscale x 4 x half> @mgather_truemask_nxv4f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x half> %passthru) { 1394; RV32-LABEL: mgather_truemask_nxv4f16: 1395; RV32: # %bb.0: 1396; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1397; RV32-NEXT: vluxei32.v v10, (zero), v8 1398; RV32-NEXT: vmv.v.v v8, v10 1399; RV32-NEXT: ret 1400; 1401; RV64-LABEL: mgather_truemask_nxv4f16: 1402; RV64: # %bb.0: 1403; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1404; RV64-NEXT: vluxei64.v v12, (zero), v8 1405; RV64-NEXT: vmv.v.v v8, v12 1406; RV64-NEXT: ret 1407 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1408 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1409 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue, <vscale x 4 x half> %passthru) 1410 ret <vscale x 4 x half> %v 1411} 1412 1413define <vscale x 4 x half> @mgather_falsemask_nxv4f16(<vscale x 4 x half*> %ptrs, <vscale x 4 x half> %passthru) { 1414; RV32-LABEL: mgather_falsemask_nxv4f16: 1415; RV32: # %bb.0: 1416; RV32-NEXT: vmv1r.v v8, v10 1417; RV32-NEXT: ret 1418; 1419; RV64-LABEL: mgather_falsemask_nxv4f16: 1420; RV64: # %bb.0: 1421; RV64-NEXT: vmv1r.v v8, v12 1422; RV64-NEXT: ret 1423 %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0f16(<vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x half> %passthru) 1424 ret <vscale x 4 x half> %v 1425} 1426 1427declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*>, i32, <vscale x 8 x i1>, <vscale x 8 x half>) 1428 1429define <vscale x 8 x half> @mgather_nxv8f16(<vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) { 1430; RV32-LABEL: mgather_nxv8f16: 1431; RV32: # %bb.0: 1432; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1433; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1434; RV32-NEXT: vmv.v.v v8, v12 1435; RV32-NEXT: ret 1436; 1437; RV64-LABEL: mgather_nxv8f16: 1438; RV64: # %bb.0: 1439; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1440; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1441; RV64-NEXT: vmv.v.v v8, v16 1442; RV64-NEXT: ret 1443 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) 1444 ret <vscale x 8 x half> %v 1445} 1446 1447define <vscale x 8 x half> @mgather_baseidx_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) { 1448; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f16: 1449; RV32: # %bb.0: 1450; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1451; RV32-NEXT: vsext.vf4 v12, v8 1452; RV32-NEXT: vadd.vv v12, v12, v12 1453; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1454; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 1455; RV32-NEXT: vmv.v.v v8, v10 1456; RV32-NEXT: ret 1457; 1458; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f16: 1459; RV64: # %bb.0: 1460; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1461; RV64-NEXT: vsext.vf8 v16, v8 1462; RV64-NEXT: vadd.vv v16, v16, v16 1463; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1464; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 1465; RV64-NEXT: vmv.v.v v8, v10 1466; RV64-NEXT: ret 1467 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1468 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) 1469 ret <vscale x 8 x half> %v 1470} 1471 1472define <vscale x 8 x half> @mgather_baseidx_sext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) { 1473; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: 1474; RV32: # %bb.0: 1475; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1476; RV32-NEXT: vsext.vf4 v12, v8 1477; RV32-NEXT: vadd.vv v12, v12, v12 1478; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1479; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 1480; RV32-NEXT: vmv.v.v v8, v10 1481; RV32-NEXT: ret 1482; 1483; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16: 1484; RV64: # %bb.0: 1485; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1486; RV64-NEXT: vsext.vf8 v16, v8 1487; RV64-NEXT: vadd.vv v16, v16, v16 1488; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1489; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 1490; RV64-NEXT: vmv.v.v v8, v10 1491; RV64-NEXT: ret 1492 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1493 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1494 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) 1495 ret <vscale x 8 x half> %v 1496} 1497 1498define <vscale x 8 x half> @mgather_baseidx_zext_nxv8i8_nxv8f16(half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) { 1499; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: 1500; RV32: # %bb.0: 1501; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1502; RV32-NEXT: vzext.vf4 v12, v8 1503; RV32-NEXT: vadd.vv v12, v12, v12 1504; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1505; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 1506; RV32-NEXT: vmv.v.v v8, v10 1507; RV32-NEXT: ret 1508; 1509; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16: 1510; RV64: # %bb.0: 1511; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1512; RV64-NEXT: vzext.vf8 v16, v8 1513; RV64-NEXT: vadd.vv v16, v16, v16 1514; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1515; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 1516; RV64-NEXT: vmv.v.v v8, v10 1517; RV64-NEXT: ret 1518 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1519 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1520 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) 1521 ret <vscale x 8 x half> %v 1522} 1523 1524define <vscale x 8 x half> @mgather_baseidx_nxv8f16(half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) { 1525; RV32-LABEL: mgather_baseidx_nxv8f16: 1526; RV32: # %bb.0: 1527; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1528; RV32-NEXT: vsext.vf2 v12, v8 1529; RV32-NEXT: vadd.vv v12, v12, v12 1530; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1531; RV32-NEXT: vluxei32.v v10, (a0), v12, v0.t 1532; RV32-NEXT: vmv.v.v v8, v10 1533; RV32-NEXT: ret 1534; 1535; RV64-LABEL: mgather_baseidx_nxv8f16: 1536; RV64: # %bb.0: 1537; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1538; RV64-NEXT: vsext.vf4 v16, v8 1539; RV64-NEXT: vadd.vv v16, v16, v16 1540; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1541; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 1542; RV64-NEXT: vmv.v.v v8, v10 1543; RV64-NEXT: ret 1544 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1545 %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0f16(<vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) 1546 ret <vscale x 8 x half> %v 1547} 1548 1549declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*>, i32, <vscale x 1 x i1>, <vscale x 1 x float>) 1550 1551define <vscale x 1 x float> @mgather_nxv1f32(<vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru) { 1552; RV32-LABEL: mgather_nxv1f32: 1553; RV32: # %bb.0: 1554; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1555; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1556; RV32-NEXT: vmv1r.v v8, v9 1557; RV32-NEXT: ret 1558; 1559; RV64-LABEL: mgather_nxv1f32: 1560; RV64: # %bb.0: 1561; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1562; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1563; RV64-NEXT: vmv1r.v v8, v9 1564; RV64-NEXT: ret 1565 %v = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0f32(<vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru) 1566 ret <vscale x 1 x float> %v 1567} 1568 1569declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 1570 1571define <vscale x 2 x float> @mgather_nxv2f32(<vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru) { 1572; RV32-LABEL: mgather_nxv2f32: 1573; RV32: # %bb.0: 1574; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1575; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1576; RV32-NEXT: vmv.v.v v8, v9 1577; RV32-NEXT: ret 1578; 1579; RV64-LABEL: mgather_nxv2f32: 1580; RV64: # %bb.0: 1581; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1582; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1583; RV64-NEXT: vmv.v.v v8, v10 1584; RV64-NEXT: ret 1585 %v = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru) 1586 ret <vscale x 2 x float> %v 1587} 1588 1589declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*>, i32, <vscale x 4 x i1>, <vscale x 4 x float>) 1590 1591define <vscale x 4 x float> @mgather_nxv4f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru) { 1592; RV32-LABEL: mgather_nxv4f32: 1593; RV32: # %bb.0: 1594; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1595; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1596; RV32-NEXT: vmv.v.v v8, v10 1597; RV32-NEXT: ret 1598; 1599; RV64-LABEL: mgather_nxv4f32: 1600; RV64: # %bb.0: 1601; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1602; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1603; RV64-NEXT: vmv.v.v v8, v12 1604; RV64-NEXT: ret 1605 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru) 1606 ret <vscale x 4 x float> %v 1607} 1608 1609define <vscale x 4 x float> @mgather_truemask_nxv4f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x float> %passthru) { 1610; RV32-LABEL: mgather_truemask_nxv4f32: 1611; RV32: # %bb.0: 1612; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1613; RV32-NEXT: vluxei32.v v8, (zero), v8 1614; RV32-NEXT: ret 1615; 1616; RV64-LABEL: mgather_truemask_nxv4f32: 1617; RV64: # %bb.0: 1618; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1619; RV64-NEXT: vluxei64.v v12, (zero), v8 1620; RV64-NEXT: vmv.v.v v8, v12 1621; RV64-NEXT: ret 1622 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1623 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1624 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue, <vscale x 4 x float> %passthru) 1625 ret <vscale x 4 x float> %v 1626} 1627 1628define <vscale x 4 x float> @mgather_falsemask_nxv4f32(<vscale x 4 x float*> %ptrs, <vscale x 4 x float> %passthru) { 1629; RV32-LABEL: mgather_falsemask_nxv4f32: 1630; RV32: # %bb.0: 1631; RV32-NEXT: vmv2r.v v8, v10 1632; RV32-NEXT: ret 1633; 1634; RV64-LABEL: mgather_falsemask_nxv4f32: 1635; RV64: # %bb.0: 1636; RV64-NEXT: vmv2r.v v8, v12 1637; RV64-NEXT: ret 1638 %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0f32(<vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %passthru) 1639 ret <vscale x 4 x float> %v 1640} 1641 1642declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*>, i32, <vscale x 8 x i1>, <vscale x 8 x float>) 1643 1644define <vscale x 8 x float> @mgather_nxv8f32(<vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1645; RV32-LABEL: mgather_nxv8f32: 1646; RV32: # %bb.0: 1647; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1648; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1649; RV32-NEXT: vmv.v.v v8, v12 1650; RV32-NEXT: ret 1651; 1652; RV64-LABEL: mgather_nxv8f32: 1653; RV64: # %bb.0: 1654; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1655; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1656; RV64-NEXT: vmv.v.v v8, v16 1657; RV64-NEXT: ret 1658 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1659 ret <vscale x 8 x float> %v 1660} 1661 1662define <vscale x 8 x float> @mgather_baseidx_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1663; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f32: 1664; RV32: # %bb.0: 1665; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1666; RV32-NEXT: vsext.vf4 v16, v8 1667; RV32-NEXT: vsll.vi v8, v16, 2 1668; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1669; RV32-NEXT: vmv.v.v v8, v12 1670; RV32-NEXT: ret 1671; 1672; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f32: 1673; RV64: # %bb.0: 1674; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1675; RV64-NEXT: vsext.vf8 v16, v8 1676; RV64-NEXT: vsll.vi v16, v16, 2 1677; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1678; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1679; RV64-NEXT: vmv.v.v v8, v12 1680; RV64-NEXT: ret 1681 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1682 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1683 ret <vscale x 8 x float> %v 1684} 1685 1686define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1687; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: 1688; RV32: # %bb.0: 1689; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1690; RV32-NEXT: vsext.vf4 v16, v8 1691; RV32-NEXT: vsll.vi v8, v16, 2 1692; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1693; RV32-NEXT: vmv.v.v v8, v12 1694; RV32-NEXT: ret 1695; 1696; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32: 1697; RV64: # %bb.0: 1698; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1699; RV64-NEXT: vsext.vf8 v16, v8 1700; RV64-NEXT: vsll.vi v16, v16, 2 1701; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1702; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1703; RV64-NEXT: vmv.v.v v8, v12 1704; RV64-NEXT: ret 1705 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1706 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1707 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1708 ret <vscale x 8 x float> %v 1709} 1710 1711define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1712; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: 1713; RV32: # %bb.0: 1714; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1715; RV32-NEXT: vzext.vf4 v16, v8 1716; RV32-NEXT: vsll.vi v8, v16, 2 1717; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1718; RV32-NEXT: vmv.v.v v8, v12 1719; RV32-NEXT: ret 1720; 1721; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32: 1722; RV64: # %bb.0: 1723; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1724; RV64-NEXT: vzext.vf8 v16, v8 1725; RV64-NEXT: vsll.vi v16, v16, 2 1726; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1727; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1728; RV64-NEXT: vmv.v.v v8, v12 1729; RV64-NEXT: ret 1730 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1731 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1732 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1733 ret <vscale x 8 x float> %v 1734} 1735 1736define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1737; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32: 1738; RV32: # %bb.0: 1739; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1740; RV32-NEXT: vsext.vf2 v16, v8 1741; RV32-NEXT: vsll.vi v8, v16, 2 1742; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1743; RV32-NEXT: vmv.v.v v8, v12 1744; RV32-NEXT: ret 1745; 1746; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f32: 1747; RV64: # %bb.0: 1748; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1749; RV64-NEXT: vsext.vf4 v16, v8 1750; RV64-NEXT: vsll.vi v16, v16, 2 1751; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1752; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1753; RV64-NEXT: vmv.v.v v8, v12 1754; RV64-NEXT: ret 1755 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1756 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1757 ret <vscale x 8 x float> %v 1758} 1759 1760define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1761; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: 1762; RV32: # %bb.0: 1763; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1764; RV32-NEXT: vsext.vf2 v16, v8 1765; RV32-NEXT: vsll.vi v8, v16, 2 1766; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1767; RV32-NEXT: vmv.v.v v8, v12 1768; RV32-NEXT: ret 1769; 1770; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32: 1771; RV64: # %bb.0: 1772; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1773; RV64-NEXT: vsext.vf4 v16, v8 1774; RV64-NEXT: vsll.vi v16, v16, 2 1775; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1776; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1777; RV64-NEXT: vmv.v.v v8, v12 1778; RV64-NEXT: ret 1779 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1780 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1781 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1782 ret <vscale x 8 x float> %v 1783} 1784 1785define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i16_nxv8f32(float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1786; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: 1787; RV32: # %bb.0: 1788; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1789; RV32-NEXT: vzext.vf2 v16, v8 1790; RV32-NEXT: vsll.vi v8, v16, 2 1791; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1792; RV32-NEXT: vmv.v.v v8, v12 1793; RV32-NEXT: ret 1794; 1795; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32: 1796; RV64: # %bb.0: 1797; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1798; RV64-NEXT: vzext.vf4 v16, v8 1799; RV64-NEXT: vsll.vi v16, v16, 2 1800; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1801; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1802; RV64-NEXT: vmv.v.v v8, v12 1803; RV64-NEXT: ret 1804 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1805 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1806 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1807 ret <vscale x 8 x float> %v 1808} 1809 1810define <vscale x 8 x float> @mgather_baseidx_nxv8f32(float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) { 1811; RV32-LABEL: mgather_baseidx_nxv8f32: 1812; RV32: # %bb.0: 1813; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1814; RV32-NEXT: vsll.vi v8, v8, 2 1815; RV32-NEXT: vluxei32.v v12, (a0), v8, v0.t 1816; RV32-NEXT: vmv.v.v v8, v12 1817; RV32-NEXT: ret 1818; 1819; RV64-LABEL: mgather_baseidx_nxv8f32: 1820; RV64: # %bb.0: 1821; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1822; RV64-NEXT: vsext.vf2 v16, v8 1823; RV64-NEXT: vsll.vi v16, v16, 2 1824; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1825; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t 1826; RV64-NEXT: vmv.v.v v8, v12 1827; RV64-NEXT: ret 1828 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1829 %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0f32(<vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) 1830 ret <vscale x 8 x float> %v 1831} 1832 1833declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*>, i32, <vscale x 1 x i1>, <vscale x 1 x double>) 1834 1835define <vscale x 1 x double> @mgather_nxv1f64(<vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru) { 1836; RV32-LABEL: mgather_nxv1f64: 1837; RV32: # %bb.0: 1838; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1839; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t 1840; RV32-NEXT: vmv.v.v v8, v9 1841; RV32-NEXT: ret 1842; 1843; RV64-LABEL: mgather_nxv1f64: 1844; RV64: # %bb.0: 1845; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1846; RV64-NEXT: vluxei64.v v9, (zero), v8, v0.t 1847; RV64-NEXT: vmv.v.v v8, v9 1848; RV64-NEXT: ret 1849 %v = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0f64(<vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru) 1850 ret <vscale x 1 x double> %v 1851} 1852 1853declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*>, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 1854 1855define <vscale x 2 x double> @mgather_nxv2f64(<vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru) { 1856; RV32-LABEL: mgather_nxv2f64: 1857; RV32: # %bb.0: 1858; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1859; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t 1860; RV32-NEXT: vmv.v.v v8, v10 1861; RV32-NEXT: ret 1862; 1863; RV64-LABEL: mgather_nxv2f64: 1864; RV64: # %bb.0: 1865; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1866; RV64-NEXT: vluxei64.v v10, (zero), v8, v0.t 1867; RV64-NEXT: vmv.v.v v8, v10 1868; RV64-NEXT: ret 1869 %v = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru) 1870 ret <vscale x 2 x double> %v 1871} 1872 1873declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*>, i32, <vscale x 4 x i1>, <vscale x 4 x double>) 1874 1875define <vscale x 4 x double> @mgather_nxv4f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru) { 1876; RV32-LABEL: mgather_nxv4f64: 1877; RV32: # %bb.0: 1878; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1879; RV32-NEXT: vluxei32.v v12, (zero), v8, v0.t 1880; RV32-NEXT: vmv.v.v v8, v12 1881; RV32-NEXT: ret 1882; 1883; RV64-LABEL: mgather_nxv4f64: 1884; RV64: # %bb.0: 1885; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1886; RV64-NEXT: vluxei64.v v12, (zero), v8, v0.t 1887; RV64-NEXT: vmv.v.v v8, v12 1888; RV64-NEXT: ret 1889 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru) 1890 ret <vscale x 4 x double> %v 1891} 1892 1893define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x double> %passthru) { 1894; RV32-LABEL: mgather_truemask_nxv4f64: 1895; RV32: # %bb.0: 1896; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1897; RV32-NEXT: vluxei32.v v12, (zero), v8 1898; RV32-NEXT: vmv.v.v v8, v12 1899; RV32-NEXT: ret 1900; 1901; RV64-LABEL: mgather_truemask_nxv4f64: 1902; RV64: # %bb.0: 1903; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1904; RV64-NEXT: vluxei64.v v8, (zero), v8 1905; RV64-NEXT: ret 1906 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1907 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1908 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue, <vscale x 4 x double> %passthru) 1909 ret <vscale x 4 x double> %v 1910} 1911 1912define <vscale x 4 x double> @mgather_falsemask_nxv4f64(<vscale x 4 x double*> %ptrs, <vscale x 4 x double> %passthru) { 1913; CHECK-LABEL: mgather_falsemask_nxv4f64: 1914; CHECK: # %bb.0: 1915; CHECK-NEXT: vmv4r.v v8, v12 1916; CHECK-NEXT: ret 1917 %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0f64(<vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x double> %passthru) 1918 ret <vscale x 4 x double> %v 1919} 1920 1921declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*>, i32, <vscale x 8 x i1>, <vscale x 8 x double>) 1922 1923define <vscale x 8 x double> @mgather_nxv8f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 1924; RV32-LABEL: mgather_nxv8f64: 1925; RV32: # %bb.0: 1926; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1927; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t 1928; RV32-NEXT: vmv.v.v v8, v16 1929; RV32-NEXT: ret 1930; 1931; RV64-LABEL: mgather_nxv8f64: 1932; RV64: # %bb.0: 1933; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1934; RV64-NEXT: vluxei64.v v16, (zero), v8, v0.t 1935; RV64-NEXT: vmv.v.v v8, v16 1936; RV64-NEXT: ret 1937 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 1938 ret <vscale x 8 x double> %v 1939} 1940 1941define <vscale x 8 x double> @mgather_baseidx_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 1942; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f64: 1943; RV32: # %bb.0: 1944; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1945; RV32-NEXT: vsext.vf4 v12, v8 1946; RV32-NEXT: vsll.vi v8, v12, 3 1947; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1948; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 1949; RV32-NEXT: vmv.v.v v8, v16 1950; RV32-NEXT: ret 1951; 1952; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64: 1953; RV64: # %bb.0: 1954; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1955; RV64-NEXT: vsext.vf8 v24, v8 1956; RV64-NEXT: vsll.vi v8, v24, 3 1957; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1958; RV64-NEXT: vmv.v.v v8, v16 1959; RV64-NEXT: ret 1960 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 1961 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 1962 ret <vscale x 8 x double> %v 1963} 1964 1965define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 1966; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: 1967; RV32: # %bb.0: 1968; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1969; RV32-NEXT: vsext.vf8 v24, v8 1970; RV32-NEXT: vsll.vi v8, v24, 3 1971; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1972; RV32-NEXT: vncvt.x.x.w v24, v8 1973; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1974; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 1975; RV32-NEXT: vmv.v.v v8, v16 1976; RV32-NEXT: ret 1977; 1978; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: 1979; RV64: # %bb.0: 1980; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1981; RV64-NEXT: vsext.vf8 v24, v8 1982; RV64-NEXT: vsll.vi v8, v24, 3 1983; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 1984; RV64-NEXT: vmv.v.v v8, v16 1985; RV64-NEXT: ret 1986 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1987 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1988 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 1989 ret <vscale x 8 x double> %v 1990} 1991 1992define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 1993; RV32-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: 1994; RV32: # %bb.0: 1995; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1996; RV32-NEXT: vzext.vf8 v24, v8 1997; RV32-NEXT: vsll.vi v8, v24, 3 1998; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1999; RV32-NEXT: vncvt.x.x.w v24, v8 2000; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2001; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2002; RV32-NEXT: vmv.v.v v8, v16 2003; RV32-NEXT: ret 2004; 2005; RV64-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64: 2006; RV64: # %bb.0: 2007; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2008; RV64-NEXT: vzext.vf8 v24, v8 2009; RV64-NEXT: vsll.vi v8, v24, 3 2010; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2011; RV64-NEXT: vmv.v.v v8, v16 2012; RV64-NEXT: ret 2013 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 2014 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2015 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2016 ret <vscale x 8 x double> %v 2017} 2018 2019define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2020; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64: 2021; RV32: # %bb.0: 2022; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 2023; RV32-NEXT: vsext.vf2 v12, v8 2024; RV32-NEXT: vsll.vi v8, v12, 3 2025; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2026; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 2027; RV32-NEXT: vmv.v.v v8, v16 2028; RV32-NEXT: ret 2029; 2030; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64: 2031; RV64: # %bb.0: 2032; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2033; RV64-NEXT: vsext.vf4 v24, v8 2034; RV64-NEXT: vsll.vi v8, v24, 3 2035; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2036; RV64-NEXT: vmv.v.v v8, v16 2037; RV64-NEXT: ret 2038 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 2039 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2040 ret <vscale x 8 x double> %v 2041} 2042 2043define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2044; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: 2045; RV32: # %bb.0: 2046; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2047; RV32-NEXT: vsext.vf4 v24, v8 2048; RV32-NEXT: vsll.vi v8, v24, 3 2049; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2050; RV32-NEXT: vncvt.x.x.w v24, v8 2051; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2052; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2053; RV32-NEXT: vmv.v.v v8, v16 2054; RV32-NEXT: ret 2055; 2056; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: 2057; RV64: # %bb.0: 2058; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2059; RV64-NEXT: vsext.vf4 v24, v8 2060; RV64-NEXT: vsll.vi v8, v24, 3 2061; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2062; RV64-NEXT: vmv.v.v v8, v16 2063; RV64-NEXT: ret 2064 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2065 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2066 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2067 ret <vscale x 8 x double> %v 2068} 2069 2070define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2071; RV32-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: 2072; RV32: # %bb.0: 2073; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2074; RV32-NEXT: vzext.vf4 v24, v8 2075; RV32-NEXT: vsll.vi v8, v24, 3 2076; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2077; RV32-NEXT: vncvt.x.x.w v24, v8 2078; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2079; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2080; RV32-NEXT: vmv.v.v v8, v16 2081; RV32-NEXT: ret 2082; 2083; RV64-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64: 2084; RV64: # %bb.0: 2085; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2086; RV64-NEXT: vzext.vf4 v24, v8 2087; RV64-NEXT: vsll.vi v8, v24, 3 2088; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2089; RV64-NEXT: vmv.v.v v8, v16 2090; RV64-NEXT: ret 2091 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 2092 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2093 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2094 ret <vscale x 8 x double> %v 2095} 2096 2097define <vscale x 8 x double> @mgather_baseidx_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2098; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8f64: 2099; RV32: # %bb.0: 2100; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 2101; RV32-NEXT: vsll.vi v8, v8, 3 2102; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2103; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t 2104; RV32-NEXT: vmv.v.v v8, v16 2105; RV32-NEXT: ret 2106; 2107; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64: 2108; RV64: # %bb.0: 2109; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2110; RV64-NEXT: vsext.vf2 v24, v8 2111; RV64-NEXT: vsll.vi v8, v24, 3 2112; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2113; RV64-NEXT: vmv.v.v v8, v16 2114; RV64-NEXT: ret 2115 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 2116 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2117 ret <vscale x 8 x double> %v 2118} 2119 2120define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2121; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: 2122; RV32: # %bb.0: 2123; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2124; RV32-NEXT: vsext.vf2 v24, v8 2125; RV32-NEXT: vsll.vi v8, v24, 3 2126; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2127; RV32-NEXT: vncvt.x.x.w v24, v8 2128; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2129; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2130; RV32-NEXT: vmv.v.v v8, v16 2131; RV32-NEXT: ret 2132; 2133; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: 2134; RV64: # %bb.0: 2135; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2136; RV64-NEXT: vsext.vf2 v24, v8 2137; RV64-NEXT: vsll.vi v8, v24, 3 2138; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2139; RV64-NEXT: vmv.v.v v8, v16 2140; RV64-NEXT: ret 2141 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2142 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2143 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2144 ret <vscale x 8 x double> %v 2145} 2146 2147define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i32_nxv8f64(double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2148; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: 2149; RV32: # %bb.0: 2150; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2151; RV32-NEXT: vzext.vf2 v24, v8 2152; RV32-NEXT: vsll.vi v8, v24, 3 2153; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2154; RV32-NEXT: vncvt.x.x.w v24, v8 2155; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2156; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2157; RV32-NEXT: vmv.v.v v8, v16 2158; RV32-NEXT: ret 2159; 2160; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: 2161; RV64: # %bb.0: 2162; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2163; RV64-NEXT: vzext.vf2 v24, v8 2164; RV64-NEXT: vsll.vi v8, v24, 3 2165; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2166; RV64-NEXT: vmv.v.v v8, v16 2167; RV64-NEXT: ret 2168 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2169 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2170 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2171 ret <vscale x 8 x double> %v 2172} 2173 2174define <vscale x 8 x double> @mgather_baseidx_nxv8f64(double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) { 2175; RV32-LABEL: mgather_baseidx_nxv8f64: 2176; RV32: # %bb.0: 2177; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2178; RV32-NEXT: vsll.vi v8, v8, 3 2179; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2180; RV32-NEXT: vncvt.x.x.w v24, v8 2181; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2182; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t 2183; RV32-NEXT: vmv.v.v v8, v16 2184; RV32-NEXT: ret 2185; 2186; RV64-LABEL: mgather_baseidx_nxv8f64: 2187; RV64: # %bb.0: 2188; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2189; RV64-NEXT: vsll.vi v8, v8, 3 2190; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t 2191; RV64-NEXT: vmv.v.v v8, v16 2192; RV64-NEXT: ret 2193 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 2194 %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) 2195 ret <vscale x 8 x double> %v 2196} 2197 2198declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>) 2199 2200define <vscale x 16 x i8> @mgather_baseidx_nxv16i8(i8* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru) { 2201; RV32-LABEL: mgather_baseidx_nxv16i8: 2202; RV32: # %bb.0: 2203; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 2204; RV32-NEXT: vsext.vf4 v16, v8 2205; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu 2206; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t 2207; RV32-NEXT: vmv.v.v v8, v10 2208; RV32-NEXT: ret 2209; 2210; RV64-LABEL: mgather_baseidx_nxv16i8: 2211; RV64: # %bb.0: 2212; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2213; RV64-NEXT: vsext.vf8 v16, v8 2214; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2215; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t 2216; RV64-NEXT: csrr a1, vlenb 2217; RV64-NEXT: srli a1, a1, 3 2218; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 2219; RV64-NEXT: vslidedown.vx v0, v0, a1 2220; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2221; RV64-NEXT: vsext.vf8 v16, v9 2222; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2223; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t 2224; RV64-NEXT: vmv2r.v v8, v10 2225; RV64-NEXT: ret 2226 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 16 x i8> %idxs 2227 %v = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0i8(<vscale x 16 x i8*> %ptrs, i32 2, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru) 2228 ret <vscale x 16 x i8> %v 2229} 2230 2231declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>) 2232 2233define <vscale x 32 x i8> @mgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru) { 2234; RV32-LABEL: mgather_baseidx_nxv32i8: 2235; RV32: # %bb.0: 2236; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 2237; RV32-NEXT: vsext.vf4 v16, v8 2238; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu 2239; RV32-NEXT: vluxei32.v v12, (a0), v16, v0.t 2240; RV32-NEXT: csrr a1, vlenb 2241; RV32-NEXT: srli a1, a1, 2 2242; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, mu 2243; RV32-NEXT: vslidedown.vx v0, v0, a1 2244; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 2245; RV32-NEXT: vsext.vf4 v16, v10 2246; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu 2247; RV32-NEXT: vluxei32.v v14, (a0), v16, v0.t 2248; RV32-NEXT: vmv4r.v v8, v12 2249; RV32-NEXT: ret 2250; 2251; RV64-LABEL: mgather_baseidx_nxv32i8: 2252; RV64: # %bb.0: 2253; RV64-NEXT: vmv1r.v v16, v0 2254; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2255; RV64-NEXT: vsext.vf8 v24, v8 2256; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2257; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t 2258; RV64-NEXT: csrr a1, vlenb 2259; RV64-NEXT: srli a2, a1, 3 2260; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, mu 2261; RV64-NEXT: vslidedown.vx v0, v0, a2 2262; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, mu 2263; RV64-NEXT: vsext.vf8 v24, v9 2264; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2265; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t 2266; RV64-NEXT: srli a1, a1, 2 2267; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, mu 2268; RV64-NEXT: vslidedown.vx v0, v16, a1 2269; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2270; RV64-NEXT: vsext.vf8 v16, v10 2271; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2272; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t 2273; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 2274; RV64-NEXT: vslidedown.vx v0, v0, a2 2275; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2276; RV64-NEXT: vsext.vf8 v16, v11 2277; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 2278; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t 2279; RV64-NEXT: vmv4r.v v8, v12 2280; RV64-NEXT: ret 2281 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs 2282 %v = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, i32 2, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru) 2283 ret <vscale x 32 x i8> %v 2284} 2285