1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64 6 7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>) 8 9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) { 10; RV32-LABEL: mscatter_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 13; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 14; RV32-NEXT: ret 15; 16; RV64-LABEL: mscatter_nxv1i8: 17; RV64: # %bb.0: 18; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 19; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 20; RV64-NEXT: ret 21 call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m) 22 ret void 23} 24 25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>) 26 27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 28; RV32-LABEL: mscatter_nxv2i8: 29; RV32: # %bb.0: 30; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 31; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 32; RV32-NEXT: ret 33; 34; RV64-LABEL: mscatter_nxv2i8: 35; RV64: # %bb.0: 36; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 37; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 38; RV64-NEXT: ret 39 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 40 ret void 41} 42 43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 45; RV32: # %bb.0: 46; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 47; RV32-NEXT: vnsrl.wi v8, v8, 0 48; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 49; RV32-NEXT: ret 50; 51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 52; RV64: # %bb.0: 53; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 54; RV64-NEXT: vnsrl.wi v8, v8, 0 55; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 56; RV64-NEXT: ret 57 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 58 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 59 ret void 60} 61 62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 64; RV32: # %bb.0: 65; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 66; RV32-NEXT: vnsrl.wi v8, v8, 0 67; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 68; RV32-NEXT: vnsrl.wi v8, v8, 0 69; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 70; RV32-NEXT: ret 71; 72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 73; RV64: # %bb.0: 74; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 75; RV64-NEXT: vnsrl.wi v8, v8, 0 76; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 77; RV64-NEXT: vnsrl.wi v8, v8, 0 78; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 79; RV64-NEXT: ret 80 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 81 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 82 ret void 83} 84 85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 87; RV32: # %bb.0: 88; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 89; RV32-NEXT: vnsrl.wi v11, v8, 0 90; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 91; RV32-NEXT: vnsrl.wi v8, v11, 0 92; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 93; RV32-NEXT: vnsrl.wi v8, v8, 0 94; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 95; RV32-NEXT: ret 96; 97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 98; RV64: # %bb.0: 99; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 100; RV64-NEXT: vnsrl.wi v12, v8, 0 101; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 102; RV64-NEXT: vnsrl.wi v8, v12, 0 103; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 104; RV64-NEXT: vnsrl.wi v8, v8, 0 105; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 106; RV64-NEXT: ret 107 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 108 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 109 ret void 110} 111 112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>) 113 114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) { 115; RV32-LABEL: mscatter_nxv4i8: 116; RV32: # %bb.0: 117; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 118; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 119; RV32-NEXT: ret 120; 121; RV64-LABEL: mscatter_nxv4i8: 122; RV64: # %bb.0: 123; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 124; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 125; RV64-NEXT: ret 126 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m) 127 ret void 128} 129 130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 131; RV32-LABEL: mscatter_truemask_nxv4i8: 132; RV32: # %bb.0: 133; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 134; RV32-NEXT: vsoxei32.v v8, (zero), v10 135; RV32-NEXT: ret 136; 137; RV64-LABEL: mscatter_truemask_nxv4i8: 138; RV64: # %bb.0: 139; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 140; RV64-NEXT: vsoxei64.v v8, (zero), v12 141; RV64-NEXT: ret 142 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 143 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 144 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue) 145 ret void 146} 147 148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 149; RV32-LABEL: mscatter_falsemask_nxv4i8: 150; RV32: # %bb.0: 151; RV32-NEXT: ret 152; 153; RV64-LABEL: mscatter_falsemask_nxv4i8: 154; RV64: # %bb.0: 155; RV64-NEXT: ret 156 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer) 157 ret void 158} 159 160declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>) 161 162define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) { 163; RV32-LABEL: mscatter_nxv8i8: 164; RV32: # %bb.0: 165; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, mu 166; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 167; RV32-NEXT: ret 168; 169; RV64-LABEL: mscatter_nxv8i8: 170; RV64: # %bb.0: 171; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, mu 172; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 173; RV64-NEXT: ret 174 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 175 ret void 176} 177 178define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 179; RV32-LABEL: mscatter_baseidx_nxv8i8: 180; RV32: # %bb.0: 181; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 182; RV32-NEXT: vsext.vf4 v12, v9 183; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu 184; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 185; RV32-NEXT: ret 186; 187; RV64-LABEL: mscatter_baseidx_nxv8i8: 188; RV64: # %bb.0: 189; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 190; RV64-NEXT: vsext.vf8 v16, v9 191; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 192; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 193; RV64-NEXT: ret 194 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 195 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 196 ret void 197} 198 199declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>) 200 201define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) { 202; RV32-LABEL: mscatter_nxv1i16: 203; RV32: # %bb.0: 204; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 205; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 206; RV32-NEXT: ret 207; 208; RV64-LABEL: mscatter_nxv1i16: 209; RV64: # %bb.0: 210; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 211; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 212; RV64-NEXT: ret 213 call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m) 214 ret void 215} 216 217declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>) 218 219define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 220; RV32-LABEL: mscatter_nxv2i16: 221; RV32: # %bb.0: 222; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 223; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 224; RV32-NEXT: ret 225; 226; RV64-LABEL: mscatter_nxv2i16: 227; RV64: # %bb.0: 228; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 229; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 230; RV64-NEXT: ret 231 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 232 ret void 233} 234 235define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 236; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 237; RV32: # %bb.0: 238; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 239; RV32-NEXT: vnsrl.wi v8, v8, 0 240; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 241; RV32-NEXT: ret 242; 243; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 244; RV64: # %bb.0: 245; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 246; RV64-NEXT: vnsrl.wi v8, v8, 0 247; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 248; RV64-NEXT: ret 249 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 250 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 251 ret void 252} 253 254define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 255; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 256; RV32: # %bb.0: 257; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 258; RV32-NEXT: vnsrl.wi v11, v8, 0 259; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 260; RV32-NEXT: vnsrl.wi v8, v11, 0 261; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 262; RV32-NEXT: ret 263; 264; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 265; RV64: # %bb.0: 266; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 267; RV64-NEXT: vnsrl.wi v12, v8, 0 268; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 269; RV64-NEXT: vnsrl.wi v8, v12, 0 270; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 271; RV64-NEXT: ret 272 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 273 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 274 ret void 275} 276 277declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>) 278 279define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) { 280; RV32-LABEL: mscatter_nxv4i16: 281; RV32: # %bb.0: 282; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 283; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 284; RV32-NEXT: ret 285; 286; RV64-LABEL: mscatter_nxv4i16: 287; RV64: # %bb.0: 288; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 289; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 290; RV64-NEXT: ret 291 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m) 292 ret void 293} 294 295define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 296; RV32-LABEL: mscatter_truemask_nxv4i16: 297; RV32: # %bb.0: 298; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 299; RV32-NEXT: vsoxei32.v v8, (zero), v10 300; RV32-NEXT: ret 301; 302; RV64-LABEL: mscatter_truemask_nxv4i16: 303; RV64: # %bb.0: 304; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 305; RV64-NEXT: vsoxei64.v v8, (zero), v12 306; RV64-NEXT: ret 307 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 308 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 309 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 310 ret void 311} 312 313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 314; RV32-LABEL: mscatter_falsemask_nxv4i16: 315; RV32: # %bb.0: 316; RV32-NEXT: ret 317; 318; RV64-LABEL: mscatter_falsemask_nxv4i16: 319; RV64: # %bb.0: 320; RV64-NEXT: ret 321 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 322 ret void 323} 324 325declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>) 326 327define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) { 328; RV32-LABEL: mscatter_nxv8i16: 329; RV32: # %bb.0: 330; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 331; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 332; RV32-NEXT: ret 333; 334; RV64-LABEL: mscatter_nxv8i16: 335; RV64: # %bb.0: 336; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 337; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 338; RV64-NEXT: ret 339 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 340 ret void 341} 342 343define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 344; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 345; RV32: # %bb.0: 346; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 347; RV32-NEXT: vsext.vf4 v12, v10 348; RV32-NEXT: vadd.vv v12, v12, v12 349; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 350; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 351; RV32-NEXT: ret 352; 353; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 354; RV64: # %bb.0: 355; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 356; RV64-NEXT: vsext.vf8 v16, v10 357; RV64-NEXT: vadd.vv v16, v16, v16 358; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 359; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 360; RV64-NEXT: ret 361 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 362 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 363 ret void 364} 365 366define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 367; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 368; RV32: # %bb.0: 369; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 370; RV32-NEXT: vsext.vf4 v12, v10 371; RV32-NEXT: vadd.vv v12, v12, v12 372; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 373; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 374; RV32-NEXT: ret 375; 376; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 377; RV64: # %bb.0: 378; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 379; RV64-NEXT: vsext.vf8 v16, v10 380; RV64-NEXT: vadd.vv v16, v16, v16 381; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 382; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 383; RV64-NEXT: ret 384 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 385 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 386 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 387 ret void 388} 389 390define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 391; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 392; RV32: # %bb.0: 393; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 394; RV32-NEXT: vzext.vf4 v12, v10 395; RV32-NEXT: vadd.vv v12, v12, v12 396; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 397; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 398; RV32-NEXT: ret 399; 400; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 401; RV64: # %bb.0: 402; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 403; RV64-NEXT: vzext.vf8 v16, v10 404; RV64-NEXT: vadd.vv v16, v16, v16 405; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 406; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 407; RV64-NEXT: ret 408 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 409 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 410 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 411 ret void 412} 413 414define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 415; RV32-LABEL: mscatter_baseidx_nxv8i16: 416; RV32: # %bb.0: 417; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 418; RV32-NEXT: vsext.vf2 v12, v10 419; RV32-NEXT: vadd.vv v12, v12, v12 420; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 421; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 422; RV32-NEXT: ret 423; 424; RV64-LABEL: mscatter_baseidx_nxv8i16: 425; RV64: # %bb.0: 426; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 427; RV64-NEXT: vsext.vf4 v16, v10 428; RV64-NEXT: vadd.vv v16, v16, v16 429; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 430; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 431; RV64-NEXT: ret 432 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 433 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 434 ret void 435} 436 437declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>) 438 439define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) { 440; RV32-LABEL: mscatter_nxv1i32: 441; RV32: # %bb.0: 442; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 443; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 444; RV32-NEXT: ret 445; 446; RV64-LABEL: mscatter_nxv1i32: 447; RV64: # %bb.0: 448; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 449; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 450; RV64-NEXT: ret 451 call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m) 452 ret void 453} 454 455declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>) 456 457define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 458; RV32-LABEL: mscatter_nxv2i32: 459; RV32: # %bb.0: 460; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 461; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 462; RV32-NEXT: ret 463; 464; RV64-LABEL: mscatter_nxv2i32: 465; RV64: # %bb.0: 466; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 467; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 468; RV64-NEXT: ret 469 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 470 ret void 471} 472 473define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 474; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 475; RV32: # %bb.0: 476; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 477; RV32-NEXT: vnsrl.wi v11, v8, 0 478; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t 479; RV32-NEXT: ret 480; 481; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 482; RV64: # %bb.0: 483; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 484; RV64-NEXT: vnsrl.wi v12, v8, 0 485; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t 486; RV64-NEXT: ret 487 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 488 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 489 ret void 490} 491 492declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>) 493 494define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) { 495; RV32-LABEL: mscatter_nxv4i32: 496; RV32: # %bb.0: 497; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 498; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 499; RV32-NEXT: ret 500; 501; RV64-LABEL: mscatter_nxv4i32: 502; RV64: # %bb.0: 503; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 504; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 505; RV64-NEXT: ret 506 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m) 507 ret void 508} 509 510define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 511; RV32-LABEL: mscatter_truemask_nxv4i32: 512; RV32: # %bb.0: 513; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 514; RV32-NEXT: vsoxei32.v v8, (zero), v10 515; RV32-NEXT: ret 516; 517; RV64-LABEL: mscatter_truemask_nxv4i32: 518; RV64: # %bb.0: 519; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 520; RV64-NEXT: vsoxei64.v v8, (zero), v12 521; RV64-NEXT: ret 522 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 523 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 524 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 525 ret void 526} 527 528define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 529; RV32-LABEL: mscatter_falsemask_nxv4i32: 530; RV32: # %bb.0: 531; RV32-NEXT: ret 532; 533; RV64-LABEL: mscatter_falsemask_nxv4i32: 534; RV64: # %bb.0: 535; RV64-NEXT: ret 536 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 537 ret void 538} 539 540declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>) 541 542define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) { 543; RV32-LABEL: mscatter_nxv8i32: 544; RV32: # %bb.0: 545; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 546; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 547; RV32-NEXT: ret 548; 549; RV64-LABEL: mscatter_nxv8i32: 550; RV64: # %bb.0: 551; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 552; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 553; RV64-NEXT: ret 554 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 555 ret void 556} 557 558define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 559; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 560; RV32: # %bb.0: 561; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 562; RV32-NEXT: vsext.vf4 v16, v12 563; RV32-NEXT: vsll.vi v12, v16, 2 564; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 565; RV32-NEXT: ret 566; 567; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 568; RV64: # %bb.0: 569; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 570; RV64-NEXT: vsext.vf8 v16, v12 571; RV64-NEXT: vsll.vi v16, v16, 2 572; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 573; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 574; RV64-NEXT: ret 575 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 576 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 577 ret void 578} 579 580define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 581; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 582; RV32: # %bb.0: 583; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 584; RV32-NEXT: vsext.vf4 v16, v12 585; RV32-NEXT: vsll.vi v12, v16, 2 586; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 587; RV32-NEXT: ret 588; 589; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 590; RV64: # %bb.0: 591; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 592; RV64-NEXT: vsext.vf8 v16, v12 593; RV64-NEXT: vsll.vi v16, v16, 2 594; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 595; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 596; RV64-NEXT: ret 597 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 598 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 599 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 600 ret void 601} 602 603define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 604; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 605; RV32: # %bb.0: 606; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 607; RV32-NEXT: vzext.vf4 v16, v12 608; RV32-NEXT: vsll.vi v12, v16, 2 609; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 610; RV32-NEXT: ret 611; 612; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 613; RV64: # %bb.0: 614; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 615; RV64-NEXT: vzext.vf8 v16, v12 616; RV64-NEXT: vsll.vi v16, v16, 2 617; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 618; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 619; RV64-NEXT: ret 620 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 621 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 622 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 623 ret void 624} 625 626define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 627; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 628; RV32: # %bb.0: 629; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 630; RV32-NEXT: vsext.vf2 v16, v12 631; RV32-NEXT: vsll.vi v12, v16, 2 632; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 633; RV32-NEXT: ret 634; 635; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 636; RV64: # %bb.0: 637; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 638; RV64-NEXT: vsext.vf4 v16, v12 639; RV64-NEXT: vsll.vi v16, v16, 2 640; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 641; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 642; RV64-NEXT: ret 643 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 644 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 645 ret void 646} 647 648define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 649; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 650; RV32: # %bb.0: 651; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 652; RV32-NEXT: vsext.vf2 v16, v12 653; RV32-NEXT: vsll.vi v12, v16, 2 654; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 655; RV32-NEXT: ret 656; 657; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 658; RV64: # %bb.0: 659; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 660; RV64-NEXT: vsext.vf4 v16, v12 661; RV64-NEXT: vsll.vi v16, v16, 2 662; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 663; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 664; RV64-NEXT: ret 665 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 666 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 667 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 668 ret void 669} 670 671define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 672; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 673; RV32: # %bb.0: 674; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 675; RV32-NEXT: vzext.vf2 v16, v12 676; RV32-NEXT: vsll.vi v12, v16, 2 677; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 678; RV32-NEXT: ret 679; 680; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 681; RV64: # %bb.0: 682; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 683; RV64-NEXT: vzext.vf4 v16, v12 684; RV64-NEXT: vsll.vi v16, v16, 2 685; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 686; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 687; RV64-NEXT: ret 688 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 689 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 690 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 691 ret void 692} 693 694define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 695; RV32-LABEL: mscatter_baseidx_nxv8i32: 696; RV32: # %bb.0: 697; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 698; RV32-NEXT: vsll.vi v12, v12, 2 699; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 700; RV32-NEXT: ret 701; 702; RV64-LABEL: mscatter_baseidx_nxv8i32: 703; RV64: # %bb.0: 704; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 705; RV64-NEXT: vsext.vf2 v16, v12 706; RV64-NEXT: vsll.vi v16, v16, 2 707; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 708; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 709; RV64-NEXT: ret 710 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 711 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 712 ret void 713} 714 715declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>) 716 717define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) { 718; RV32-LABEL: mscatter_nxv1i64: 719; RV32: # %bb.0: 720; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 721; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 722; RV32-NEXT: ret 723; 724; RV64-LABEL: mscatter_nxv1i64: 725; RV64: # %bb.0: 726; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 727; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 728; RV64-NEXT: ret 729 call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m) 730 ret void 731} 732 733declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>) 734 735define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) { 736; RV32-LABEL: mscatter_nxv2i64: 737; RV32: # %bb.0: 738; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 739; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 740; RV32-NEXT: ret 741; 742; RV64-LABEL: mscatter_nxv2i64: 743; RV64: # %bb.0: 744; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 745; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 746; RV64-NEXT: ret 747 call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m) 748 ret void 749} 750 751declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>) 752 753define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) { 754; RV32-LABEL: mscatter_nxv4i64: 755; RV32: # %bb.0: 756; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 757; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 758; RV32-NEXT: ret 759; 760; RV64-LABEL: mscatter_nxv4i64: 761; RV64: # %bb.0: 762; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 763; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 764; RV64-NEXT: ret 765 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m) 766 ret void 767} 768 769define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 770; RV32-LABEL: mscatter_truemask_nxv4i64: 771; RV32: # %bb.0: 772; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 773; RV32-NEXT: vsoxei32.v v8, (zero), v12 774; RV32-NEXT: ret 775; 776; RV64-LABEL: mscatter_truemask_nxv4i64: 777; RV64: # %bb.0: 778; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 779; RV64-NEXT: vsoxei64.v v8, (zero), v12 780; RV64-NEXT: ret 781 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 782 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 783 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 784 ret void 785} 786 787define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 788; RV32-LABEL: mscatter_falsemask_nxv4i64: 789; RV32: # %bb.0: 790; RV32-NEXT: ret 791; 792; RV64-LABEL: mscatter_falsemask_nxv4i64: 793; RV64: # %bb.0: 794; RV64-NEXT: ret 795 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 796 ret void 797} 798 799declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>) 800 801define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) { 802; RV32-LABEL: mscatter_nxv8i64: 803; RV32: # %bb.0: 804; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 805; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 806; RV32-NEXT: ret 807; 808; RV64-LABEL: mscatter_nxv8i64: 809; RV64: # %bb.0: 810; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 811; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 812; RV64-NEXT: ret 813 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 814 ret void 815} 816 817define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 818; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 819; RV32: # %bb.0: 820; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 821; RV32-NEXT: vsext.vf4 v20, v16 822; RV32-NEXT: vsll.vi v16, v20, 3 823; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 824; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 825; RV32-NEXT: ret 826; 827; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 828; RV64: # %bb.0: 829; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 830; RV64-NEXT: vsext.vf8 v24, v16 831; RV64-NEXT: vsll.vi v16, v24, 3 832; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 833; RV64-NEXT: ret 834 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 835 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 836 ret void 837} 838 839define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 840; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 841; RV32: # %bb.0: 842; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 843; RV32-NEXT: vsext.vf8 v24, v16 844; RV32-NEXT: vsll.vi v16, v24, 3 845; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 846; RV32-NEXT: vnsrl.wi v24, v16, 0 847; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 848; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 849; RV32-NEXT: ret 850; 851; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 852; RV64: # %bb.0: 853; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 854; RV64-NEXT: vsext.vf8 v24, v16 855; RV64-NEXT: vsll.vi v16, v24, 3 856; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 857; RV64-NEXT: ret 858 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 859 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 860 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 861 ret void 862} 863 864define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 865; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 866; RV32: # %bb.0: 867; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 868; RV32-NEXT: vzext.vf8 v24, v16 869; RV32-NEXT: vsll.vi v16, v24, 3 870; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 871; RV32-NEXT: vnsrl.wi v24, v16, 0 872; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 873; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 874; RV32-NEXT: ret 875; 876; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 877; RV64: # %bb.0: 878; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 879; RV64-NEXT: vzext.vf8 v24, v16 880; RV64-NEXT: vsll.vi v16, v24, 3 881; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 882; RV64-NEXT: ret 883 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 884 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 885 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 886 ret void 887} 888 889define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 890; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 891; RV32: # %bb.0: 892; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 893; RV32-NEXT: vsext.vf2 v20, v16 894; RV32-NEXT: vsll.vi v16, v20, 3 895; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 896; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 897; RV32-NEXT: ret 898; 899; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 900; RV64: # %bb.0: 901; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 902; RV64-NEXT: vsext.vf4 v24, v16 903; RV64-NEXT: vsll.vi v16, v24, 3 904; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 905; RV64-NEXT: ret 906 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 907 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 908 ret void 909} 910 911define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 912; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 913; RV32: # %bb.0: 914; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 915; RV32-NEXT: vsext.vf4 v24, v16 916; RV32-NEXT: vsll.vi v16, v24, 3 917; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 918; RV32-NEXT: vnsrl.wi v24, v16, 0 919; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 920; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 921; RV32-NEXT: ret 922; 923; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 924; RV64: # %bb.0: 925; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 926; RV64-NEXT: vsext.vf4 v24, v16 927; RV64-NEXT: vsll.vi v16, v24, 3 928; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 929; RV64-NEXT: ret 930 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 931 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 932 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 933 ret void 934} 935 936define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 937; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 938; RV32: # %bb.0: 939; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 940; RV32-NEXT: vzext.vf4 v24, v16 941; RV32-NEXT: vsll.vi v16, v24, 3 942; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 943; RV32-NEXT: vnsrl.wi v24, v16, 0 944; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 945; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 946; RV32-NEXT: ret 947; 948; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 949; RV64: # %bb.0: 950; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 951; RV64-NEXT: vzext.vf4 v24, v16 952; RV64-NEXT: vsll.vi v16, v24, 3 953; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 954; RV64-NEXT: ret 955 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 956 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 957 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 958 ret void 959} 960 961define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 962; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 963; RV32: # %bb.0: 964; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 965; RV32-NEXT: vsll.vi v16, v16, 3 966; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 967; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 968; RV32-NEXT: ret 969; 970; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 971; RV64: # %bb.0: 972; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 973; RV64-NEXT: vsext.vf2 v24, v16 974; RV64-NEXT: vsll.vi v16, v24, 3 975; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 976; RV64-NEXT: ret 977 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 978 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 979 ret void 980} 981 982define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 983; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 984; RV32: # %bb.0: 985; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 986; RV32-NEXT: vsext.vf2 v24, v16 987; RV32-NEXT: vsll.vi v16, v24, 3 988; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 989; RV32-NEXT: vnsrl.wi v24, v16, 0 990; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 991; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 992; RV32-NEXT: ret 993; 994; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 995; RV64: # %bb.0: 996; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 997; RV64-NEXT: vsext.vf2 v24, v16 998; RV64-NEXT: vsll.vi v16, v24, 3 999; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1000; RV64-NEXT: ret 1001 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1002 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1003 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1004 ret void 1005} 1006 1007define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1008; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 1009; RV32: # %bb.0: 1010; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1011; RV32-NEXT: vzext.vf2 v24, v16 1012; RV32-NEXT: vsll.vi v16, v24, 3 1013; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1014; RV32-NEXT: vnsrl.wi v24, v16, 0 1015; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1016; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1017; RV32-NEXT: ret 1018; 1019; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 1020; RV64: # %bb.0: 1021; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1022; RV64-NEXT: vzext.vf2 v24, v16 1023; RV64-NEXT: vsll.vi v16, v24, 3 1024; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1025; RV64-NEXT: ret 1026 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1027 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1028 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1029 ret void 1030} 1031 1032define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1033; RV32-LABEL: mscatter_baseidx_nxv8i64: 1034; RV32: # %bb.0: 1035; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1036; RV32-NEXT: vsll.vi v16, v16, 3 1037; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1038; RV32-NEXT: vnsrl.wi v24, v16, 0 1039; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1040; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1041; RV32-NEXT: ret 1042; 1043; RV64-LABEL: mscatter_baseidx_nxv8i64: 1044; RV64: # %bb.0: 1045; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1046; RV64-NEXT: vsll.vi v16, v16, 3 1047; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1048; RV64-NEXT: ret 1049 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1050 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1051 ret void 1052} 1053 1054declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>) 1055 1056define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) { 1057; RV32-LABEL: mscatter_nxv1f16: 1058; RV32: # %bb.0: 1059; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1060; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1061; RV32-NEXT: ret 1062; 1063; RV64-LABEL: mscatter_nxv1f16: 1064; RV64: # %bb.0: 1065; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1066; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1067; RV64-NEXT: ret 1068 call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m) 1069 ret void 1070} 1071 1072declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>) 1073 1074define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) { 1075; RV32-LABEL: mscatter_nxv2f16: 1076; RV32: # %bb.0: 1077; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1078; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1079; RV32-NEXT: ret 1080; 1081; RV64-LABEL: mscatter_nxv2f16: 1082; RV64: # %bb.0: 1083; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1084; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1085; RV64-NEXT: ret 1086 call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m) 1087 ret void 1088} 1089 1090declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>) 1091 1092define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) { 1093; RV32-LABEL: mscatter_nxv4f16: 1094; RV32: # %bb.0: 1095; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1096; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1097; RV32-NEXT: ret 1098; 1099; RV64-LABEL: mscatter_nxv4f16: 1100; RV64: # %bb.0: 1101; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1102; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1103; RV64-NEXT: ret 1104 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m) 1105 ret void 1106} 1107 1108define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1109; RV32-LABEL: mscatter_truemask_nxv4f16: 1110; RV32: # %bb.0: 1111; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1112; RV32-NEXT: vsoxei32.v v8, (zero), v10 1113; RV32-NEXT: ret 1114; 1115; RV64-LABEL: mscatter_truemask_nxv4f16: 1116; RV64: # %bb.0: 1117; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1118; RV64-NEXT: vsoxei64.v v8, (zero), v12 1119; RV64-NEXT: ret 1120 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1121 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1122 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 1123 ret void 1124} 1125 1126define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1127; RV32-LABEL: mscatter_falsemask_nxv4f16: 1128; RV32: # %bb.0: 1129; RV32-NEXT: ret 1130; 1131; RV64-LABEL: mscatter_falsemask_nxv4f16: 1132; RV64: # %bb.0: 1133; RV64-NEXT: ret 1134 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 1135 ret void 1136} 1137 1138declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>) 1139 1140define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) { 1141; RV32-LABEL: mscatter_nxv8f16: 1142; RV32: # %bb.0: 1143; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1144; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1145; RV32-NEXT: ret 1146; 1147; RV64-LABEL: mscatter_nxv8f16: 1148; RV64: # %bb.0: 1149; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1150; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1151; RV64-NEXT: ret 1152 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1153 ret void 1154} 1155 1156define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1157; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1158; RV32: # %bb.0: 1159; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1160; RV32-NEXT: vsext.vf4 v12, v10 1161; RV32-NEXT: vadd.vv v12, v12, v12 1162; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1163; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1164; RV32-NEXT: ret 1165; 1166; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1167; RV64: # %bb.0: 1168; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1169; RV64-NEXT: vsext.vf8 v16, v10 1170; RV64-NEXT: vadd.vv v16, v16, v16 1171; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1172; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1173; RV64-NEXT: ret 1174 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1175 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1176 ret void 1177} 1178 1179define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1180; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1181; RV32: # %bb.0: 1182; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1183; RV32-NEXT: vsext.vf4 v12, v10 1184; RV32-NEXT: vadd.vv v12, v12, v12 1185; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1186; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1187; RV32-NEXT: ret 1188; 1189; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1190; RV64: # %bb.0: 1191; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1192; RV64-NEXT: vsext.vf8 v16, v10 1193; RV64-NEXT: vadd.vv v16, v16, v16 1194; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1195; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1196; RV64-NEXT: ret 1197 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1198 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1199 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1200 ret void 1201} 1202 1203define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1204; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1205; RV32: # %bb.0: 1206; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1207; RV32-NEXT: vzext.vf4 v12, v10 1208; RV32-NEXT: vadd.vv v12, v12, v12 1209; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1210; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1211; RV32-NEXT: ret 1212; 1213; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1214; RV64: # %bb.0: 1215; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1216; RV64-NEXT: vzext.vf8 v16, v10 1217; RV64-NEXT: vadd.vv v16, v16, v16 1218; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1219; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1220; RV64-NEXT: ret 1221 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1222 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1223 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1224 ret void 1225} 1226 1227define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1228; RV32-LABEL: mscatter_baseidx_nxv8f16: 1229; RV32: # %bb.0: 1230; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1231; RV32-NEXT: vsext.vf2 v12, v10 1232; RV32-NEXT: vadd.vv v12, v12, v12 1233; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1234; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1235; RV32-NEXT: ret 1236; 1237; RV64-LABEL: mscatter_baseidx_nxv8f16: 1238; RV64: # %bb.0: 1239; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1240; RV64-NEXT: vsext.vf4 v16, v10 1241; RV64-NEXT: vadd.vv v16, v16, v16 1242; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1243; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1244; RV64-NEXT: ret 1245 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1246 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1247 ret void 1248} 1249 1250declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>) 1251 1252define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) { 1253; RV32-LABEL: mscatter_nxv1f32: 1254; RV32: # %bb.0: 1255; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1256; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1257; RV32-NEXT: ret 1258; 1259; RV64-LABEL: mscatter_nxv1f32: 1260; RV64: # %bb.0: 1261; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1262; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1263; RV64-NEXT: ret 1264 call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m) 1265 ret void 1266} 1267 1268declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>) 1269 1270define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) { 1271; RV32-LABEL: mscatter_nxv2f32: 1272; RV32: # %bb.0: 1273; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1274; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1275; RV32-NEXT: ret 1276; 1277; RV64-LABEL: mscatter_nxv2f32: 1278; RV64: # %bb.0: 1279; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1280; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1281; RV64-NEXT: ret 1282 call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m) 1283 ret void 1284} 1285 1286declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>) 1287 1288define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) { 1289; RV32-LABEL: mscatter_nxv4f32: 1290; RV32: # %bb.0: 1291; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1292; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1293; RV32-NEXT: ret 1294; 1295; RV64-LABEL: mscatter_nxv4f32: 1296; RV64: # %bb.0: 1297; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1298; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1299; RV64-NEXT: ret 1300 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m) 1301 ret void 1302} 1303 1304define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1305; RV32-LABEL: mscatter_truemask_nxv4f32: 1306; RV32: # %bb.0: 1307; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1308; RV32-NEXT: vsoxei32.v v8, (zero), v10 1309; RV32-NEXT: ret 1310; 1311; RV64-LABEL: mscatter_truemask_nxv4f32: 1312; RV64: # %bb.0: 1313; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1314; RV64-NEXT: vsoxei64.v v8, (zero), v12 1315; RV64-NEXT: ret 1316 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1317 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1318 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 1319 ret void 1320} 1321 1322define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1323; RV32-LABEL: mscatter_falsemask_nxv4f32: 1324; RV32: # %bb.0: 1325; RV32-NEXT: ret 1326; 1327; RV64-LABEL: mscatter_falsemask_nxv4f32: 1328; RV64: # %bb.0: 1329; RV64-NEXT: ret 1330 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 1331 ret void 1332} 1333 1334declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>) 1335 1336define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) { 1337; RV32-LABEL: mscatter_nxv8f32: 1338; RV32: # %bb.0: 1339; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1340; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1341; RV32-NEXT: ret 1342; 1343; RV64-LABEL: mscatter_nxv8f32: 1344; RV64: # %bb.0: 1345; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1346; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1347; RV64-NEXT: ret 1348 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1349 ret void 1350} 1351 1352define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1353; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1354; RV32: # %bb.0: 1355; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1356; RV32-NEXT: vsext.vf4 v16, v12 1357; RV32-NEXT: vsll.vi v12, v16, 2 1358; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1359; RV32-NEXT: ret 1360; 1361; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1362; RV64: # %bb.0: 1363; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1364; RV64-NEXT: vsext.vf8 v16, v12 1365; RV64-NEXT: vsll.vi v16, v16, 2 1366; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1367; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1368; RV64-NEXT: ret 1369 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1370 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1371 ret void 1372} 1373 1374define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1375; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1376; RV32: # %bb.0: 1377; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1378; RV32-NEXT: vsext.vf4 v16, v12 1379; RV32-NEXT: vsll.vi v12, v16, 2 1380; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1381; RV32-NEXT: ret 1382; 1383; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1384; RV64: # %bb.0: 1385; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1386; RV64-NEXT: vsext.vf8 v16, v12 1387; RV64-NEXT: vsll.vi v16, v16, 2 1388; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1389; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1390; RV64-NEXT: ret 1391 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1392 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1393 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1394 ret void 1395} 1396 1397define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1398; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1399; RV32: # %bb.0: 1400; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1401; RV32-NEXT: vzext.vf4 v16, v12 1402; RV32-NEXT: vsll.vi v12, v16, 2 1403; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1404; RV32-NEXT: ret 1405; 1406; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1407; RV64: # %bb.0: 1408; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1409; RV64-NEXT: vzext.vf8 v16, v12 1410; RV64-NEXT: vsll.vi v16, v16, 2 1411; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1412; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1413; RV64-NEXT: ret 1414 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1415 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1416 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1417 ret void 1418} 1419 1420define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1421; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1422; RV32: # %bb.0: 1423; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1424; RV32-NEXT: vsext.vf2 v16, v12 1425; RV32-NEXT: vsll.vi v12, v16, 2 1426; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1427; RV32-NEXT: ret 1428; 1429; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1430; RV64: # %bb.0: 1431; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1432; RV64-NEXT: vsext.vf4 v16, v12 1433; RV64-NEXT: vsll.vi v16, v16, 2 1434; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1435; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1436; RV64-NEXT: ret 1437 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1438 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1439 ret void 1440} 1441 1442define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1443; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1444; RV32: # %bb.0: 1445; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1446; RV32-NEXT: vsext.vf2 v16, v12 1447; RV32-NEXT: vsll.vi v12, v16, 2 1448; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1449; RV32-NEXT: ret 1450; 1451; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1452; RV64: # %bb.0: 1453; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1454; RV64-NEXT: vsext.vf4 v16, v12 1455; RV64-NEXT: vsll.vi v16, v16, 2 1456; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1457; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1458; RV64-NEXT: ret 1459 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1460 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1461 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1462 ret void 1463} 1464 1465define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1466; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1467; RV32: # %bb.0: 1468; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1469; RV32-NEXT: vzext.vf2 v16, v12 1470; RV32-NEXT: vsll.vi v12, v16, 2 1471; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1472; RV32-NEXT: ret 1473; 1474; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1475; RV64: # %bb.0: 1476; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1477; RV64-NEXT: vzext.vf4 v16, v12 1478; RV64-NEXT: vsll.vi v16, v16, 2 1479; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1480; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1481; RV64-NEXT: ret 1482 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1483 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1484 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1485 ret void 1486} 1487 1488define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1489; RV32-LABEL: mscatter_baseidx_nxv8f32: 1490; RV32: # %bb.0: 1491; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1492; RV32-NEXT: vsll.vi v12, v12, 2 1493; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1494; RV32-NEXT: ret 1495; 1496; RV64-LABEL: mscatter_baseidx_nxv8f32: 1497; RV64: # %bb.0: 1498; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1499; RV64-NEXT: vsext.vf2 v16, v12 1500; RV64-NEXT: vsll.vi v16, v16, 2 1501; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1502; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1503; RV64-NEXT: ret 1504 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1505 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1506 ret void 1507} 1508 1509declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>) 1510 1511define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) { 1512; RV32-LABEL: mscatter_nxv1f64: 1513; RV32: # %bb.0: 1514; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1515; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1516; RV32-NEXT: ret 1517; 1518; RV64-LABEL: mscatter_nxv1f64: 1519; RV64: # %bb.0: 1520; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1521; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1522; RV64-NEXT: ret 1523 call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m) 1524 ret void 1525} 1526 1527declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>) 1528 1529define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) { 1530; RV32-LABEL: mscatter_nxv2f64: 1531; RV32: # %bb.0: 1532; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1533; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1534; RV32-NEXT: ret 1535; 1536; RV64-LABEL: mscatter_nxv2f64: 1537; RV64: # %bb.0: 1538; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1539; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1540; RV64-NEXT: ret 1541 call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m) 1542 ret void 1543} 1544 1545declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>) 1546 1547define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) { 1548; RV32-LABEL: mscatter_nxv4f64: 1549; RV32: # %bb.0: 1550; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1551; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1552; RV32-NEXT: ret 1553; 1554; RV64-LABEL: mscatter_nxv4f64: 1555; RV64: # %bb.0: 1556; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1557; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1558; RV64-NEXT: ret 1559 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m) 1560 ret void 1561} 1562 1563define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1564; RV32-LABEL: mscatter_truemask_nxv4f64: 1565; RV32: # %bb.0: 1566; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1567; RV32-NEXT: vsoxei32.v v8, (zero), v12 1568; RV32-NEXT: ret 1569; 1570; RV64-LABEL: mscatter_truemask_nxv4f64: 1571; RV64: # %bb.0: 1572; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1573; RV64-NEXT: vsoxei64.v v8, (zero), v12 1574; RV64-NEXT: ret 1575 %mhead = insertelement <vscale x 4 x i1> undef, i1 1, i32 0 1576 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer 1577 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 1578 ret void 1579} 1580 1581define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1582; RV32-LABEL: mscatter_falsemask_nxv4f64: 1583; RV32: # %bb.0: 1584; RV32-NEXT: ret 1585; 1586; RV64-LABEL: mscatter_falsemask_nxv4f64: 1587; RV64: # %bb.0: 1588; RV64-NEXT: ret 1589 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 1590 ret void 1591} 1592 1593declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>) 1594 1595define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) { 1596; RV32-LABEL: mscatter_nxv8f64: 1597; RV32: # %bb.0: 1598; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1599; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1600; RV32-NEXT: ret 1601; 1602; RV64-LABEL: mscatter_nxv8f64: 1603; RV64: # %bb.0: 1604; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1605; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1606; RV64-NEXT: ret 1607 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1608 ret void 1609} 1610 1611define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1612; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1613; RV32: # %bb.0: 1614; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1615; RV32-NEXT: vsext.vf4 v20, v16 1616; RV32-NEXT: vsll.vi v16, v20, 3 1617; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1618; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1619; RV32-NEXT: ret 1620; 1621; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1622; RV64: # %bb.0: 1623; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1624; RV64-NEXT: vsext.vf8 v24, v16 1625; RV64-NEXT: vsll.vi v16, v24, 3 1626; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1627; RV64-NEXT: ret 1628 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 1629 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1630 ret void 1631} 1632 1633define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1634; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1635; RV32: # %bb.0: 1636; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1637; RV32-NEXT: vsext.vf8 v24, v16 1638; RV32-NEXT: vsll.vi v16, v24, 3 1639; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1640; RV32-NEXT: vnsrl.wi v24, v16, 0 1641; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1642; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1643; RV32-NEXT: ret 1644; 1645; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1646; RV64: # %bb.0: 1647; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1648; RV64-NEXT: vsext.vf8 v24, v16 1649; RV64-NEXT: vsll.vi v16, v24, 3 1650; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1651; RV64-NEXT: ret 1652 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1653 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1654 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1655 ret void 1656} 1657 1658define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1659; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1660; RV32: # %bb.0: 1661; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1662; RV32-NEXT: vzext.vf8 v24, v16 1663; RV32-NEXT: vsll.vi v16, v24, 3 1664; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1665; RV32-NEXT: vnsrl.wi v24, v16, 0 1666; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1667; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1668; RV32-NEXT: ret 1669; 1670; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1671; RV64: # %bb.0: 1672; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1673; RV64-NEXT: vzext.vf8 v24, v16 1674; RV64-NEXT: vsll.vi v16, v24, 3 1675; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1676; RV64-NEXT: ret 1677 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1678 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1679 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1680 ret void 1681} 1682 1683define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1684; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1685; RV32: # %bb.0: 1686; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1687; RV32-NEXT: vsext.vf2 v20, v16 1688; RV32-NEXT: vsll.vi v16, v20, 3 1689; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1690; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1691; RV32-NEXT: ret 1692; 1693; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1694; RV64: # %bb.0: 1695; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1696; RV64-NEXT: vsext.vf4 v24, v16 1697; RV64-NEXT: vsll.vi v16, v24, 3 1698; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1699; RV64-NEXT: ret 1700 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 1701 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1702 ret void 1703} 1704 1705define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1706; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1707; RV32: # %bb.0: 1708; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1709; RV32-NEXT: vsext.vf4 v24, v16 1710; RV32-NEXT: vsll.vi v16, v24, 3 1711; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1712; RV32-NEXT: vnsrl.wi v24, v16, 0 1713; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1714; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1715; RV32-NEXT: ret 1716; 1717; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1718; RV64: # %bb.0: 1719; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1720; RV64-NEXT: vsext.vf4 v24, v16 1721; RV64-NEXT: vsll.vi v16, v24, 3 1722; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1723; RV64-NEXT: ret 1724 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1725 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1726 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1727 ret void 1728} 1729 1730define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1731; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1732; RV32: # %bb.0: 1733; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1734; RV32-NEXT: vzext.vf4 v24, v16 1735; RV32-NEXT: vsll.vi v16, v24, 3 1736; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1737; RV32-NEXT: vnsrl.wi v24, v16, 0 1738; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1739; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1740; RV32-NEXT: ret 1741; 1742; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1743; RV64: # %bb.0: 1744; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1745; RV64-NEXT: vzext.vf4 v24, v16 1746; RV64-NEXT: vsll.vi v16, v24, 3 1747; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1748; RV64-NEXT: ret 1749 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1750 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1751 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1752 ret void 1753} 1754 1755define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1756; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1757; RV32: # %bb.0: 1758; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1759; RV32-NEXT: vsll.vi v16, v16, 3 1760; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1761; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1762; RV32-NEXT: ret 1763; 1764; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1765; RV64: # %bb.0: 1766; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1767; RV64-NEXT: vsext.vf2 v24, v16 1768; RV64-NEXT: vsll.vi v16, v24, 3 1769; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1770; RV64-NEXT: ret 1771 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 1772 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1773 ret void 1774} 1775 1776define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1777; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1778; RV32: # %bb.0: 1779; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1780; RV32-NEXT: vsext.vf2 v24, v16 1781; RV32-NEXT: vsll.vi v16, v24, 3 1782; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1783; RV32-NEXT: vnsrl.wi v24, v16, 0 1784; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1785; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1786; RV32-NEXT: ret 1787; 1788; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1789; RV64: # %bb.0: 1790; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1791; RV64-NEXT: vsext.vf2 v24, v16 1792; RV64-NEXT: vsll.vi v16, v24, 3 1793; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1794; RV64-NEXT: ret 1795 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1796 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1797 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1798 ret void 1799} 1800 1801define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1802; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1803; RV32: # %bb.0: 1804; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1805; RV32-NEXT: vzext.vf2 v24, v16 1806; RV32-NEXT: vsll.vi v16, v24, 3 1807; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1808; RV32-NEXT: vnsrl.wi v24, v16, 0 1809; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1810; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1811; RV32-NEXT: ret 1812; 1813; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1814; RV64: # %bb.0: 1815; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1816; RV64-NEXT: vzext.vf2 v24, v16 1817; RV64-NEXT: vsll.vi v16, v24, 3 1818; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1819; RV64-NEXT: ret 1820 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1821 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1822 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1823 ret void 1824} 1825 1826define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1827; RV32-LABEL: mscatter_baseidx_nxv8f64: 1828; RV32: # %bb.0: 1829; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1830; RV32-NEXT: vsll.vi v16, v16, 3 1831; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1832; RV32-NEXT: vnsrl.wi v24, v16, 0 1833; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1834; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1835; RV32-NEXT: ret 1836; 1837; RV64-LABEL: mscatter_baseidx_nxv8f64: 1838; RV64: # %bb.0: 1839; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1840; RV64-NEXT: vsll.vi v16, v16, 3 1841; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1842; RV64-NEXT: ret 1843 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 1844 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1845 ret void 1846} 1847 1848declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>) 1849 1850declare <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64) 1851declare <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64) 1852 1853define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) { 1854; RV32-LABEL: mscatter_nxv16f64: 1855; RV32: # %bb.0: 1856; RV32-NEXT: vl4re32.v v24, (a0) 1857; RV32-NEXT: vl4re32.v v28, (a1) 1858; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1859; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t 1860; RV32-NEXT: csrr a0, vlenb 1861; RV32-NEXT: srli a0, a0, 3 1862; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1863; RV32-NEXT: vslidedown.vx v0, v0, a0 1864; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1865; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t 1866; RV32-NEXT: ret 1867; 1868; RV64-LABEL: mscatter_nxv16f64: 1869; RV64: # %bb.0: 1870; RV64-NEXT: addi sp, sp, -16 1871; RV64-NEXT: .cfi_def_cfa_offset 16 1872; RV64-NEXT: csrr a2, vlenb 1873; RV64-NEXT: slli a2, a2, 3 1874; RV64-NEXT: sub sp, sp, a2 1875; RV64-NEXT: vl8re64.v v24, (a0) 1876; RV64-NEXT: addi a0, sp, 16 1877; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1878; RV64-NEXT: vl8re64.v v16, (a1) 1879; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1880; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 1881; RV64-NEXT: csrr a0, vlenb 1882; RV64-NEXT: srli a0, a0, 3 1883; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1884; RV64-NEXT: vslidedown.vx v0, v0, a0 1885; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1886; RV64-NEXT: addi a0, sp, 16 1887; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload 1888; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1889; RV64-NEXT: csrr a0, vlenb 1890; RV64-NEXT: slli a0, a0, 3 1891; RV64-NEXT: add sp, sp, a0 1892; RV64-NEXT: addi sp, sp, 16 1893; RV64-NEXT: ret 1894 %p0 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0) 1895 %p1 = call <vscale x 16 x double*> @llvm.experimental.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8) 1896 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1897 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1898 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m) 1899 ret void 1900} 1901 1902define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) { 1903; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1904; RV32: # %bb.0: 1905; RV32-NEXT: vl2r.v v2, (a1) 1906; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1907; RV32-NEXT: vsext.vf4 v24, v2 1908; RV32-NEXT: vsll.vi v24, v24, 3 1909; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1910; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1911; RV32-NEXT: csrr a1, vlenb 1912; RV32-NEXT: srli a1, a1, 3 1913; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1914; RV32-NEXT: vslidedown.vx v0, v0, a1 1915; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1916; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1917; RV32-NEXT: ret 1918; 1919; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1920; RV64: # %bb.0: 1921; RV64-NEXT: vl2r.v v2, (a1) 1922; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1923; RV64-NEXT: vsext.vf8 v24, v2 1924; RV64-NEXT: vsll.vi v24, v24, 3 1925; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1926; RV64-NEXT: csrr a1, vlenb 1927; RV64-NEXT: srli a1, a1, 3 1928; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1929; RV64-NEXT: vslidedown.vx v0, v0, a1 1930; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1931; RV64-NEXT: vsext.vf8 v8, v3 1932; RV64-NEXT: vsll.vi v8, v8, 3 1933; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1934; RV64-NEXT: ret 1935 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs 1936 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1937 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1938 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1939 ret void 1940} 1941 1942define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) { 1943; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1944; RV32: # %bb.0: 1945; RV32-NEXT: vl4re16.v v4, (a1) 1946; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1947; RV32-NEXT: vsext.vf2 v24, v4 1948; RV32-NEXT: vsll.vi v24, v24, 3 1949; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1950; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1951; RV32-NEXT: csrr a1, vlenb 1952; RV32-NEXT: srli a1, a1, 3 1953; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1954; RV32-NEXT: vslidedown.vx v0, v0, a1 1955; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1956; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1957; RV32-NEXT: ret 1958; 1959; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1960; RV64: # %bb.0: 1961; RV64-NEXT: vl4re16.v v4, (a1) 1962; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1963; RV64-NEXT: vsext.vf4 v24, v4 1964; RV64-NEXT: vsll.vi v24, v24, 3 1965; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1966; RV64-NEXT: csrr a1, vlenb 1967; RV64-NEXT: srli a1, a1, 3 1968; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1969; RV64-NEXT: vslidedown.vx v0, v0, a1 1970; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1971; RV64-NEXT: vsext.vf4 v8, v6 1972; RV64-NEXT: vsll.vi v8, v8, 3 1973; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1974; RV64-NEXT: ret 1975 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs 1976 %v0 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1977 %v1 = call <vscale x 16 x double> @llvm.experimental.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1978 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1979 ret void 1980} 1981