1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>) 8 9define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m) { 10; RV32-LABEL: mscatter_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 13; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 14; RV32-NEXT: ret 15; 16; RV64-LABEL: mscatter_nxv1i8: 17; RV64: # %bb.0: 18; RV64-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 19; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 20; RV64-NEXT: ret 21 call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, i32 1, <vscale x 1 x i1> %m) 22 ret void 23} 24 25declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>) 26 27define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 28; RV32-LABEL: mscatter_nxv2i8: 29; RV32: # %bb.0: 30; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 31; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 32; RV32-NEXT: ret 33; 34; RV64-LABEL: mscatter_nxv2i8: 35; RV64: # %bb.0: 36; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 37; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 38; RV64-NEXT: ret 39 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 40 ret void 41} 42 43define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 44; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 45; RV32: # %bb.0: 46; RV32-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 47; RV32-NEXT: vncvt.x.x.w v8, v8 48; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 49; RV32-NEXT: ret 50; 51; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8: 52; RV64: # %bb.0: 53; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 54; RV64-NEXT: vncvt.x.x.w v8, v8 55; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 56; RV64-NEXT: ret 57 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 58 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 59 ret void 60} 61 62define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 63; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 64; RV32: # %bb.0: 65; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 66; RV32-NEXT: vncvt.x.x.w v8, v8 67; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 68; RV32-NEXT: vncvt.x.x.w v8, v8 69; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 70; RV32-NEXT: ret 71; 72; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8: 73; RV64: # %bb.0: 74; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 75; RV64-NEXT: vncvt.x.x.w v8, v8 76; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 77; RV64-NEXT: vncvt.x.x.w v8, v8 78; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 79; RV64-NEXT: ret 80 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 81 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 82 ret void 83} 84 85define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m) { 86; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 87; RV32: # %bb.0: 88; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 89; RV32-NEXT: vncvt.x.x.w v11, v8 90; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 91; RV32-NEXT: vncvt.x.x.w v8, v11 92; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 93; RV32-NEXT: vncvt.x.x.w v8, v8 94; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 95; RV32-NEXT: ret 96; 97; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8: 98; RV64: # %bb.0: 99; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 100; RV64-NEXT: vncvt.x.x.w v12, v8 101; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 102; RV64-NEXT: vncvt.x.x.w v8, v12 103; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 104; RV64-NEXT: vncvt.x.x.w v8, v8 105; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 106; RV64-NEXT: ret 107 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 108 call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, i32 1, <vscale x 2 x i1> %m) 109 ret void 110} 111 112declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>) 113 114define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m) { 115; RV32-LABEL: mscatter_nxv4i8: 116; RV32: # %bb.0: 117; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 118; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 119; RV32-NEXT: ret 120; 121; RV64-LABEL: mscatter_nxv4i8: 122; RV64: # %bb.0: 123; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 124; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 125; RV64-NEXT: ret 126 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %m) 127 ret void 128} 129 130define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 131; RV32-LABEL: mscatter_truemask_nxv4i8: 132; RV32: # %bb.0: 133; RV32-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 134; RV32-NEXT: vsoxei32.v v8, (zero), v10 135; RV32-NEXT: ret 136; 137; RV64-LABEL: mscatter_truemask_nxv4i8: 138; RV64: # %bb.0: 139; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 140; RV64-NEXT: vsoxei64.v v8, (zero), v12 141; RV64-NEXT: ret 142 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 143 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 144 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> %mtrue) 145 ret void 146} 147 148define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs) { 149; CHECK-LABEL: mscatter_falsemask_nxv4i8: 150; CHECK: # %bb.0: 151; CHECK-NEXT: ret 152 call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer) 153 ret void 154} 155 156declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>) 157 158define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m) { 159; RV32-LABEL: mscatter_nxv8i8: 160; RV32: # %bb.0: 161; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, mu 162; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 163; RV32-NEXT: ret 164; 165; RV64-LABEL: mscatter_nxv8i8: 166; RV64: # %bb.0: 167; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, mu 168; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 169; RV64-NEXT: ret 170 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 171 ret void 172} 173 174define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 175; RV32-LABEL: mscatter_baseidx_nxv8i8: 176; RV32: # %bb.0: 177; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 178; RV32-NEXT: vsext.vf4 v12, v9 179; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu 180; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 181; RV32-NEXT: ret 182; 183; RV64-LABEL: mscatter_baseidx_nxv8i8: 184; RV64: # %bb.0: 185; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 186; RV64-NEXT: vsext.vf8 v16, v9 187; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu 188; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 189; RV64-NEXT: ret 190 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 191 call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, i32 1, <vscale x 8 x i1> %m) 192 ret void 193} 194 195declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>) 196 197define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m) { 198; RV32-LABEL: mscatter_nxv1i16: 199; RV32: # %bb.0: 200; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 201; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 202; RV32-NEXT: ret 203; 204; RV64-LABEL: mscatter_nxv1i16: 205; RV64: # %bb.0: 206; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 207; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 208; RV64-NEXT: ret 209 call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, i32 2, <vscale x 1 x i1> %m) 210 ret void 211} 212 213declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>) 214 215define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 216; RV32-LABEL: mscatter_nxv2i16: 217; RV32: # %bb.0: 218; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 219; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 220; RV32-NEXT: ret 221; 222; RV64-LABEL: mscatter_nxv2i16: 223; RV64: # %bb.0: 224; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 225; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 226; RV64-NEXT: ret 227 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 228 ret void 229} 230 231define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 232; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 233; RV32: # %bb.0: 234; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 235; RV32-NEXT: vncvt.x.x.w v8, v8 236; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 237; RV32-NEXT: ret 238; 239; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16: 240; RV64: # %bb.0: 241; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 242; RV64-NEXT: vncvt.x.x.w v8, v8 243; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 244; RV64-NEXT: ret 245 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 246 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 247 ret void 248} 249 250define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m) { 251; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 252; RV32: # %bb.0: 253; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 254; RV32-NEXT: vncvt.x.x.w v11, v8 255; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 256; RV32-NEXT: vncvt.x.x.w v8, v11 257; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 258; RV32-NEXT: ret 259; 260; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16: 261; RV64: # %bb.0: 262; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 263; RV64-NEXT: vncvt.x.x.w v12, v8 264; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 265; RV64-NEXT: vncvt.x.x.w v8, v12 266; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 267; RV64-NEXT: ret 268 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 269 call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, i32 2, <vscale x 2 x i1> %m) 270 ret void 271} 272 273declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>) 274 275define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m) { 276; RV32-LABEL: mscatter_nxv4i16: 277; RV32: # %bb.0: 278; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 279; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 280; RV32-NEXT: ret 281; 282; RV64-LABEL: mscatter_nxv4i16: 283; RV64: # %bb.0: 284; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 285; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 286; RV64-NEXT: ret 287 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %m) 288 ret void 289} 290 291define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 292; RV32-LABEL: mscatter_truemask_nxv4i16: 293; RV32: # %bb.0: 294; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 295; RV32-NEXT: vsoxei32.v v8, (zero), v10 296; RV32-NEXT: ret 297; 298; RV64-LABEL: mscatter_truemask_nxv4i16: 299; RV64: # %bb.0: 300; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 301; RV64-NEXT: vsoxei64.v v8, (zero), v12 302; RV64-NEXT: ret 303 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 304 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 305 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 306 ret void 307} 308 309define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs) { 310; CHECK-LABEL: mscatter_falsemask_nxv4i16: 311; CHECK: # %bb.0: 312; CHECK-NEXT: ret 313 call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 314 ret void 315} 316 317declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>) 318 319define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m) { 320; RV32-LABEL: mscatter_nxv8i16: 321; RV32: # %bb.0: 322; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 323; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 324; RV32-NEXT: ret 325; 326; RV64-LABEL: mscatter_nxv8i16: 327; RV64: # %bb.0: 328; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 329; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 330; RV64-NEXT: ret 331 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 332 ret void 333} 334 335define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 336; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 337; RV32: # %bb.0: 338; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 339; RV32-NEXT: vsext.vf4 v12, v10 340; RV32-NEXT: vadd.vv v12, v12, v12 341; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 342; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 343; RV32-NEXT: ret 344; 345; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16: 346; RV64: # %bb.0: 347; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 348; RV64-NEXT: vsext.vf8 v16, v10 349; RV64-NEXT: vadd.vv v16, v16, v16 350; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 351; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 352; RV64-NEXT: ret 353 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 354 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 355 ret void 356} 357 358define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 359; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 360; RV32: # %bb.0: 361; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 362; RV32-NEXT: vsext.vf4 v12, v10 363; RV32-NEXT: vadd.vv v12, v12, v12 364; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 365; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 366; RV32-NEXT: ret 367; 368; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16: 369; RV64: # %bb.0: 370; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 371; RV64-NEXT: vsext.vf8 v16, v10 372; RV64-NEXT: vadd.vv v16, v16, v16 373; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 374; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 375; RV64-NEXT: ret 376 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 377 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 378 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 379 ret void 380} 381 382define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 383; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 384; RV32: # %bb.0: 385; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 386; RV32-NEXT: vzext.vf4 v12, v10 387; RV32-NEXT: vadd.vv v12, v12, v12 388; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 389; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 390; RV32-NEXT: ret 391; 392; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16: 393; RV64: # %bb.0: 394; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 395; RV64-NEXT: vzext.vf8 v16, v10 396; RV64-NEXT: vadd.vv v16, v16, v16 397; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 398; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 399; RV64-NEXT: ret 400 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 401 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 402 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 403 ret void 404} 405 406define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 407; RV32-LABEL: mscatter_baseidx_nxv8i16: 408; RV32: # %bb.0: 409; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 410; RV32-NEXT: vsext.vf2 v12, v10 411; RV32-NEXT: vadd.vv v12, v12, v12 412; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 413; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 414; RV32-NEXT: ret 415; 416; RV64-LABEL: mscatter_baseidx_nxv8i16: 417; RV64: # %bb.0: 418; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 419; RV64-NEXT: vsext.vf4 v16, v10 420; RV64-NEXT: vadd.vv v16, v16, v16 421; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 422; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 423; RV64-NEXT: ret 424 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 425 call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, i32 2, <vscale x 8 x i1> %m) 426 ret void 427} 428 429declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>) 430 431define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m) { 432; RV32-LABEL: mscatter_nxv1i32: 433; RV32: # %bb.0: 434; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 435; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 436; RV32-NEXT: ret 437; 438; RV64-LABEL: mscatter_nxv1i32: 439; RV64: # %bb.0: 440; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 441; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 442; RV64-NEXT: ret 443 call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, i32 4, <vscale x 1 x i1> %m) 444 ret void 445} 446 447declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>) 448 449define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 450; RV32-LABEL: mscatter_nxv2i32: 451; RV32: # %bb.0: 452; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 453; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 454; RV32-NEXT: ret 455; 456; RV64-LABEL: mscatter_nxv2i32: 457; RV64: # %bb.0: 458; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 459; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 460; RV64-NEXT: ret 461 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 462 ret void 463} 464 465define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m) { 466; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 467; RV32: # %bb.0: 468; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 469; RV32-NEXT: vncvt.x.x.w v11, v8 470; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t 471; RV32-NEXT: ret 472; 473; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32: 474; RV64: # %bb.0: 475; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 476; RV64-NEXT: vncvt.x.x.w v12, v8 477; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t 478; RV64-NEXT: ret 479 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 480 call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, i32 4, <vscale x 2 x i1> %m) 481 ret void 482} 483 484declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>) 485 486define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m) { 487; RV32-LABEL: mscatter_nxv4i32: 488; RV32: # %bb.0: 489; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 490; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 491; RV32-NEXT: ret 492; 493; RV64-LABEL: mscatter_nxv4i32: 494; RV64: # %bb.0: 495; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 496; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 497; RV64-NEXT: ret 498 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %m) 499 ret void 500} 501 502define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 503; RV32-LABEL: mscatter_truemask_nxv4i32: 504; RV32: # %bb.0: 505; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 506; RV32-NEXT: vsoxei32.v v8, (zero), v10 507; RV32-NEXT: ret 508; 509; RV64-LABEL: mscatter_truemask_nxv4i32: 510; RV64: # %bb.0: 511; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 512; RV64-NEXT: vsoxei64.v v8, (zero), v12 513; RV64-NEXT: ret 514 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 515 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 516 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 517 ret void 518} 519 520define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs) { 521; CHECK-LABEL: mscatter_falsemask_nxv4i32: 522; CHECK: # %bb.0: 523; CHECK-NEXT: ret 524 call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 525 ret void 526} 527 528declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>) 529 530define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m) { 531; RV32-LABEL: mscatter_nxv8i32: 532; RV32: # %bb.0: 533; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 534; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 535; RV32-NEXT: ret 536; 537; RV64-LABEL: mscatter_nxv8i32: 538; RV64: # %bb.0: 539; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 540; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 541; RV64-NEXT: ret 542 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 543 ret void 544} 545 546define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 547; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 548; RV32: # %bb.0: 549; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 550; RV32-NEXT: vsext.vf4 v16, v12 551; RV32-NEXT: vsll.vi v12, v16, 2 552; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 553; RV32-NEXT: ret 554; 555; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32: 556; RV64: # %bb.0: 557; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 558; RV64-NEXT: vsext.vf8 v16, v12 559; RV64-NEXT: vsll.vi v16, v16, 2 560; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 561; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 562; RV64-NEXT: ret 563 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 564 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 565 ret void 566} 567 568define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 569; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 570; RV32: # %bb.0: 571; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 572; RV32-NEXT: vsext.vf4 v16, v12 573; RV32-NEXT: vsll.vi v12, v16, 2 574; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 575; RV32-NEXT: ret 576; 577; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32: 578; RV64: # %bb.0: 579; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 580; RV64-NEXT: vsext.vf8 v16, v12 581; RV64-NEXT: vsll.vi v16, v16, 2 582; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 583; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 584; RV64-NEXT: ret 585 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 586 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 587 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 588 ret void 589} 590 591define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 592; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 593; RV32: # %bb.0: 594; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 595; RV32-NEXT: vzext.vf4 v16, v12 596; RV32-NEXT: vsll.vi v12, v16, 2 597; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 598; RV32-NEXT: ret 599; 600; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32: 601; RV64: # %bb.0: 602; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 603; RV64-NEXT: vzext.vf8 v16, v12 604; RV64-NEXT: vsll.vi v16, v16, 2 605; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 606; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 607; RV64-NEXT: ret 608 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 609 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 610 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 611 ret void 612} 613 614define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 615; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 616; RV32: # %bb.0: 617; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 618; RV32-NEXT: vsext.vf2 v16, v12 619; RV32-NEXT: vsll.vi v12, v16, 2 620; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 621; RV32-NEXT: ret 622; 623; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32: 624; RV64: # %bb.0: 625; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 626; RV64-NEXT: vsext.vf4 v16, v12 627; RV64-NEXT: vsll.vi v16, v16, 2 628; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 629; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 630; RV64-NEXT: ret 631 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 632 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 633 ret void 634} 635 636define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 637; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 638; RV32: # %bb.0: 639; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 640; RV32-NEXT: vsext.vf2 v16, v12 641; RV32-NEXT: vsll.vi v12, v16, 2 642; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 643; RV32-NEXT: ret 644; 645; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32: 646; RV64: # %bb.0: 647; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 648; RV64-NEXT: vsext.vf4 v16, v12 649; RV64-NEXT: vsll.vi v16, v16, 2 650; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 651; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 652; RV64-NEXT: ret 653 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 654 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 655 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 656 ret void 657} 658 659define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 660; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 661; RV32: # %bb.0: 662; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 663; RV32-NEXT: vzext.vf2 v16, v12 664; RV32-NEXT: vsll.vi v12, v16, 2 665; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 666; RV32-NEXT: ret 667; 668; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32: 669; RV64: # %bb.0: 670; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 671; RV64-NEXT: vzext.vf4 v16, v12 672; RV64-NEXT: vsll.vi v16, v16, 2 673; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 674; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 675; RV64-NEXT: ret 676 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 677 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 678 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 679 ret void 680} 681 682define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 683; RV32-LABEL: mscatter_baseidx_nxv8i32: 684; RV32: # %bb.0: 685; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 686; RV32-NEXT: vsll.vi v12, v12, 2 687; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 688; RV32-NEXT: ret 689; 690; RV64-LABEL: mscatter_baseidx_nxv8i32: 691; RV64: # %bb.0: 692; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 693; RV64-NEXT: vsext.vf2 v16, v12 694; RV64-NEXT: vsll.vi v16, v16, 2 695; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 696; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 697; RV64-NEXT: ret 698 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 699 call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, i32 4, <vscale x 8 x i1> %m) 700 ret void 701} 702 703declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>) 704 705define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m) { 706; RV32-LABEL: mscatter_nxv1i64: 707; RV32: # %bb.0: 708; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 709; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 710; RV32-NEXT: ret 711; 712; RV64-LABEL: mscatter_nxv1i64: 713; RV64: # %bb.0: 714; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 715; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 716; RV64-NEXT: ret 717 call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, i32 8, <vscale x 1 x i1> %m) 718 ret void 719} 720 721declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>) 722 723define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m) { 724; RV32-LABEL: mscatter_nxv2i64: 725; RV32: # %bb.0: 726; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 727; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 728; RV32-NEXT: ret 729; 730; RV64-LABEL: mscatter_nxv2i64: 731; RV64: # %bb.0: 732; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 733; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 734; RV64-NEXT: ret 735 call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, i32 8, <vscale x 2 x i1> %m) 736 ret void 737} 738 739declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>) 740 741define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m) { 742; RV32-LABEL: mscatter_nxv4i64: 743; RV32: # %bb.0: 744; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 745; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 746; RV32-NEXT: ret 747; 748; RV64-LABEL: mscatter_nxv4i64: 749; RV64: # %bb.0: 750; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 751; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 752; RV64-NEXT: ret 753 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %m) 754 ret void 755} 756 757define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 758; RV32-LABEL: mscatter_truemask_nxv4i64: 759; RV32: # %bb.0: 760; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 761; RV32-NEXT: vsoxei32.v v8, (zero), v12 762; RV32-NEXT: ret 763; 764; RV64-LABEL: mscatter_truemask_nxv4i64: 765; RV64: # %bb.0: 766; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 767; RV64-NEXT: vsoxei64.v v8, (zero), v12 768; RV64-NEXT: ret 769 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 770 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 771 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 772 ret void 773} 774 775define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs) { 776; CHECK-LABEL: mscatter_falsemask_nxv4i64: 777; CHECK: # %bb.0: 778; CHECK-NEXT: ret 779 call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 780 ret void 781} 782 783declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>) 784 785define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m) { 786; RV32-LABEL: mscatter_nxv8i64: 787; RV32: # %bb.0: 788; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 789; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 790; RV32-NEXT: ret 791; 792; RV64-LABEL: mscatter_nxv8i64: 793; RV64: # %bb.0: 794; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 795; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 796; RV64-NEXT: ret 797 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 798 ret void 799} 800 801define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 802; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 803; RV32: # %bb.0: 804; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 805; RV32-NEXT: vsext.vf4 v20, v16 806; RV32-NEXT: vsll.vi v16, v20, 3 807; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 808; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 809; RV32-NEXT: ret 810; 811; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: 812; RV64: # %bb.0: 813; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 814; RV64-NEXT: vsext.vf8 v24, v16 815; RV64-NEXT: vsll.vi v16, v24, 3 816; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 817; RV64-NEXT: ret 818 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 819 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 820 ret void 821} 822 823define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 824; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 825; RV32: # %bb.0: 826; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 827; RV32-NEXT: vsext.vf8 v24, v16 828; RV32-NEXT: vsll.vi v16, v24, 3 829; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 830; RV32-NEXT: vncvt.x.x.w v24, v16 831; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 832; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 833; RV32-NEXT: ret 834; 835; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: 836; RV64: # %bb.0: 837; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 838; RV64-NEXT: vsext.vf8 v24, v16 839; RV64-NEXT: vsll.vi v16, v24, 3 840; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 841; RV64-NEXT: ret 842 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 843 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 844 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 845 ret void 846} 847 848define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 849; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 850; RV32: # %bb.0: 851; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 852; RV32-NEXT: vzext.vf8 v24, v16 853; RV32-NEXT: vsll.vi v16, v24, 3 854; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 855; RV32-NEXT: vncvt.x.x.w v24, v16 856; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 857; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 858; RV32-NEXT: ret 859; 860; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64: 861; RV64: # %bb.0: 862; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 863; RV64-NEXT: vzext.vf8 v24, v16 864; RV64-NEXT: vsll.vi v16, v24, 3 865; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 866; RV64-NEXT: ret 867 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 868 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 869 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 870 ret void 871} 872 873define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 874; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 875; RV32: # %bb.0: 876; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 877; RV32-NEXT: vsext.vf2 v20, v16 878; RV32-NEXT: vsll.vi v16, v20, 3 879; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 880; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 881; RV32-NEXT: ret 882; 883; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: 884; RV64: # %bb.0: 885; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 886; RV64-NEXT: vsext.vf4 v24, v16 887; RV64-NEXT: vsll.vi v16, v24, 3 888; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 889; RV64-NEXT: ret 890 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 891 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 892 ret void 893} 894 895define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 896; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 897; RV32: # %bb.0: 898; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 899; RV32-NEXT: vsext.vf4 v24, v16 900; RV32-NEXT: vsll.vi v16, v24, 3 901; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 902; RV32-NEXT: vncvt.x.x.w v24, v16 903; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 904; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 905; RV32-NEXT: ret 906; 907; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: 908; RV64: # %bb.0: 909; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 910; RV64-NEXT: vsext.vf4 v24, v16 911; RV64-NEXT: vsll.vi v16, v24, 3 912; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 913; RV64-NEXT: ret 914 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 915 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 916 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 917 ret void 918} 919 920define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 921; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 922; RV32: # %bb.0: 923; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 924; RV32-NEXT: vzext.vf4 v24, v16 925; RV32-NEXT: vsll.vi v16, v24, 3 926; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 927; RV32-NEXT: vncvt.x.x.w v24, v16 928; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 929; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 930; RV32-NEXT: ret 931; 932; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64: 933; RV64: # %bb.0: 934; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 935; RV64-NEXT: vzext.vf4 v24, v16 936; RV64-NEXT: vsll.vi v16, v24, 3 937; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 938; RV64-NEXT: ret 939 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 940 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 941 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 942 ret void 943} 944 945define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 946; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 947; RV32: # %bb.0: 948; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 949; RV32-NEXT: vsll.vi v16, v16, 3 950; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 951; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 952; RV32-NEXT: ret 953; 954; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: 955; RV64: # %bb.0: 956; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 957; RV64-NEXT: vsext.vf2 v24, v16 958; RV64-NEXT: vsll.vi v16, v24, 3 959; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 960; RV64-NEXT: ret 961 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 962 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 963 ret void 964} 965 966define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 967; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 968; RV32: # %bb.0: 969; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 970; RV32-NEXT: vsext.vf2 v24, v16 971; RV32-NEXT: vsll.vi v16, v24, 3 972; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 973; RV32-NEXT: vncvt.x.x.w v24, v16 974; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 975; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 976; RV32-NEXT: ret 977; 978; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: 979; RV64: # %bb.0: 980; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 981; RV64-NEXT: vsext.vf2 v24, v16 982; RV64-NEXT: vsll.vi v16, v24, 3 983; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 984; RV64-NEXT: ret 985 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 986 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 987 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 988 ret void 989} 990 991define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 992; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 993; RV32: # %bb.0: 994; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 995; RV32-NEXT: vzext.vf2 v24, v16 996; RV32-NEXT: vsll.vi v16, v24, 3 997; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 998; RV32-NEXT: vncvt.x.x.w v24, v16 999; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1000; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1001; RV32-NEXT: ret 1002; 1003; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: 1004; RV64: # %bb.0: 1005; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1006; RV64-NEXT: vzext.vf2 v24, v16 1007; RV64-NEXT: vsll.vi v16, v24, 3 1008; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1009; RV64-NEXT: ret 1010 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1011 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1012 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1013 ret void 1014} 1015 1016define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1017; RV32-LABEL: mscatter_baseidx_nxv8i64: 1018; RV32: # %bb.0: 1019; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1020; RV32-NEXT: vsll.vi v16, v16, 3 1021; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1022; RV32-NEXT: vncvt.x.x.w v24, v16 1023; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1024; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1025; RV32-NEXT: ret 1026; 1027; RV64-LABEL: mscatter_baseidx_nxv8i64: 1028; RV64: # %bb.0: 1029; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1030; RV64-NEXT: vsll.vi v16, v16, 3 1031; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1032; RV64-NEXT: ret 1033 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1034 call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1035 ret void 1036} 1037 1038declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>) 1039 1040define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m) { 1041; RV32-LABEL: mscatter_nxv1f16: 1042; RV32: # %bb.0: 1043; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1044; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1045; RV32-NEXT: ret 1046; 1047; RV64-LABEL: mscatter_nxv1f16: 1048; RV64: # %bb.0: 1049; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1050; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1051; RV64-NEXT: ret 1052 call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, i32 2, <vscale x 1 x i1> %m) 1053 ret void 1054} 1055 1056declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>) 1057 1058define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m) { 1059; RV32-LABEL: mscatter_nxv2f16: 1060; RV32: # %bb.0: 1061; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1062; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1063; RV32-NEXT: ret 1064; 1065; RV64-LABEL: mscatter_nxv2f16: 1066; RV64: # %bb.0: 1067; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1068; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1069; RV64-NEXT: ret 1070 call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, i32 2, <vscale x 2 x i1> %m) 1071 ret void 1072} 1073 1074declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>) 1075 1076define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m) { 1077; RV32-LABEL: mscatter_nxv4f16: 1078; RV32: # %bb.0: 1079; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1080; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1081; RV32-NEXT: ret 1082; 1083; RV64-LABEL: mscatter_nxv4f16: 1084; RV64: # %bb.0: 1085; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1086; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1087; RV64-NEXT: ret 1088 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %m) 1089 ret void 1090} 1091 1092define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1093; RV32-LABEL: mscatter_truemask_nxv4f16: 1094; RV32: # %bb.0: 1095; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1096; RV32-NEXT: vsoxei32.v v8, (zero), v10 1097; RV32-NEXT: ret 1098; 1099; RV64-LABEL: mscatter_truemask_nxv4f16: 1100; RV64: # %bb.0: 1101; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1102; RV64-NEXT: vsoxei64.v v8, (zero), v12 1103; RV64-NEXT: ret 1104 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1105 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1106 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> %mtrue) 1107 ret void 1108} 1109 1110define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs) { 1111; CHECK-LABEL: mscatter_falsemask_nxv4f16: 1112; CHECK: # %bb.0: 1113; CHECK-NEXT: ret 1114 call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer) 1115 ret void 1116} 1117 1118declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>) 1119 1120define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m) { 1121; RV32-LABEL: mscatter_nxv8f16: 1122; RV32: # %bb.0: 1123; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1124; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1125; RV32-NEXT: ret 1126; 1127; RV64-LABEL: mscatter_nxv8f16: 1128; RV64: # %bb.0: 1129; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1130; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1131; RV64-NEXT: ret 1132 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1133 ret void 1134} 1135 1136define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1137; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1138; RV32: # %bb.0: 1139; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1140; RV32-NEXT: vsext.vf4 v12, v10 1141; RV32-NEXT: vadd.vv v12, v12, v12 1142; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1143; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1144; RV32-NEXT: ret 1145; 1146; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16: 1147; RV64: # %bb.0: 1148; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1149; RV64-NEXT: vsext.vf8 v16, v10 1150; RV64-NEXT: vadd.vv v16, v16, v16 1151; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1152; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1153; RV64-NEXT: ret 1154 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1155 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1156 ret void 1157} 1158 1159define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1160; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1161; RV32: # %bb.0: 1162; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1163; RV32-NEXT: vsext.vf4 v12, v10 1164; RV32-NEXT: vadd.vv v12, v12, v12 1165; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1166; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1167; RV32-NEXT: ret 1168; 1169; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16: 1170; RV64: # %bb.0: 1171; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1172; RV64-NEXT: vsext.vf8 v16, v10 1173; RV64-NEXT: vadd.vv v16, v16, v16 1174; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1175; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1176; RV64-NEXT: ret 1177 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1178 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1179 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1180 ret void 1181} 1182 1183define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1184; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1185; RV32: # %bb.0: 1186; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1187; RV32-NEXT: vzext.vf4 v12, v10 1188; RV32-NEXT: vadd.vv v12, v12, v12 1189; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1190; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1191; RV32-NEXT: ret 1192; 1193; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16: 1194; RV64: # %bb.0: 1195; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1196; RV64-NEXT: vzext.vf8 v16, v10 1197; RV64-NEXT: vadd.vv v16, v16, v16 1198; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1199; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1200; RV64-NEXT: ret 1201 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1202 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1203 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1204 ret void 1205} 1206 1207define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1208; RV32-LABEL: mscatter_baseidx_nxv8f16: 1209; RV32: # %bb.0: 1210; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1211; RV32-NEXT: vsext.vf2 v12, v10 1212; RV32-NEXT: vadd.vv v12, v12, v12 1213; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1214; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1215; RV32-NEXT: ret 1216; 1217; RV64-LABEL: mscatter_baseidx_nxv8f16: 1218; RV64: # %bb.0: 1219; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1220; RV64-NEXT: vsext.vf4 v16, v10 1221; RV64-NEXT: vadd.vv v16, v16, v16 1222; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1223; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1224; RV64-NEXT: ret 1225 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1226 call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, i32 2, <vscale x 8 x i1> %m) 1227 ret void 1228} 1229 1230declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>) 1231 1232define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m) { 1233; RV32-LABEL: mscatter_nxv1f32: 1234; RV32: # %bb.0: 1235; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1236; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1237; RV32-NEXT: ret 1238; 1239; RV64-LABEL: mscatter_nxv1f32: 1240; RV64: # %bb.0: 1241; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1242; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1243; RV64-NEXT: ret 1244 call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, i32 4, <vscale x 1 x i1> %m) 1245 ret void 1246} 1247 1248declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>) 1249 1250define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m) { 1251; RV32-LABEL: mscatter_nxv2f32: 1252; RV32: # %bb.0: 1253; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1254; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1255; RV32-NEXT: ret 1256; 1257; RV64-LABEL: mscatter_nxv2f32: 1258; RV64: # %bb.0: 1259; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1260; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1261; RV64-NEXT: ret 1262 call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, i32 4, <vscale x 2 x i1> %m) 1263 ret void 1264} 1265 1266declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>) 1267 1268define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m) { 1269; RV32-LABEL: mscatter_nxv4f32: 1270; RV32: # %bb.0: 1271; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1272; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1273; RV32-NEXT: ret 1274; 1275; RV64-LABEL: mscatter_nxv4f32: 1276; RV64: # %bb.0: 1277; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1278; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1279; RV64-NEXT: ret 1280 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %m) 1281 ret void 1282} 1283 1284define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1285; RV32-LABEL: mscatter_truemask_nxv4f32: 1286; RV32: # %bb.0: 1287; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1288; RV32-NEXT: vsoxei32.v v8, (zero), v10 1289; RV32-NEXT: ret 1290; 1291; RV64-LABEL: mscatter_truemask_nxv4f32: 1292; RV64: # %bb.0: 1293; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1294; RV64-NEXT: vsoxei64.v v8, (zero), v12 1295; RV64-NEXT: ret 1296 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1297 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1298 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> %mtrue) 1299 ret void 1300} 1301 1302define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs) { 1303; CHECK-LABEL: mscatter_falsemask_nxv4f32: 1304; CHECK: # %bb.0: 1305; CHECK-NEXT: ret 1306 call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer) 1307 ret void 1308} 1309 1310declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>) 1311 1312define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m) { 1313; RV32-LABEL: mscatter_nxv8f32: 1314; RV32: # %bb.0: 1315; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1316; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1317; RV32-NEXT: ret 1318; 1319; RV64-LABEL: mscatter_nxv8f32: 1320; RV64: # %bb.0: 1321; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1322; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1323; RV64-NEXT: ret 1324 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1325 ret void 1326} 1327 1328define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1329; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1330; RV32: # %bb.0: 1331; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1332; RV32-NEXT: vsext.vf4 v16, v12 1333; RV32-NEXT: vsll.vi v12, v16, 2 1334; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1335; RV32-NEXT: ret 1336; 1337; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32: 1338; RV64: # %bb.0: 1339; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1340; RV64-NEXT: vsext.vf8 v16, v12 1341; RV64-NEXT: vsll.vi v16, v16, 2 1342; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1343; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1344; RV64-NEXT: ret 1345 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1346 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1347 ret void 1348} 1349 1350define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1351; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1352; RV32: # %bb.0: 1353; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1354; RV32-NEXT: vsext.vf4 v16, v12 1355; RV32-NEXT: vsll.vi v12, v16, 2 1356; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1357; RV32-NEXT: ret 1358; 1359; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32: 1360; RV64: # %bb.0: 1361; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1362; RV64-NEXT: vsext.vf8 v16, v12 1363; RV64-NEXT: vsll.vi v16, v16, 2 1364; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1365; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1366; RV64-NEXT: ret 1367 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1368 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1369 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1370 ret void 1371} 1372 1373define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1374; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1375; RV32: # %bb.0: 1376; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1377; RV32-NEXT: vzext.vf4 v16, v12 1378; RV32-NEXT: vsll.vi v12, v16, 2 1379; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1380; RV32-NEXT: ret 1381; 1382; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32: 1383; RV64: # %bb.0: 1384; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1385; RV64-NEXT: vzext.vf8 v16, v12 1386; RV64-NEXT: vsll.vi v16, v16, 2 1387; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1388; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1389; RV64-NEXT: ret 1390 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1391 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1392 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1393 ret void 1394} 1395 1396define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1397; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1398; RV32: # %bb.0: 1399; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1400; RV32-NEXT: vsext.vf2 v16, v12 1401; RV32-NEXT: vsll.vi v12, v16, 2 1402; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1403; RV32-NEXT: ret 1404; 1405; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32: 1406; RV64: # %bb.0: 1407; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1408; RV64-NEXT: vsext.vf4 v16, v12 1409; RV64-NEXT: vsll.vi v16, v16, 2 1410; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1411; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1412; RV64-NEXT: ret 1413 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1414 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1415 ret void 1416} 1417 1418define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1419; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1420; RV32: # %bb.0: 1421; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1422; RV32-NEXT: vsext.vf2 v16, v12 1423; RV32-NEXT: vsll.vi v12, v16, 2 1424; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1425; RV32-NEXT: ret 1426; 1427; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32: 1428; RV64: # %bb.0: 1429; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1430; RV64-NEXT: vsext.vf4 v16, v12 1431; RV64-NEXT: vsll.vi v16, v16, 2 1432; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1433; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1434; RV64-NEXT: ret 1435 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1436 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1437 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1438 ret void 1439} 1440 1441define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1442; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1443; RV32: # %bb.0: 1444; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1445; RV32-NEXT: vzext.vf2 v16, v12 1446; RV32-NEXT: vsll.vi v12, v16, 2 1447; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1448; RV32-NEXT: ret 1449; 1450; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32: 1451; RV64: # %bb.0: 1452; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1453; RV64-NEXT: vzext.vf4 v16, v12 1454; RV64-NEXT: vsll.vi v16, v16, 2 1455; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1456; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1457; RV64-NEXT: ret 1458 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1459 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1460 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1461 ret void 1462} 1463 1464define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1465; RV32-LABEL: mscatter_baseidx_nxv8f32: 1466; RV32: # %bb.0: 1467; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1468; RV32-NEXT: vsll.vi v12, v12, 2 1469; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1470; RV32-NEXT: ret 1471; 1472; RV64-LABEL: mscatter_baseidx_nxv8f32: 1473; RV64: # %bb.0: 1474; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1475; RV64-NEXT: vsext.vf2 v16, v12 1476; RV64-NEXT: vsll.vi v16, v16, 2 1477; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1478; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1479; RV64-NEXT: ret 1480 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1481 call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, i32 4, <vscale x 8 x i1> %m) 1482 ret void 1483} 1484 1485declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>) 1486 1487define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m) { 1488; RV32-LABEL: mscatter_nxv1f64: 1489; RV32: # %bb.0: 1490; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1491; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1492; RV32-NEXT: ret 1493; 1494; RV64-LABEL: mscatter_nxv1f64: 1495; RV64: # %bb.0: 1496; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1497; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1498; RV64-NEXT: ret 1499 call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, i32 8, <vscale x 1 x i1> %m) 1500 ret void 1501} 1502 1503declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>) 1504 1505define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m) { 1506; RV32-LABEL: mscatter_nxv2f64: 1507; RV32: # %bb.0: 1508; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1509; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1510; RV32-NEXT: ret 1511; 1512; RV64-LABEL: mscatter_nxv2f64: 1513; RV64: # %bb.0: 1514; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1515; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1516; RV64-NEXT: ret 1517 call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, i32 8, <vscale x 2 x i1> %m) 1518 ret void 1519} 1520 1521declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>) 1522 1523define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m) { 1524; RV32-LABEL: mscatter_nxv4f64: 1525; RV32: # %bb.0: 1526; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1527; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1528; RV32-NEXT: ret 1529; 1530; RV64-LABEL: mscatter_nxv4f64: 1531; RV64: # %bb.0: 1532; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1533; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1534; RV64-NEXT: ret 1535 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %m) 1536 ret void 1537} 1538 1539define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1540; RV32-LABEL: mscatter_truemask_nxv4f64: 1541; RV32: # %bb.0: 1542; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1543; RV32-NEXT: vsoxei32.v v8, (zero), v12 1544; RV32-NEXT: ret 1545; 1546; RV64-LABEL: mscatter_truemask_nxv4f64: 1547; RV64: # %bb.0: 1548; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1549; RV64-NEXT: vsoxei64.v v8, (zero), v12 1550; RV64-NEXT: ret 1551 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1552 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1553 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> %mtrue) 1554 ret void 1555} 1556 1557define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs) { 1558; CHECK-LABEL: mscatter_falsemask_nxv4f64: 1559; CHECK: # %bb.0: 1560; CHECK-NEXT: ret 1561 call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer) 1562 ret void 1563} 1564 1565declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>) 1566 1567define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m) { 1568; RV32-LABEL: mscatter_nxv8f64: 1569; RV32: # %bb.0: 1570; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1571; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1572; RV32-NEXT: ret 1573; 1574; RV64-LABEL: mscatter_nxv8f64: 1575; RV64: # %bb.0: 1576; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1577; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1578; RV64-NEXT: ret 1579 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1580 ret void 1581} 1582 1583define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1584; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1585; RV32: # %bb.0: 1586; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1587; RV32-NEXT: vsext.vf4 v20, v16 1588; RV32-NEXT: vsll.vi v16, v20, 3 1589; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1590; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1591; RV32-NEXT: ret 1592; 1593; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: 1594; RV64: # %bb.0: 1595; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1596; RV64-NEXT: vsext.vf8 v24, v16 1597; RV64-NEXT: vsll.vi v16, v24, 3 1598; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1599; RV64-NEXT: ret 1600 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 1601 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1602 ret void 1603} 1604 1605define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1606; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1607; RV32: # %bb.0: 1608; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1609; RV32-NEXT: vsext.vf8 v24, v16 1610; RV32-NEXT: vsll.vi v16, v24, 3 1611; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1612; RV32-NEXT: vncvt.x.x.w v24, v16 1613; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1614; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1615; RV32-NEXT: ret 1616; 1617; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: 1618; RV64: # %bb.0: 1619; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1620; RV64-NEXT: vsext.vf8 v24, v16 1621; RV64-NEXT: vsll.vi v16, v24, 3 1622; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1623; RV64-NEXT: ret 1624 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1625 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1626 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1627 ret void 1628} 1629 1630define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) { 1631; RV32-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1632; RV32: # %bb.0: 1633; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1634; RV32-NEXT: vzext.vf8 v24, v16 1635; RV32-NEXT: vsll.vi v16, v24, 3 1636; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1637; RV32-NEXT: vncvt.x.x.w v24, v16 1638; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1639; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1640; RV32-NEXT: ret 1641; 1642; RV64-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64: 1643; RV64: # %bb.0: 1644; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1645; RV64-NEXT: vzext.vf8 v24, v16 1646; RV64-NEXT: vsll.vi v16, v24, 3 1647; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1648; RV64-NEXT: ret 1649 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1650 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1651 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1652 ret void 1653} 1654 1655define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1656; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1657; RV32: # %bb.0: 1658; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1659; RV32-NEXT: vsext.vf2 v20, v16 1660; RV32-NEXT: vsll.vi v16, v20, 3 1661; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1662; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1663; RV32-NEXT: ret 1664; 1665; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: 1666; RV64: # %bb.0: 1667; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1668; RV64-NEXT: vsext.vf4 v24, v16 1669; RV64-NEXT: vsll.vi v16, v24, 3 1670; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1671; RV64-NEXT: ret 1672 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 1673 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1674 ret void 1675} 1676 1677define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1678; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1679; RV32: # %bb.0: 1680; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1681; RV32-NEXT: vsext.vf4 v24, v16 1682; RV32-NEXT: vsll.vi v16, v24, 3 1683; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1684; RV32-NEXT: vncvt.x.x.w v24, v16 1685; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1686; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1687; RV32-NEXT: ret 1688; 1689; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: 1690; RV64: # %bb.0: 1691; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1692; RV64-NEXT: vsext.vf4 v24, v16 1693; RV64-NEXT: vsll.vi v16, v24, 3 1694; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1695; RV64-NEXT: ret 1696 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1697 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1698 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1699 ret void 1700} 1701 1702define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) { 1703; RV32-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1704; RV32: # %bb.0: 1705; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1706; RV32-NEXT: vzext.vf4 v24, v16 1707; RV32-NEXT: vsll.vi v16, v24, 3 1708; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1709; RV32-NEXT: vncvt.x.x.w v24, v16 1710; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1711; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1712; RV32-NEXT: ret 1713; 1714; RV64-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64: 1715; RV64: # %bb.0: 1716; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1717; RV64-NEXT: vzext.vf4 v24, v16 1718; RV64-NEXT: vsll.vi v16, v24, 3 1719; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1720; RV64-NEXT: ret 1721 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1722 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1723 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1724 ret void 1725} 1726 1727define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1728; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1729; RV32: # %bb.0: 1730; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu 1731; RV32-NEXT: vsll.vi v16, v16, 3 1732; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1733; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1734; RV32-NEXT: ret 1735; 1736; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: 1737; RV64: # %bb.0: 1738; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1739; RV64-NEXT: vsext.vf2 v24, v16 1740; RV64-NEXT: vsll.vi v16, v24, 3 1741; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1742; RV64-NEXT: ret 1743 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 1744 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1745 ret void 1746} 1747 1748define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1749; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1750; RV32: # %bb.0: 1751; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1752; RV32-NEXT: vsext.vf2 v24, v16 1753; RV32-NEXT: vsll.vi v16, v24, 3 1754; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1755; RV32-NEXT: vncvt.x.x.w v24, v16 1756; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1757; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1758; RV32-NEXT: ret 1759; 1760; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: 1761; RV64: # %bb.0: 1762; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1763; RV64-NEXT: vsext.vf2 v24, v16 1764; RV64-NEXT: vsll.vi v16, v24, 3 1765; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1766; RV64-NEXT: ret 1767 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1768 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1769 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1770 ret void 1771} 1772 1773define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) { 1774; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1775; RV32: # %bb.0: 1776; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1777; RV32-NEXT: vzext.vf2 v24, v16 1778; RV32-NEXT: vsll.vi v16, v24, 3 1779; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1780; RV32-NEXT: vncvt.x.x.w v24, v16 1781; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1782; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1783; RV32-NEXT: ret 1784; 1785; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: 1786; RV64: # %bb.0: 1787; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1788; RV64-NEXT: vzext.vf2 v24, v16 1789; RV64-NEXT: vsll.vi v16, v24, 3 1790; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1791; RV64-NEXT: ret 1792 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1793 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1794 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1795 ret void 1796} 1797 1798define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) { 1799; RV32-LABEL: mscatter_baseidx_nxv8f64: 1800; RV32: # %bb.0: 1801; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1802; RV32-NEXT: vsll.vi v16, v16, 3 1803; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1804; RV32-NEXT: vncvt.x.x.w v24, v16 1805; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1806; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1807; RV32-NEXT: ret 1808; 1809; RV64-LABEL: mscatter_baseidx_nxv8f64: 1810; RV64: # %bb.0: 1811; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1812; RV64-NEXT: vsll.vi v16, v16, 3 1813; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1814; RV64-NEXT: ret 1815 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 1816 call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, i32 8, <vscale x 8 x i1> %m) 1817 ret void 1818} 1819 1820declare void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, i32, <vscale x 16 x i1>) 1821 1822declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64) 1823declare <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 8 x double*>, i64) 1824 1825define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x double*> %ptrs0, <vscale x 8 x double*> %ptrs1, <vscale x 16 x i1> %m) { 1826; RV32-LABEL: mscatter_nxv16f64: 1827; RV32: # %bb.0: 1828; RV32-NEXT: vl4re32.v v24, (a0) 1829; RV32-NEXT: vl4re32.v v28, (a1) 1830; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1831; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t 1832; RV32-NEXT: csrr a0, vlenb 1833; RV32-NEXT: srli a0, a0, 3 1834; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1835; RV32-NEXT: vslidedown.vx v0, v0, a0 1836; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1837; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t 1838; RV32-NEXT: ret 1839; 1840; RV64-LABEL: mscatter_nxv16f64: 1841; RV64: # %bb.0: 1842; RV64-NEXT: addi sp, sp, -16 1843; RV64-NEXT: .cfi_def_cfa_offset 16 1844; RV64-NEXT: csrr a2, vlenb 1845; RV64-NEXT: slli a2, a2, 3 1846; RV64-NEXT: sub sp, sp, a2 1847; RV64-NEXT: vl8re64.v v24, (a0) 1848; RV64-NEXT: addi a0, sp, 16 1849; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1850; RV64-NEXT: vl8re64.v v16, (a1) 1851; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1852; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 1853; RV64-NEXT: csrr a0, vlenb 1854; RV64-NEXT: srli a0, a0, 3 1855; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1856; RV64-NEXT: vslidedown.vx v0, v0, a0 1857; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1858; RV64-NEXT: addi a0, sp, 16 1859; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload 1860; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1861; RV64-NEXT: csrr a0, vlenb 1862; RV64-NEXT: slli a0, a0, 3 1863; RV64-NEXT: add sp, sp, a0 1864; RV64-NEXT: addi sp, sp, 16 1865; RV64-NEXT: ret 1866 %p0 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> undef, <vscale x 8 x double*> %ptrs0, i64 0) 1867 %p1 = call <vscale x 16 x double*> @llvm.vector.insert.nxv8p0f64.nxv16p0f64(<vscale x 16 x double*> %p0, <vscale x 8 x double*> %ptrs1, i64 8) 1868 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1869 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1870 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %p1, i32 8, <vscale x 16 x i1> %m) 1871 ret void 1872} 1873 1874define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) { 1875; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1876; RV32: # %bb.0: 1877; RV32-NEXT: vl2r.v v2, (a1) 1878; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1879; RV32-NEXT: vsext.vf4 v24, v2 1880; RV32-NEXT: vsll.vi v24, v24, 3 1881; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1882; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1883; RV32-NEXT: csrr a1, vlenb 1884; RV32-NEXT: srli a1, a1, 3 1885; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1886; RV32-NEXT: vslidedown.vx v0, v0, a1 1887; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1888; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1889; RV32-NEXT: ret 1890; 1891; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: 1892; RV64: # %bb.0: 1893; RV64-NEXT: vl2r.v v2, (a1) 1894; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1895; RV64-NEXT: vsext.vf8 v24, v2 1896; RV64-NEXT: vsll.vi v24, v24, 3 1897; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1898; RV64-NEXT: csrr a1, vlenb 1899; RV64-NEXT: srli a1, a1, 3 1900; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1901; RV64-NEXT: vslidedown.vx v0, v0, a1 1902; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1903; RV64-NEXT: vsext.vf8 v8, v3 1904; RV64-NEXT: vsll.vi v8, v8, 3 1905; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1906; RV64-NEXT: ret 1907 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i8> %idxs 1908 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1909 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1910 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1911 ret void 1912} 1913 1914define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) { 1915; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1916; RV32: # %bb.0: 1917; RV32-NEXT: vl4re16.v v4, (a1) 1918; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1919; RV32-NEXT: vsext.vf2 v24, v4 1920; RV32-NEXT: vsll.vi v24, v24, 3 1921; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1922; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1923; RV32-NEXT: csrr a1, vlenb 1924; RV32-NEXT: srli a1, a1, 3 1925; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1926; RV32-NEXT: vslidedown.vx v0, v0, a1 1927; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1928; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 1929; RV32-NEXT: ret 1930; 1931; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: 1932; RV64: # %bb.0: 1933; RV64-NEXT: vl4re16.v v4, (a1) 1934; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1935; RV64-NEXT: vsext.vf4 v24, v4 1936; RV64-NEXT: vsll.vi v24, v24, 3 1937; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 1938; RV64-NEXT: csrr a1, vlenb 1939; RV64-NEXT: srli a1, a1, 3 1940; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, mu 1941; RV64-NEXT: vslidedown.vx v0, v0, a1 1942; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1943; RV64-NEXT: vsext.vf4 v8, v6 1944; RV64-NEXT: vsll.vi v8, v8, 3 1945; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 1946; RV64-NEXT: ret 1947 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs 1948 %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) 1949 %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8) 1950 call void @llvm.masked.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %v1, <vscale x 16 x double*> %ptrs, i32 8, <vscale x 16 x i1> %m) 1951 ret void 1952} 1953