1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 6 7declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, <vscale x 1 x i1>, i32) 8 9define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 10; RV32-LABEL: vpscatter_nxv1i8: 11; RV32: # %bb.0: 12; RV32-NEXT: vsetvli zero, a0, e8, mf8, ta, mu 13; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 14; RV32-NEXT: ret 15; 16; RV64-LABEL: vpscatter_nxv1i8: 17; RV64: # %bb.0: 18; RV64-NEXT: vsetvli zero, a0, e8, mf8, ta, mu 19; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 20; RV64-NEXT: ret 21 call void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> %val, <vscale x 1 x i8*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 22 ret void 23} 24 25declare void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i32) 26 27define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 28; RV32-LABEL: vpscatter_nxv2i8: 29; RV32: # %bb.0: 30; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 31; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 32; RV32-NEXT: ret 33; 34; RV64-LABEL: vpscatter_nxv2i8: 35; RV64: # %bb.0: 36; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 37; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 38; RV64-NEXT: ret 39 call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 40 ret void 41} 42 43define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 44; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: 45; RV32: # %bb.0: 46; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 47; RV32-NEXT: vncvt.x.x.w v8, v8 48; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 49; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 50; RV32-NEXT: ret 51; 52; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8: 53; RV64: # %bb.0: 54; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 55; RV64-NEXT: vncvt.x.x.w v8, v8 56; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 57; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 58; RV64-NEXT: ret 59 %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8> 60 call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 61 ret void 62} 63 64define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 65; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: 66; RV32: # %bb.0: 67; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 68; RV32-NEXT: vncvt.x.x.w v8, v8 69; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 70; RV32-NEXT: vncvt.x.x.w v8, v8 71; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 72; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 73; RV32-NEXT: ret 74; 75; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8: 76; RV64: # %bb.0: 77; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 78; RV64-NEXT: vncvt.x.x.w v8, v8 79; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 80; RV64-NEXT: vncvt.x.x.w v8, v8 81; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 82; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 83; RV64-NEXT: ret 84 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8> 85 call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 86 ret void 87} 88 89define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 90; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: 91; RV32: # %bb.0: 92; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu 93; RV32-NEXT: vncvt.x.x.w v11, v8 94; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 95; RV32-NEXT: vncvt.x.x.w v8, v11 96; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 97; RV32-NEXT: vncvt.x.x.w v8, v8 98; RV32-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 99; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 100; RV32-NEXT: ret 101; 102; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8: 103; RV64: # %bb.0: 104; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu 105; RV64-NEXT: vncvt.x.x.w v12, v8 106; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 107; RV64-NEXT: vncvt.x.x.w v8, v12 108; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 109; RV64-NEXT: vncvt.x.x.w v8, v8 110; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 111; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 112; RV64-NEXT: ret 113 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8> 114 call void @llvm.vp.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> %tval, <vscale x 2 x i8*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 115 ret void 116} 117 118declare void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, <vscale x 4 x i1>, i32) 119 120define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 121; RV32-LABEL: vpscatter_nxv4i8: 122; RV32: # %bb.0: 123; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 124; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 125; RV32-NEXT: ret 126; 127; RV64-LABEL: vpscatter_nxv4i8: 128; RV64: # %bb.0: 129; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 130; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 131; RV64-NEXT: ret 132 call void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 133 ret void 134} 135 136define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, i32 zeroext %evl) { 137; RV32-LABEL: vpscatter_truemask_nxv4i8: 138; RV32: # %bb.0: 139; RV32-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 140; RV32-NEXT: vsoxei32.v v8, (zero), v10 141; RV32-NEXT: ret 142; 143; RV64-LABEL: vpscatter_truemask_nxv4i8: 144; RV64: # %bb.0: 145; RV64-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 146; RV64-NEXT: vsoxei64.v v8, (zero), v12 147; RV64-NEXT: ret 148 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 149 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 150 call void @llvm.vp.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> %val, <vscale x 4 x i8*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 151 ret void 152} 153 154declare void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, <vscale x 8 x i1>, i32) 155 156define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 157; RV32-LABEL: vpscatter_nxv8i8: 158; RV32: # %bb.0: 159; RV32-NEXT: vsetvli zero, a0, e8, m1, ta, mu 160; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 161; RV32-NEXT: ret 162; 163; RV64-LABEL: vpscatter_nxv8i8: 164; RV64: # %bb.0: 165; RV64-NEXT: vsetvli zero, a0, e8, m1, ta, mu 166; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 167; RV64-NEXT: ret 168 call void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 169 ret void 170} 171 172define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, i8* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 173; RV32-LABEL: vpscatter_baseidx_nxv8i8: 174; RV32: # %bb.0: 175; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 176; RV32-NEXT: vsext.vf4 v12, v9 177; RV32-NEXT: vsetvli zero, a1, e8, m1, ta, mu 178; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 179; RV32-NEXT: ret 180; 181; RV64-LABEL: vpscatter_baseidx_nxv8i8: 182; RV64: # %bb.0: 183; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 184; RV64-NEXT: vsext.vf8 v16, v9 185; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, mu 186; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 187; RV64-NEXT: ret 188 %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 8 x i8> %idxs 189 call void @llvm.vp.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> %val, <vscale x 8 x i8*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 190 ret void 191} 192 193declare void @llvm.vp.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, <vscale x 1 x i1>, i32) 194 195define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 196; RV32-LABEL: vpscatter_nxv1i16: 197; RV32: # %bb.0: 198; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 199; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 200; RV32-NEXT: ret 201; 202; RV64-LABEL: vpscatter_nxv1i16: 203; RV64: # %bb.0: 204; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 205; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 206; RV64-NEXT: ret 207 call void @llvm.vp.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> %val, <vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 208 ret void 209} 210 211declare void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, <vscale x 2 x i1>, i32) 212 213define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 214; RV32-LABEL: vpscatter_nxv2i16: 215; RV32: # %bb.0: 216; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 217; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 218; RV32-NEXT: ret 219; 220; RV64-LABEL: vpscatter_nxv2i16: 221; RV64: # %bb.0: 222; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 223; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 224; RV64-NEXT: ret 225 call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 226 ret void 227} 228 229define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 230; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: 231; RV32: # %bb.0: 232; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 233; RV32-NEXT: vncvt.x.x.w v8, v8 234; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 235; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 236; RV32-NEXT: ret 237; 238; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16: 239; RV64: # %bb.0: 240; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 241; RV64-NEXT: vncvt.x.x.w v8, v8 242; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 243; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 244; RV64-NEXT: ret 245 %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16> 246 call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 247 ret void 248} 249 250define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 251; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: 252; RV32: # %bb.0: 253; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu 254; RV32-NEXT: vncvt.x.x.w v11, v8 255; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 256; RV32-NEXT: vncvt.x.x.w v8, v11 257; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 258; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 259; RV32-NEXT: ret 260; 261; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16: 262; RV64: # %bb.0: 263; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu 264; RV64-NEXT: vncvt.x.x.w v12, v8 265; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 266; RV64-NEXT: vncvt.x.x.w v8, v12 267; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 268; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 269; RV64-NEXT: ret 270 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16> 271 call void @llvm.vp.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> %tval, <vscale x 2 x i16*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 272 ret void 273} 274 275declare void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, <vscale x 4 x i1>, i32) 276 277define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 278; RV32-LABEL: vpscatter_nxv4i16: 279; RV32: # %bb.0: 280; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 281; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 282; RV32-NEXT: ret 283; 284; RV64-LABEL: vpscatter_nxv4i16: 285; RV64: # %bb.0: 286; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 287; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 288; RV64-NEXT: ret 289 call void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 290 ret void 291} 292 293define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, i32 zeroext %evl) { 294; RV32-LABEL: vpscatter_truemask_nxv4i16: 295; RV32: # %bb.0: 296; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 297; RV32-NEXT: vsoxei32.v v8, (zero), v10 298; RV32-NEXT: ret 299; 300; RV64-LABEL: vpscatter_truemask_nxv4i16: 301; RV64: # %bb.0: 302; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 303; RV64-NEXT: vsoxei64.v v8, (zero), v12 304; RV64-NEXT: ret 305 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 306 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 307 call void @llvm.vp.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %val, <vscale x 4 x i16*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 308 ret void 309} 310 311declare void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, <vscale x 8 x i1>, i32) 312 313define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 314; RV32-LABEL: vpscatter_nxv8i16: 315; RV32: # %bb.0: 316; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, mu 317; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 318; RV32-NEXT: ret 319; 320; RV64-LABEL: vpscatter_nxv8i16: 321; RV64: # %bb.0: 322; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, mu 323; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 324; RV64-NEXT: ret 325 call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 326 ret void 327} 328 329define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 330; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: 331; RV32: # %bb.0: 332; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 333; RV32-NEXT: vsext.vf4 v12, v10 334; RV32-NEXT: vadd.vv v12, v12, v12 335; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 336; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 337; RV32-NEXT: ret 338; 339; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16: 340; RV64: # %bb.0: 341; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 342; RV64-NEXT: vsext.vf8 v16, v10 343; RV64-NEXT: vadd.vv v16, v16, v16 344; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 345; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 346; RV64-NEXT: ret 347 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i8> %idxs 348 call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 349 ret void 350} 351 352define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 353; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: 354; RV32: # %bb.0: 355; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 356; RV32-NEXT: vsext.vf4 v12, v10 357; RV32-NEXT: vadd.vv v12, v12, v12 358; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 359; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 360; RV32-NEXT: ret 361; 362; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16: 363; RV64: # %bb.0: 364; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 365; RV64-NEXT: vsext.vf8 v16, v10 366; RV64-NEXT: vadd.vv v16, v16, v16 367; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 368; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 369; RV64-NEXT: ret 370 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 371 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 372 call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 373 ret void 374} 375 376define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 377; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: 378; RV32: # %bb.0: 379; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 380; RV32-NEXT: vzext.vf4 v12, v10 381; RV32-NEXT: vadd.vv v12, v12, v12 382; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 383; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 384; RV32-NEXT: ret 385; 386; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16: 387; RV64: # %bb.0: 388; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 389; RV64-NEXT: vzext.vf8 v16, v10 390; RV64-NEXT: vadd.vv v16, v16, v16 391; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 392; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 393; RV64-NEXT: ret 394 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 395 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %eidxs 396 call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 397 ret void 398} 399 400define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, i16* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 401; RV32-LABEL: vpscatter_baseidx_nxv8i16: 402; RV32: # %bb.0: 403; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 404; RV32-NEXT: vsext.vf2 v12, v10 405; RV32-NEXT: vadd.vv v12, v12, v12 406; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 407; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 408; RV32-NEXT: ret 409; 410; RV64-LABEL: vpscatter_baseidx_nxv8i16: 411; RV64: # %bb.0: 412; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 413; RV64-NEXT: vsext.vf4 v16, v10 414; RV64-NEXT: vadd.vv v16, v16, v16 415; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 416; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 417; RV64-NEXT: ret 418 %ptrs = getelementptr inbounds i16, i16* %base, <vscale x 8 x i16> %idxs 419 call void @llvm.vp.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> %val, <vscale x 8 x i16*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 420 ret void 421} 422 423declare void @llvm.vp.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, <vscale x 1 x i1>, i32) 424 425define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 426; RV32-LABEL: vpscatter_nxv1i32: 427; RV32: # %bb.0: 428; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 429; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 430; RV32-NEXT: ret 431; 432; RV64-LABEL: vpscatter_nxv1i32: 433; RV64: # %bb.0: 434; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 435; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 436; RV64-NEXT: ret 437 call void @llvm.vp.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> %val, <vscale x 1 x i32*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 438 ret void 439} 440 441declare void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, <vscale x 2 x i1>, i32) 442 443define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 444; RV32-LABEL: vpscatter_nxv2i32: 445; RV32: # %bb.0: 446; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 447; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 448; RV32-NEXT: ret 449; 450; RV64-LABEL: vpscatter_nxv2i32: 451; RV64: # %bb.0: 452; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 453; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 454; RV64-NEXT: ret 455 call void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 456 ret void 457} 458 459define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 460; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: 461; RV32: # %bb.0: 462; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu 463; RV32-NEXT: vncvt.x.x.w v11, v8 464; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 465; RV32-NEXT: vsoxei32.v v11, (zero), v10, v0.t 466; RV32-NEXT: ret 467; 468; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32: 469; RV64: # %bb.0: 470; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu 471; RV64-NEXT: vncvt.x.x.w v12, v8 472; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 473; RV64-NEXT: vsoxei64.v v12, (zero), v10, v0.t 474; RV64-NEXT: ret 475 %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32> 476 call void @llvm.vp.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> %tval, <vscale x 2 x i32*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 477 ret void 478} 479 480declare void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, <vscale x 4 x i1>, i32) 481 482define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 483; RV32-LABEL: vpscatter_nxv4i32: 484; RV32: # %bb.0: 485; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 486; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 487; RV32-NEXT: ret 488; 489; RV64-LABEL: vpscatter_nxv4i32: 490; RV64: # %bb.0: 491; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 492; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 493; RV64-NEXT: ret 494 call void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 495 ret void 496} 497 498define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, i32 zeroext %evl) { 499; RV32-LABEL: vpscatter_truemask_nxv4i32: 500; RV32: # %bb.0: 501; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 502; RV32-NEXT: vsoxei32.v v8, (zero), v10 503; RV32-NEXT: ret 504; 505; RV64-LABEL: vpscatter_truemask_nxv4i32: 506; RV64: # %bb.0: 507; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 508; RV64-NEXT: vsoxei64.v v8, (zero), v12 509; RV64-NEXT: ret 510 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 511 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 512 call void @llvm.vp.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %val, <vscale x 4 x i32*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 513 ret void 514} 515 516declare void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, <vscale x 8 x i1>, i32) 517 518define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 519; RV32-LABEL: vpscatter_nxv8i32: 520; RV32: # %bb.0: 521; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, mu 522; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 523; RV32-NEXT: ret 524; 525; RV64-LABEL: vpscatter_nxv8i32: 526; RV64: # %bb.0: 527; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, mu 528; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 529; RV64-NEXT: ret 530 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 531 ret void 532} 533 534define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 535; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: 536; RV32: # %bb.0: 537; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 538; RV32-NEXT: vsext.vf4 v16, v12 539; RV32-NEXT: vsll.vi v12, v16, 2 540; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 541; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 542; RV32-NEXT: ret 543; 544; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32: 545; RV64: # %bb.0: 546; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 547; RV64-NEXT: vsext.vf8 v16, v12 548; RV64-NEXT: vsll.vi v16, v16, 2 549; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 550; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 551; RV64-NEXT: ret 552 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i8> %idxs 553 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 554 ret void 555} 556 557define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 558; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: 559; RV32: # %bb.0: 560; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 561; RV32-NEXT: vsext.vf4 v16, v12 562; RV32-NEXT: vsll.vi v12, v16, 2 563; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 564; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 565; RV32-NEXT: ret 566; 567; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32: 568; RV64: # %bb.0: 569; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 570; RV64-NEXT: vsext.vf8 v16, v12 571; RV64-NEXT: vsll.vi v16, v16, 2 572; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 573; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 574; RV64-NEXT: ret 575 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 576 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 577 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 578 ret void 579} 580 581define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 582; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: 583; RV32: # %bb.0: 584; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 585; RV32-NEXT: vzext.vf4 v16, v12 586; RV32-NEXT: vsll.vi v12, v16, 2 587; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 588; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 589; RV32-NEXT: ret 590; 591; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32: 592; RV64: # %bb.0: 593; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 594; RV64-NEXT: vzext.vf8 v16, v12 595; RV64-NEXT: vsll.vi v16, v16, 2 596; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 597; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 598; RV64-NEXT: ret 599 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 600 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 601 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 602 ret void 603} 604 605define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 606; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: 607; RV32: # %bb.0: 608; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 609; RV32-NEXT: vsext.vf2 v16, v12 610; RV32-NEXT: vsll.vi v12, v16, 2 611; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 612; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 613; RV32-NEXT: ret 614; 615; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32: 616; RV64: # %bb.0: 617; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 618; RV64-NEXT: vsext.vf4 v16, v12 619; RV64-NEXT: vsll.vi v16, v16, 2 620; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 621; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 622; RV64-NEXT: ret 623 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i16> %idxs 624 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 625 ret void 626} 627 628define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 629; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: 630; RV32: # %bb.0: 631; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 632; RV32-NEXT: vsext.vf2 v16, v12 633; RV32-NEXT: vsll.vi v12, v16, 2 634; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 635; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 636; RV32-NEXT: ret 637; 638; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32: 639; RV64: # %bb.0: 640; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 641; RV64-NEXT: vsext.vf4 v16, v12 642; RV64-NEXT: vsll.vi v16, v16, 2 643; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 644; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 645; RV64-NEXT: ret 646 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 647 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 648 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 649 ret void 650} 651 652define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 653; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: 654; RV32: # %bb.0: 655; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 656; RV32-NEXT: vzext.vf2 v16, v12 657; RV32-NEXT: vsll.vi v12, v16, 2 658; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 659; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 660; RV32-NEXT: ret 661; 662; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32: 663; RV64: # %bb.0: 664; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 665; RV64-NEXT: vzext.vf4 v16, v12 666; RV64-NEXT: vsll.vi v16, v16, 2 667; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 668; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 669; RV64-NEXT: ret 670 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 671 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %eidxs 672 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 673 ret void 674} 675 676define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, i32* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 677; RV32-LABEL: vpscatter_baseidx_nxv8i32: 678; RV32: # %bb.0: 679; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 680; RV32-NEXT: vsll.vi v12, v12, 2 681; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 682; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 683; RV32-NEXT: ret 684; 685; RV64-LABEL: vpscatter_baseidx_nxv8i32: 686; RV64: # %bb.0: 687; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 688; RV64-NEXT: vsext.vf2 v16, v12 689; RV64-NEXT: vsll.vi v16, v16, 2 690; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 691; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 692; RV64-NEXT: ret 693 %ptrs = getelementptr inbounds i32, i32* %base, <vscale x 8 x i32> %idxs 694 call void @llvm.vp.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> %val, <vscale x 8 x i32*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 695 ret void 696} 697 698declare void @llvm.vp.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, <vscale x 1 x i1>, i32) 699 700define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 701; RV32-LABEL: vpscatter_nxv1i64: 702; RV32: # %bb.0: 703; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, mu 704; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 705; RV32-NEXT: ret 706; 707; RV64-LABEL: vpscatter_nxv1i64: 708; RV64: # %bb.0: 709; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, mu 710; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 711; RV64-NEXT: ret 712 call void @llvm.vp.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> %val, <vscale x 1 x i64*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 713 ret void 714} 715 716declare void @llvm.vp.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, <vscale x 2 x i1>, i32) 717 718define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 719; RV32-LABEL: vpscatter_nxv2i64: 720; RV32: # %bb.0: 721; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, mu 722; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 723; RV32-NEXT: ret 724; 725; RV64-LABEL: vpscatter_nxv2i64: 726; RV64: # %bb.0: 727; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, mu 728; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 729; RV64-NEXT: ret 730 call void @llvm.vp.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> %val, <vscale x 2 x i64*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 731 ret void 732} 733 734declare void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, <vscale x 4 x i1>, i32) 735 736define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 737; RV32-LABEL: vpscatter_nxv4i64: 738; RV32: # %bb.0: 739; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 740; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 741; RV32-NEXT: ret 742; 743; RV64-LABEL: vpscatter_nxv4i64: 744; RV64: # %bb.0: 745; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 746; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 747; RV64-NEXT: ret 748 call void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 749 ret void 750} 751 752define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, i32 zeroext %evl) { 753; RV32-LABEL: vpscatter_truemask_nxv4i64: 754; RV32: # %bb.0: 755; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 756; RV32-NEXT: vsoxei32.v v8, (zero), v12 757; RV32-NEXT: ret 758; 759; RV64-LABEL: vpscatter_truemask_nxv4i64: 760; RV64: # %bb.0: 761; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 762; RV64-NEXT: vsoxei64.v v8, (zero), v12 763; RV64-NEXT: ret 764 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 765 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 766 call void @llvm.vp.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> %val, <vscale x 4 x i64*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 767 ret void 768} 769 770declare void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, <vscale x 8 x i1>, i32) 771 772define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 773; RV32-LABEL: vpscatter_nxv8i64: 774; RV32: # %bb.0: 775; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 776; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 777; RV32-NEXT: ret 778; 779; RV64-LABEL: vpscatter_nxv8i64: 780; RV64: # %bb.0: 781; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 782; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 783; RV64-NEXT: ret 784 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 785 ret void 786} 787 788define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 789; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: 790; RV32: # %bb.0: 791; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 792; RV32-NEXT: vsext.vf4 v20, v16 793; RV32-NEXT: vsll.vi v16, v20, 3 794; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 795; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 796; RV32-NEXT: ret 797; 798; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: 799; RV64: # %bb.0: 800; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 801; RV64-NEXT: vsext.vf8 v24, v16 802; RV64-NEXT: vsll.vi v16, v24, 3 803; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 804; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 805; RV64-NEXT: ret 806 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i8> %idxs 807 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 808 ret void 809} 810 811define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 812; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: 813; RV32: # %bb.0: 814; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 815; RV32-NEXT: vsext.vf8 v24, v16 816; RV32-NEXT: vsll.vi v16, v24, 3 817; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 818; RV32-NEXT: vncvt.x.x.w v24, v16 819; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 820; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 821; RV32-NEXT: ret 822; 823; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: 824; RV64: # %bb.0: 825; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 826; RV64-NEXT: vsext.vf8 v24, v16 827; RV64-NEXT: vsll.vi v16, v24, 3 828; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 829; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 830; RV64-NEXT: ret 831 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 832 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 833 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 834 ret void 835} 836 837define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 838; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: 839; RV32: # %bb.0: 840; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 841; RV32-NEXT: vzext.vf8 v24, v16 842; RV32-NEXT: vsll.vi v16, v24, 3 843; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 844; RV32-NEXT: vncvt.x.x.w v24, v16 845; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 846; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 847; RV32-NEXT: ret 848; 849; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64: 850; RV64: # %bb.0: 851; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 852; RV64-NEXT: vzext.vf8 v24, v16 853; RV64-NEXT: vsll.vi v16, v24, 3 854; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 855; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 856; RV64-NEXT: ret 857 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 858 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 859 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 860 ret void 861} 862 863define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 864; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: 865; RV32: # %bb.0: 866; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 867; RV32-NEXT: vsext.vf2 v20, v16 868; RV32-NEXT: vsll.vi v16, v20, 3 869; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 870; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 871; RV32-NEXT: ret 872; 873; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: 874; RV64: # %bb.0: 875; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 876; RV64-NEXT: vsext.vf4 v24, v16 877; RV64-NEXT: vsll.vi v16, v24, 3 878; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 879; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 880; RV64-NEXT: ret 881 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i16> %idxs 882 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 883 ret void 884} 885 886define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 887; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: 888; RV32: # %bb.0: 889; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 890; RV32-NEXT: vsext.vf4 v24, v16 891; RV32-NEXT: vsll.vi v16, v24, 3 892; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 893; RV32-NEXT: vncvt.x.x.w v24, v16 894; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 895; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 896; RV32-NEXT: ret 897; 898; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: 899; RV64: # %bb.0: 900; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 901; RV64-NEXT: vsext.vf4 v24, v16 902; RV64-NEXT: vsll.vi v16, v24, 3 903; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 904; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 905; RV64-NEXT: ret 906 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 907 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 908 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 909 ret void 910} 911 912define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 913; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: 914; RV32: # %bb.0: 915; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 916; RV32-NEXT: vzext.vf4 v24, v16 917; RV32-NEXT: vsll.vi v16, v24, 3 918; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 919; RV32-NEXT: vncvt.x.x.w v24, v16 920; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 921; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 922; RV32-NEXT: ret 923; 924; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64: 925; RV64: # %bb.0: 926; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 927; RV64-NEXT: vzext.vf4 v24, v16 928; RV64-NEXT: vsll.vi v16, v24, 3 929; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 930; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 931; RV64-NEXT: ret 932 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 933 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 934 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 935 ret void 936} 937 938define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 939; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: 940; RV32: # %bb.0: 941; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 942; RV32-NEXT: vsll.vi v16, v16, 3 943; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 944; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 945; RV32-NEXT: ret 946; 947; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: 948; RV64: # %bb.0: 949; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 950; RV64-NEXT: vsext.vf2 v24, v16 951; RV64-NEXT: vsll.vi v16, v24, 3 952; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 953; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 954; RV64-NEXT: ret 955 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i32> %idxs 956 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 957 ret void 958} 959 960define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 961; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: 962; RV32: # %bb.0: 963; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 964; RV32-NEXT: vsext.vf2 v24, v16 965; RV32-NEXT: vsll.vi v16, v24, 3 966; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 967; RV32-NEXT: vncvt.x.x.w v24, v16 968; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 969; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 970; RV32-NEXT: ret 971; 972; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: 973; RV64: # %bb.0: 974; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 975; RV64-NEXT: vsext.vf2 v24, v16 976; RV64-NEXT: vsll.vi v16, v24, 3 977; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 978; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 979; RV64-NEXT: ret 980 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 981 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 982 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 983 ret void 984} 985 986define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 987; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: 988; RV32: # %bb.0: 989; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 990; RV32-NEXT: vzext.vf2 v24, v16 991; RV32-NEXT: vsll.vi v16, v24, 3 992; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 993; RV32-NEXT: vncvt.x.x.w v24, v16 994; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 995; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 996; RV32-NEXT: ret 997; 998; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: 999; RV64: # %bb.0: 1000; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1001; RV64-NEXT: vzext.vf2 v24, v16 1002; RV64-NEXT: vsll.vi v16, v24, 3 1003; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1004; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1005; RV64-NEXT: ret 1006 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 1007 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %eidxs 1008 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1009 ret void 1010} 1011 1012define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1013; RV32-LABEL: vpscatter_baseidx_nxv8i64: 1014; RV32: # %bb.0: 1015; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1016; RV32-NEXT: vsll.vi v16, v16, 3 1017; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1018; RV32-NEXT: vncvt.x.x.w v24, v16 1019; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1020; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1021; RV32-NEXT: ret 1022; 1023; RV64-LABEL: vpscatter_baseidx_nxv8i64: 1024; RV64: # %bb.0: 1025; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1026; RV64-NEXT: vsll.vi v16, v16, 3 1027; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1028; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1029; RV64-NEXT: ret 1030 %ptrs = getelementptr inbounds i64, i64* %base, <vscale x 8 x i64> %idxs 1031 call void @llvm.vp.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> %val, <vscale x 8 x i64*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1032 ret void 1033} 1034 1035declare void @llvm.vp.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, <vscale x 1 x i1>, i32) 1036 1037define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1038; RV32-LABEL: vpscatter_nxv1f16: 1039; RV32: # %bb.0: 1040; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 1041; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1042; RV32-NEXT: ret 1043; 1044; RV64-LABEL: vpscatter_nxv1f16: 1045; RV64: # %bb.0: 1046; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 1047; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1048; RV64-NEXT: ret 1049 call void @llvm.vp.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> %val, <vscale x 1 x half*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1050 ret void 1051} 1052 1053declare void @llvm.vp.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, <vscale x 2 x i1>, i32) 1054 1055define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1056; RV32-LABEL: vpscatter_nxv2f16: 1057; RV32: # %bb.0: 1058; RV32-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 1059; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1060; RV32-NEXT: ret 1061; 1062; RV64-LABEL: vpscatter_nxv2f16: 1063; RV64: # %bb.0: 1064; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 1065; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1066; RV64-NEXT: ret 1067 call void @llvm.vp.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> %val, <vscale x 2 x half*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1068 ret void 1069} 1070 1071declare void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, <vscale x 4 x i1>, i32) 1072 1073define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1074; RV32-LABEL: vpscatter_nxv4f16: 1075; RV32: # %bb.0: 1076; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1077; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1078; RV32-NEXT: ret 1079; 1080; RV64-LABEL: vpscatter_nxv4f16: 1081; RV64: # %bb.0: 1082; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1083; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1084; RV64-NEXT: ret 1085 call void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1086 ret void 1087} 1088 1089define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, i32 zeroext %evl) { 1090; RV32-LABEL: vpscatter_truemask_nxv4f16: 1091; RV32: # %bb.0: 1092; RV32-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1093; RV32-NEXT: vsoxei32.v v8, (zero), v10 1094; RV32-NEXT: ret 1095; 1096; RV64-LABEL: vpscatter_truemask_nxv4f16: 1097; RV64: # %bb.0: 1098; RV64-NEXT: vsetvli zero, a0, e16, m1, ta, mu 1099; RV64-NEXT: vsoxei64.v v8, (zero), v12 1100; RV64-NEXT: ret 1101 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1102 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1103 call void @llvm.vp.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> %val, <vscale x 4 x half*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1104 ret void 1105} 1106 1107declare void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, <vscale x 8 x i1>, i32) 1108 1109define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1110; RV32-LABEL: vpscatter_nxv8f16: 1111; RV32: # %bb.0: 1112; RV32-NEXT: vsetvli zero, a0, e16, m2, ta, mu 1113; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1114; RV32-NEXT: ret 1115; 1116; RV64-LABEL: vpscatter_nxv8f16: 1117; RV64: # %bb.0: 1118; RV64-NEXT: vsetvli zero, a0, e16, m2, ta, mu 1119; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1120; RV64-NEXT: ret 1121 call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1122 ret void 1123} 1124 1125define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1126; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: 1127; RV32: # %bb.0: 1128; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1129; RV32-NEXT: vsext.vf4 v12, v10 1130; RV32-NEXT: vadd.vv v12, v12, v12 1131; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1132; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1133; RV32-NEXT: ret 1134; 1135; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16: 1136; RV64: # %bb.0: 1137; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1138; RV64-NEXT: vsext.vf8 v16, v10 1139; RV64-NEXT: vadd.vv v16, v16, v16 1140; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1141; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1142; RV64-NEXT: ret 1143 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i8> %idxs 1144 call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1145 ret void 1146} 1147 1148define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1149; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: 1150; RV32: # %bb.0: 1151; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1152; RV32-NEXT: vsext.vf4 v12, v10 1153; RV32-NEXT: vadd.vv v12, v12, v12 1154; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1155; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1156; RV32-NEXT: ret 1157; 1158; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16: 1159; RV64: # %bb.0: 1160; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1161; RV64-NEXT: vsext.vf8 v16, v10 1162; RV64-NEXT: vadd.vv v16, v16, v16 1163; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1164; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1165; RV64-NEXT: ret 1166 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1167 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1168 call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1169 ret void 1170} 1171 1172define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1173; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: 1174; RV32: # %bb.0: 1175; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1176; RV32-NEXT: vzext.vf4 v12, v10 1177; RV32-NEXT: vadd.vv v12, v12, v12 1178; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1179; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1180; RV32-NEXT: ret 1181; 1182; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16: 1183; RV64: # %bb.0: 1184; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1185; RV64-NEXT: vzext.vf8 v16, v10 1186; RV64-NEXT: vadd.vv v16, v16, v16 1187; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1188; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1189; RV64-NEXT: ret 1190 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16> 1191 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %eidxs 1192 call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1193 ret void 1194} 1195 1196define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, half* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1197; RV32-LABEL: vpscatter_baseidx_nxv8f16: 1198; RV32: # %bb.0: 1199; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1200; RV32-NEXT: vsext.vf2 v12, v10 1201; RV32-NEXT: vadd.vv v12, v12, v12 1202; RV32-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1203; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1204; RV32-NEXT: ret 1205; 1206; RV64-LABEL: vpscatter_baseidx_nxv8f16: 1207; RV64: # %bb.0: 1208; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1209; RV64-NEXT: vsext.vf4 v16, v10 1210; RV64-NEXT: vadd.vv v16, v16, v16 1211; RV64-NEXT: vsetvli zero, a1, e16, m2, ta, mu 1212; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1213; RV64-NEXT: ret 1214 %ptrs = getelementptr inbounds half, half* %base, <vscale x 8 x i16> %idxs 1215 call void @llvm.vp.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> %val, <vscale x 8 x half*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1216 ret void 1217} 1218 1219declare void @llvm.vp.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, <vscale x 1 x i1>, i32) 1220 1221define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1222; RV32-LABEL: vpscatter_nxv1f32: 1223; RV32: # %bb.0: 1224; RV32-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 1225; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1226; RV32-NEXT: ret 1227; 1228; RV64-LABEL: vpscatter_nxv1f32: 1229; RV64: # %bb.0: 1230; RV64-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 1231; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1232; RV64-NEXT: ret 1233 call void @llvm.vp.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> %val, <vscale x 1 x float*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1234 ret void 1235} 1236 1237declare void @llvm.vp.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, <vscale x 2 x i1>, i32) 1238 1239define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1240; RV32-LABEL: vpscatter_nxv2f32: 1241; RV32: # %bb.0: 1242; RV32-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1243; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1244; RV32-NEXT: ret 1245; 1246; RV64-LABEL: vpscatter_nxv2f32: 1247; RV64: # %bb.0: 1248; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1249; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1250; RV64-NEXT: ret 1251 call void @llvm.vp.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> %val, <vscale x 2 x float*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1252 ret void 1253} 1254 1255declare void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, <vscale x 4 x i1>, i32) 1256 1257define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1258; RV32-LABEL: vpscatter_nxv4f32: 1259; RV32: # %bb.0: 1260; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1261; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1262; RV32-NEXT: ret 1263; 1264; RV64-LABEL: vpscatter_nxv4f32: 1265; RV64: # %bb.0: 1266; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1267; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1268; RV64-NEXT: ret 1269 call void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1270 ret void 1271} 1272 1273define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, i32 zeroext %evl) { 1274; RV32-LABEL: vpscatter_truemask_nxv4f32: 1275; RV32: # %bb.0: 1276; RV32-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1277; RV32-NEXT: vsoxei32.v v8, (zero), v10 1278; RV32-NEXT: ret 1279; 1280; RV64-LABEL: vpscatter_truemask_nxv4f32: 1281; RV64: # %bb.0: 1282; RV64-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1283; RV64-NEXT: vsoxei64.v v8, (zero), v12 1284; RV64-NEXT: ret 1285 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1286 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1287 call void @llvm.vp.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> %val, <vscale x 4 x float*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1288 ret void 1289} 1290 1291declare void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, <vscale x 8 x i1>, i32) 1292 1293define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1294; RV32-LABEL: vpscatter_nxv8f32: 1295; RV32: # %bb.0: 1296; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, mu 1297; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1298; RV32-NEXT: ret 1299; 1300; RV64-LABEL: vpscatter_nxv8f32: 1301; RV64: # %bb.0: 1302; RV64-NEXT: vsetvli zero, a0, e32, m4, ta, mu 1303; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1304; RV64-NEXT: ret 1305 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1306 ret void 1307} 1308 1309define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1310; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: 1311; RV32: # %bb.0: 1312; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1313; RV32-NEXT: vsext.vf4 v16, v12 1314; RV32-NEXT: vsll.vi v12, v16, 2 1315; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1316; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1317; RV32-NEXT: ret 1318; 1319; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32: 1320; RV64: # %bb.0: 1321; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1322; RV64-NEXT: vsext.vf8 v16, v12 1323; RV64-NEXT: vsll.vi v16, v16, 2 1324; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1325; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1326; RV64-NEXT: ret 1327 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i8> %idxs 1328 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1329 ret void 1330} 1331 1332define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1333; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: 1334; RV32: # %bb.0: 1335; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1336; RV32-NEXT: vsext.vf4 v16, v12 1337; RV32-NEXT: vsll.vi v12, v16, 2 1338; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1339; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1340; RV32-NEXT: ret 1341; 1342; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32: 1343; RV64: # %bb.0: 1344; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1345; RV64-NEXT: vsext.vf8 v16, v12 1346; RV64-NEXT: vsll.vi v16, v16, 2 1347; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1348; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1349; RV64-NEXT: ret 1350 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1351 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1352 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1353 ret void 1354} 1355 1356define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1357; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: 1358; RV32: # %bb.0: 1359; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1360; RV32-NEXT: vzext.vf4 v16, v12 1361; RV32-NEXT: vsll.vi v12, v16, 2 1362; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1363; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1364; RV32-NEXT: ret 1365; 1366; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32: 1367; RV64: # %bb.0: 1368; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1369; RV64-NEXT: vzext.vf8 v16, v12 1370; RV64-NEXT: vsll.vi v16, v16, 2 1371; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1372; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1373; RV64-NEXT: ret 1374 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32> 1375 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1376 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1377 ret void 1378} 1379 1380define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1381; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: 1382; RV32: # %bb.0: 1383; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1384; RV32-NEXT: vsext.vf2 v16, v12 1385; RV32-NEXT: vsll.vi v12, v16, 2 1386; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1387; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1388; RV32-NEXT: ret 1389; 1390; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32: 1391; RV64: # %bb.0: 1392; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1393; RV64-NEXT: vsext.vf4 v16, v12 1394; RV64-NEXT: vsll.vi v16, v16, 2 1395; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1396; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1397; RV64-NEXT: ret 1398 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i16> %idxs 1399 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1400 ret void 1401} 1402 1403define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1404; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: 1405; RV32: # %bb.0: 1406; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1407; RV32-NEXT: vsext.vf2 v16, v12 1408; RV32-NEXT: vsll.vi v12, v16, 2 1409; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1410; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1411; RV32-NEXT: ret 1412; 1413; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32: 1414; RV64: # %bb.0: 1415; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1416; RV64-NEXT: vsext.vf4 v16, v12 1417; RV64-NEXT: vsll.vi v16, v16, 2 1418; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1419; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1420; RV64-NEXT: ret 1421 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1422 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1423 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1424 ret void 1425} 1426 1427define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1428; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: 1429; RV32: # %bb.0: 1430; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1431; RV32-NEXT: vzext.vf2 v16, v12 1432; RV32-NEXT: vsll.vi v12, v16, 2 1433; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1434; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1435; RV32-NEXT: ret 1436; 1437; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32: 1438; RV64: # %bb.0: 1439; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1440; RV64-NEXT: vzext.vf4 v16, v12 1441; RV64-NEXT: vsll.vi v16, v16, 2 1442; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1443; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1444; RV64-NEXT: ret 1445 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32> 1446 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %eidxs 1447 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1448 ret void 1449} 1450 1451define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, float* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1452; RV32-LABEL: vpscatter_baseidx_nxv8f32: 1453; RV32: # %bb.0: 1454; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1455; RV32-NEXT: vsll.vi v12, v12, 2 1456; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1457; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t 1458; RV32-NEXT: ret 1459; 1460; RV64-LABEL: vpscatter_baseidx_nxv8f32: 1461; RV64: # %bb.0: 1462; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1463; RV64-NEXT: vsext.vf2 v16, v12 1464; RV64-NEXT: vsll.vi v16, v16, 2 1465; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1466; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1467; RV64-NEXT: ret 1468 %ptrs = getelementptr inbounds float, float* %base, <vscale x 8 x i32> %idxs 1469 call void @llvm.vp.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> %val, <vscale x 8 x float*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1470 ret void 1471} 1472 1473declare void @llvm.vp.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, <vscale x 1 x i1>, i32) 1474 1475define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1476; RV32-LABEL: vpscatter_nxv1f64: 1477; RV32: # %bb.0: 1478; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, mu 1479; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t 1480; RV32-NEXT: ret 1481; 1482; RV64-LABEL: vpscatter_nxv1f64: 1483; RV64: # %bb.0: 1484; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, mu 1485; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t 1486; RV64-NEXT: ret 1487 call void @llvm.vp.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> %val, <vscale x 1 x double*> %ptrs, <vscale x 1 x i1> %m, i32 %evl) 1488 ret void 1489} 1490 1491declare void @llvm.vp.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, <vscale x 2 x i1>, i32) 1492 1493define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1494; RV32-LABEL: vpscatter_nxv2f64: 1495; RV32: # %bb.0: 1496; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, mu 1497; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t 1498; RV32-NEXT: ret 1499; 1500; RV64-LABEL: vpscatter_nxv2f64: 1501; RV64: # %bb.0: 1502; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, mu 1503; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t 1504; RV64-NEXT: ret 1505 call void @llvm.vp.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %val, <vscale x 2 x double*> %ptrs, <vscale x 2 x i1> %m, i32 %evl) 1506 ret void 1507} 1508 1509declare void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, <vscale x 4 x i1>, i32) 1510 1511define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1512; RV32-LABEL: vpscatter_nxv4f64: 1513; RV32: # %bb.0: 1514; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1515; RV32-NEXT: vsoxei32.v v8, (zero), v12, v0.t 1516; RV32-NEXT: ret 1517; 1518; RV64-LABEL: vpscatter_nxv4f64: 1519; RV64: # %bb.0: 1520; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1521; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t 1522; RV64-NEXT: ret 1523 call void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %m, i32 %evl) 1524 ret void 1525} 1526 1527define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, i32 zeroext %evl) { 1528; RV32-LABEL: vpscatter_truemask_nxv4f64: 1529; RV32: # %bb.0: 1530; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1531; RV32-NEXT: vsoxei32.v v8, (zero), v12 1532; RV32-NEXT: ret 1533; 1534; RV64-LABEL: vpscatter_truemask_nxv4f64: 1535; RV64: # %bb.0: 1536; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1537; RV64-NEXT: vsoxei64.v v8, (zero), v12 1538; RV64-NEXT: ret 1539 %mhead = insertelement <vscale x 4 x i1> poison, i1 1, i32 0 1540 %mtrue = shufflevector <vscale x 4 x i1> %mhead, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer 1541 call void @llvm.vp.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> %val, <vscale x 4 x double*> %ptrs, <vscale x 4 x i1> %mtrue, i32 %evl) 1542 ret void 1543} 1544 1545declare void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double>, <vscale x 6 x double*>, <vscale x 6 x i1>, i32) 1546 1547define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1548; RV32-LABEL: vpscatter_nxv6f64: 1549; RV32: # %bb.0: 1550; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1551; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1552; RV32-NEXT: ret 1553; 1554; RV64-LABEL: vpscatter_nxv6f64: 1555; RV64: # %bb.0: 1556; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1557; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1558; RV64-NEXT: ret 1559 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1560 ret void 1561} 1562 1563define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1564; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: 1565; RV32: # %bb.0: 1566; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1567; RV32-NEXT: vsext.vf4 v20, v16 1568; RV32-NEXT: vsll.vi v16, v20, 3 1569; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1570; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1571; RV32-NEXT: ret 1572; 1573; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: 1574; RV64: # %bb.0: 1575; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1576; RV64-NEXT: vsext.vf8 v24, v16 1577; RV64-NEXT: vsll.vi v16, v24, 3 1578; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1579; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1580; RV64-NEXT: ret 1581 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i8> %idxs 1582 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1583 ret void 1584} 1585 1586define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1587; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: 1588; RV32: # %bb.0: 1589; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1590; RV32-NEXT: vsext.vf8 v24, v16 1591; RV32-NEXT: vsll.vi v16, v24, 3 1592; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1593; RV32-NEXT: vncvt.x.x.w v24, v16 1594; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1595; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1596; RV32-NEXT: ret 1597; 1598; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: 1599; RV64: # %bb.0: 1600; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1601; RV64-NEXT: vsext.vf8 v24, v16 1602; RV64-NEXT: vsll.vi v16, v24, 3 1603; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1604; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1605; RV64-NEXT: ret 1606 %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1607 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1608 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1609 ret void 1610} 1611 1612define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1613; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: 1614; RV32: # %bb.0: 1615; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1616; RV32-NEXT: vzext.vf8 v24, v16 1617; RV32-NEXT: vsll.vi v16, v24, 3 1618; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1619; RV32-NEXT: vncvt.x.x.w v24, v16 1620; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1621; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1622; RV32-NEXT: ret 1623; 1624; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64: 1625; RV64: # %bb.0: 1626; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1627; RV64-NEXT: vzext.vf8 v24, v16 1628; RV64-NEXT: vsll.vi v16, v24, 3 1629; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1630; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1631; RV64-NEXT: ret 1632 %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64> 1633 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1634 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1635 ret void 1636} 1637 1638define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1639; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: 1640; RV32: # %bb.0: 1641; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1642; RV32-NEXT: vsext.vf2 v20, v16 1643; RV32-NEXT: vsll.vi v16, v20, 3 1644; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1645; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1646; RV32-NEXT: ret 1647; 1648; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: 1649; RV64: # %bb.0: 1650; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1651; RV64-NEXT: vsext.vf4 v24, v16 1652; RV64-NEXT: vsll.vi v16, v24, 3 1653; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1654; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1655; RV64-NEXT: ret 1656 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i16> %idxs 1657 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1658 ret void 1659} 1660 1661define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1662; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: 1663; RV32: # %bb.0: 1664; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1665; RV32-NEXT: vsext.vf4 v24, v16 1666; RV32-NEXT: vsll.vi v16, v24, 3 1667; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1668; RV32-NEXT: vncvt.x.x.w v24, v16 1669; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1670; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1671; RV32-NEXT: ret 1672; 1673; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: 1674; RV64: # %bb.0: 1675; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1676; RV64-NEXT: vsext.vf4 v24, v16 1677; RV64-NEXT: vsll.vi v16, v24, 3 1678; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1679; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1680; RV64-NEXT: ret 1681 %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1682 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1683 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1684 ret void 1685} 1686 1687define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1688; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: 1689; RV32: # %bb.0: 1690; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1691; RV32-NEXT: vzext.vf4 v24, v16 1692; RV32-NEXT: vsll.vi v16, v24, 3 1693; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1694; RV32-NEXT: vncvt.x.x.w v24, v16 1695; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1696; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1697; RV32-NEXT: ret 1698; 1699; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64: 1700; RV64: # %bb.0: 1701; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1702; RV64-NEXT: vzext.vf4 v24, v16 1703; RV64-NEXT: vsll.vi v16, v24, 3 1704; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1705; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1706; RV64-NEXT: ret 1707 %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64> 1708 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1709 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1710 ret void 1711} 1712 1713define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1714; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: 1715; RV32: # %bb.0: 1716; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1717; RV32-NEXT: vsll.vi v16, v16, 3 1718; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1719; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1720; RV32-NEXT: ret 1721; 1722; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: 1723; RV64: # %bb.0: 1724; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1725; RV64-NEXT: vsext.vf2 v24, v16 1726; RV64-NEXT: vsll.vi v16, v24, 3 1727; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1728; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1729; RV64-NEXT: ret 1730 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i32> %idxs 1731 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1732 ret void 1733} 1734 1735define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1736; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: 1737; RV32: # %bb.0: 1738; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1739; RV32-NEXT: vsext.vf2 v24, v16 1740; RV32-NEXT: vsll.vi v16, v24, 3 1741; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1742; RV32-NEXT: vncvt.x.x.w v24, v16 1743; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1744; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1745; RV32-NEXT: ret 1746; 1747; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: 1748; RV64: # %bb.0: 1749; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1750; RV64-NEXT: vsext.vf2 v24, v16 1751; RV64-NEXT: vsll.vi v16, v24, 3 1752; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1753; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1754; RV64-NEXT: ret 1755 %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 1756 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1757 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1758 ret void 1759} 1760 1761define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1762; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: 1763; RV32: # %bb.0: 1764; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1765; RV32-NEXT: vzext.vf2 v24, v16 1766; RV32-NEXT: vsll.vi v16, v24, 3 1767; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1768; RV32-NEXT: vncvt.x.x.w v24, v16 1769; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1770; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1771; RV32-NEXT: ret 1772; 1773; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: 1774; RV64: # %bb.0: 1775; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1776; RV64-NEXT: vzext.vf2 v24, v16 1777; RV64-NEXT: vsll.vi v16, v24, 3 1778; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1779; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1780; RV64-NEXT: ret 1781 %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64> 1782 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %eidxs 1783 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1784 ret void 1785} 1786 1787define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, double* %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) { 1788; RV32-LABEL: vpscatter_baseidx_nxv6f64: 1789; RV32: # %bb.0: 1790; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1791; RV32-NEXT: vsll.vi v16, v16, 3 1792; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1793; RV32-NEXT: vncvt.x.x.w v24, v16 1794; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1795; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1796; RV32-NEXT: ret 1797; 1798; RV64-LABEL: vpscatter_baseidx_nxv6f64: 1799; RV64: # %bb.0: 1800; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1801; RV64-NEXT: vsll.vi v16, v16, 3 1802; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1803; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1804; RV64-NEXT: ret 1805 %ptrs = getelementptr inbounds double, double* %base, <vscale x 6 x i64> %idxs 1806 call void @llvm.vp.scatter.nxv6f64.nxv6p0f64(<vscale x 6 x double> %val, <vscale x 6 x double*> %ptrs, <vscale x 6 x i1> %m, i32 %evl) 1807 ret void 1808} 1809 1810declare void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, <vscale x 8 x i1>, i32) 1811 1812define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1813; RV32-LABEL: vpscatter_nxv8f64: 1814; RV32: # %bb.0: 1815; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1816; RV32-NEXT: vsoxei32.v v8, (zero), v16, v0.t 1817; RV32-NEXT: ret 1818; 1819; RV64-LABEL: vpscatter_nxv8f64: 1820; RV64: # %bb.0: 1821; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, mu 1822; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 1823; RV64-NEXT: ret 1824 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1825 ret void 1826} 1827 1828define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1829; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: 1830; RV32: # %bb.0: 1831; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1832; RV32-NEXT: vsext.vf4 v20, v16 1833; RV32-NEXT: vsll.vi v16, v20, 3 1834; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1835; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1836; RV32-NEXT: ret 1837; 1838; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: 1839; RV64: # %bb.0: 1840; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1841; RV64-NEXT: vsext.vf8 v24, v16 1842; RV64-NEXT: vsll.vi v16, v24, 3 1843; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1844; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1845; RV64-NEXT: ret 1846 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i8> %idxs 1847 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1848 ret void 1849} 1850 1851define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1852; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: 1853; RV32: # %bb.0: 1854; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1855; RV32-NEXT: vsext.vf8 v24, v16 1856; RV32-NEXT: vsll.vi v16, v24, 3 1857; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1858; RV32-NEXT: vncvt.x.x.w v24, v16 1859; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1860; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1861; RV32-NEXT: ret 1862; 1863; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: 1864; RV64: # %bb.0: 1865; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1866; RV64-NEXT: vsext.vf8 v24, v16 1867; RV64-NEXT: vsll.vi v16, v24, 3 1868; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1869; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1870; RV64-NEXT: ret 1871 %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1872 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1873 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1874 ret void 1875} 1876 1877define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1878; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: 1879; RV32: # %bb.0: 1880; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1881; RV32-NEXT: vzext.vf8 v24, v16 1882; RV32-NEXT: vsll.vi v16, v24, 3 1883; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1884; RV32-NEXT: vncvt.x.x.w v24, v16 1885; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1886; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1887; RV32-NEXT: ret 1888; 1889; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64: 1890; RV64: # %bb.0: 1891; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1892; RV64-NEXT: vzext.vf8 v24, v16 1893; RV64-NEXT: vsll.vi v16, v24, 3 1894; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1895; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1896; RV64-NEXT: ret 1897 %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64> 1898 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1899 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1900 ret void 1901} 1902 1903define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1904; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: 1905; RV32: # %bb.0: 1906; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1907; RV32-NEXT: vsext.vf2 v20, v16 1908; RV32-NEXT: vsll.vi v16, v20, 3 1909; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1910; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1911; RV32-NEXT: ret 1912; 1913; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: 1914; RV64: # %bb.0: 1915; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1916; RV64-NEXT: vsext.vf4 v24, v16 1917; RV64-NEXT: vsll.vi v16, v24, 3 1918; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1919; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1920; RV64-NEXT: ret 1921 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i16> %idxs 1922 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1923 ret void 1924} 1925 1926define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1927; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: 1928; RV32: # %bb.0: 1929; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1930; RV32-NEXT: vsext.vf4 v24, v16 1931; RV32-NEXT: vsll.vi v16, v24, 3 1932; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1933; RV32-NEXT: vncvt.x.x.w v24, v16 1934; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1935; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1936; RV32-NEXT: ret 1937; 1938; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: 1939; RV64: # %bb.0: 1940; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1941; RV64-NEXT: vsext.vf4 v24, v16 1942; RV64-NEXT: vsll.vi v16, v24, 3 1943; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1944; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1945; RV64-NEXT: ret 1946 %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1947 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1948 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1949 ret void 1950} 1951 1952define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1953; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: 1954; RV32: # %bb.0: 1955; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1956; RV32-NEXT: vzext.vf4 v24, v16 1957; RV32-NEXT: vsll.vi v16, v24, 3 1958; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1959; RV32-NEXT: vncvt.x.x.w v24, v16 1960; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1961; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 1962; RV32-NEXT: ret 1963; 1964; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64: 1965; RV64: # %bb.0: 1966; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1967; RV64-NEXT: vzext.vf4 v24, v16 1968; RV64-NEXT: vsll.vi v16, v24, 3 1969; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1970; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1971; RV64-NEXT: ret 1972 %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64> 1973 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 1974 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1975 ret void 1976} 1977 1978define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1979; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: 1980; RV32: # %bb.0: 1981; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, mu 1982; RV32-NEXT: vsll.vi v16, v16, 3 1983; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1984; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 1985; RV32-NEXT: ret 1986; 1987; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: 1988; RV64: # %bb.0: 1989; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 1990; RV64-NEXT: vsext.vf2 v24, v16 1991; RV64-NEXT: vsll.vi v16, v24, 3 1992; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 1993; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 1994; RV64-NEXT: ret 1995 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i32> %idxs 1996 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 1997 ret void 1998} 1999 2000define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2001; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: 2002; RV32: # %bb.0: 2003; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2004; RV32-NEXT: vsext.vf2 v24, v16 2005; RV32-NEXT: vsll.vi v16, v24, 3 2006; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2007; RV32-NEXT: vncvt.x.x.w v24, v16 2008; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2009; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2010; RV32-NEXT: ret 2011; 2012; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: 2013; RV64: # %bb.0: 2014; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2015; RV64-NEXT: vsext.vf2 v24, v16 2016; RV64-NEXT: vsll.vi v16, v24, 3 2017; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2018; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2019; RV64-NEXT: ret 2020 %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2021 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2022 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2023 ret void 2024} 2025 2026define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2027; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: 2028; RV32: # %bb.0: 2029; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2030; RV32-NEXT: vzext.vf2 v24, v16 2031; RV32-NEXT: vsll.vi v16, v24, 3 2032; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2033; RV32-NEXT: vncvt.x.x.w v24, v16 2034; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2035; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2036; RV32-NEXT: ret 2037; 2038; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: 2039; RV64: # %bb.0: 2040; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2041; RV64-NEXT: vzext.vf2 v24, v16 2042; RV64-NEXT: vsll.vi v16, v24, 3 2043; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2044; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2045; RV64-NEXT: ret 2046 %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64> 2047 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %eidxs 2048 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2049 ret void 2050} 2051 2052define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2053; RV32-LABEL: vpscatter_baseidx_nxv8f64: 2054; RV32: # %bb.0: 2055; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2056; RV32-NEXT: vsll.vi v16, v16, 3 2057; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, mu 2058; RV32-NEXT: vncvt.x.x.w v24, v16 2059; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2060; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2061; RV32-NEXT: ret 2062; 2063; RV64-LABEL: vpscatter_baseidx_nxv8f64: 2064; RV64: # %bb.0: 2065; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, mu 2066; RV64-NEXT: vsll.vi v16, v16, 3 2067; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2068; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t 2069; RV64-NEXT: ret 2070 %ptrs = getelementptr inbounds double, double* %base, <vscale x 8 x i64> %idxs 2071 call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl) 2072 ret void 2073} 2074 2075declare void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, <vscale x 16 x i1>, i32) 2076 2077define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2078; RV32-LABEL: vpscatter_nxv16f64: 2079; RV32: # %bb.0: 2080; RV32-NEXT: vl8re32.v v24, (a0) 2081; RV32-NEXT: csrr a0, vlenb 2082; RV32-NEXT: mv a2, a1 2083; RV32-NEXT: bltu a1, a0, .LBB95_2 2084; RV32-NEXT: # %bb.1: 2085; RV32-NEXT: mv a2, a0 2086; RV32-NEXT: .LBB95_2: 2087; RV32-NEXT: li a3, 0 2088; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu 2089; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t 2090; RV32-NEXT: srli a2, a0, 3 2091; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, mu 2092; RV32-NEXT: sub a0, a1, a0 2093; RV32-NEXT: vslidedown.vx v0, v0, a2 2094; RV32-NEXT: bltu a1, a0, .LBB95_4 2095; RV32-NEXT: # %bb.3: 2096; RV32-NEXT: mv a3, a0 2097; RV32-NEXT: .LBB95_4: 2098; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2099; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t 2100; RV32-NEXT: ret 2101; 2102; RV64-LABEL: vpscatter_nxv16f64: 2103; RV64: # %bb.0: 2104; RV64-NEXT: addi sp, sp, -16 2105; RV64-NEXT: .cfi_def_cfa_offset 16 2106; RV64-NEXT: csrr a1, vlenb 2107; RV64-NEXT: slli a1, a1, 3 2108; RV64-NEXT: sub sp, sp, a1 2109; RV64-NEXT: addi a1, sp, 16 2110; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2111; RV64-NEXT: vl8re64.v v16, (a0) 2112; RV64-NEXT: csrr a1, vlenb 2113; RV64-NEXT: slli a3, a1, 3 2114; RV64-NEXT: add a0, a0, a3 2115; RV64-NEXT: mv a3, a2 2116; RV64-NEXT: bltu a2, a1, .LBB95_2 2117; RV64-NEXT: # %bb.1: 2118; RV64-NEXT: mv a3, a1 2119; RV64-NEXT: .LBB95_2: 2120; RV64-NEXT: li a4, 0 2121; RV64-NEXT: vl8re64.v v24, (a0) 2122; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2123; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t 2124; RV64-NEXT: srli a3, a1, 3 2125; RV64-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 2126; RV64-NEXT: sub a0, a2, a1 2127; RV64-NEXT: vslidedown.vx v0, v0, a3 2128; RV64-NEXT: bltu a2, a0, .LBB95_4 2129; RV64-NEXT: # %bb.3: 2130; RV64-NEXT: mv a4, a0 2131; RV64-NEXT: .LBB95_4: 2132; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu 2133; RV64-NEXT: addi a0, sp, 16 2134; RV64-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload 2135; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t 2136; RV64-NEXT: csrr a0, vlenb 2137; RV64-NEXT: slli a0, a0, 3 2138; RV64-NEXT: add sp, sp, a0 2139; RV64-NEXT: addi sp, sp, 16 2140; RV64-NEXT: ret 2141 call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2142 ret void 2143} 2144 2145define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2146; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: 2147; RV32: # %bb.0: 2148; RV32-NEXT: vl4re16.v v4, (a1) 2149; RV32-NEXT: csrr a1, vlenb 2150; RV32-NEXT: mv a3, a2 2151; RV32-NEXT: bltu a2, a1, .LBB96_2 2152; RV32-NEXT: # %bb.1: 2153; RV32-NEXT: mv a3, a1 2154; RV32-NEXT: .LBB96_2: 2155; RV32-NEXT: li a4, 0 2156; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, mu 2157; RV32-NEXT: vsext.vf2 v24, v4 2158; RV32-NEXT: vsll.vi v24, v24, 3 2159; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2160; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2161; RV32-NEXT: srli a3, a1, 3 2162; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2163; RV32-NEXT: sub a1, a2, a1 2164; RV32-NEXT: vslidedown.vx v0, v0, a3 2165; RV32-NEXT: bltu a2, a1, .LBB96_4 2166; RV32-NEXT: # %bb.3: 2167; RV32-NEXT: mv a4, a1 2168; RV32-NEXT: .LBB96_4: 2169; RV32-NEXT: vsetvli zero, a4, e64, m8, ta, mu 2170; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t 2171; RV32-NEXT: ret 2172; 2173; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: 2174; RV64: # %bb.0: 2175; RV64-NEXT: vl4re16.v v4, (a1) 2176; RV64-NEXT: csrr a1, vlenb 2177; RV64-NEXT: mv a3, a2 2178; RV64-NEXT: bltu a2, a1, .LBB96_2 2179; RV64-NEXT: # %bb.1: 2180; RV64-NEXT: mv a3, a1 2181; RV64-NEXT: .LBB96_2: 2182; RV64-NEXT: li a4, 0 2183; RV64-NEXT: vsetvli a5, zero, e64, m8, ta, mu 2184; RV64-NEXT: vsext.vf4 v24, v4 2185; RV64-NEXT: vsll.vi v24, v24, 3 2186; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2187; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t 2188; RV64-NEXT: srli a3, a1, 3 2189; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2190; RV64-NEXT: sub a1, a2, a1 2191; RV64-NEXT: vslidedown.vx v0, v0, a3 2192; RV64-NEXT: bltu a2, a1, .LBB96_4 2193; RV64-NEXT: # %bb.3: 2194; RV64-NEXT: mv a4, a1 2195; RV64-NEXT: .LBB96_4: 2196; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2197; RV64-NEXT: vsext.vf4 v8, v6 2198; RV64-NEXT: vsll.vi v8, v8, 3 2199; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu 2200; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 2201; RV64-NEXT: ret 2202 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs 2203 call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2204 ret void 2205} 2206 2207define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2208; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: 2209; RV32: # %bb.0: 2210; RV32-NEXT: addi sp, sp, -16 2211; RV32-NEXT: .cfi_def_cfa_offset 16 2212; RV32-NEXT: csrr a3, vlenb 2213; RV32-NEXT: slli a3, a3, 4 2214; RV32-NEXT: sub sp, sp, a3 2215; RV32-NEXT: vl4re16.v v24, (a1) 2216; RV32-NEXT: csrr a1, vlenb 2217; RV32-NEXT: slli a1, a1, 3 2218; RV32-NEXT: add a1, sp, a1 2219; RV32-NEXT: addi a1, a1, 16 2220; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2221; RV32-NEXT: addi a1, sp, 16 2222; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2223; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2224; RV32-NEXT: csrr a1, vlenb 2225; RV32-NEXT: vsext.vf4 v8, v24 2226; RV32-NEXT: mv a3, a2 2227; RV32-NEXT: bltu a2, a1, .LBB97_2 2228; RV32-NEXT: # %bb.1: 2229; RV32-NEXT: mv a3, a1 2230; RV32-NEXT: .LBB97_2: 2231; RV32-NEXT: li a4, 0 2232; RV32-NEXT: vsext.vf4 v16, v26 2233; RV32-NEXT: vsll.vi v8, v8, 3 2234; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu 2235; RV32-NEXT: vncvt.x.x.w v24, v8 2236; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2237; RV32-NEXT: addi a3, sp, 16 2238; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload 2239; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2240; RV32-NEXT: srli a3, a1, 3 2241; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2242; RV32-NEXT: sub a1, a2, a1 2243; RV32-NEXT: vslidedown.vx v0, v0, a3 2244; RV32-NEXT: bltu a2, a1, .LBB97_4 2245; RV32-NEXT: # %bb.3: 2246; RV32-NEXT: mv a4, a1 2247; RV32-NEXT: .LBB97_4: 2248; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2249; RV32-NEXT: vsll.vi v8, v16, 3 2250; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu 2251; RV32-NEXT: vncvt.x.x.w v16, v8 2252; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2253; RV32-NEXT: csrr a1, vlenb 2254; RV32-NEXT: slli a1, a1, 3 2255; RV32-NEXT: add a1, sp, a1 2256; RV32-NEXT: addi a1, a1, 16 2257; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload 2258; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2259; RV32-NEXT: csrr a0, vlenb 2260; RV32-NEXT: slli a0, a0, 4 2261; RV32-NEXT: add sp, sp, a0 2262; RV32-NEXT: addi sp, sp, 16 2263; RV32-NEXT: ret 2264; 2265; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: 2266; RV64: # %bb.0: 2267; RV64-NEXT: addi sp, sp, -16 2268; RV64-NEXT: .cfi_def_cfa_offset 16 2269; RV64-NEXT: csrr a3, vlenb 2270; RV64-NEXT: slli a3, a3, 4 2271; RV64-NEXT: sub sp, sp, a3 2272; RV64-NEXT: vl4re16.v v24, (a1) 2273; RV64-NEXT: csrr a1, vlenb 2274; RV64-NEXT: slli a1, a1, 3 2275; RV64-NEXT: add a1, sp, a1 2276; RV64-NEXT: addi a1, a1, 16 2277; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2278; RV64-NEXT: addi a1, sp, 16 2279; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2280; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2281; RV64-NEXT: csrr a1, vlenb 2282; RV64-NEXT: vsext.vf4 v8, v24 2283; RV64-NEXT: mv a3, a2 2284; RV64-NEXT: bltu a2, a1, .LBB97_2 2285; RV64-NEXT: # %bb.1: 2286; RV64-NEXT: mv a3, a1 2287; RV64-NEXT: .LBB97_2: 2288; RV64-NEXT: li a4, 0 2289; RV64-NEXT: vsext.vf4 v16, v26 2290; RV64-NEXT: vsll.vi v8, v8, 3 2291; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2292; RV64-NEXT: addi a3, sp, 16 2293; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload 2294; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t 2295; RV64-NEXT: srli a3, a1, 3 2296; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2297; RV64-NEXT: sub a1, a2, a1 2298; RV64-NEXT: vslidedown.vx v0, v0, a3 2299; RV64-NEXT: bltu a2, a1, .LBB97_4 2300; RV64-NEXT: # %bb.3: 2301; RV64-NEXT: mv a4, a1 2302; RV64-NEXT: .LBB97_4: 2303; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2304; RV64-NEXT: vsll.vi v8, v16, 3 2305; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu 2306; RV64-NEXT: csrr a1, vlenb 2307; RV64-NEXT: slli a1, a1, 3 2308; RV64-NEXT: add a1, sp, a1 2309; RV64-NEXT: addi a1, a1, 16 2310; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload 2311; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 2312; RV64-NEXT: csrr a0, vlenb 2313; RV64-NEXT: slli a0, a0, 4 2314; RV64-NEXT: add sp, sp, a0 2315; RV64-NEXT: addi sp, sp, 16 2316; RV64-NEXT: ret 2317 %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2318 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs 2319 call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2320 ret void 2321} 2322 2323define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2324; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: 2325; RV32: # %bb.0: 2326; RV32-NEXT: addi sp, sp, -16 2327; RV32-NEXT: .cfi_def_cfa_offset 16 2328; RV32-NEXT: csrr a3, vlenb 2329; RV32-NEXT: slli a3, a3, 4 2330; RV32-NEXT: sub sp, sp, a3 2331; RV32-NEXT: vl4re16.v v24, (a1) 2332; RV32-NEXT: csrr a1, vlenb 2333; RV32-NEXT: slli a1, a1, 3 2334; RV32-NEXT: add a1, sp, a1 2335; RV32-NEXT: addi a1, a1, 16 2336; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2337; RV32-NEXT: addi a1, sp, 16 2338; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2339; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2340; RV32-NEXT: csrr a1, vlenb 2341; RV32-NEXT: vzext.vf4 v8, v24 2342; RV32-NEXT: mv a3, a2 2343; RV32-NEXT: bltu a2, a1, .LBB98_2 2344; RV32-NEXT: # %bb.1: 2345; RV32-NEXT: mv a3, a1 2346; RV32-NEXT: .LBB98_2: 2347; RV32-NEXT: li a4, 0 2348; RV32-NEXT: vzext.vf4 v16, v26 2349; RV32-NEXT: vsll.vi v8, v8, 3 2350; RV32-NEXT: vsetvli zero, a3, e32, m4, ta, mu 2351; RV32-NEXT: vncvt.x.x.w v24, v8 2352; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2353; RV32-NEXT: addi a3, sp, 16 2354; RV32-NEXT: vl8re8.v v8, (a3) # Unknown-size Folded Reload 2355; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t 2356; RV32-NEXT: srli a3, a1, 3 2357; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2358; RV32-NEXT: sub a1, a2, a1 2359; RV32-NEXT: vslidedown.vx v0, v0, a3 2360; RV32-NEXT: bltu a2, a1, .LBB98_4 2361; RV32-NEXT: # %bb.3: 2362; RV32-NEXT: mv a4, a1 2363; RV32-NEXT: .LBB98_4: 2364; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2365; RV32-NEXT: vsll.vi v8, v16, 3 2366; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, mu 2367; RV32-NEXT: vncvt.x.x.w v16, v8 2368; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2369; RV32-NEXT: csrr a1, vlenb 2370; RV32-NEXT: slli a1, a1, 3 2371; RV32-NEXT: add a1, sp, a1 2372; RV32-NEXT: addi a1, a1, 16 2373; RV32-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload 2374; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t 2375; RV32-NEXT: csrr a0, vlenb 2376; RV32-NEXT: slli a0, a0, 4 2377; RV32-NEXT: add sp, sp, a0 2378; RV32-NEXT: addi sp, sp, 16 2379; RV32-NEXT: ret 2380; 2381; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: 2382; RV64: # %bb.0: 2383; RV64-NEXT: addi sp, sp, -16 2384; RV64-NEXT: .cfi_def_cfa_offset 16 2385; RV64-NEXT: csrr a3, vlenb 2386; RV64-NEXT: slli a3, a3, 4 2387; RV64-NEXT: sub sp, sp, a3 2388; RV64-NEXT: vl4re16.v v24, (a1) 2389; RV64-NEXT: csrr a1, vlenb 2390; RV64-NEXT: slli a1, a1, 3 2391; RV64-NEXT: add a1, sp, a1 2392; RV64-NEXT: addi a1, a1, 16 2393; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2394; RV64-NEXT: addi a1, sp, 16 2395; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2396; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2397; RV64-NEXT: csrr a1, vlenb 2398; RV64-NEXT: vzext.vf4 v8, v24 2399; RV64-NEXT: mv a3, a2 2400; RV64-NEXT: bltu a2, a1, .LBB98_2 2401; RV64-NEXT: # %bb.1: 2402; RV64-NEXT: mv a3, a1 2403; RV64-NEXT: .LBB98_2: 2404; RV64-NEXT: li a4, 0 2405; RV64-NEXT: vzext.vf4 v16, v26 2406; RV64-NEXT: vsll.vi v8, v8, 3 2407; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, mu 2408; RV64-NEXT: addi a3, sp, 16 2409; RV64-NEXT: vl8re8.v v24, (a3) # Unknown-size Folded Reload 2410; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t 2411; RV64-NEXT: srli a3, a1, 3 2412; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, mu 2413; RV64-NEXT: sub a1, a2, a1 2414; RV64-NEXT: vslidedown.vx v0, v0, a3 2415; RV64-NEXT: bltu a2, a1, .LBB98_4 2416; RV64-NEXT: # %bb.3: 2417; RV64-NEXT: mv a4, a1 2418; RV64-NEXT: .LBB98_4: 2419; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2420; RV64-NEXT: vsll.vi v8, v16, 3 2421; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, mu 2422; RV64-NEXT: csrr a1, vlenb 2423; RV64-NEXT: slli a1, a1, 3 2424; RV64-NEXT: add a1, sp, a1 2425; RV64-NEXT: addi a1, a1, 16 2426; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload 2427; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t 2428; RV64-NEXT: csrr a0, vlenb 2429; RV64-NEXT: slli a0, a0, 4 2430; RV64-NEXT: add sp, sp, a0 2431; RV64-NEXT: addi sp, sp, 16 2432; RV64-NEXT: ret 2433 %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64> 2434 %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs 2435 call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl) 2436 ret void 2437} 2438