1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 3; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 4; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 5; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 6 7; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 8; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 9; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 10; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 11 12define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x i32>* %svp) { 13; CHECK-LABEL: insert_nxv8i32_v2i32_0: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 16; CHECK-NEXT: vle32.v v12, (a0) 17; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, mu 18; CHECK-NEXT: vslideup.vi v8, v12, 0 19; CHECK-NEXT: ret 20 %sv = load <2 x i32>, <2 x i32>* %svp 21 %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0) 22 ret <vscale x 8 x i32> %v 23} 24 25define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, <2 x i32>* %svp) { 26; CHECK-LABEL: insert_nxv8i32_v2i32_2: 27; CHECK: # %bb.0: 28; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 29; CHECK-NEXT: vle32.v v12, (a0) 30; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, mu 31; CHECK-NEXT: vslideup.vi v8, v12, 2 32; CHECK-NEXT: ret 33 %sv = load <2 x i32>, <2 x i32>* %svp 34 %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2) 35 ret <vscale x 8 x i32> %v 36} 37 38define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, <2 x i32>* %svp) { 39; CHECK-LABEL: insert_nxv8i32_v2i32_6: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 42; CHECK-NEXT: vle32.v v12, (a0) 43; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, mu 44; CHECK-NEXT: vslideup.vi v8, v12, 6 45; CHECK-NEXT: ret 46 %sv = load <2 x i32>, <2 x i32>* %svp 47 %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6) 48 ret <vscale x 8 x i32> %v 49} 50 51define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, <8 x i32>* %svp) { 52; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0: 53; LMULMAX2: # %bb.0: 54; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 55; LMULMAX2-NEXT: vle32.v v12, (a0) 56; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, mu 57; LMULMAX2-NEXT: vslideup.vi v8, v12, 0 58; LMULMAX2-NEXT: ret 59; 60; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0: 61; LMULMAX1: # %bb.0: 62; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 63; LMULMAX1-NEXT: vle32.v v12, (a0) 64; LMULMAX1-NEXT: addi a0, a0, 16 65; LMULMAX1-NEXT: vle32.v v16, (a0) 66; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, mu 67; LMULMAX1-NEXT: vslideup.vi v8, v12, 0 68; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, mu 69; LMULMAX1-NEXT: vslideup.vi v8, v16, 4 70; LMULMAX1-NEXT: ret 71 %sv = load <8 x i32>, <8 x i32>* %svp 72 %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0) 73 ret <vscale x 8 x i32> %v 74} 75 76define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, <8 x i32>* %svp) { 77; LMULMAX2-LABEL: insert_nxv8i32_v8i32_8: 78; LMULMAX2: # %bb.0: 79; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 80; LMULMAX2-NEXT: vle32.v v12, (a0) 81; LMULMAX2-NEXT: vsetivli zero, 16, e32, m4, tu, mu 82; LMULMAX2-NEXT: vslideup.vi v8, v12, 8 83; LMULMAX2-NEXT: ret 84; 85; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8: 86; LMULMAX1: # %bb.0: 87; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 88; LMULMAX1-NEXT: vle32.v v12, (a0) 89; LMULMAX1-NEXT: addi a0, a0, 16 90; LMULMAX1-NEXT: vle32.v v16, (a0) 91; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, mu 92; LMULMAX1-NEXT: vslideup.vi v8, v12, 8 93; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, mu 94; LMULMAX1-NEXT: vslideup.vi v8, v16, 12 95; LMULMAX1-NEXT: ret 96 %sv = load <8 x i32>, <8 x i32>* %svp 97 %v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8) 98 ret <vscale x 8 x i32> %v 99} 100 101define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(<2 x i32>* %svp) { 102; CHECK-LABEL: insert_nxv8i32_undef_v2i32_0: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 105; CHECK-NEXT: vle32.v v8, (a0) 106; CHECK-NEXT: ret 107 %sv = load <2 x i32>, <2 x i32>* %svp 108 %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0) 109 ret <vscale x 8 x i32> %v 110} 111 112define void @insert_v4i32_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) { 113; CHECK-LABEL: insert_v4i32_v2i32_0: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 116; CHECK-NEXT: vle32.v v8, (a1) 117; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 118; CHECK-NEXT: vle32.v v9, (a0) 119; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu 120; CHECK-NEXT: vslideup.vi v9, v8, 0 121; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 122; CHECK-NEXT: vse32.v v9, (a0) 123; CHECK-NEXT: ret 124 %sv = load <2 x i32>, <2 x i32>* %svp 125 %vec = load <4 x i32>, <4 x i32>* %vp 126 %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 0) 127 store <4 x i32> %v, <4 x i32>* %vp 128 ret void 129} 130 131define void @insert_v4i32_v2i32_2(<4 x i32>* %vp, <2 x i32>* %svp) { 132; CHECK-LABEL: insert_v4i32_v2i32_2: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 135; CHECK-NEXT: vle32.v v8, (a1) 136; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 137; CHECK-NEXT: vle32.v v9, (a0) 138; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu 139; CHECK-NEXT: vslideup.vi v9, v8, 2 140; CHECK-NEXT: vse32.v v9, (a0) 141; CHECK-NEXT: ret 142 %sv = load <2 x i32>, <2 x i32>* %svp 143 %vec = load <4 x i32>, <4 x i32>* %vp 144 %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> %vec, <2 x i32> %sv, i64 2) 145 store <4 x i32> %v, <4 x i32>* %vp 146 ret void 147} 148 149define void @insert_v4i32_undef_v2i32_0(<4 x i32>* %vp, <2 x i32>* %svp) { 150; CHECK-LABEL: insert_v4i32_undef_v2i32_0: 151; CHECK: # %bb.0: 152; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 153; CHECK-NEXT: vle32.v v8, (a1) 154; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu 155; CHECK-NEXT: vse32.v v8, (a0) 156; CHECK-NEXT: ret 157 %sv = load <2 x i32>, <2 x i32>* %svp 158 %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0) 159 store <4 x i32> %v, <4 x i32>* %vp 160 ret void 161} 162 163define void @insert_v8i32_v2i32_0(<8 x i32>* %vp, <2 x i32>* %svp) { 164; LMULMAX2-LABEL: insert_v8i32_v2i32_0: 165; LMULMAX2: # %bb.0: 166; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 167; LMULMAX2-NEXT: vle32.v v8, (a1) 168; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 169; LMULMAX2-NEXT: vle32.v v10, (a0) 170; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, mu 171; LMULMAX2-NEXT: vslideup.vi v10, v8, 0 172; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 173; LMULMAX2-NEXT: vse32.v v10, (a0) 174; LMULMAX2-NEXT: ret 175; 176; LMULMAX1-LABEL: insert_v8i32_v2i32_0: 177; LMULMAX1: # %bb.0: 178; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 179; LMULMAX1-NEXT: vle32.v v8, (a1) 180; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 181; LMULMAX1-NEXT: vle32.v v9, (a0) 182; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu 183; LMULMAX1-NEXT: vslideup.vi v9, v8, 0 184; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 185; LMULMAX1-NEXT: vse32.v v9, (a0) 186; LMULMAX1-NEXT: ret 187 %sv = load <2 x i32>, <2 x i32>* %svp 188 %vec = load <8 x i32>, <8 x i32>* %vp 189 %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 0) 190 store <8 x i32> %v, <8 x i32>* %vp 191 ret void 192} 193 194define void @insert_v8i32_v2i32_2(<8 x i32>* %vp, <2 x i32>* %svp) { 195; LMULMAX2-LABEL: insert_v8i32_v2i32_2: 196; LMULMAX2: # %bb.0: 197; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 198; LMULMAX2-NEXT: vle32.v v8, (a1) 199; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 200; LMULMAX2-NEXT: vle32.v v10, (a0) 201; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, mu 202; LMULMAX2-NEXT: vslideup.vi v10, v8, 2 203; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 204; LMULMAX2-NEXT: vse32.v v10, (a0) 205; LMULMAX2-NEXT: ret 206; 207; LMULMAX1-LABEL: insert_v8i32_v2i32_2: 208; LMULMAX1: # %bb.0: 209; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 210; LMULMAX1-NEXT: vle32.v v8, (a1) 211; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 212; LMULMAX1-NEXT: vle32.v v9, (a0) 213; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, mu 214; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 215; LMULMAX1-NEXT: vse32.v v9, (a0) 216; LMULMAX1-NEXT: ret 217 %sv = load <2 x i32>, <2 x i32>* %svp 218 %vec = load <8 x i32>, <8 x i32>* %vp 219 %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 2) 220 store <8 x i32> %v, <8 x i32>* %vp 221 ret void 222} 223 224define void @insert_v8i32_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) { 225; LMULMAX2-LABEL: insert_v8i32_v2i32_6: 226; LMULMAX2: # %bb.0: 227; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 228; LMULMAX2-NEXT: vle32.v v8, (a1) 229; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu 230; LMULMAX2-NEXT: vle32.v v10, (a0) 231; LMULMAX2-NEXT: vsetvli zero, zero, e32, m2, tu, mu 232; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 233; LMULMAX2-NEXT: vse32.v v10, (a0) 234; LMULMAX2-NEXT: ret 235; 236; LMULMAX1-LABEL: insert_v8i32_v2i32_6: 237; LMULMAX1: # %bb.0: 238; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 239; LMULMAX1-NEXT: vle32.v v8, (a1) 240; LMULMAX1-NEXT: addi a0, a0, 16 241; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu 242; LMULMAX1-NEXT: vle32.v v9, (a0) 243; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, mu 244; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 245; LMULMAX1-NEXT: vse32.v v9, (a0) 246; LMULMAX1-NEXT: ret 247 %sv = load <2 x i32>, <2 x i32>* %svp 248 %vec = load <8 x i32>, <8 x i32>* %vp 249 %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> %vec, <2 x i32> %sv, i64 6) 250 store <8 x i32> %v, <8 x i32>* %vp 251 ret void 252} 253 254define void @insert_v8i32_undef_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) { 255; LMULMAX2-LABEL: insert_v8i32_undef_v2i32_6: 256; LMULMAX2: # %bb.0: 257; LMULMAX2-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 258; LMULMAX2-NEXT: vle32.v v8, (a1) 259; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, mu 260; LMULMAX2-NEXT: vslideup.vi v10, v8, 6 261; LMULMAX2-NEXT: vse32.v v10, (a0) 262; LMULMAX2-NEXT: ret 263; 264; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6: 265; LMULMAX1: # %bb.0: 266; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu 267; LMULMAX1-NEXT: vle32.v v8, (a1) 268; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu 269; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 270; LMULMAX1-NEXT: addi a0, a0, 16 271; LMULMAX1-NEXT: vse32.v v9, (a0) 272; LMULMAX1-NEXT: ret 273 %sv = load <2 x i32>, <2 x i32>* %svp 274 %v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6) 275 store <8 x i32> %v, <8 x i32>* %vp 276 ret void 277} 278 279define void @insert_v4i16_v2i16_0(<4 x i16>* %vp, <2 x i16>* %svp) { 280; CHECK-LABEL: insert_v4i16_v2i16_0: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 283; CHECK-NEXT: vle16.v v8, (a0) 284; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 285; CHECK-NEXT: vle16.v v9, (a1) 286; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu 287; CHECK-NEXT: vslideup.vi v8, v9, 0 288; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 289; CHECK-NEXT: vse16.v v8, (a0) 290; CHECK-NEXT: ret 291 %v = load <4 x i16>, <4 x i16>* %vp 292 %sv = load <2 x i16>, <2 x i16>* %svp 293 %c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 0) 294 store <4 x i16> %c, <4 x i16>* %vp 295 ret void 296} 297 298define void @insert_v4i16_v2i16_2(<4 x i16>* %vp, <2 x i16>* %svp) { 299; CHECK-LABEL: insert_v4i16_v2i16_2: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu 302; CHECK-NEXT: vle16.v v8, (a0) 303; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 304; CHECK-NEXT: vle16.v v9, (a1) 305; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, mu 306; CHECK-NEXT: vslideup.vi v8, v9, 2 307; CHECK-NEXT: vse16.v v8, (a0) 308; CHECK-NEXT: ret 309 %v = load <4 x i16>, <4 x i16>* %vp 310 %sv = load <2 x i16>, <2 x i16>* %svp 311 %c = call <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16> %v, <2 x i16> %sv, i64 2) 312 store <4 x i16> %c, <4 x i16>* %vp 313 ret void 314} 315 316define void @insert_v32i1_v8i1_0(<32 x i1>* %vp, <8 x i1>* %svp) { 317; LMULMAX2-LABEL: insert_v32i1_v8i1_0: 318; LMULMAX2: # %bb.0: 319; LMULMAX2-NEXT: li a2, 32 320; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu 321; LMULMAX2-NEXT: vlm.v v8, (a0) 322; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 323; LMULMAX2-NEXT: vlm.v v9, (a1) 324; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf4, tu, mu 325; LMULMAX2-NEXT: vslideup.vi v8, v9, 0 326; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu 327; LMULMAX2-NEXT: vsm.v v8, (a0) 328; LMULMAX2-NEXT: ret 329; 330; LMULMAX1-LABEL: insert_v32i1_v8i1_0: 331; LMULMAX1: # %bb.0: 332; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu 333; LMULMAX1-NEXT: vlm.v v8, (a0) 334; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 335; LMULMAX1-NEXT: vlm.v v9, (a1) 336; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, mu 337; LMULMAX1-NEXT: vslideup.vi v8, v9, 0 338; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu 339; LMULMAX1-NEXT: vsm.v v8, (a0) 340; LMULMAX1-NEXT: ret 341 %v = load <32 x i1>, <32 x i1>* %vp 342 %sv = load <8 x i1>, <8 x i1>* %svp 343 %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 0) 344 store <32 x i1> %c, <32 x i1>* %vp 345 ret void 346} 347 348define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) { 349; LMULMAX2-LABEL: insert_v32i1_v8i1_16: 350; LMULMAX2: # %bb.0: 351; LMULMAX2-NEXT: li a2, 32 352; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu 353; LMULMAX2-NEXT: vlm.v v8, (a0) 354; LMULMAX2-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 355; LMULMAX2-NEXT: vlm.v v9, (a1) 356; LMULMAX2-NEXT: vsetivli zero, 3, e8, mf4, tu, mu 357; LMULMAX2-NEXT: vslideup.vi v8, v9, 2 358; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu 359; LMULMAX2-NEXT: vsm.v v8, (a0) 360; LMULMAX2-NEXT: ret 361; 362; LMULMAX1-LABEL: insert_v32i1_v8i1_16: 363; LMULMAX1: # %bb.0: 364; LMULMAX1-NEXT: addi a0, a0, 2 365; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu 366; LMULMAX1-NEXT: vlm.v v8, (a0) 367; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 368; LMULMAX1-NEXT: vlm.v v9, (a1) 369; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, tu, mu 370; LMULMAX1-NEXT: vslideup.vi v8, v9, 0 371; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu 372; LMULMAX1-NEXT: vsm.v v8, (a0) 373; LMULMAX1-NEXT: ret 374 %v = load <32 x i1>, <32 x i1>* %vp 375 %sv = load <8 x i1>, <8 x i1>* %svp 376 %c = call <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16) 377 store <32 x i1> %c, <32 x i1>* %vp 378 ret void 379} 380 381define void @insert_v8i1_v4i1_0(<8 x i1>* %vp, <4 x i1>* %svp) { 382; CHECK-LABEL: insert_v8i1_v4i1_0: 383; CHECK: # %bb.0: 384; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 385; CHECK-NEXT: vlm.v v0, (a0) 386; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 387; CHECK-NEXT: vlm.v v8, (a1) 388; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 389; CHECK-NEXT: vmv.v.i v9, 0 390; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 391; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 392; CHECK-NEXT: vmv.v.i v10, 0 393; CHECK-NEXT: vmv1r.v v0, v8 394; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 395; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, mu 396; CHECK-NEXT: vslideup.vi v9, v8, 0 397; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 398; CHECK-NEXT: vmsne.vi v8, v9, 0 399; CHECK-NEXT: vsm.v v8, (a0) 400; CHECK-NEXT: ret 401 %v = load <8 x i1>, <8 x i1>* %vp 402 %sv = load <4 x i1>, <4 x i1>* %svp 403 %c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 0) 404 store <8 x i1> %c, <8 x i1>* %vp 405 ret void 406} 407 408define void @insert_v8i1_v4i1_4(<8 x i1>* %vp, <4 x i1>* %svp) { 409; CHECK-LABEL: insert_v8i1_v4i1_4: 410; CHECK: # %bb.0: 411; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 412; CHECK-NEXT: vlm.v v0, (a0) 413; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 414; CHECK-NEXT: vlm.v v8, (a1) 415; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 416; CHECK-NEXT: vmv.v.i v9, 0 417; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 418; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 419; CHECK-NEXT: vmv.v.i v10, 0 420; CHECK-NEXT: vmv1r.v v0, v8 421; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 422; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu 423; CHECK-NEXT: vslideup.vi v9, v8, 4 424; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 425; CHECK-NEXT: vmsne.vi v8, v9, 0 426; CHECK-NEXT: vsm.v v8, (a0) 427; CHECK-NEXT: ret 428 %v = load <8 x i1>, <8 x i1>* %vp 429 %sv = load <4 x i1>, <4 x i1>* %svp 430 %c = call <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1> %v, <4 x i1> %sv, i64 4) 431 store <8 x i1> %c, <8 x i1>* %vp 432 ret void 433} 434 435define <vscale x 2 x i16> @insert_nxv2i16_v2i16_0(<vscale x 2 x i16> %v, <2 x i16>* %svp) { 436; CHECK-LABEL: insert_nxv2i16_v2i16_0: 437; CHECK: # %bb.0: 438; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 439; CHECK-NEXT: vle16.v v9, (a0) 440; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, mu 441; CHECK-NEXT: vslideup.vi v8, v9, 0 442; CHECK-NEXT: ret 443 %sv = load <2 x i16>, <2 x i16>* %svp 444 %c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 0) 445 ret <vscale x 2 x i16> %c 446} 447 448define <vscale x 2 x i16> @insert_nxv2i16_v2i16_2(<vscale x 2 x i16> %v, <2 x i16>* %svp) { 449; CHECK-LABEL: insert_nxv2i16_v2i16_2: 450; CHECK: # %bb.0: 451; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu 452; CHECK-NEXT: vle16.v v9, (a0) 453; CHECK-NEXT: vsetivli zero, 6, e16, mf2, tu, mu 454; CHECK-NEXT: vslideup.vi v8, v9, 4 455; CHECK-NEXT: ret 456 %sv = load <2 x i16>, <2 x i16>* %svp 457 %c = call <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16> %v, <2 x i16> %sv, i64 4) 458 ret <vscale x 2 x i16> %c 459} 460 461define <vscale x 2 x i1> @insert_nxv2i1_v4i1_0(<vscale x 2 x i1> %v, <4 x i1>* %svp) { 462; CHECK-LABEL: insert_nxv2i1_v4i1_0: 463; CHECK: # %bb.0: 464; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 465; CHECK-NEXT: vlm.v v8, (a0) 466; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 467; CHECK-NEXT: vmv.v.i v9, 0 468; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 469; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu 470; CHECK-NEXT: vmv.v.i v10, 0 471; CHECK-NEXT: vmv1r.v v0, v8 472; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 473; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, mu 474; CHECK-NEXT: vslideup.vi v9, v8, 0 475; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 476; CHECK-NEXT: vmsne.vi v0, v9, 0 477; CHECK-NEXT: ret 478 %sv = load <4 x i1>, <4 x i1>* %svp 479 %c = call <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0) 480 ret <vscale x 2 x i1> %c 481} 482 483define <vscale x 8 x i1> @insert_nxv8i1_v4i1_0(<vscale x 8 x i1> %v, <8 x i1>* %svp) { 484; CHECK-LABEL: insert_nxv8i1_v4i1_0: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 487; CHECK-NEXT: vlm.v v8, (a0) 488; CHECK-NEXT: vsetivli zero, 1, e8, mf8, tu, mu 489; CHECK-NEXT: vslideup.vi v0, v8, 0 490; CHECK-NEXT: ret 491 %sv = load <8 x i1>, <8 x i1>* %svp 492 %c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 0) 493 ret <vscale x 8 x i1> %c 494} 495 496define <vscale x 8 x i1> @insert_nxv8i1_v8i1_16(<vscale x 8 x i1> %v, <8 x i1>* %svp) { 497; CHECK-LABEL: insert_nxv8i1_v8i1_16: 498; CHECK: # %bb.0: 499; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu 500; CHECK-NEXT: vlm.v v8, (a0) 501; CHECK-NEXT: vsetivli zero, 3, e8, mf8, tu, mu 502; CHECK-NEXT: vslideup.vi v0, v8, 2 503; CHECK-NEXT: ret 504 %sv = load <8 x i1>, <8 x i1>* %svp 505 %c = call <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1> %v, <8 x i1> %sv, i64 16) 506 ret <vscale x 8 x i1> %c 507} 508 509declare <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64) 510 511define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x 16 x i64>* %out) { 512; CHECK-LABEL: insert_v2i64_nxv16i64: 513; CHECK: # %bb.0: 514; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu 515; CHECK-NEXT: vle64.v v8, (a0) 516; CHECK-NEXT: vle64.v v16, (a1) 517; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, mu 518; CHECK-NEXT: vslideup.vi v8, v16, 4 519; CHECK-NEXT: vs8r.v v8, (a2) 520; CHECK-NEXT: ret 521 %sv0 = load <2 x i64>, <2 x i64>* %psv0 522 %sv1 = load <2 x i64>, <2 x i64>* %psv1 523 %v0 = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0) 524 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4) 525 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out 526 ret void 527} 528 529define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, <vscale x 16 x i64>* %out) { 530; CHECK-LABEL: insert_v2i64_nxv16i64_lo0: 531; CHECK: # %bb.0: 532; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu 533; CHECK-NEXT: vle64.v v8, (a0) 534; CHECK-NEXT: vs8r.v v8, (a1) 535; CHECK-NEXT: ret 536 %sv = load <2 x i64>, <2 x i64>* %psv 537 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0) 538 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out 539 ret void 540} 541 542define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %out) { 543; CHECK-LABEL: insert_v2i64_nxv16i64_lo2: 544; CHECK: # %bb.0: 545; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu 546; CHECK-NEXT: vle64.v v8, (a0) 547; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, mu 548; CHECK-NEXT: vslideup.vi v16, v8, 2 549; CHECK-NEXT: vs8r.v v16, (a1) 550; CHECK-NEXT: ret 551 %sv = load <2 x i64>, <2 x i64>* %psv 552 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2) 553 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out 554 ret void 555} 556 557; Check we don't mistakenly optimize this: we don't know whether this is 558; inserted into the low or high split vector. 559define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out) { 560; CHECK-LABEL: insert_v2i64_nxv16i64_hi: 561; CHECK: # %bb.0: 562; CHECK-NEXT: addi sp, sp, -64 563; CHECK-NEXT: .cfi_def_cfa_offset 64 564; CHECK-NEXT: addi s0, sp, 64 565; CHECK-NEXT: .cfi_def_cfa s0, 0 566; CHECK-NEXT: csrr a2, vlenb 567; CHECK-NEXT: slli a2, a2, 4 568; CHECK-NEXT: sub sp, sp, a2 569; CHECK-NEXT: andi sp, sp, -64 570; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu 571; CHECK-NEXT: vle64.v v8, (a0) 572; CHECK-NEXT: addi a0, sp, 128 573; CHECK-NEXT: vse64.v v8, (a0) 574; CHECK-NEXT: csrr a0, vlenb 575; CHECK-NEXT: slli a0, a0, 3 576; CHECK-NEXT: addi a2, sp, 64 577; CHECK-NEXT: add a3, a2, a0 578; CHECK-NEXT: vl8re64.v v8, (a3) 579; CHECK-NEXT: vl8re64.v v16, (a2) 580; CHECK-NEXT: add a0, a1, a0 581; CHECK-NEXT: vs8r.v v8, (a0) 582; CHECK-NEXT: vs8r.v v16, (a1) 583; CHECK-NEXT: addi sp, s0, -64 584; CHECK-NEXT: addi sp, sp, 64 585; CHECK-NEXT: ret 586 %sv = load <2 x i64>, <2 x i64>* %psv 587 %v = call <vscale x 16 x i64> @llvm.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8) 588 store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out 589 ret void 590} 591 592declare <8 x i1> @llvm.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64) 593declare <32 x i1> @llvm.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64) 594 595declare <4 x i16> @llvm.vector.insert.v2i16.v4i16(<4 x i16>, <2 x i16>, i64) 596 597declare <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32>, <2 x i32>, i64) 598declare <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32>, <2 x i32>, i64) 599 600declare <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1>, <4 x i1>, i64) 601declare <vscale x 8 x i1> @llvm.vector.insert.v8i1.nxv8i1(<vscale x 8 x i1>, <8 x i1>, i64) 602 603declare <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>, <2 x i16>, i64) 604 605declare <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64) 606declare <vscale x 8 x i32> @llvm.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64) 607declare <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64) 608