1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) { 6; CHECK-LABEL: ctpop_nxv1i8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 9; CHECK-NEXT: vsrl.vi v9, v8, 1 10; CHECK-NEXT: li a0, 85 11; CHECK-NEXT: vand.vx v9, v9, a0 12; CHECK-NEXT: vsub.vv v8, v8, v9 13; CHECK-NEXT: li a0, 51 14; CHECK-NEXT: vand.vx v9, v8, a0 15; CHECK-NEXT: vsrl.vi v8, v8, 2 16; CHECK-NEXT: vand.vx v8, v8, a0 17; CHECK-NEXT: vadd.vv v8, v9, v8 18; CHECK-NEXT: vsrl.vi v9, v8, 4 19; CHECK-NEXT: vadd.vv v8, v8, v9 20; CHECK-NEXT: vand.vi v8, v8, 15 21; CHECK-NEXT: ret 22 %a = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> %va) 23 ret <vscale x 1 x i8> %a 24} 25declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>) 26 27define <vscale x 2 x i8> @ctpop_nxv2i8(<vscale x 2 x i8> %va) { 28; CHECK-LABEL: ctpop_nxv2i8: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 31; CHECK-NEXT: vsrl.vi v9, v8, 1 32; CHECK-NEXT: li a0, 85 33; CHECK-NEXT: vand.vx v9, v9, a0 34; CHECK-NEXT: vsub.vv v8, v8, v9 35; CHECK-NEXT: li a0, 51 36; CHECK-NEXT: vand.vx v9, v8, a0 37; CHECK-NEXT: vsrl.vi v8, v8, 2 38; CHECK-NEXT: vand.vx v8, v8, a0 39; CHECK-NEXT: vadd.vv v8, v9, v8 40; CHECK-NEXT: vsrl.vi v9, v8, 4 41; CHECK-NEXT: vadd.vv v8, v8, v9 42; CHECK-NEXT: vand.vi v8, v8, 15 43; CHECK-NEXT: ret 44 %a = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> %va) 45 ret <vscale x 2 x i8> %a 46} 47declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>) 48 49define <vscale x 4 x i8> @ctpop_nxv4i8(<vscale x 4 x i8> %va) { 50; CHECK-LABEL: ctpop_nxv4i8: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 53; CHECK-NEXT: vsrl.vi v9, v8, 1 54; CHECK-NEXT: li a0, 85 55; CHECK-NEXT: vand.vx v9, v9, a0 56; CHECK-NEXT: vsub.vv v8, v8, v9 57; CHECK-NEXT: li a0, 51 58; CHECK-NEXT: vand.vx v9, v8, a0 59; CHECK-NEXT: vsrl.vi v8, v8, 2 60; CHECK-NEXT: vand.vx v8, v8, a0 61; CHECK-NEXT: vadd.vv v8, v9, v8 62; CHECK-NEXT: vsrl.vi v9, v8, 4 63; CHECK-NEXT: vadd.vv v8, v8, v9 64; CHECK-NEXT: vand.vi v8, v8, 15 65; CHECK-NEXT: ret 66 %a = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> %va) 67 ret <vscale x 4 x i8> %a 68} 69declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>) 70 71define <vscale x 8 x i8> @ctpop_nxv8i8(<vscale x 8 x i8> %va) { 72; CHECK-LABEL: ctpop_nxv8i8: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 75; CHECK-NEXT: vsrl.vi v9, v8, 1 76; CHECK-NEXT: li a0, 85 77; CHECK-NEXT: vand.vx v9, v9, a0 78; CHECK-NEXT: vsub.vv v8, v8, v9 79; CHECK-NEXT: li a0, 51 80; CHECK-NEXT: vand.vx v9, v8, a0 81; CHECK-NEXT: vsrl.vi v8, v8, 2 82; CHECK-NEXT: vand.vx v8, v8, a0 83; CHECK-NEXT: vadd.vv v8, v9, v8 84; CHECK-NEXT: vsrl.vi v9, v8, 4 85; CHECK-NEXT: vadd.vv v8, v8, v9 86; CHECK-NEXT: vand.vi v8, v8, 15 87; CHECK-NEXT: ret 88 %a = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> %va) 89 ret <vscale x 8 x i8> %a 90} 91declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>) 92 93define <vscale x 16 x i8> @ctpop_nxv16i8(<vscale x 16 x i8> %va) { 94; CHECK-LABEL: ctpop_nxv16i8: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu 97; CHECK-NEXT: vsrl.vi v10, v8, 1 98; CHECK-NEXT: li a0, 85 99; CHECK-NEXT: vand.vx v10, v10, a0 100; CHECK-NEXT: vsub.vv v8, v8, v10 101; CHECK-NEXT: li a0, 51 102; CHECK-NEXT: vand.vx v10, v8, a0 103; CHECK-NEXT: vsrl.vi v8, v8, 2 104; CHECK-NEXT: vand.vx v8, v8, a0 105; CHECK-NEXT: vadd.vv v8, v10, v8 106; CHECK-NEXT: vsrl.vi v10, v8, 4 107; CHECK-NEXT: vadd.vv v8, v8, v10 108; CHECK-NEXT: vand.vi v8, v8, 15 109; CHECK-NEXT: ret 110 %a = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %va) 111 ret <vscale x 16 x i8> %a 112} 113declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>) 114 115define <vscale x 32 x i8> @ctpop_nxv32i8(<vscale x 32 x i8> %va) { 116; CHECK-LABEL: ctpop_nxv32i8: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu 119; CHECK-NEXT: vsrl.vi v12, v8, 1 120; CHECK-NEXT: li a0, 85 121; CHECK-NEXT: vand.vx v12, v12, a0 122; CHECK-NEXT: vsub.vv v8, v8, v12 123; CHECK-NEXT: li a0, 51 124; CHECK-NEXT: vand.vx v12, v8, a0 125; CHECK-NEXT: vsrl.vi v8, v8, 2 126; CHECK-NEXT: vand.vx v8, v8, a0 127; CHECK-NEXT: vadd.vv v8, v12, v8 128; CHECK-NEXT: vsrl.vi v12, v8, 4 129; CHECK-NEXT: vadd.vv v8, v8, v12 130; CHECK-NEXT: vand.vi v8, v8, 15 131; CHECK-NEXT: ret 132 %a = call <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8> %va) 133 ret <vscale x 32 x i8> %a 134} 135declare <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8>) 136 137define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) { 138; CHECK-LABEL: ctpop_nxv64i8: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu 141; CHECK-NEXT: vsrl.vi v16, v8, 1 142; CHECK-NEXT: li a0, 85 143; CHECK-NEXT: vand.vx v16, v16, a0 144; CHECK-NEXT: vsub.vv v8, v8, v16 145; CHECK-NEXT: li a0, 51 146; CHECK-NEXT: vand.vx v16, v8, a0 147; CHECK-NEXT: vsrl.vi v8, v8, 2 148; CHECK-NEXT: vand.vx v8, v8, a0 149; CHECK-NEXT: vadd.vv v8, v16, v8 150; CHECK-NEXT: vsrl.vi v16, v8, 4 151; CHECK-NEXT: vadd.vv v8, v8, v16 152; CHECK-NEXT: vand.vi v8, v8, 15 153; CHECK-NEXT: ret 154 %a = call <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8> %va) 155 ret <vscale x 64 x i8> %a 156} 157declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>) 158 159define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) { 160; RV32-LABEL: ctpop_nxv1i16: 161; RV32: # %bb.0: 162; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 163; RV32-NEXT: vsrl.vi v9, v8, 1 164; RV32-NEXT: lui a0, 5 165; RV32-NEXT: addi a0, a0, 1365 166; RV32-NEXT: vand.vx v9, v9, a0 167; RV32-NEXT: vsub.vv v8, v8, v9 168; RV32-NEXT: lui a0, 3 169; RV32-NEXT: addi a0, a0, 819 170; RV32-NEXT: vand.vx v9, v8, a0 171; RV32-NEXT: vsrl.vi v8, v8, 2 172; RV32-NEXT: vand.vx v8, v8, a0 173; RV32-NEXT: vadd.vv v8, v9, v8 174; RV32-NEXT: vsrl.vi v9, v8, 4 175; RV32-NEXT: vadd.vv v8, v8, v9 176; RV32-NEXT: lui a0, 1 177; RV32-NEXT: addi a0, a0, -241 178; RV32-NEXT: vand.vx v8, v8, a0 179; RV32-NEXT: li a0, 257 180; RV32-NEXT: vmul.vx v8, v8, a0 181; RV32-NEXT: vsrl.vi v8, v8, 8 182; RV32-NEXT: ret 183; 184; RV64-LABEL: ctpop_nxv1i16: 185; RV64: # %bb.0: 186; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 187; RV64-NEXT: vsrl.vi v9, v8, 1 188; RV64-NEXT: lui a0, 5 189; RV64-NEXT: addiw a0, a0, 1365 190; RV64-NEXT: vand.vx v9, v9, a0 191; RV64-NEXT: vsub.vv v8, v8, v9 192; RV64-NEXT: lui a0, 3 193; RV64-NEXT: addiw a0, a0, 819 194; RV64-NEXT: vand.vx v9, v8, a0 195; RV64-NEXT: vsrl.vi v8, v8, 2 196; RV64-NEXT: vand.vx v8, v8, a0 197; RV64-NEXT: vadd.vv v8, v9, v8 198; RV64-NEXT: vsrl.vi v9, v8, 4 199; RV64-NEXT: vadd.vv v8, v8, v9 200; RV64-NEXT: lui a0, 1 201; RV64-NEXT: addiw a0, a0, -241 202; RV64-NEXT: vand.vx v8, v8, a0 203; RV64-NEXT: li a0, 257 204; RV64-NEXT: vmul.vx v8, v8, a0 205; RV64-NEXT: vsrl.vi v8, v8, 8 206; RV64-NEXT: ret 207 %a = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> %va) 208 ret <vscale x 1 x i16> %a 209} 210declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>) 211 212define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) { 213; RV32-LABEL: ctpop_nxv2i16: 214; RV32: # %bb.0: 215; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 216; RV32-NEXT: vsrl.vi v9, v8, 1 217; RV32-NEXT: lui a0, 5 218; RV32-NEXT: addi a0, a0, 1365 219; RV32-NEXT: vand.vx v9, v9, a0 220; RV32-NEXT: vsub.vv v8, v8, v9 221; RV32-NEXT: lui a0, 3 222; RV32-NEXT: addi a0, a0, 819 223; RV32-NEXT: vand.vx v9, v8, a0 224; RV32-NEXT: vsrl.vi v8, v8, 2 225; RV32-NEXT: vand.vx v8, v8, a0 226; RV32-NEXT: vadd.vv v8, v9, v8 227; RV32-NEXT: vsrl.vi v9, v8, 4 228; RV32-NEXT: vadd.vv v8, v8, v9 229; RV32-NEXT: lui a0, 1 230; RV32-NEXT: addi a0, a0, -241 231; RV32-NEXT: vand.vx v8, v8, a0 232; RV32-NEXT: li a0, 257 233; RV32-NEXT: vmul.vx v8, v8, a0 234; RV32-NEXT: vsrl.vi v8, v8, 8 235; RV32-NEXT: ret 236; 237; RV64-LABEL: ctpop_nxv2i16: 238; RV64: # %bb.0: 239; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 240; RV64-NEXT: vsrl.vi v9, v8, 1 241; RV64-NEXT: lui a0, 5 242; RV64-NEXT: addiw a0, a0, 1365 243; RV64-NEXT: vand.vx v9, v9, a0 244; RV64-NEXT: vsub.vv v8, v8, v9 245; RV64-NEXT: lui a0, 3 246; RV64-NEXT: addiw a0, a0, 819 247; RV64-NEXT: vand.vx v9, v8, a0 248; RV64-NEXT: vsrl.vi v8, v8, 2 249; RV64-NEXT: vand.vx v8, v8, a0 250; RV64-NEXT: vadd.vv v8, v9, v8 251; RV64-NEXT: vsrl.vi v9, v8, 4 252; RV64-NEXT: vadd.vv v8, v8, v9 253; RV64-NEXT: lui a0, 1 254; RV64-NEXT: addiw a0, a0, -241 255; RV64-NEXT: vand.vx v8, v8, a0 256; RV64-NEXT: li a0, 257 257; RV64-NEXT: vmul.vx v8, v8, a0 258; RV64-NEXT: vsrl.vi v8, v8, 8 259; RV64-NEXT: ret 260 %a = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> %va) 261 ret <vscale x 2 x i16> %a 262} 263declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>) 264 265define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) { 266; RV32-LABEL: ctpop_nxv4i16: 267; RV32: # %bb.0: 268; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 269; RV32-NEXT: vsrl.vi v9, v8, 1 270; RV32-NEXT: lui a0, 5 271; RV32-NEXT: addi a0, a0, 1365 272; RV32-NEXT: vand.vx v9, v9, a0 273; RV32-NEXT: vsub.vv v8, v8, v9 274; RV32-NEXT: lui a0, 3 275; RV32-NEXT: addi a0, a0, 819 276; RV32-NEXT: vand.vx v9, v8, a0 277; RV32-NEXT: vsrl.vi v8, v8, 2 278; RV32-NEXT: vand.vx v8, v8, a0 279; RV32-NEXT: vadd.vv v8, v9, v8 280; RV32-NEXT: vsrl.vi v9, v8, 4 281; RV32-NEXT: vadd.vv v8, v8, v9 282; RV32-NEXT: lui a0, 1 283; RV32-NEXT: addi a0, a0, -241 284; RV32-NEXT: vand.vx v8, v8, a0 285; RV32-NEXT: li a0, 257 286; RV32-NEXT: vmul.vx v8, v8, a0 287; RV32-NEXT: vsrl.vi v8, v8, 8 288; RV32-NEXT: ret 289; 290; RV64-LABEL: ctpop_nxv4i16: 291; RV64: # %bb.0: 292; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 293; RV64-NEXT: vsrl.vi v9, v8, 1 294; RV64-NEXT: lui a0, 5 295; RV64-NEXT: addiw a0, a0, 1365 296; RV64-NEXT: vand.vx v9, v9, a0 297; RV64-NEXT: vsub.vv v8, v8, v9 298; RV64-NEXT: lui a0, 3 299; RV64-NEXT: addiw a0, a0, 819 300; RV64-NEXT: vand.vx v9, v8, a0 301; RV64-NEXT: vsrl.vi v8, v8, 2 302; RV64-NEXT: vand.vx v8, v8, a0 303; RV64-NEXT: vadd.vv v8, v9, v8 304; RV64-NEXT: vsrl.vi v9, v8, 4 305; RV64-NEXT: vadd.vv v8, v8, v9 306; RV64-NEXT: lui a0, 1 307; RV64-NEXT: addiw a0, a0, -241 308; RV64-NEXT: vand.vx v8, v8, a0 309; RV64-NEXT: li a0, 257 310; RV64-NEXT: vmul.vx v8, v8, a0 311; RV64-NEXT: vsrl.vi v8, v8, 8 312; RV64-NEXT: ret 313 %a = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> %va) 314 ret <vscale x 4 x i16> %a 315} 316declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>) 317 318define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) { 319; RV32-LABEL: ctpop_nxv8i16: 320; RV32: # %bb.0: 321; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 322; RV32-NEXT: vsrl.vi v10, v8, 1 323; RV32-NEXT: lui a0, 5 324; RV32-NEXT: addi a0, a0, 1365 325; RV32-NEXT: vand.vx v10, v10, a0 326; RV32-NEXT: vsub.vv v8, v8, v10 327; RV32-NEXT: lui a0, 3 328; RV32-NEXT: addi a0, a0, 819 329; RV32-NEXT: vand.vx v10, v8, a0 330; RV32-NEXT: vsrl.vi v8, v8, 2 331; RV32-NEXT: vand.vx v8, v8, a0 332; RV32-NEXT: vadd.vv v8, v10, v8 333; RV32-NEXT: vsrl.vi v10, v8, 4 334; RV32-NEXT: vadd.vv v8, v8, v10 335; RV32-NEXT: lui a0, 1 336; RV32-NEXT: addi a0, a0, -241 337; RV32-NEXT: vand.vx v8, v8, a0 338; RV32-NEXT: li a0, 257 339; RV32-NEXT: vmul.vx v8, v8, a0 340; RV32-NEXT: vsrl.vi v8, v8, 8 341; RV32-NEXT: ret 342; 343; RV64-LABEL: ctpop_nxv8i16: 344; RV64: # %bb.0: 345; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 346; RV64-NEXT: vsrl.vi v10, v8, 1 347; RV64-NEXT: lui a0, 5 348; RV64-NEXT: addiw a0, a0, 1365 349; RV64-NEXT: vand.vx v10, v10, a0 350; RV64-NEXT: vsub.vv v8, v8, v10 351; RV64-NEXT: lui a0, 3 352; RV64-NEXT: addiw a0, a0, 819 353; RV64-NEXT: vand.vx v10, v8, a0 354; RV64-NEXT: vsrl.vi v8, v8, 2 355; RV64-NEXT: vand.vx v8, v8, a0 356; RV64-NEXT: vadd.vv v8, v10, v8 357; RV64-NEXT: vsrl.vi v10, v8, 4 358; RV64-NEXT: vadd.vv v8, v8, v10 359; RV64-NEXT: lui a0, 1 360; RV64-NEXT: addiw a0, a0, -241 361; RV64-NEXT: vand.vx v8, v8, a0 362; RV64-NEXT: li a0, 257 363; RV64-NEXT: vmul.vx v8, v8, a0 364; RV64-NEXT: vsrl.vi v8, v8, 8 365; RV64-NEXT: ret 366 %a = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %va) 367 ret <vscale x 8 x i16> %a 368} 369declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>) 370 371define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) { 372; RV32-LABEL: ctpop_nxv16i16: 373; RV32: # %bb.0: 374; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, mu 375; RV32-NEXT: vsrl.vi v12, v8, 1 376; RV32-NEXT: lui a0, 5 377; RV32-NEXT: addi a0, a0, 1365 378; RV32-NEXT: vand.vx v12, v12, a0 379; RV32-NEXT: vsub.vv v8, v8, v12 380; RV32-NEXT: lui a0, 3 381; RV32-NEXT: addi a0, a0, 819 382; RV32-NEXT: vand.vx v12, v8, a0 383; RV32-NEXT: vsrl.vi v8, v8, 2 384; RV32-NEXT: vand.vx v8, v8, a0 385; RV32-NEXT: vadd.vv v8, v12, v8 386; RV32-NEXT: vsrl.vi v12, v8, 4 387; RV32-NEXT: vadd.vv v8, v8, v12 388; RV32-NEXT: lui a0, 1 389; RV32-NEXT: addi a0, a0, -241 390; RV32-NEXT: vand.vx v8, v8, a0 391; RV32-NEXT: li a0, 257 392; RV32-NEXT: vmul.vx v8, v8, a0 393; RV32-NEXT: vsrl.vi v8, v8, 8 394; RV32-NEXT: ret 395; 396; RV64-LABEL: ctpop_nxv16i16: 397; RV64: # %bb.0: 398; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, mu 399; RV64-NEXT: vsrl.vi v12, v8, 1 400; RV64-NEXT: lui a0, 5 401; RV64-NEXT: addiw a0, a0, 1365 402; RV64-NEXT: vand.vx v12, v12, a0 403; RV64-NEXT: vsub.vv v8, v8, v12 404; RV64-NEXT: lui a0, 3 405; RV64-NEXT: addiw a0, a0, 819 406; RV64-NEXT: vand.vx v12, v8, a0 407; RV64-NEXT: vsrl.vi v8, v8, 2 408; RV64-NEXT: vand.vx v8, v8, a0 409; RV64-NEXT: vadd.vv v8, v12, v8 410; RV64-NEXT: vsrl.vi v12, v8, 4 411; RV64-NEXT: vadd.vv v8, v8, v12 412; RV64-NEXT: lui a0, 1 413; RV64-NEXT: addiw a0, a0, -241 414; RV64-NEXT: vand.vx v8, v8, a0 415; RV64-NEXT: li a0, 257 416; RV64-NEXT: vmul.vx v8, v8, a0 417; RV64-NEXT: vsrl.vi v8, v8, 8 418; RV64-NEXT: ret 419 %a = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> %va) 420 ret <vscale x 16 x i16> %a 421} 422declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>) 423 424define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) { 425; RV32-LABEL: ctpop_nxv32i16: 426; RV32: # %bb.0: 427; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, mu 428; RV32-NEXT: vsrl.vi v16, v8, 1 429; RV32-NEXT: lui a0, 5 430; RV32-NEXT: addi a0, a0, 1365 431; RV32-NEXT: vand.vx v16, v16, a0 432; RV32-NEXT: vsub.vv v8, v8, v16 433; RV32-NEXT: lui a0, 3 434; RV32-NEXT: addi a0, a0, 819 435; RV32-NEXT: vand.vx v16, v8, a0 436; RV32-NEXT: vsrl.vi v8, v8, 2 437; RV32-NEXT: vand.vx v8, v8, a0 438; RV32-NEXT: vadd.vv v8, v16, v8 439; RV32-NEXT: vsrl.vi v16, v8, 4 440; RV32-NEXT: vadd.vv v8, v8, v16 441; RV32-NEXT: lui a0, 1 442; RV32-NEXT: addi a0, a0, -241 443; RV32-NEXT: vand.vx v8, v8, a0 444; RV32-NEXT: li a0, 257 445; RV32-NEXT: vmul.vx v8, v8, a0 446; RV32-NEXT: vsrl.vi v8, v8, 8 447; RV32-NEXT: ret 448; 449; RV64-LABEL: ctpop_nxv32i16: 450; RV64: # %bb.0: 451; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, mu 452; RV64-NEXT: vsrl.vi v16, v8, 1 453; RV64-NEXT: lui a0, 5 454; RV64-NEXT: addiw a0, a0, 1365 455; RV64-NEXT: vand.vx v16, v16, a0 456; RV64-NEXT: vsub.vv v8, v8, v16 457; RV64-NEXT: lui a0, 3 458; RV64-NEXT: addiw a0, a0, 819 459; RV64-NEXT: vand.vx v16, v8, a0 460; RV64-NEXT: vsrl.vi v8, v8, 2 461; RV64-NEXT: vand.vx v8, v8, a0 462; RV64-NEXT: vadd.vv v8, v16, v8 463; RV64-NEXT: vsrl.vi v16, v8, 4 464; RV64-NEXT: vadd.vv v8, v8, v16 465; RV64-NEXT: lui a0, 1 466; RV64-NEXT: addiw a0, a0, -241 467; RV64-NEXT: vand.vx v8, v8, a0 468; RV64-NEXT: li a0, 257 469; RV64-NEXT: vmul.vx v8, v8, a0 470; RV64-NEXT: vsrl.vi v8, v8, 8 471; RV64-NEXT: ret 472 %a = call <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16> %va) 473 ret <vscale x 32 x i16> %a 474} 475declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>) 476 477define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) { 478; RV32-LABEL: ctpop_nxv1i32: 479; RV32: # %bb.0: 480; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 481; RV32-NEXT: vsrl.vi v9, v8, 1 482; RV32-NEXT: lui a0, 349525 483; RV32-NEXT: addi a0, a0, 1365 484; RV32-NEXT: vand.vx v9, v9, a0 485; RV32-NEXT: vsub.vv v8, v8, v9 486; RV32-NEXT: lui a0, 209715 487; RV32-NEXT: addi a0, a0, 819 488; RV32-NEXT: vand.vx v9, v8, a0 489; RV32-NEXT: vsrl.vi v8, v8, 2 490; RV32-NEXT: vand.vx v8, v8, a0 491; RV32-NEXT: vadd.vv v8, v9, v8 492; RV32-NEXT: vsrl.vi v9, v8, 4 493; RV32-NEXT: vadd.vv v8, v8, v9 494; RV32-NEXT: lui a0, 61681 495; RV32-NEXT: addi a0, a0, -241 496; RV32-NEXT: vand.vx v8, v8, a0 497; RV32-NEXT: lui a0, 4112 498; RV32-NEXT: addi a0, a0, 257 499; RV32-NEXT: vmul.vx v8, v8, a0 500; RV32-NEXT: vsrl.vi v8, v8, 24 501; RV32-NEXT: ret 502; 503; RV64-LABEL: ctpop_nxv1i32: 504; RV64: # %bb.0: 505; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 506; RV64-NEXT: vsrl.vi v9, v8, 1 507; RV64-NEXT: lui a0, 349525 508; RV64-NEXT: addiw a0, a0, 1365 509; RV64-NEXT: vand.vx v9, v9, a0 510; RV64-NEXT: vsub.vv v8, v8, v9 511; RV64-NEXT: lui a0, 209715 512; RV64-NEXT: addiw a0, a0, 819 513; RV64-NEXT: vand.vx v9, v8, a0 514; RV64-NEXT: vsrl.vi v8, v8, 2 515; RV64-NEXT: vand.vx v8, v8, a0 516; RV64-NEXT: vadd.vv v8, v9, v8 517; RV64-NEXT: vsrl.vi v9, v8, 4 518; RV64-NEXT: vadd.vv v8, v8, v9 519; RV64-NEXT: lui a0, 61681 520; RV64-NEXT: addiw a0, a0, -241 521; RV64-NEXT: vand.vx v8, v8, a0 522; RV64-NEXT: lui a0, 4112 523; RV64-NEXT: addiw a0, a0, 257 524; RV64-NEXT: vmul.vx v8, v8, a0 525; RV64-NEXT: vsrl.vi v8, v8, 24 526; RV64-NEXT: ret 527 %a = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> %va) 528 ret <vscale x 1 x i32> %a 529} 530declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>) 531 532define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) { 533; RV32-LABEL: ctpop_nxv2i32: 534; RV32: # %bb.0: 535; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 536; RV32-NEXT: vsrl.vi v9, v8, 1 537; RV32-NEXT: lui a0, 349525 538; RV32-NEXT: addi a0, a0, 1365 539; RV32-NEXT: vand.vx v9, v9, a0 540; RV32-NEXT: vsub.vv v8, v8, v9 541; RV32-NEXT: lui a0, 209715 542; RV32-NEXT: addi a0, a0, 819 543; RV32-NEXT: vand.vx v9, v8, a0 544; RV32-NEXT: vsrl.vi v8, v8, 2 545; RV32-NEXT: vand.vx v8, v8, a0 546; RV32-NEXT: vadd.vv v8, v9, v8 547; RV32-NEXT: vsrl.vi v9, v8, 4 548; RV32-NEXT: vadd.vv v8, v8, v9 549; RV32-NEXT: lui a0, 61681 550; RV32-NEXT: addi a0, a0, -241 551; RV32-NEXT: vand.vx v8, v8, a0 552; RV32-NEXT: lui a0, 4112 553; RV32-NEXT: addi a0, a0, 257 554; RV32-NEXT: vmul.vx v8, v8, a0 555; RV32-NEXT: vsrl.vi v8, v8, 24 556; RV32-NEXT: ret 557; 558; RV64-LABEL: ctpop_nxv2i32: 559; RV64: # %bb.0: 560; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 561; RV64-NEXT: vsrl.vi v9, v8, 1 562; RV64-NEXT: lui a0, 349525 563; RV64-NEXT: addiw a0, a0, 1365 564; RV64-NEXT: vand.vx v9, v9, a0 565; RV64-NEXT: vsub.vv v8, v8, v9 566; RV64-NEXT: lui a0, 209715 567; RV64-NEXT: addiw a0, a0, 819 568; RV64-NEXT: vand.vx v9, v8, a0 569; RV64-NEXT: vsrl.vi v8, v8, 2 570; RV64-NEXT: vand.vx v8, v8, a0 571; RV64-NEXT: vadd.vv v8, v9, v8 572; RV64-NEXT: vsrl.vi v9, v8, 4 573; RV64-NEXT: vadd.vv v8, v8, v9 574; RV64-NEXT: lui a0, 61681 575; RV64-NEXT: addiw a0, a0, -241 576; RV64-NEXT: vand.vx v8, v8, a0 577; RV64-NEXT: lui a0, 4112 578; RV64-NEXT: addiw a0, a0, 257 579; RV64-NEXT: vmul.vx v8, v8, a0 580; RV64-NEXT: vsrl.vi v8, v8, 24 581; RV64-NEXT: ret 582 %a = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> %va) 583 ret <vscale x 2 x i32> %a 584} 585declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>) 586 587define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) { 588; RV32-LABEL: ctpop_nxv4i32: 589; RV32: # %bb.0: 590; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 591; RV32-NEXT: vsrl.vi v10, v8, 1 592; RV32-NEXT: lui a0, 349525 593; RV32-NEXT: addi a0, a0, 1365 594; RV32-NEXT: vand.vx v10, v10, a0 595; RV32-NEXT: vsub.vv v8, v8, v10 596; RV32-NEXT: lui a0, 209715 597; RV32-NEXT: addi a0, a0, 819 598; RV32-NEXT: vand.vx v10, v8, a0 599; RV32-NEXT: vsrl.vi v8, v8, 2 600; RV32-NEXT: vand.vx v8, v8, a0 601; RV32-NEXT: vadd.vv v8, v10, v8 602; RV32-NEXT: vsrl.vi v10, v8, 4 603; RV32-NEXT: vadd.vv v8, v8, v10 604; RV32-NEXT: lui a0, 61681 605; RV32-NEXT: addi a0, a0, -241 606; RV32-NEXT: vand.vx v8, v8, a0 607; RV32-NEXT: lui a0, 4112 608; RV32-NEXT: addi a0, a0, 257 609; RV32-NEXT: vmul.vx v8, v8, a0 610; RV32-NEXT: vsrl.vi v8, v8, 24 611; RV32-NEXT: ret 612; 613; RV64-LABEL: ctpop_nxv4i32: 614; RV64: # %bb.0: 615; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 616; RV64-NEXT: vsrl.vi v10, v8, 1 617; RV64-NEXT: lui a0, 349525 618; RV64-NEXT: addiw a0, a0, 1365 619; RV64-NEXT: vand.vx v10, v10, a0 620; RV64-NEXT: vsub.vv v8, v8, v10 621; RV64-NEXT: lui a0, 209715 622; RV64-NEXT: addiw a0, a0, 819 623; RV64-NEXT: vand.vx v10, v8, a0 624; RV64-NEXT: vsrl.vi v8, v8, 2 625; RV64-NEXT: vand.vx v8, v8, a0 626; RV64-NEXT: vadd.vv v8, v10, v8 627; RV64-NEXT: vsrl.vi v10, v8, 4 628; RV64-NEXT: vadd.vv v8, v8, v10 629; RV64-NEXT: lui a0, 61681 630; RV64-NEXT: addiw a0, a0, -241 631; RV64-NEXT: vand.vx v8, v8, a0 632; RV64-NEXT: lui a0, 4112 633; RV64-NEXT: addiw a0, a0, 257 634; RV64-NEXT: vmul.vx v8, v8, a0 635; RV64-NEXT: vsrl.vi v8, v8, 24 636; RV64-NEXT: ret 637 %a = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %va) 638 ret <vscale x 4 x i32> %a 639} 640declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>) 641 642define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) { 643; RV32-LABEL: ctpop_nxv8i32: 644; RV32: # %bb.0: 645; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 646; RV32-NEXT: vsrl.vi v12, v8, 1 647; RV32-NEXT: lui a0, 349525 648; RV32-NEXT: addi a0, a0, 1365 649; RV32-NEXT: vand.vx v12, v12, a0 650; RV32-NEXT: vsub.vv v8, v8, v12 651; RV32-NEXT: lui a0, 209715 652; RV32-NEXT: addi a0, a0, 819 653; RV32-NEXT: vand.vx v12, v8, a0 654; RV32-NEXT: vsrl.vi v8, v8, 2 655; RV32-NEXT: vand.vx v8, v8, a0 656; RV32-NEXT: vadd.vv v8, v12, v8 657; RV32-NEXT: vsrl.vi v12, v8, 4 658; RV32-NEXT: vadd.vv v8, v8, v12 659; RV32-NEXT: lui a0, 61681 660; RV32-NEXT: addi a0, a0, -241 661; RV32-NEXT: vand.vx v8, v8, a0 662; RV32-NEXT: lui a0, 4112 663; RV32-NEXT: addi a0, a0, 257 664; RV32-NEXT: vmul.vx v8, v8, a0 665; RV32-NEXT: vsrl.vi v8, v8, 24 666; RV32-NEXT: ret 667; 668; RV64-LABEL: ctpop_nxv8i32: 669; RV64: # %bb.0: 670; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 671; RV64-NEXT: vsrl.vi v12, v8, 1 672; RV64-NEXT: lui a0, 349525 673; RV64-NEXT: addiw a0, a0, 1365 674; RV64-NEXT: vand.vx v12, v12, a0 675; RV64-NEXT: vsub.vv v8, v8, v12 676; RV64-NEXT: lui a0, 209715 677; RV64-NEXT: addiw a0, a0, 819 678; RV64-NEXT: vand.vx v12, v8, a0 679; RV64-NEXT: vsrl.vi v8, v8, 2 680; RV64-NEXT: vand.vx v8, v8, a0 681; RV64-NEXT: vadd.vv v8, v12, v8 682; RV64-NEXT: vsrl.vi v12, v8, 4 683; RV64-NEXT: vadd.vv v8, v8, v12 684; RV64-NEXT: lui a0, 61681 685; RV64-NEXT: addiw a0, a0, -241 686; RV64-NEXT: vand.vx v8, v8, a0 687; RV64-NEXT: lui a0, 4112 688; RV64-NEXT: addiw a0, a0, 257 689; RV64-NEXT: vmul.vx v8, v8, a0 690; RV64-NEXT: vsrl.vi v8, v8, 24 691; RV64-NEXT: ret 692 %a = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> %va) 693 ret <vscale x 8 x i32> %a 694} 695declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>) 696 697define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) { 698; RV32-LABEL: ctpop_nxv16i32: 699; RV32: # %bb.0: 700; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 701; RV32-NEXT: vsrl.vi v16, v8, 1 702; RV32-NEXT: lui a0, 349525 703; RV32-NEXT: addi a0, a0, 1365 704; RV32-NEXT: vand.vx v16, v16, a0 705; RV32-NEXT: vsub.vv v8, v8, v16 706; RV32-NEXT: lui a0, 209715 707; RV32-NEXT: addi a0, a0, 819 708; RV32-NEXT: vand.vx v16, v8, a0 709; RV32-NEXT: vsrl.vi v8, v8, 2 710; RV32-NEXT: vand.vx v8, v8, a0 711; RV32-NEXT: vadd.vv v8, v16, v8 712; RV32-NEXT: vsrl.vi v16, v8, 4 713; RV32-NEXT: vadd.vv v8, v8, v16 714; RV32-NEXT: lui a0, 61681 715; RV32-NEXT: addi a0, a0, -241 716; RV32-NEXT: vand.vx v8, v8, a0 717; RV32-NEXT: lui a0, 4112 718; RV32-NEXT: addi a0, a0, 257 719; RV32-NEXT: vmul.vx v8, v8, a0 720; RV32-NEXT: vsrl.vi v8, v8, 24 721; RV32-NEXT: ret 722; 723; RV64-LABEL: ctpop_nxv16i32: 724; RV64: # %bb.0: 725; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 726; RV64-NEXT: vsrl.vi v16, v8, 1 727; RV64-NEXT: lui a0, 349525 728; RV64-NEXT: addiw a0, a0, 1365 729; RV64-NEXT: vand.vx v16, v16, a0 730; RV64-NEXT: vsub.vv v8, v8, v16 731; RV64-NEXT: lui a0, 209715 732; RV64-NEXT: addiw a0, a0, 819 733; RV64-NEXT: vand.vx v16, v8, a0 734; RV64-NEXT: vsrl.vi v8, v8, 2 735; RV64-NEXT: vand.vx v8, v8, a0 736; RV64-NEXT: vadd.vv v8, v16, v8 737; RV64-NEXT: vsrl.vi v16, v8, 4 738; RV64-NEXT: vadd.vv v8, v8, v16 739; RV64-NEXT: lui a0, 61681 740; RV64-NEXT: addiw a0, a0, -241 741; RV64-NEXT: vand.vx v8, v8, a0 742; RV64-NEXT: lui a0, 4112 743; RV64-NEXT: addiw a0, a0, 257 744; RV64-NEXT: vmul.vx v8, v8, a0 745; RV64-NEXT: vsrl.vi v8, v8, 24 746; RV64-NEXT: ret 747 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 748 ret <vscale x 16 x i32> %a 749} 750declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>) 751 752define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) { 753; RV32-LABEL: ctpop_nxv1i64: 754; RV32: # %bb.0: 755; RV32-NEXT: addi sp, sp, -16 756; RV32-NEXT: .cfi_def_cfa_offset 16 757; RV32-NEXT: lui a0, 349525 758; RV32-NEXT: addi a0, a0, 1365 759; RV32-NEXT: sw a0, 12(sp) 760; RV32-NEXT: sw a0, 8(sp) 761; RV32-NEXT: lui a0, 209715 762; RV32-NEXT: addi a0, a0, 819 763; RV32-NEXT: sw a0, 12(sp) 764; RV32-NEXT: sw a0, 8(sp) 765; RV32-NEXT: lui a0, 61681 766; RV32-NEXT: addi a0, a0, -241 767; RV32-NEXT: sw a0, 12(sp) 768; RV32-NEXT: sw a0, 8(sp) 769; RV32-NEXT: lui a0, 4112 770; RV32-NEXT: addi a0, a0, 257 771; RV32-NEXT: sw a0, 12(sp) 772; RV32-NEXT: sw a0, 8(sp) 773; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 774; RV32-NEXT: addi a0, sp, 8 775; RV32-NEXT: vlse64.v v9, (a0), zero 776; RV32-NEXT: vlse64.v v10, (a0), zero 777; RV32-NEXT: vsrl.vi v11, v8, 1 778; RV32-NEXT: vand.vv v9, v11, v9 779; RV32-NEXT: vsub.vv v8, v8, v9 780; RV32-NEXT: vand.vv v9, v8, v10 781; RV32-NEXT: vsrl.vi v8, v8, 2 782; RV32-NEXT: vand.vv v8, v8, v10 783; RV32-NEXT: vadd.vv v8, v9, v8 784; RV32-NEXT: vlse64.v v9, (a0), zero 785; RV32-NEXT: vlse64.v v10, (a0), zero 786; RV32-NEXT: vsrl.vi v11, v8, 4 787; RV32-NEXT: vadd.vv v8, v8, v11 788; RV32-NEXT: vand.vv v8, v8, v9 789; RV32-NEXT: vmul.vv v8, v8, v10 790; RV32-NEXT: li a0, 56 791; RV32-NEXT: vsrl.vx v8, v8, a0 792; RV32-NEXT: addi sp, sp, 16 793; RV32-NEXT: ret 794; 795; RV64-LABEL: ctpop_nxv1i64: 796; RV64: # %bb.0: 797; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 798; RV64-NEXT: lui a0, %hi(.LCPI18_0) 799; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) 800; RV64-NEXT: lui a1, %hi(.LCPI18_1) 801; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) 802; RV64-NEXT: vsrl.vi v9, v8, 1 803; RV64-NEXT: vand.vx v9, v9, a0 804; RV64-NEXT: vsub.vv v8, v8, v9 805; RV64-NEXT: vand.vx v9, v8, a1 806; RV64-NEXT: vsrl.vi v8, v8, 2 807; RV64-NEXT: vand.vx v8, v8, a1 808; RV64-NEXT: vadd.vv v8, v9, v8 809; RV64-NEXT: lui a0, %hi(.LCPI18_2) 810; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) 811; RV64-NEXT: lui a1, %hi(.LCPI18_3) 812; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) 813; RV64-NEXT: vsrl.vi v9, v8, 4 814; RV64-NEXT: vadd.vv v8, v8, v9 815; RV64-NEXT: vand.vx v8, v8, a0 816; RV64-NEXT: vmul.vx v8, v8, a1 817; RV64-NEXT: li a0, 56 818; RV64-NEXT: vsrl.vx v8, v8, a0 819; RV64-NEXT: ret 820 %a = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> %va) 821 ret <vscale x 1 x i64> %a 822} 823declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>) 824 825define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) { 826; RV32-LABEL: ctpop_nxv2i64: 827; RV32: # %bb.0: 828; RV32-NEXT: addi sp, sp, -16 829; RV32-NEXT: .cfi_def_cfa_offset 16 830; RV32-NEXT: lui a0, 349525 831; RV32-NEXT: addi a0, a0, 1365 832; RV32-NEXT: sw a0, 12(sp) 833; RV32-NEXT: sw a0, 8(sp) 834; RV32-NEXT: lui a0, 209715 835; RV32-NEXT: addi a0, a0, 819 836; RV32-NEXT: sw a0, 12(sp) 837; RV32-NEXT: sw a0, 8(sp) 838; RV32-NEXT: lui a0, 61681 839; RV32-NEXT: addi a0, a0, -241 840; RV32-NEXT: sw a0, 12(sp) 841; RV32-NEXT: sw a0, 8(sp) 842; RV32-NEXT: lui a0, 4112 843; RV32-NEXT: addi a0, a0, 257 844; RV32-NEXT: sw a0, 12(sp) 845; RV32-NEXT: sw a0, 8(sp) 846; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 847; RV32-NEXT: addi a0, sp, 8 848; RV32-NEXT: vlse64.v v10, (a0), zero 849; RV32-NEXT: vlse64.v v12, (a0), zero 850; RV32-NEXT: vsrl.vi v14, v8, 1 851; RV32-NEXT: vand.vv v10, v14, v10 852; RV32-NEXT: vsub.vv v8, v8, v10 853; RV32-NEXT: vand.vv v10, v8, v12 854; RV32-NEXT: vsrl.vi v8, v8, 2 855; RV32-NEXT: vand.vv v8, v8, v12 856; RV32-NEXT: vadd.vv v8, v10, v8 857; RV32-NEXT: vlse64.v v10, (a0), zero 858; RV32-NEXT: vlse64.v v12, (a0), zero 859; RV32-NEXT: vsrl.vi v14, v8, 4 860; RV32-NEXT: vadd.vv v8, v8, v14 861; RV32-NEXT: vand.vv v8, v8, v10 862; RV32-NEXT: vmul.vv v8, v8, v12 863; RV32-NEXT: li a0, 56 864; RV32-NEXT: vsrl.vx v8, v8, a0 865; RV32-NEXT: addi sp, sp, 16 866; RV32-NEXT: ret 867; 868; RV64-LABEL: ctpop_nxv2i64: 869; RV64: # %bb.0: 870; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 871; RV64-NEXT: lui a0, %hi(.LCPI19_0) 872; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) 873; RV64-NEXT: lui a1, %hi(.LCPI19_1) 874; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) 875; RV64-NEXT: vsrl.vi v10, v8, 1 876; RV64-NEXT: vand.vx v10, v10, a0 877; RV64-NEXT: vsub.vv v8, v8, v10 878; RV64-NEXT: vand.vx v10, v8, a1 879; RV64-NEXT: vsrl.vi v8, v8, 2 880; RV64-NEXT: vand.vx v8, v8, a1 881; RV64-NEXT: vadd.vv v8, v10, v8 882; RV64-NEXT: lui a0, %hi(.LCPI19_2) 883; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) 884; RV64-NEXT: lui a1, %hi(.LCPI19_3) 885; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) 886; RV64-NEXT: vsrl.vi v10, v8, 4 887; RV64-NEXT: vadd.vv v8, v8, v10 888; RV64-NEXT: vand.vx v8, v8, a0 889; RV64-NEXT: vmul.vx v8, v8, a1 890; RV64-NEXT: li a0, 56 891; RV64-NEXT: vsrl.vx v8, v8, a0 892; RV64-NEXT: ret 893 %a = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %va) 894 ret <vscale x 2 x i64> %a 895} 896declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>) 897 898define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) { 899; RV32-LABEL: ctpop_nxv4i64: 900; RV32: # %bb.0: 901; RV32-NEXT: addi sp, sp, -16 902; RV32-NEXT: .cfi_def_cfa_offset 16 903; RV32-NEXT: lui a0, 349525 904; RV32-NEXT: addi a0, a0, 1365 905; RV32-NEXT: sw a0, 12(sp) 906; RV32-NEXT: sw a0, 8(sp) 907; RV32-NEXT: lui a0, 209715 908; RV32-NEXT: addi a0, a0, 819 909; RV32-NEXT: sw a0, 12(sp) 910; RV32-NEXT: sw a0, 8(sp) 911; RV32-NEXT: lui a0, 61681 912; RV32-NEXT: addi a0, a0, -241 913; RV32-NEXT: sw a0, 12(sp) 914; RV32-NEXT: sw a0, 8(sp) 915; RV32-NEXT: lui a0, 4112 916; RV32-NEXT: addi a0, a0, 257 917; RV32-NEXT: sw a0, 12(sp) 918; RV32-NEXT: sw a0, 8(sp) 919; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 920; RV32-NEXT: addi a0, sp, 8 921; RV32-NEXT: vlse64.v v12, (a0), zero 922; RV32-NEXT: vlse64.v v16, (a0), zero 923; RV32-NEXT: vsrl.vi v20, v8, 1 924; RV32-NEXT: vand.vv v12, v20, v12 925; RV32-NEXT: vsub.vv v8, v8, v12 926; RV32-NEXT: vand.vv v12, v8, v16 927; RV32-NEXT: vsrl.vi v8, v8, 2 928; RV32-NEXT: vand.vv v8, v8, v16 929; RV32-NEXT: vadd.vv v8, v12, v8 930; RV32-NEXT: vlse64.v v12, (a0), zero 931; RV32-NEXT: vlse64.v v16, (a0), zero 932; RV32-NEXT: vsrl.vi v20, v8, 4 933; RV32-NEXT: vadd.vv v8, v8, v20 934; RV32-NEXT: vand.vv v8, v8, v12 935; RV32-NEXT: vmul.vv v8, v8, v16 936; RV32-NEXT: li a0, 56 937; RV32-NEXT: vsrl.vx v8, v8, a0 938; RV32-NEXT: addi sp, sp, 16 939; RV32-NEXT: ret 940; 941; RV64-LABEL: ctpop_nxv4i64: 942; RV64: # %bb.0: 943; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 944; RV64-NEXT: lui a0, %hi(.LCPI20_0) 945; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) 946; RV64-NEXT: lui a1, %hi(.LCPI20_1) 947; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) 948; RV64-NEXT: vsrl.vi v12, v8, 1 949; RV64-NEXT: vand.vx v12, v12, a0 950; RV64-NEXT: vsub.vv v8, v8, v12 951; RV64-NEXT: vand.vx v12, v8, a1 952; RV64-NEXT: vsrl.vi v8, v8, 2 953; RV64-NEXT: vand.vx v8, v8, a1 954; RV64-NEXT: vadd.vv v8, v12, v8 955; RV64-NEXT: lui a0, %hi(.LCPI20_2) 956; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) 957; RV64-NEXT: lui a1, %hi(.LCPI20_3) 958; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) 959; RV64-NEXT: vsrl.vi v12, v8, 4 960; RV64-NEXT: vadd.vv v8, v8, v12 961; RV64-NEXT: vand.vx v8, v8, a0 962; RV64-NEXT: vmul.vx v8, v8, a1 963; RV64-NEXT: li a0, 56 964; RV64-NEXT: vsrl.vx v8, v8, a0 965; RV64-NEXT: ret 966 %a = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> %va) 967 ret <vscale x 4 x i64> %a 968} 969declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>) 970 971define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) { 972; RV32-LABEL: ctpop_nxv8i64: 973; RV32: # %bb.0: 974; RV32-NEXT: addi sp, sp, -16 975; RV32-NEXT: .cfi_def_cfa_offset 16 976; RV32-NEXT: lui a0, 349525 977; RV32-NEXT: addi a0, a0, 1365 978; RV32-NEXT: sw a0, 12(sp) 979; RV32-NEXT: sw a0, 8(sp) 980; RV32-NEXT: lui a0, 209715 981; RV32-NEXT: addi a0, a0, 819 982; RV32-NEXT: sw a0, 12(sp) 983; RV32-NEXT: sw a0, 8(sp) 984; RV32-NEXT: lui a0, 61681 985; RV32-NEXT: addi a0, a0, -241 986; RV32-NEXT: sw a0, 12(sp) 987; RV32-NEXT: sw a0, 8(sp) 988; RV32-NEXT: lui a0, 4112 989; RV32-NEXT: addi a0, a0, 257 990; RV32-NEXT: sw a0, 12(sp) 991; RV32-NEXT: sw a0, 8(sp) 992; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 993; RV32-NEXT: addi a0, sp, 8 994; RV32-NEXT: vlse64.v v16, (a0), zero 995; RV32-NEXT: vlse64.v v24, (a0), zero 996; RV32-NEXT: vsrl.vi v0, v8, 1 997; RV32-NEXT: vand.vv v16, v0, v16 998; RV32-NEXT: vsub.vv v8, v8, v16 999; RV32-NEXT: vand.vv v16, v8, v24 1000; RV32-NEXT: vsrl.vi v8, v8, 2 1001; RV32-NEXT: vand.vv v8, v8, v24 1002; RV32-NEXT: vadd.vv v8, v16, v8 1003; RV32-NEXT: vlse64.v v16, (a0), zero 1004; RV32-NEXT: vlse64.v v24, (a0), zero 1005; RV32-NEXT: vsrl.vi v0, v8, 4 1006; RV32-NEXT: vadd.vv v8, v8, v0 1007; RV32-NEXT: vand.vv v8, v8, v16 1008; RV32-NEXT: vmul.vv v8, v8, v24 1009; RV32-NEXT: li a0, 56 1010; RV32-NEXT: vsrl.vx v8, v8, a0 1011; RV32-NEXT: addi sp, sp, 16 1012; RV32-NEXT: ret 1013; 1014; RV64-LABEL: ctpop_nxv8i64: 1015; RV64: # %bb.0: 1016; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1017; RV64-NEXT: lui a0, %hi(.LCPI21_0) 1018; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) 1019; RV64-NEXT: lui a1, %hi(.LCPI21_1) 1020; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) 1021; RV64-NEXT: vsrl.vi v16, v8, 1 1022; RV64-NEXT: vand.vx v16, v16, a0 1023; RV64-NEXT: vsub.vv v8, v8, v16 1024; RV64-NEXT: vand.vx v16, v8, a1 1025; RV64-NEXT: vsrl.vi v8, v8, 2 1026; RV64-NEXT: vand.vx v8, v8, a1 1027; RV64-NEXT: vadd.vv v8, v16, v8 1028; RV64-NEXT: lui a0, %hi(.LCPI21_2) 1029; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) 1030; RV64-NEXT: lui a1, %hi(.LCPI21_3) 1031; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) 1032; RV64-NEXT: vsrl.vi v16, v8, 4 1033; RV64-NEXT: vadd.vv v8, v8, v16 1034; RV64-NEXT: vand.vx v8, v8, a0 1035; RV64-NEXT: vmul.vx v8, v8, a1 1036; RV64-NEXT: li a0, 56 1037; RV64-NEXT: vsrl.vx v8, v8, a0 1038; RV64-NEXT: ret 1039 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1040 ret <vscale x 8 x i64> %a 1041} 1042declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>) 1043