1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I 3; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I 4; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 5; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 6 7define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) { 8; CHECK-ZVE64X-LABEL: cttz_nxv1i8: 9; CHECK-ZVE64X: # %bb.0: 10; CHECK-ZVE64X-NEXT: li a0, 1 11; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, mu 12; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 13; CHECK-ZVE64X-NEXT: vnot.v v8, v8 14; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 15; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 16; CHECK-ZVE64X-NEXT: li a0, 85 17; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 18; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 19; CHECK-ZVE64X-NEXT: li a0, 51 20; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 21; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 22; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 23; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 24; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 25; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 26; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 27; CHECK-ZVE64X-NEXT: ret 28; 29; CHECK-D-LABEL: cttz_nxv1i8: 30; CHECK-D: # %bb.0: 31; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 32; CHECK-D-NEXT: vrsub.vi v9, v8, 0 33; CHECK-D-NEXT: vand.vv v9, v8, v9 34; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 35; CHECK-D-NEXT: vzext.vf4 v10, v9 36; CHECK-D-NEXT: vfcvt.f.xu.v v9, v10 37; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 38; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 39; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu 40; CHECK-D-NEXT: vncvt.x.x.w v9, v9 41; CHECK-D-NEXT: li a0, 127 42; CHECK-D-NEXT: vmseq.vi v0, v8, 0 43; CHECK-D-NEXT: vsub.vx v8, v9, a0 44; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 45; CHECK-D-NEXT: ret 46 %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false) 47 ret <vscale x 1 x i8> %a 48} 49declare <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8>, i1) 50 51define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) { 52; CHECK-ZVE64X-LABEL: cttz_nxv2i8: 53; CHECK-ZVE64X: # %bb.0: 54; CHECK-ZVE64X-NEXT: li a0, 1 55; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 56; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 57; CHECK-ZVE64X-NEXT: vnot.v v8, v8 58; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 59; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 60; CHECK-ZVE64X-NEXT: li a0, 85 61; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 62; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 63; CHECK-ZVE64X-NEXT: li a0, 51 64; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 65; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 66; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 67; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 68; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 69; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 70; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 71; CHECK-ZVE64X-NEXT: ret 72; 73; CHECK-D-LABEL: cttz_nxv2i8: 74; CHECK-D: # %bb.0: 75; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 76; CHECK-D-NEXT: vrsub.vi v9, v8, 0 77; CHECK-D-NEXT: vand.vv v9, v8, v9 78; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 79; CHECK-D-NEXT: vzext.vf4 v10, v9 80; CHECK-D-NEXT: vfcvt.f.xu.v v9, v10 81; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 82; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 83; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 84; CHECK-D-NEXT: vncvt.x.x.w v9, v9 85; CHECK-D-NEXT: li a0, 127 86; CHECK-D-NEXT: vmseq.vi v0, v8, 0 87; CHECK-D-NEXT: vsub.vx v8, v9, a0 88; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 89; CHECK-D-NEXT: ret 90 %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false) 91 ret <vscale x 2 x i8> %a 92} 93declare <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8>, i1) 94 95define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) { 96; CHECK-ZVE64X-LABEL: cttz_nxv4i8: 97; CHECK-ZVE64X: # %bb.0: 98; CHECK-ZVE64X-NEXT: li a0, 1 99; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, mu 100; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 101; CHECK-ZVE64X-NEXT: vnot.v v8, v8 102; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 103; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 104; CHECK-ZVE64X-NEXT: li a0, 85 105; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 106; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 107; CHECK-ZVE64X-NEXT: li a0, 51 108; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 109; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 110; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 111; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 112; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 113; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 114; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 115; CHECK-ZVE64X-NEXT: ret 116; 117; CHECK-D-LABEL: cttz_nxv4i8: 118; CHECK-D: # %bb.0: 119; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 120; CHECK-D-NEXT: vrsub.vi v9, v8, 0 121; CHECK-D-NEXT: vand.vv v9, v8, v9 122; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 123; CHECK-D-NEXT: vzext.vf4 v10, v9 124; CHECK-D-NEXT: vfcvt.f.xu.v v10, v10 125; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu 126; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 127; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 128; CHECK-D-NEXT: vncvt.x.x.w v9, v9 129; CHECK-D-NEXT: li a0, 127 130; CHECK-D-NEXT: vmseq.vi v0, v8, 0 131; CHECK-D-NEXT: vsub.vx v8, v9, a0 132; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 133; CHECK-D-NEXT: ret 134 %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false) 135 ret <vscale x 4 x i8> %a 136} 137declare <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8>, i1) 138 139define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) { 140; CHECK-ZVE64X-LABEL: cttz_nxv8i8: 141; CHECK-ZVE64X: # %bb.0: 142; CHECK-ZVE64X-NEXT: li a0, 1 143; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, mu 144; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 145; CHECK-ZVE64X-NEXT: vnot.v v8, v8 146; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 147; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 148; CHECK-ZVE64X-NEXT: li a0, 85 149; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 150; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 151; CHECK-ZVE64X-NEXT: li a0, 51 152; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 153; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 154; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 155; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 156; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 157; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 158; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 159; CHECK-ZVE64X-NEXT: ret 160; 161; CHECK-D-LABEL: cttz_nxv8i8: 162; CHECK-D: # %bb.0: 163; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, mu 164; CHECK-D-NEXT: vrsub.vi v9, v8, 0 165; CHECK-D-NEXT: vand.vv v9, v8, v9 166; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 167; CHECK-D-NEXT: vzext.vf4 v12, v9 168; CHECK-D-NEXT: vfcvt.f.xu.v v12, v12 169; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu 170; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 171; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu 172; CHECK-D-NEXT: vncvt.x.x.w v9, v10 173; CHECK-D-NEXT: li a0, 127 174; CHECK-D-NEXT: vmseq.vi v0, v8, 0 175; CHECK-D-NEXT: vsub.vx v8, v9, a0 176; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 177; CHECK-D-NEXT: ret 178 %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false) 179 ret <vscale x 8 x i8> %a 180} 181declare <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8>, i1) 182 183define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) { 184; CHECK-ZVE64X-LABEL: cttz_nxv16i8: 185; CHECK-ZVE64X: # %bb.0: 186; CHECK-ZVE64X-NEXT: li a0, 1 187; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, mu 188; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 189; CHECK-ZVE64X-NEXT: vnot.v v8, v8 190; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 191; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 192; CHECK-ZVE64X-NEXT: li a0, 85 193; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 194; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 195; CHECK-ZVE64X-NEXT: li a0, 51 196; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 197; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 198; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 199; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 200; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 201; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 202; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 203; CHECK-ZVE64X-NEXT: ret 204; 205; CHECK-D-LABEL: cttz_nxv16i8: 206; CHECK-D: # %bb.0: 207; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, mu 208; CHECK-D-NEXT: vrsub.vi v10, v8, 0 209; CHECK-D-NEXT: vand.vv v10, v8, v10 210; CHECK-D-NEXT: vsetvli zero, zero, e32, m8, ta, mu 211; CHECK-D-NEXT: vzext.vf4 v16, v10 212; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16 213; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu 214; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 215; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu 216; CHECK-D-NEXT: vncvt.x.x.w v10, v12 217; CHECK-D-NEXT: li a0, 127 218; CHECK-D-NEXT: vmseq.vi v0, v8, 0 219; CHECK-D-NEXT: vsub.vx v8, v10, a0 220; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 221; CHECK-D-NEXT: ret 222 %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false) 223 ret <vscale x 16 x i8> %a 224} 225declare <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8>, i1) 226 227define <vscale x 32 x i8> @cttz_nxv32i8(<vscale x 32 x i8> %va) { 228; CHECK-LABEL: cttz_nxv32i8: 229; CHECK: # %bb.0: 230; CHECK-NEXT: li a0, 1 231; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu 232; CHECK-NEXT: vsub.vx v12, v8, a0 233; CHECK-NEXT: vnot.v v8, v8 234; CHECK-NEXT: vand.vv v8, v8, v12 235; CHECK-NEXT: vsrl.vi v12, v8, 1 236; CHECK-NEXT: li a0, 85 237; CHECK-NEXT: vand.vx v12, v12, a0 238; CHECK-NEXT: vsub.vv v8, v8, v12 239; CHECK-NEXT: li a0, 51 240; CHECK-NEXT: vand.vx v12, v8, a0 241; CHECK-NEXT: vsrl.vi v8, v8, 2 242; CHECK-NEXT: vand.vx v8, v8, a0 243; CHECK-NEXT: vadd.vv v8, v12, v8 244; CHECK-NEXT: vsrl.vi v12, v8, 4 245; CHECK-NEXT: vadd.vv v8, v8, v12 246; CHECK-NEXT: vand.vi v8, v8, 15 247; CHECK-NEXT: ret 248 %a = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false) 249 ret <vscale x 32 x i8> %a 250} 251declare <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8>, i1) 252 253define <vscale x 64 x i8> @cttz_nxv64i8(<vscale x 64 x i8> %va) { 254; CHECK-LABEL: cttz_nxv64i8: 255; CHECK: # %bb.0: 256; CHECK-NEXT: li a0, 1 257; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu 258; CHECK-NEXT: vsub.vx v16, v8, a0 259; CHECK-NEXT: vnot.v v8, v8 260; CHECK-NEXT: vand.vv v8, v8, v16 261; CHECK-NEXT: vsrl.vi v16, v8, 1 262; CHECK-NEXT: li a0, 85 263; CHECK-NEXT: vand.vx v16, v16, a0 264; CHECK-NEXT: vsub.vv v8, v8, v16 265; CHECK-NEXT: li a0, 51 266; CHECK-NEXT: vand.vx v16, v8, a0 267; CHECK-NEXT: vsrl.vi v8, v8, 2 268; CHECK-NEXT: vand.vx v8, v8, a0 269; CHECK-NEXT: vadd.vv v8, v16, v8 270; CHECK-NEXT: vsrl.vi v16, v8, 4 271; CHECK-NEXT: vadd.vv v8, v8, v16 272; CHECK-NEXT: vand.vi v8, v8, 15 273; CHECK-NEXT: ret 274 %a = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false) 275 ret <vscale x 64 x i8> %a 276} 277declare <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8>, i1) 278 279define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) { 280; RV32I-LABEL: cttz_nxv1i16: 281; RV32I: # %bb.0: 282; RV32I-NEXT: li a0, 1 283; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, mu 284; RV32I-NEXT: vsub.vx v9, v8, a0 285; RV32I-NEXT: vnot.v v8, v8 286; RV32I-NEXT: vand.vv v8, v8, v9 287; RV32I-NEXT: vsrl.vi v9, v8, 1 288; RV32I-NEXT: lui a0, 5 289; RV32I-NEXT: addi a0, a0, 1365 290; RV32I-NEXT: vand.vx v9, v9, a0 291; RV32I-NEXT: vsub.vv v8, v8, v9 292; RV32I-NEXT: lui a0, 3 293; RV32I-NEXT: addi a0, a0, 819 294; RV32I-NEXT: vand.vx v9, v8, a0 295; RV32I-NEXT: vsrl.vi v8, v8, 2 296; RV32I-NEXT: vand.vx v8, v8, a0 297; RV32I-NEXT: vadd.vv v8, v9, v8 298; RV32I-NEXT: vsrl.vi v9, v8, 4 299; RV32I-NEXT: vadd.vv v8, v8, v9 300; RV32I-NEXT: lui a0, 1 301; RV32I-NEXT: addi a0, a0, -241 302; RV32I-NEXT: vand.vx v8, v8, a0 303; RV32I-NEXT: li a0, 257 304; RV32I-NEXT: vmul.vx v8, v8, a0 305; RV32I-NEXT: vsrl.vi v8, v8, 8 306; RV32I-NEXT: ret 307; 308; RV64I-LABEL: cttz_nxv1i16: 309; RV64I: # %bb.0: 310; RV64I-NEXT: li a0, 1 311; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, mu 312; RV64I-NEXT: vsub.vx v9, v8, a0 313; RV64I-NEXT: vnot.v v8, v8 314; RV64I-NEXT: vand.vv v8, v8, v9 315; RV64I-NEXT: vsrl.vi v9, v8, 1 316; RV64I-NEXT: lui a0, 5 317; RV64I-NEXT: addiw a0, a0, 1365 318; RV64I-NEXT: vand.vx v9, v9, a0 319; RV64I-NEXT: vsub.vv v8, v8, v9 320; RV64I-NEXT: lui a0, 3 321; RV64I-NEXT: addiw a0, a0, 819 322; RV64I-NEXT: vand.vx v9, v8, a0 323; RV64I-NEXT: vsrl.vi v8, v8, 2 324; RV64I-NEXT: vand.vx v8, v8, a0 325; RV64I-NEXT: vadd.vv v8, v9, v8 326; RV64I-NEXT: vsrl.vi v9, v8, 4 327; RV64I-NEXT: vadd.vv v8, v8, v9 328; RV64I-NEXT: lui a0, 1 329; RV64I-NEXT: addiw a0, a0, -241 330; RV64I-NEXT: vand.vx v8, v8, a0 331; RV64I-NEXT: li a0, 257 332; RV64I-NEXT: vmul.vx v8, v8, a0 333; RV64I-NEXT: vsrl.vi v8, v8, 8 334; RV64I-NEXT: ret 335; 336; CHECK-D-LABEL: cttz_nxv1i16: 337; CHECK-D: # %bb.0: 338; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 339; CHECK-D-NEXT: vrsub.vi v9, v8, 0 340; CHECK-D-NEXT: vand.vv v9, v8, v9 341; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 342; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 343; CHECK-D-NEXT: li a0, 127 344; CHECK-D-NEXT: vsub.vx v9, v9, a0 345; CHECK-D-NEXT: vmseq.vi v0, v8, 0 346; CHECK-D-NEXT: li a0, 16 347; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 348; CHECK-D-NEXT: ret 349 %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false) 350 ret <vscale x 1 x i16> %a 351} 352declare <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16>, i1) 353 354define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) { 355; RV32I-LABEL: cttz_nxv2i16: 356; RV32I: # %bb.0: 357; RV32I-NEXT: li a0, 1 358; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 359; RV32I-NEXT: vsub.vx v9, v8, a0 360; RV32I-NEXT: vnot.v v8, v8 361; RV32I-NEXT: vand.vv v8, v8, v9 362; RV32I-NEXT: vsrl.vi v9, v8, 1 363; RV32I-NEXT: lui a0, 5 364; RV32I-NEXT: addi a0, a0, 1365 365; RV32I-NEXT: vand.vx v9, v9, a0 366; RV32I-NEXT: vsub.vv v8, v8, v9 367; RV32I-NEXT: lui a0, 3 368; RV32I-NEXT: addi a0, a0, 819 369; RV32I-NEXT: vand.vx v9, v8, a0 370; RV32I-NEXT: vsrl.vi v8, v8, 2 371; RV32I-NEXT: vand.vx v8, v8, a0 372; RV32I-NEXT: vadd.vv v8, v9, v8 373; RV32I-NEXT: vsrl.vi v9, v8, 4 374; RV32I-NEXT: vadd.vv v8, v8, v9 375; RV32I-NEXT: lui a0, 1 376; RV32I-NEXT: addi a0, a0, -241 377; RV32I-NEXT: vand.vx v8, v8, a0 378; RV32I-NEXT: li a0, 257 379; RV32I-NEXT: vmul.vx v8, v8, a0 380; RV32I-NEXT: vsrl.vi v8, v8, 8 381; RV32I-NEXT: ret 382; 383; RV64I-LABEL: cttz_nxv2i16: 384; RV64I: # %bb.0: 385; RV64I-NEXT: li a0, 1 386; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 387; RV64I-NEXT: vsub.vx v9, v8, a0 388; RV64I-NEXT: vnot.v v8, v8 389; RV64I-NEXT: vand.vv v8, v8, v9 390; RV64I-NEXT: vsrl.vi v9, v8, 1 391; RV64I-NEXT: lui a0, 5 392; RV64I-NEXT: addiw a0, a0, 1365 393; RV64I-NEXT: vand.vx v9, v9, a0 394; RV64I-NEXT: vsub.vv v8, v8, v9 395; RV64I-NEXT: lui a0, 3 396; RV64I-NEXT: addiw a0, a0, 819 397; RV64I-NEXT: vand.vx v9, v8, a0 398; RV64I-NEXT: vsrl.vi v8, v8, 2 399; RV64I-NEXT: vand.vx v8, v8, a0 400; RV64I-NEXT: vadd.vv v8, v9, v8 401; RV64I-NEXT: vsrl.vi v9, v8, 4 402; RV64I-NEXT: vadd.vv v8, v8, v9 403; RV64I-NEXT: lui a0, 1 404; RV64I-NEXT: addiw a0, a0, -241 405; RV64I-NEXT: vand.vx v8, v8, a0 406; RV64I-NEXT: li a0, 257 407; RV64I-NEXT: vmul.vx v8, v8, a0 408; RV64I-NEXT: vsrl.vi v8, v8, 8 409; RV64I-NEXT: ret 410; 411; CHECK-D-LABEL: cttz_nxv2i16: 412; CHECK-D: # %bb.0: 413; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 414; CHECK-D-NEXT: vrsub.vi v9, v8, 0 415; CHECK-D-NEXT: vand.vv v9, v8, v9 416; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 417; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 418; CHECK-D-NEXT: li a0, 127 419; CHECK-D-NEXT: vsub.vx v9, v9, a0 420; CHECK-D-NEXT: vmseq.vi v0, v8, 0 421; CHECK-D-NEXT: li a0, 16 422; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 423; CHECK-D-NEXT: ret 424 %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false) 425 ret <vscale x 2 x i16> %a 426} 427declare <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16>, i1) 428 429define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) { 430; RV32I-LABEL: cttz_nxv4i16: 431; RV32I: # %bb.0: 432; RV32I-NEXT: li a0, 1 433; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, mu 434; RV32I-NEXT: vsub.vx v9, v8, a0 435; RV32I-NEXT: vnot.v v8, v8 436; RV32I-NEXT: vand.vv v8, v8, v9 437; RV32I-NEXT: vsrl.vi v9, v8, 1 438; RV32I-NEXT: lui a0, 5 439; RV32I-NEXT: addi a0, a0, 1365 440; RV32I-NEXT: vand.vx v9, v9, a0 441; RV32I-NEXT: vsub.vv v8, v8, v9 442; RV32I-NEXT: lui a0, 3 443; RV32I-NEXT: addi a0, a0, 819 444; RV32I-NEXT: vand.vx v9, v8, a0 445; RV32I-NEXT: vsrl.vi v8, v8, 2 446; RV32I-NEXT: vand.vx v8, v8, a0 447; RV32I-NEXT: vadd.vv v8, v9, v8 448; RV32I-NEXT: vsrl.vi v9, v8, 4 449; RV32I-NEXT: vadd.vv v8, v8, v9 450; RV32I-NEXT: lui a0, 1 451; RV32I-NEXT: addi a0, a0, -241 452; RV32I-NEXT: vand.vx v8, v8, a0 453; RV32I-NEXT: li a0, 257 454; RV32I-NEXT: vmul.vx v8, v8, a0 455; RV32I-NEXT: vsrl.vi v8, v8, 8 456; RV32I-NEXT: ret 457; 458; RV64I-LABEL: cttz_nxv4i16: 459; RV64I: # %bb.0: 460; RV64I-NEXT: li a0, 1 461; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, mu 462; RV64I-NEXT: vsub.vx v9, v8, a0 463; RV64I-NEXT: vnot.v v8, v8 464; RV64I-NEXT: vand.vv v8, v8, v9 465; RV64I-NEXT: vsrl.vi v9, v8, 1 466; RV64I-NEXT: lui a0, 5 467; RV64I-NEXT: addiw a0, a0, 1365 468; RV64I-NEXT: vand.vx v9, v9, a0 469; RV64I-NEXT: vsub.vv v8, v8, v9 470; RV64I-NEXT: lui a0, 3 471; RV64I-NEXT: addiw a0, a0, 819 472; RV64I-NEXT: vand.vx v9, v8, a0 473; RV64I-NEXT: vsrl.vi v8, v8, 2 474; RV64I-NEXT: vand.vx v8, v8, a0 475; RV64I-NEXT: vadd.vv v8, v9, v8 476; RV64I-NEXT: vsrl.vi v9, v8, 4 477; RV64I-NEXT: vadd.vv v8, v8, v9 478; RV64I-NEXT: lui a0, 1 479; RV64I-NEXT: addiw a0, a0, -241 480; RV64I-NEXT: vand.vx v8, v8, a0 481; RV64I-NEXT: li a0, 257 482; RV64I-NEXT: vmul.vx v8, v8, a0 483; RV64I-NEXT: vsrl.vi v8, v8, 8 484; RV64I-NEXT: ret 485; 486; CHECK-D-LABEL: cttz_nxv4i16: 487; CHECK-D: # %bb.0: 488; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu 489; CHECK-D-NEXT: vrsub.vi v9, v8, 0 490; CHECK-D-NEXT: vand.vv v9, v8, v9 491; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 492; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 493; CHECK-D-NEXT: li a0, 127 494; CHECK-D-NEXT: vsub.vx v9, v9, a0 495; CHECK-D-NEXT: vmseq.vi v0, v8, 0 496; CHECK-D-NEXT: li a0, 16 497; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 498; CHECK-D-NEXT: ret 499 %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false) 500 ret <vscale x 4 x i16> %a 501} 502declare <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16>, i1) 503 504define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) { 505; RV32I-LABEL: cttz_nxv8i16: 506; RV32I: # %bb.0: 507; RV32I-NEXT: li a0, 1 508; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, mu 509; RV32I-NEXT: vsub.vx v10, v8, a0 510; RV32I-NEXT: vnot.v v8, v8 511; RV32I-NEXT: vand.vv v8, v8, v10 512; RV32I-NEXT: vsrl.vi v10, v8, 1 513; RV32I-NEXT: lui a0, 5 514; RV32I-NEXT: addi a0, a0, 1365 515; RV32I-NEXT: vand.vx v10, v10, a0 516; RV32I-NEXT: vsub.vv v8, v8, v10 517; RV32I-NEXT: lui a0, 3 518; RV32I-NEXT: addi a0, a0, 819 519; RV32I-NEXT: vand.vx v10, v8, a0 520; RV32I-NEXT: vsrl.vi v8, v8, 2 521; RV32I-NEXT: vand.vx v8, v8, a0 522; RV32I-NEXT: vadd.vv v8, v10, v8 523; RV32I-NEXT: vsrl.vi v10, v8, 4 524; RV32I-NEXT: vadd.vv v8, v8, v10 525; RV32I-NEXT: lui a0, 1 526; RV32I-NEXT: addi a0, a0, -241 527; RV32I-NEXT: vand.vx v8, v8, a0 528; RV32I-NEXT: li a0, 257 529; RV32I-NEXT: vmul.vx v8, v8, a0 530; RV32I-NEXT: vsrl.vi v8, v8, 8 531; RV32I-NEXT: ret 532; 533; RV64I-LABEL: cttz_nxv8i16: 534; RV64I: # %bb.0: 535; RV64I-NEXT: li a0, 1 536; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, mu 537; RV64I-NEXT: vsub.vx v10, v8, a0 538; RV64I-NEXT: vnot.v v8, v8 539; RV64I-NEXT: vand.vv v8, v8, v10 540; RV64I-NEXT: vsrl.vi v10, v8, 1 541; RV64I-NEXT: lui a0, 5 542; RV64I-NEXT: addiw a0, a0, 1365 543; RV64I-NEXT: vand.vx v10, v10, a0 544; RV64I-NEXT: vsub.vv v8, v8, v10 545; RV64I-NEXT: lui a0, 3 546; RV64I-NEXT: addiw a0, a0, 819 547; RV64I-NEXT: vand.vx v10, v8, a0 548; RV64I-NEXT: vsrl.vi v8, v8, 2 549; RV64I-NEXT: vand.vx v8, v8, a0 550; RV64I-NEXT: vadd.vv v8, v10, v8 551; RV64I-NEXT: vsrl.vi v10, v8, 4 552; RV64I-NEXT: vadd.vv v8, v8, v10 553; RV64I-NEXT: lui a0, 1 554; RV64I-NEXT: addiw a0, a0, -241 555; RV64I-NEXT: vand.vx v8, v8, a0 556; RV64I-NEXT: li a0, 257 557; RV64I-NEXT: vmul.vx v8, v8, a0 558; RV64I-NEXT: vsrl.vi v8, v8, 8 559; RV64I-NEXT: ret 560; 561; CHECK-D-LABEL: cttz_nxv8i16: 562; CHECK-D: # %bb.0: 563; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu 564; CHECK-D-NEXT: vrsub.vi v10, v8, 0 565; CHECK-D-NEXT: vand.vv v10, v8, v10 566; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10 567; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 568; CHECK-D-NEXT: li a0, 127 569; CHECK-D-NEXT: vsub.vx v10, v10, a0 570; CHECK-D-NEXT: vmseq.vi v0, v8, 0 571; CHECK-D-NEXT: li a0, 16 572; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 573; CHECK-D-NEXT: ret 574 %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false) 575 ret <vscale x 8 x i16> %a 576} 577declare <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16>, i1) 578 579define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) { 580; RV32I-LABEL: cttz_nxv16i16: 581; RV32I: # %bb.0: 582; RV32I-NEXT: li a0, 1 583; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, mu 584; RV32I-NEXT: vsub.vx v12, v8, a0 585; RV32I-NEXT: vnot.v v8, v8 586; RV32I-NEXT: vand.vv v8, v8, v12 587; RV32I-NEXT: vsrl.vi v12, v8, 1 588; RV32I-NEXT: lui a0, 5 589; RV32I-NEXT: addi a0, a0, 1365 590; RV32I-NEXT: vand.vx v12, v12, a0 591; RV32I-NEXT: vsub.vv v8, v8, v12 592; RV32I-NEXT: lui a0, 3 593; RV32I-NEXT: addi a0, a0, 819 594; RV32I-NEXT: vand.vx v12, v8, a0 595; RV32I-NEXT: vsrl.vi v8, v8, 2 596; RV32I-NEXT: vand.vx v8, v8, a0 597; RV32I-NEXT: vadd.vv v8, v12, v8 598; RV32I-NEXT: vsrl.vi v12, v8, 4 599; RV32I-NEXT: vadd.vv v8, v8, v12 600; RV32I-NEXT: lui a0, 1 601; RV32I-NEXT: addi a0, a0, -241 602; RV32I-NEXT: vand.vx v8, v8, a0 603; RV32I-NEXT: li a0, 257 604; RV32I-NEXT: vmul.vx v8, v8, a0 605; RV32I-NEXT: vsrl.vi v8, v8, 8 606; RV32I-NEXT: ret 607; 608; RV64I-LABEL: cttz_nxv16i16: 609; RV64I: # %bb.0: 610; RV64I-NEXT: li a0, 1 611; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, mu 612; RV64I-NEXT: vsub.vx v12, v8, a0 613; RV64I-NEXT: vnot.v v8, v8 614; RV64I-NEXT: vand.vv v8, v8, v12 615; RV64I-NEXT: vsrl.vi v12, v8, 1 616; RV64I-NEXT: lui a0, 5 617; RV64I-NEXT: addiw a0, a0, 1365 618; RV64I-NEXT: vand.vx v12, v12, a0 619; RV64I-NEXT: vsub.vv v8, v8, v12 620; RV64I-NEXT: lui a0, 3 621; RV64I-NEXT: addiw a0, a0, 819 622; RV64I-NEXT: vand.vx v12, v8, a0 623; RV64I-NEXT: vsrl.vi v8, v8, 2 624; RV64I-NEXT: vand.vx v8, v8, a0 625; RV64I-NEXT: vadd.vv v8, v12, v8 626; RV64I-NEXT: vsrl.vi v12, v8, 4 627; RV64I-NEXT: vadd.vv v8, v8, v12 628; RV64I-NEXT: lui a0, 1 629; RV64I-NEXT: addiw a0, a0, -241 630; RV64I-NEXT: vand.vx v8, v8, a0 631; RV64I-NEXT: li a0, 257 632; RV64I-NEXT: vmul.vx v8, v8, a0 633; RV64I-NEXT: vsrl.vi v8, v8, 8 634; RV64I-NEXT: ret 635; 636; CHECK-D-LABEL: cttz_nxv16i16: 637; CHECK-D: # %bb.0: 638; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu 639; CHECK-D-NEXT: vrsub.vi v12, v8, 0 640; CHECK-D-NEXT: vand.vv v12, v8, v12 641; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12 642; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 643; CHECK-D-NEXT: li a0, 127 644; CHECK-D-NEXT: vsub.vx v12, v12, a0 645; CHECK-D-NEXT: vmseq.vi v0, v8, 0 646; CHECK-D-NEXT: li a0, 16 647; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 648; CHECK-D-NEXT: ret 649 %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false) 650 ret <vscale x 16 x i16> %a 651} 652declare <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16>, i1) 653 654define <vscale x 32 x i16> @cttz_nxv32i16(<vscale x 32 x i16> %va) { 655; RV32-LABEL: cttz_nxv32i16: 656; RV32: # %bb.0: 657; RV32-NEXT: li a0, 1 658; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, mu 659; RV32-NEXT: vsub.vx v16, v8, a0 660; RV32-NEXT: vnot.v v8, v8 661; RV32-NEXT: vand.vv v8, v8, v16 662; RV32-NEXT: vsrl.vi v16, v8, 1 663; RV32-NEXT: lui a0, 5 664; RV32-NEXT: addi a0, a0, 1365 665; RV32-NEXT: vand.vx v16, v16, a0 666; RV32-NEXT: vsub.vv v8, v8, v16 667; RV32-NEXT: lui a0, 3 668; RV32-NEXT: addi a0, a0, 819 669; RV32-NEXT: vand.vx v16, v8, a0 670; RV32-NEXT: vsrl.vi v8, v8, 2 671; RV32-NEXT: vand.vx v8, v8, a0 672; RV32-NEXT: vadd.vv v8, v16, v8 673; RV32-NEXT: vsrl.vi v16, v8, 4 674; RV32-NEXT: vadd.vv v8, v8, v16 675; RV32-NEXT: lui a0, 1 676; RV32-NEXT: addi a0, a0, -241 677; RV32-NEXT: vand.vx v8, v8, a0 678; RV32-NEXT: li a0, 257 679; RV32-NEXT: vmul.vx v8, v8, a0 680; RV32-NEXT: vsrl.vi v8, v8, 8 681; RV32-NEXT: ret 682; 683; RV64-LABEL: cttz_nxv32i16: 684; RV64: # %bb.0: 685; RV64-NEXT: li a0, 1 686; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, mu 687; RV64-NEXT: vsub.vx v16, v8, a0 688; RV64-NEXT: vnot.v v8, v8 689; RV64-NEXT: vand.vv v8, v8, v16 690; RV64-NEXT: vsrl.vi v16, v8, 1 691; RV64-NEXT: lui a0, 5 692; RV64-NEXT: addiw a0, a0, 1365 693; RV64-NEXT: vand.vx v16, v16, a0 694; RV64-NEXT: vsub.vv v8, v8, v16 695; RV64-NEXT: lui a0, 3 696; RV64-NEXT: addiw a0, a0, 819 697; RV64-NEXT: vand.vx v16, v8, a0 698; RV64-NEXT: vsrl.vi v8, v8, 2 699; RV64-NEXT: vand.vx v8, v8, a0 700; RV64-NEXT: vadd.vv v8, v16, v8 701; RV64-NEXT: vsrl.vi v16, v8, 4 702; RV64-NEXT: vadd.vv v8, v8, v16 703; RV64-NEXT: lui a0, 1 704; RV64-NEXT: addiw a0, a0, -241 705; RV64-NEXT: vand.vx v8, v8, a0 706; RV64-NEXT: li a0, 257 707; RV64-NEXT: vmul.vx v8, v8, a0 708; RV64-NEXT: vsrl.vi v8, v8, 8 709; RV64-NEXT: ret 710 %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false) 711 ret <vscale x 32 x i16> %a 712} 713declare <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16>, i1) 714 715define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) { 716; RV32I-LABEL: cttz_nxv1i32: 717; RV32I: # %bb.0: 718; RV32I-NEXT: li a0, 1 719; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, mu 720; RV32I-NEXT: vsub.vx v9, v8, a0 721; RV32I-NEXT: vnot.v v8, v8 722; RV32I-NEXT: vand.vv v8, v8, v9 723; RV32I-NEXT: vsrl.vi v9, v8, 1 724; RV32I-NEXT: lui a0, 349525 725; RV32I-NEXT: addi a0, a0, 1365 726; RV32I-NEXT: vand.vx v9, v9, a0 727; RV32I-NEXT: vsub.vv v8, v8, v9 728; RV32I-NEXT: lui a0, 209715 729; RV32I-NEXT: addi a0, a0, 819 730; RV32I-NEXT: vand.vx v9, v8, a0 731; RV32I-NEXT: vsrl.vi v8, v8, 2 732; RV32I-NEXT: vand.vx v8, v8, a0 733; RV32I-NEXT: vadd.vv v8, v9, v8 734; RV32I-NEXT: vsrl.vi v9, v8, 4 735; RV32I-NEXT: vadd.vv v8, v8, v9 736; RV32I-NEXT: lui a0, 61681 737; RV32I-NEXT: addi a0, a0, -241 738; RV32I-NEXT: vand.vx v8, v8, a0 739; RV32I-NEXT: lui a0, 4112 740; RV32I-NEXT: addi a0, a0, 257 741; RV32I-NEXT: vmul.vx v8, v8, a0 742; RV32I-NEXT: vsrl.vi v8, v8, 24 743; RV32I-NEXT: ret 744; 745; RV64I-LABEL: cttz_nxv1i32: 746; RV64I: # %bb.0: 747; RV64I-NEXT: li a0, 1 748; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, mu 749; RV64I-NEXT: vsub.vx v9, v8, a0 750; RV64I-NEXT: vnot.v v8, v8 751; RV64I-NEXT: vand.vv v8, v8, v9 752; RV64I-NEXT: vsrl.vi v9, v8, 1 753; RV64I-NEXT: lui a0, 349525 754; RV64I-NEXT: addiw a0, a0, 1365 755; RV64I-NEXT: vand.vx v9, v9, a0 756; RV64I-NEXT: vsub.vv v8, v8, v9 757; RV64I-NEXT: lui a0, 209715 758; RV64I-NEXT: addiw a0, a0, 819 759; RV64I-NEXT: vand.vx v9, v8, a0 760; RV64I-NEXT: vsrl.vi v8, v8, 2 761; RV64I-NEXT: vand.vx v8, v8, a0 762; RV64I-NEXT: vadd.vv v8, v9, v8 763; RV64I-NEXT: vsrl.vi v9, v8, 4 764; RV64I-NEXT: vadd.vv v8, v8, v9 765; RV64I-NEXT: lui a0, 61681 766; RV64I-NEXT: addiw a0, a0, -241 767; RV64I-NEXT: vand.vx v8, v8, a0 768; RV64I-NEXT: lui a0, 4112 769; RV64I-NEXT: addiw a0, a0, 257 770; RV64I-NEXT: vmul.vx v8, v8, a0 771; RV64I-NEXT: vsrl.vi v8, v8, 24 772; RV64I-NEXT: ret 773; 774; CHECK-D-LABEL: cttz_nxv1i32: 775; CHECK-D: # %bb.0: 776; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 777; CHECK-D-NEXT: vrsub.vi v9, v8, 0 778; CHECK-D-NEXT: vand.vv v9, v8, v9 779; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 780; CHECK-D-NEXT: li a0, 52 781; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu 782; CHECK-D-NEXT: vsrl.vx v9, v10, a0 783; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 784; CHECK-D-NEXT: vncvt.x.x.w v9, v9 785; CHECK-D-NEXT: li a0, 1023 786; CHECK-D-NEXT: vsub.vx v9, v9, a0 787; CHECK-D-NEXT: vmseq.vi v0, v8, 0 788; CHECK-D-NEXT: li a0, 32 789; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 790; CHECK-D-NEXT: ret 791 %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false) 792 ret <vscale x 1 x i32> %a 793} 794declare <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32>, i1) 795 796define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) { 797; RV32I-LABEL: cttz_nxv2i32: 798; RV32I: # %bb.0: 799; RV32I-NEXT: li a0, 1 800; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, mu 801; RV32I-NEXT: vsub.vx v9, v8, a0 802; RV32I-NEXT: vnot.v v8, v8 803; RV32I-NEXT: vand.vv v8, v8, v9 804; RV32I-NEXT: vsrl.vi v9, v8, 1 805; RV32I-NEXT: lui a0, 349525 806; RV32I-NEXT: addi a0, a0, 1365 807; RV32I-NEXT: vand.vx v9, v9, a0 808; RV32I-NEXT: vsub.vv v8, v8, v9 809; RV32I-NEXT: lui a0, 209715 810; RV32I-NEXT: addi a0, a0, 819 811; RV32I-NEXT: vand.vx v9, v8, a0 812; RV32I-NEXT: vsrl.vi v8, v8, 2 813; RV32I-NEXT: vand.vx v8, v8, a0 814; RV32I-NEXT: vadd.vv v8, v9, v8 815; RV32I-NEXT: vsrl.vi v9, v8, 4 816; RV32I-NEXT: vadd.vv v8, v8, v9 817; RV32I-NEXT: lui a0, 61681 818; RV32I-NEXT: addi a0, a0, -241 819; RV32I-NEXT: vand.vx v8, v8, a0 820; RV32I-NEXT: lui a0, 4112 821; RV32I-NEXT: addi a0, a0, 257 822; RV32I-NEXT: vmul.vx v8, v8, a0 823; RV32I-NEXT: vsrl.vi v8, v8, 24 824; RV32I-NEXT: ret 825; 826; RV64I-LABEL: cttz_nxv2i32: 827; RV64I: # %bb.0: 828; RV64I-NEXT: li a0, 1 829; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, mu 830; RV64I-NEXT: vsub.vx v9, v8, a0 831; RV64I-NEXT: vnot.v v8, v8 832; RV64I-NEXT: vand.vv v8, v8, v9 833; RV64I-NEXT: vsrl.vi v9, v8, 1 834; RV64I-NEXT: lui a0, 349525 835; RV64I-NEXT: addiw a0, a0, 1365 836; RV64I-NEXT: vand.vx v9, v9, a0 837; RV64I-NEXT: vsub.vv v8, v8, v9 838; RV64I-NEXT: lui a0, 209715 839; RV64I-NEXT: addiw a0, a0, 819 840; RV64I-NEXT: vand.vx v9, v8, a0 841; RV64I-NEXT: vsrl.vi v8, v8, 2 842; RV64I-NEXT: vand.vx v8, v8, a0 843; RV64I-NEXT: vadd.vv v8, v9, v8 844; RV64I-NEXT: vsrl.vi v9, v8, 4 845; RV64I-NEXT: vadd.vv v8, v8, v9 846; RV64I-NEXT: lui a0, 61681 847; RV64I-NEXT: addiw a0, a0, -241 848; RV64I-NEXT: vand.vx v8, v8, a0 849; RV64I-NEXT: lui a0, 4112 850; RV64I-NEXT: addiw a0, a0, 257 851; RV64I-NEXT: vmul.vx v8, v8, a0 852; RV64I-NEXT: vsrl.vi v8, v8, 24 853; RV64I-NEXT: ret 854; 855; CHECK-D-LABEL: cttz_nxv2i32: 856; CHECK-D: # %bb.0: 857; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 858; CHECK-D-NEXT: vrsub.vi v9, v8, 0 859; CHECK-D-NEXT: vand.vv v9, v8, v9 860; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9 861; CHECK-D-NEXT: li a0, 52 862; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu 863; CHECK-D-NEXT: vsrl.vx v10, v10, a0 864; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 865; CHECK-D-NEXT: vncvt.x.x.w v9, v10 866; CHECK-D-NEXT: li a0, 1023 867; CHECK-D-NEXT: vsub.vx v9, v9, a0 868; CHECK-D-NEXT: vmseq.vi v0, v8, 0 869; CHECK-D-NEXT: li a0, 32 870; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 871; CHECK-D-NEXT: ret 872 %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false) 873 ret <vscale x 2 x i32> %a 874} 875declare <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32>, i1) 876 877define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) { 878; RV32I-LABEL: cttz_nxv4i32: 879; RV32I: # %bb.0: 880; RV32I-NEXT: li a0, 1 881; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, mu 882; RV32I-NEXT: vsub.vx v10, v8, a0 883; RV32I-NEXT: vnot.v v8, v8 884; RV32I-NEXT: vand.vv v8, v8, v10 885; RV32I-NEXT: vsrl.vi v10, v8, 1 886; RV32I-NEXT: lui a0, 349525 887; RV32I-NEXT: addi a0, a0, 1365 888; RV32I-NEXT: vand.vx v10, v10, a0 889; RV32I-NEXT: vsub.vv v8, v8, v10 890; RV32I-NEXT: lui a0, 209715 891; RV32I-NEXT: addi a0, a0, 819 892; RV32I-NEXT: vand.vx v10, v8, a0 893; RV32I-NEXT: vsrl.vi v8, v8, 2 894; RV32I-NEXT: vand.vx v8, v8, a0 895; RV32I-NEXT: vadd.vv v8, v10, v8 896; RV32I-NEXT: vsrl.vi v10, v8, 4 897; RV32I-NEXT: vadd.vv v8, v8, v10 898; RV32I-NEXT: lui a0, 61681 899; RV32I-NEXT: addi a0, a0, -241 900; RV32I-NEXT: vand.vx v8, v8, a0 901; RV32I-NEXT: lui a0, 4112 902; RV32I-NEXT: addi a0, a0, 257 903; RV32I-NEXT: vmul.vx v8, v8, a0 904; RV32I-NEXT: vsrl.vi v8, v8, 24 905; RV32I-NEXT: ret 906; 907; RV64I-LABEL: cttz_nxv4i32: 908; RV64I: # %bb.0: 909; RV64I-NEXT: li a0, 1 910; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, mu 911; RV64I-NEXT: vsub.vx v10, v8, a0 912; RV64I-NEXT: vnot.v v8, v8 913; RV64I-NEXT: vand.vv v8, v8, v10 914; RV64I-NEXT: vsrl.vi v10, v8, 1 915; RV64I-NEXT: lui a0, 349525 916; RV64I-NEXT: addiw a0, a0, 1365 917; RV64I-NEXT: vand.vx v10, v10, a0 918; RV64I-NEXT: vsub.vv v8, v8, v10 919; RV64I-NEXT: lui a0, 209715 920; RV64I-NEXT: addiw a0, a0, 819 921; RV64I-NEXT: vand.vx v10, v8, a0 922; RV64I-NEXT: vsrl.vi v8, v8, 2 923; RV64I-NEXT: vand.vx v8, v8, a0 924; RV64I-NEXT: vadd.vv v8, v10, v8 925; RV64I-NEXT: vsrl.vi v10, v8, 4 926; RV64I-NEXT: vadd.vv v8, v8, v10 927; RV64I-NEXT: lui a0, 61681 928; RV64I-NEXT: addiw a0, a0, -241 929; RV64I-NEXT: vand.vx v8, v8, a0 930; RV64I-NEXT: lui a0, 4112 931; RV64I-NEXT: addiw a0, a0, 257 932; RV64I-NEXT: vmul.vx v8, v8, a0 933; RV64I-NEXT: vsrl.vi v8, v8, 24 934; RV64I-NEXT: ret 935; 936; CHECK-D-LABEL: cttz_nxv4i32: 937; CHECK-D: # %bb.0: 938; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 939; CHECK-D-NEXT: vrsub.vi v10, v8, 0 940; CHECK-D-NEXT: vand.vv v10, v8, v10 941; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10 942; CHECK-D-NEXT: li a0, 52 943; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu 944; CHECK-D-NEXT: vsrl.vx v12, v12, a0 945; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 946; CHECK-D-NEXT: vncvt.x.x.w v10, v12 947; CHECK-D-NEXT: li a0, 1023 948; CHECK-D-NEXT: vsub.vx v10, v10, a0 949; CHECK-D-NEXT: vmseq.vi v0, v8, 0 950; CHECK-D-NEXT: li a0, 32 951; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 952; CHECK-D-NEXT: ret 953 %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false) 954 ret <vscale x 4 x i32> %a 955} 956declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1) 957 958define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) { 959; RV32I-LABEL: cttz_nxv8i32: 960; RV32I: # %bb.0: 961; RV32I-NEXT: li a0, 1 962; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, mu 963; RV32I-NEXT: vsub.vx v12, v8, a0 964; RV32I-NEXT: vnot.v v8, v8 965; RV32I-NEXT: vand.vv v8, v8, v12 966; RV32I-NEXT: vsrl.vi v12, v8, 1 967; RV32I-NEXT: lui a0, 349525 968; RV32I-NEXT: addi a0, a0, 1365 969; RV32I-NEXT: vand.vx v12, v12, a0 970; RV32I-NEXT: vsub.vv v8, v8, v12 971; RV32I-NEXT: lui a0, 209715 972; RV32I-NEXT: addi a0, a0, 819 973; RV32I-NEXT: vand.vx v12, v8, a0 974; RV32I-NEXT: vsrl.vi v8, v8, 2 975; RV32I-NEXT: vand.vx v8, v8, a0 976; RV32I-NEXT: vadd.vv v8, v12, v8 977; RV32I-NEXT: vsrl.vi v12, v8, 4 978; RV32I-NEXT: vadd.vv v8, v8, v12 979; RV32I-NEXT: lui a0, 61681 980; RV32I-NEXT: addi a0, a0, -241 981; RV32I-NEXT: vand.vx v8, v8, a0 982; RV32I-NEXT: lui a0, 4112 983; RV32I-NEXT: addi a0, a0, 257 984; RV32I-NEXT: vmul.vx v8, v8, a0 985; RV32I-NEXT: vsrl.vi v8, v8, 24 986; RV32I-NEXT: ret 987; 988; RV64I-LABEL: cttz_nxv8i32: 989; RV64I: # %bb.0: 990; RV64I-NEXT: li a0, 1 991; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, mu 992; RV64I-NEXT: vsub.vx v12, v8, a0 993; RV64I-NEXT: vnot.v v8, v8 994; RV64I-NEXT: vand.vv v8, v8, v12 995; RV64I-NEXT: vsrl.vi v12, v8, 1 996; RV64I-NEXT: lui a0, 349525 997; RV64I-NEXT: addiw a0, a0, 1365 998; RV64I-NEXT: vand.vx v12, v12, a0 999; RV64I-NEXT: vsub.vv v8, v8, v12 1000; RV64I-NEXT: lui a0, 209715 1001; RV64I-NEXT: addiw a0, a0, 819 1002; RV64I-NEXT: vand.vx v12, v8, a0 1003; RV64I-NEXT: vsrl.vi v8, v8, 2 1004; RV64I-NEXT: vand.vx v8, v8, a0 1005; RV64I-NEXT: vadd.vv v8, v12, v8 1006; RV64I-NEXT: vsrl.vi v12, v8, 4 1007; RV64I-NEXT: vadd.vv v8, v8, v12 1008; RV64I-NEXT: lui a0, 61681 1009; RV64I-NEXT: addiw a0, a0, -241 1010; RV64I-NEXT: vand.vx v8, v8, a0 1011; RV64I-NEXT: lui a0, 4112 1012; RV64I-NEXT: addiw a0, a0, 257 1013; RV64I-NEXT: vmul.vx v8, v8, a0 1014; RV64I-NEXT: vsrl.vi v8, v8, 24 1015; RV64I-NEXT: ret 1016; 1017; CHECK-D-LABEL: cttz_nxv8i32: 1018; CHECK-D: # %bb.0: 1019; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1020; CHECK-D-NEXT: vrsub.vi v12, v8, 0 1021; CHECK-D-NEXT: vand.vv v12, v8, v12 1022; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12 1023; CHECK-D-NEXT: li a0, 52 1024; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1025; CHECK-D-NEXT: vsrl.vx v16, v16, a0 1026; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1027; CHECK-D-NEXT: vncvt.x.x.w v12, v16 1028; CHECK-D-NEXT: li a0, 1023 1029; CHECK-D-NEXT: vsub.vx v12, v12, a0 1030; CHECK-D-NEXT: vmseq.vi v0, v8, 0 1031; CHECK-D-NEXT: li a0, 32 1032; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 1033; CHECK-D-NEXT: ret 1034 %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false) 1035 ret <vscale x 8 x i32> %a 1036} 1037declare <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32>, i1) 1038 1039define <vscale x 16 x i32> @cttz_nxv16i32(<vscale x 16 x i32> %va) { 1040; RV32-LABEL: cttz_nxv16i32: 1041; RV32: # %bb.0: 1042; RV32-NEXT: li a0, 1 1043; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1044; RV32-NEXT: vsub.vx v16, v8, a0 1045; RV32-NEXT: vnot.v v8, v8 1046; RV32-NEXT: vand.vv v8, v8, v16 1047; RV32-NEXT: vsrl.vi v16, v8, 1 1048; RV32-NEXT: lui a0, 349525 1049; RV32-NEXT: addi a0, a0, 1365 1050; RV32-NEXT: vand.vx v16, v16, a0 1051; RV32-NEXT: vsub.vv v8, v8, v16 1052; RV32-NEXT: lui a0, 209715 1053; RV32-NEXT: addi a0, a0, 819 1054; RV32-NEXT: vand.vx v16, v8, a0 1055; RV32-NEXT: vsrl.vi v8, v8, 2 1056; RV32-NEXT: vand.vx v8, v8, a0 1057; RV32-NEXT: vadd.vv v8, v16, v8 1058; RV32-NEXT: vsrl.vi v16, v8, 4 1059; RV32-NEXT: vadd.vv v8, v8, v16 1060; RV32-NEXT: lui a0, 61681 1061; RV32-NEXT: addi a0, a0, -241 1062; RV32-NEXT: vand.vx v8, v8, a0 1063; RV32-NEXT: lui a0, 4112 1064; RV32-NEXT: addi a0, a0, 257 1065; RV32-NEXT: vmul.vx v8, v8, a0 1066; RV32-NEXT: vsrl.vi v8, v8, 24 1067; RV32-NEXT: ret 1068; 1069; RV64-LABEL: cttz_nxv16i32: 1070; RV64: # %bb.0: 1071; RV64-NEXT: li a0, 1 1072; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, mu 1073; RV64-NEXT: vsub.vx v16, v8, a0 1074; RV64-NEXT: vnot.v v8, v8 1075; RV64-NEXT: vand.vv v8, v8, v16 1076; RV64-NEXT: vsrl.vi v16, v8, 1 1077; RV64-NEXT: lui a0, 349525 1078; RV64-NEXT: addiw a0, a0, 1365 1079; RV64-NEXT: vand.vx v16, v16, a0 1080; RV64-NEXT: vsub.vv v8, v8, v16 1081; RV64-NEXT: lui a0, 209715 1082; RV64-NEXT: addiw a0, a0, 819 1083; RV64-NEXT: vand.vx v16, v8, a0 1084; RV64-NEXT: vsrl.vi v8, v8, 2 1085; RV64-NEXT: vand.vx v8, v8, a0 1086; RV64-NEXT: vadd.vv v8, v16, v8 1087; RV64-NEXT: vsrl.vi v16, v8, 4 1088; RV64-NEXT: vadd.vv v8, v8, v16 1089; RV64-NEXT: lui a0, 61681 1090; RV64-NEXT: addiw a0, a0, -241 1091; RV64-NEXT: vand.vx v8, v8, a0 1092; RV64-NEXT: lui a0, 4112 1093; RV64-NEXT: addiw a0, a0, 257 1094; RV64-NEXT: vmul.vx v8, v8, a0 1095; RV64-NEXT: vsrl.vi v8, v8, 24 1096; RV64-NEXT: ret 1097 %a = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false) 1098 ret <vscale x 16 x i32> %a 1099} 1100declare <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32>, i1) 1101 1102define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) { 1103; RV32-LABEL: cttz_nxv1i64: 1104; RV32: # %bb.0: 1105; RV32-NEXT: addi sp, sp, -16 1106; RV32-NEXT: .cfi_def_cfa_offset 16 1107; RV32-NEXT: lui a0, 349525 1108; RV32-NEXT: addi a0, a0, 1365 1109; RV32-NEXT: sw a0, 12(sp) 1110; RV32-NEXT: sw a0, 8(sp) 1111; RV32-NEXT: lui a0, 209715 1112; RV32-NEXT: addi a0, a0, 819 1113; RV32-NEXT: sw a0, 12(sp) 1114; RV32-NEXT: sw a0, 8(sp) 1115; RV32-NEXT: lui a0, 61681 1116; RV32-NEXT: addi a0, a0, -241 1117; RV32-NEXT: sw a0, 12(sp) 1118; RV32-NEXT: sw a0, 8(sp) 1119; RV32-NEXT: lui a0, 4112 1120; RV32-NEXT: addi a0, a0, 257 1121; RV32-NEXT: sw a0, 12(sp) 1122; RV32-NEXT: sw a0, 8(sp) 1123; RV32-NEXT: li a0, 1 1124; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, mu 1125; RV32-NEXT: vsub.vx v9, v8, a0 1126; RV32-NEXT: vnot.v v8, v8 1127; RV32-NEXT: addi a0, sp, 8 1128; RV32-NEXT: vlse64.v v10, (a0), zero 1129; RV32-NEXT: vand.vv v8, v8, v9 1130; RV32-NEXT: vlse64.v v9, (a0), zero 1131; RV32-NEXT: vsrl.vi v11, v8, 1 1132; RV32-NEXT: vand.vv v10, v11, v10 1133; RV32-NEXT: vsub.vv v8, v8, v10 1134; RV32-NEXT: vand.vv v10, v8, v9 1135; RV32-NEXT: vsrl.vi v8, v8, 2 1136; RV32-NEXT: vand.vv v8, v8, v9 1137; RV32-NEXT: vadd.vv v8, v10, v8 1138; RV32-NEXT: vlse64.v v9, (a0), zero 1139; RV32-NEXT: vlse64.v v10, (a0), zero 1140; RV32-NEXT: vsrl.vi v11, v8, 4 1141; RV32-NEXT: vadd.vv v8, v8, v11 1142; RV32-NEXT: vand.vv v8, v8, v9 1143; RV32-NEXT: vmul.vv v8, v8, v10 1144; RV32-NEXT: li a0, 56 1145; RV32-NEXT: vsrl.vx v8, v8, a0 1146; RV32-NEXT: addi sp, sp, 16 1147; RV32-NEXT: ret 1148; 1149; RV64-LABEL: cttz_nxv1i64: 1150; RV64: # %bb.0: 1151; RV64-NEXT: li a0, 1 1152; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu 1153; RV64-NEXT: vsub.vx v9, v8, a0 1154; RV64-NEXT: vnot.v v8, v8 1155; RV64-NEXT: vand.vv v8, v8, v9 1156; RV64-NEXT: lui a0, %hi(.LCPI18_0) 1157; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) 1158; RV64-NEXT: lui a1, %hi(.LCPI18_1) 1159; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) 1160; RV64-NEXT: vsrl.vi v9, v8, 1 1161; RV64-NEXT: vand.vx v9, v9, a0 1162; RV64-NEXT: vsub.vv v8, v8, v9 1163; RV64-NEXT: vand.vx v9, v8, a1 1164; RV64-NEXT: vsrl.vi v8, v8, 2 1165; RV64-NEXT: vand.vx v8, v8, a1 1166; RV64-NEXT: vadd.vv v8, v9, v8 1167; RV64-NEXT: lui a0, %hi(.LCPI18_2) 1168; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) 1169; RV64-NEXT: lui a1, %hi(.LCPI18_3) 1170; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) 1171; RV64-NEXT: vsrl.vi v9, v8, 4 1172; RV64-NEXT: vadd.vv v8, v8, v9 1173; RV64-NEXT: vand.vx v8, v8, a0 1174; RV64-NEXT: vmul.vx v8, v8, a1 1175; RV64-NEXT: li a0, 56 1176; RV64-NEXT: vsrl.vx v8, v8, a0 1177; RV64-NEXT: ret 1178 %a = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false) 1179 ret <vscale x 1 x i64> %a 1180} 1181declare <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64>, i1) 1182 1183define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) { 1184; RV32-LABEL: cttz_nxv2i64: 1185; RV32: # %bb.0: 1186; RV32-NEXT: addi sp, sp, -16 1187; RV32-NEXT: .cfi_def_cfa_offset 16 1188; RV32-NEXT: lui a0, 349525 1189; RV32-NEXT: addi a0, a0, 1365 1190; RV32-NEXT: sw a0, 12(sp) 1191; RV32-NEXT: sw a0, 8(sp) 1192; RV32-NEXT: lui a0, 209715 1193; RV32-NEXT: addi a0, a0, 819 1194; RV32-NEXT: sw a0, 12(sp) 1195; RV32-NEXT: sw a0, 8(sp) 1196; RV32-NEXT: lui a0, 61681 1197; RV32-NEXT: addi a0, a0, -241 1198; RV32-NEXT: sw a0, 12(sp) 1199; RV32-NEXT: sw a0, 8(sp) 1200; RV32-NEXT: lui a0, 4112 1201; RV32-NEXT: addi a0, a0, 257 1202; RV32-NEXT: sw a0, 12(sp) 1203; RV32-NEXT: sw a0, 8(sp) 1204; RV32-NEXT: li a0, 1 1205; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu 1206; RV32-NEXT: vsub.vx v10, v8, a0 1207; RV32-NEXT: vnot.v v8, v8 1208; RV32-NEXT: addi a0, sp, 8 1209; RV32-NEXT: vlse64.v v12, (a0), zero 1210; RV32-NEXT: vand.vv v8, v8, v10 1211; RV32-NEXT: vlse64.v v10, (a0), zero 1212; RV32-NEXT: vsrl.vi v14, v8, 1 1213; RV32-NEXT: vand.vv v12, v14, v12 1214; RV32-NEXT: vsub.vv v8, v8, v12 1215; RV32-NEXT: vand.vv v12, v8, v10 1216; RV32-NEXT: vsrl.vi v8, v8, 2 1217; RV32-NEXT: vand.vv v8, v8, v10 1218; RV32-NEXT: vadd.vv v8, v12, v8 1219; RV32-NEXT: vlse64.v v10, (a0), zero 1220; RV32-NEXT: vlse64.v v12, (a0), zero 1221; RV32-NEXT: vsrl.vi v14, v8, 4 1222; RV32-NEXT: vadd.vv v8, v8, v14 1223; RV32-NEXT: vand.vv v8, v8, v10 1224; RV32-NEXT: vmul.vv v8, v8, v12 1225; RV32-NEXT: li a0, 56 1226; RV32-NEXT: vsrl.vx v8, v8, a0 1227; RV32-NEXT: addi sp, sp, 16 1228; RV32-NEXT: ret 1229; 1230; RV64-LABEL: cttz_nxv2i64: 1231; RV64: # %bb.0: 1232; RV64-NEXT: li a0, 1 1233; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu 1234; RV64-NEXT: vsub.vx v10, v8, a0 1235; RV64-NEXT: vnot.v v8, v8 1236; RV64-NEXT: vand.vv v8, v8, v10 1237; RV64-NEXT: lui a0, %hi(.LCPI19_0) 1238; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) 1239; RV64-NEXT: lui a1, %hi(.LCPI19_1) 1240; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) 1241; RV64-NEXT: vsrl.vi v10, v8, 1 1242; RV64-NEXT: vand.vx v10, v10, a0 1243; RV64-NEXT: vsub.vv v8, v8, v10 1244; RV64-NEXT: vand.vx v10, v8, a1 1245; RV64-NEXT: vsrl.vi v8, v8, 2 1246; RV64-NEXT: vand.vx v8, v8, a1 1247; RV64-NEXT: vadd.vv v8, v10, v8 1248; RV64-NEXT: lui a0, %hi(.LCPI19_2) 1249; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) 1250; RV64-NEXT: lui a1, %hi(.LCPI19_3) 1251; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) 1252; RV64-NEXT: vsrl.vi v10, v8, 4 1253; RV64-NEXT: vadd.vv v8, v8, v10 1254; RV64-NEXT: vand.vx v8, v8, a0 1255; RV64-NEXT: vmul.vx v8, v8, a1 1256; RV64-NEXT: li a0, 56 1257; RV64-NEXT: vsrl.vx v8, v8, a0 1258; RV64-NEXT: ret 1259 %a = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false) 1260 ret <vscale x 2 x i64> %a 1261} 1262declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>, i1) 1263 1264define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) { 1265; RV32-LABEL: cttz_nxv4i64: 1266; RV32: # %bb.0: 1267; RV32-NEXT: addi sp, sp, -16 1268; RV32-NEXT: .cfi_def_cfa_offset 16 1269; RV32-NEXT: lui a0, 349525 1270; RV32-NEXT: addi a0, a0, 1365 1271; RV32-NEXT: sw a0, 12(sp) 1272; RV32-NEXT: sw a0, 8(sp) 1273; RV32-NEXT: lui a0, 209715 1274; RV32-NEXT: addi a0, a0, 819 1275; RV32-NEXT: sw a0, 12(sp) 1276; RV32-NEXT: sw a0, 8(sp) 1277; RV32-NEXT: lui a0, 61681 1278; RV32-NEXT: addi a0, a0, -241 1279; RV32-NEXT: sw a0, 12(sp) 1280; RV32-NEXT: sw a0, 8(sp) 1281; RV32-NEXT: lui a0, 4112 1282; RV32-NEXT: addi a0, a0, 257 1283; RV32-NEXT: sw a0, 12(sp) 1284; RV32-NEXT: sw a0, 8(sp) 1285; RV32-NEXT: li a0, 1 1286; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, mu 1287; RV32-NEXT: vsub.vx v12, v8, a0 1288; RV32-NEXT: vnot.v v8, v8 1289; RV32-NEXT: addi a0, sp, 8 1290; RV32-NEXT: vlse64.v v16, (a0), zero 1291; RV32-NEXT: vand.vv v8, v8, v12 1292; RV32-NEXT: vlse64.v v12, (a0), zero 1293; RV32-NEXT: vsrl.vi v20, v8, 1 1294; RV32-NEXT: vand.vv v16, v20, v16 1295; RV32-NEXT: vsub.vv v8, v8, v16 1296; RV32-NEXT: vand.vv v16, v8, v12 1297; RV32-NEXT: vsrl.vi v8, v8, 2 1298; RV32-NEXT: vand.vv v8, v8, v12 1299; RV32-NEXT: vadd.vv v8, v16, v8 1300; RV32-NEXT: vlse64.v v12, (a0), zero 1301; RV32-NEXT: vlse64.v v16, (a0), zero 1302; RV32-NEXT: vsrl.vi v20, v8, 4 1303; RV32-NEXT: vadd.vv v8, v8, v20 1304; RV32-NEXT: vand.vv v8, v8, v12 1305; RV32-NEXT: vmul.vv v8, v8, v16 1306; RV32-NEXT: li a0, 56 1307; RV32-NEXT: vsrl.vx v8, v8, a0 1308; RV32-NEXT: addi sp, sp, 16 1309; RV32-NEXT: ret 1310; 1311; RV64-LABEL: cttz_nxv4i64: 1312; RV64: # %bb.0: 1313; RV64-NEXT: li a0, 1 1314; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu 1315; RV64-NEXT: vsub.vx v12, v8, a0 1316; RV64-NEXT: vnot.v v8, v8 1317; RV64-NEXT: vand.vv v8, v8, v12 1318; RV64-NEXT: lui a0, %hi(.LCPI20_0) 1319; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) 1320; RV64-NEXT: lui a1, %hi(.LCPI20_1) 1321; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) 1322; RV64-NEXT: vsrl.vi v12, v8, 1 1323; RV64-NEXT: vand.vx v12, v12, a0 1324; RV64-NEXT: vsub.vv v8, v8, v12 1325; RV64-NEXT: vand.vx v12, v8, a1 1326; RV64-NEXT: vsrl.vi v8, v8, 2 1327; RV64-NEXT: vand.vx v8, v8, a1 1328; RV64-NEXT: vadd.vv v8, v12, v8 1329; RV64-NEXT: lui a0, %hi(.LCPI20_2) 1330; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) 1331; RV64-NEXT: lui a1, %hi(.LCPI20_3) 1332; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) 1333; RV64-NEXT: vsrl.vi v12, v8, 4 1334; RV64-NEXT: vadd.vv v8, v8, v12 1335; RV64-NEXT: vand.vx v8, v8, a0 1336; RV64-NEXT: vmul.vx v8, v8, a1 1337; RV64-NEXT: li a0, 56 1338; RV64-NEXT: vsrl.vx v8, v8, a0 1339; RV64-NEXT: ret 1340 %a = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false) 1341 ret <vscale x 4 x i64> %a 1342} 1343declare <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64>, i1) 1344 1345define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) { 1346; RV32-LABEL: cttz_nxv8i64: 1347; RV32: # %bb.0: 1348; RV32-NEXT: addi sp, sp, -16 1349; RV32-NEXT: .cfi_def_cfa_offset 16 1350; RV32-NEXT: lui a0, 349525 1351; RV32-NEXT: addi a0, a0, 1365 1352; RV32-NEXT: sw a0, 12(sp) 1353; RV32-NEXT: sw a0, 8(sp) 1354; RV32-NEXT: lui a0, 209715 1355; RV32-NEXT: addi a0, a0, 819 1356; RV32-NEXT: sw a0, 12(sp) 1357; RV32-NEXT: sw a0, 8(sp) 1358; RV32-NEXT: lui a0, 61681 1359; RV32-NEXT: addi a0, a0, -241 1360; RV32-NEXT: sw a0, 12(sp) 1361; RV32-NEXT: sw a0, 8(sp) 1362; RV32-NEXT: lui a0, 4112 1363; RV32-NEXT: addi a0, a0, 257 1364; RV32-NEXT: sw a0, 12(sp) 1365; RV32-NEXT: sw a0, 8(sp) 1366; RV32-NEXT: li a0, 1 1367; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1368; RV32-NEXT: vsub.vx v16, v8, a0 1369; RV32-NEXT: vnot.v v8, v8 1370; RV32-NEXT: addi a0, sp, 8 1371; RV32-NEXT: vlse64.v v24, (a0), zero 1372; RV32-NEXT: vand.vv v8, v8, v16 1373; RV32-NEXT: vlse64.v v16, (a0), zero 1374; RV32-NEXT: vsrl.vi v0, v8, 1 1375; RV32-NEXT: vand.vv v24, v0, v24 1376; RV32-NEXT: vsub.vv v8, v8, v24 1377; RV32-NEXT: vand.vv v24, v8, v16 1378; RV32-NEXT: vsrl.vi v8, v8, 2 1379; RV32-NEXT: vand.vv v8, v8, v16 1380; RV32-NEXT: vadd.vv v8, v24, v8 1381; RV32-NEXT: vlse64.v v16, (a0), zero 1382; RV32-NEXT: vlse64.v v24, (a0), zero 1383; RV32-NEXT: vsrl.vi v0, v8, 4 1384; RV32-NEXT: vadd.vv v8, v8, v0 1385; RV32-NEXT: vand.vv v8, v8, v16 1386; RV32-NEXT: vmul.vv v8, v8, v24 1387; RV32-NEXT: li a0, 56 1388; RV32-NEXT: vsrl.vx v8, v8, a0 1389; RV32-NEXT: addi sp, sp, 16 1390; RV32-NEXT: ret 1391; 1392; RV64-LABEL: cttz_nxv8i64: 1393; RV64: # %bb.0: 1394; RV64-NEXT: li a0, 1 1395; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1396; RV64-NEXT: vsub.vx v16, v8, a0 1397; RV64-NEXT: vnot.v v8, v8 1398; RV64-NEXT: vand.vv v8, v8, v16 1399; RV64-NEXT: lui a0, %hi(.LCPI21_0) 1400; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) 1401; RV64-NEXT: lui a1, %hi(.LCPI21_1) 1402; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) 1403; RV64-NEXT: vsrl.vi v16, v8, 1 1404; RV64-NEXT: vand.vx v16, v16, a0 1405; RV64-NEXT: vsub.vv v8, v8, v16 1406; RV64-NEXT: vand.vx v16, v8, a1 1407; RV64-NEXT: vsrl.vi v8, v8, 2 1408; RV64-NEXT: vand.vx v8, v8, a1 1409; RV64-NEXT: vadd.vv v8, v16, v8 1410; RV64-NEXT: lui a0, %hi(.LCPI21_2) 1411; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) 1412; RV64-NEXT: lui a1, %hi(.LCPI21_3) 1413; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) 1414; RV64-NEXT: vsrl.vi v16, v8, 4 1415; RV64-NEXT: vadd.vv v8, v8, v16 1416; RV64-NEXT: vand.vx v8, v8, a0 1417; RV64-NEXT: vmul.vx v8, v8, a1 1418; RV64-NEXT: li a0, 56 1419; RV64-NEXT: vsrl.vx v8, v8, a0 1420; RV64-NEXT: ret 1421 %a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false) 1422 ret <vscale x 8 x i64> %a 1423} 1424declare <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64>, i1) 1425 1426define <vscale x 1 x i8> @cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) { 1427; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8: 1428; CHECK-ZVE64X: # %bb.0: 1429; CHECK-ZVE64X-NEXT: li a0, 1 1430; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, mu 1431; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 1432; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1433; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 1434; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1435; CHECK-ZVE64X-NEXT: li a0, 85 1436; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1437; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1438; CHECK-ZVE64X-NEXT: li a0, 51 1439; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1440; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1441; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1442; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1443; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1444; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1445; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1446; CHECK-ZVE64X-NEXT: ret 1447; 1448; CHECK-D-LABEL: cttz_zero_undef_nxv1i8: 1449; CHECK-D: # %bb.0: 1450; CHECK-D-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 1451; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1452; CHECK-D-NEXT: vand.vv v8, v8, v9 1453; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 1454; CHECK-D-NEXT: vzext.vf4 v9, v8 1455; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 1456; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 1457; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 1458; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu 1459; CHECK-D-NEXT: vncvt.x.x.w v8, v8 1460; CHECK-D-NEXT: li a0, 127 1461; CHECK-D-NEXT: vsub.vx v8, v8, a0 1462; CHECK-D-NEXT: ret 1463 %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true) 1464 ret <vscale x 1 x i8> %a 1465} 1466 1467define <vscale x 2 x i8> @cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) { 1468; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8: 1469; CHECK-ZVE64X: # %bb.0: 1470; CHECK-ZVE64X-NEXT: li a0, 1 1471; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, mu 1472; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 1473; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1474; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 1475; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1476; CHECK-ZVE64X-NEXT: li a0, 85 1477; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1478; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1479; CHECK-ZVE64X-NEXT: li a0, 51 1480; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1481; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1482; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1483; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1484; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1485; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1486; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1487; CHECK-ZVE64X-NEXT: ret 1488; 1489; CHECK-D-LABEL: cttz_zero_undef_nxv2i8: 1490; CHECK-D: # %bb.0: 1491; CHECK-D-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 1492; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1493; CHECK-D-NEXT: vand.vv v8, v8, v9 1494; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 1495; CHECK-D-NEXT: vzext.vf4 v9, v8 1496; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 1497; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 1498; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 1499; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 1500; CHECK-D-NEXT: vncvt.x.x.w v8, v8 1501; CHECK-D-NEXT: li a0, 127 1502; CHECK-D-NEXT: vsub.vx v8, v8, a0 1503; CHECK-D-NEXT: ret 1504 %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true) 1505 ret <vscale x 2 x i8> %a 1506} 1507 1508define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) { 1509; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8: 1510; CHECK-ZVE64X: # %bb.0: 1511; CHECK-ZVE64X-NEXT: li a0, 1 1512; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, mu 1513; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 1514; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1515; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 1516; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1517; CHECK-ZVE64X-NEXT: li a0, 85 1518; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1519; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1520; CHECK-ZVE64X-NEXT: li a0, 51 1521; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1522; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1523; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1524; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1525; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1526; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1527; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1528; CHECK-ZVE64X-NEXT: ret 1529; 1530; CHECK-D-LABEL: cttz_zero_undef_nxv4i8: 1531; CHECK-D: # %bb.0: 1532; CHECK-D-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 1533; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1534; CHECK-D-NEXT: vand.vv v8, v8, v9 1535; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1536; CHECK-D-NEXT: vzext.vf4 v10, v8 1537; CHECK-D-NEXT: vfcvt.f.xu.v v8, v10 1538; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1539; CHECK-D-NEXT: vnsrl.wi v10, v8, 23 1540; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 1541; CHECK-D-NEXT: vncvt.x.x.w v8, v10 1542; CHECK-D-NEXT: li a0, 127 1543; CHECK-D-NEXT: vsub.vx v8, v8, a0 1544; CHECK-D-NEXT: ret 1545 %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true) 1546 ret <vscale x 4 x i8> %a 1547} 1548 1549define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) { 1550; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8: 1551; CHECK-ZVE64X: # %bb.0: 1552; CHECK-ZVE64X-NEXT: li a0, 1 1553; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, mu 1554; CHECK-ZVE64X-NEXT: vsub.vx v9, v8, a0 1555; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1556; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 1557; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1558; CHECK-ZVE64X-NEXT: li a0, 85 1559; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1560; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1561; CHECK-ZVE64X-NEXT: li a0, 51 1562; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1563; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1564; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1565; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1566; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1567; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1568; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1569; CHECK-ZVE64X-NEXT: ret 1570; 1571; CHECK-D-LABEL: cttz_zero_undef_nxv8i8: 1572; CHECK-D: # %bb.0: 1573; CHECK-D-NEXT: vsetvli a0, zero, e8, m1, ta, mu 1574; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1575; CHECK-D-NEXT: vand.vv v8, v8, v9 1576; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1577; CHECK-D-NEXT: vzext.vf4 v12, v8 1578; CHECK-D-NEXT: vfcvt.f.xu.v v8, v12 1579; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1580; CHECK-D-NEXT: vnsrl.wi v12, v8, 23 1581; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu 1582; CHECK-D-NEXT: vncvt.x.x.w v8, v12 1583; CHECK-D-NEXT: li a0, 127 1584; CHECK-D-NEXT: vsub.vx v8, v8, a0 1585; CHECK-D-NEXT: ret 1586 %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true) 1587 ret <vscale x 8 x i8> %a 1588} 1589 1590define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) { 1591; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8: 1592; CHECK-ZVE64X: # %bb.0: 1593; CHECK-ZVE64X-NEXT: li a0, 1 1594; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, mu 1595; CHECK-ZVE64X-NEXT: vsub.vx v10, v8, a0 1596; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1597; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 1598; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 1599; CHECK-ZVE64X-NEXT: li a0, 85 1600; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 1601; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 1602; CHECK-ZVE64X-NEXT: li a0, 51 1603; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 1604; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1605; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1606; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 1607; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 1608; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 1609; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1610; CHECK-ZVE64X-NEXT: ret 1611; 1612; CHECK-D-LABEL: cttz_zero_undef_nxv16i8: 1613; CHECK-D: # %bb.0: 1614; CHECK-D-NEXT: vsetvli a0, zero, e8, m2, ta, mu 1615; CHECK-D-NEXT: vrsub.vi v10, v8, 0 1616; CHECK-D-NEXT: vand.vv v8, v8, v10 1617; CHECK-D-NEXT: vsetvli zero, zero, e32, m8, ta, mu 1618; CHECK-D-NEXT: vzext.vf4 v16, v8 1619; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 1620; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu 1621; CHECK-D-NEXT: vnsrl.wi v16, v8, 23 1622; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu 1623; CHECK-D-NEXT: vncvt.x.x.w v8, v16 1624; CHECK-D-NEXT: li a0, 127 1625; CHECK-D-NEXT: vsub.vx v8, v8, a0 1626; CHECK-D-NEXT: ret 1627 %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true) 1628 ret <vscale x 16 x i8> %a 1629} 1630 1631define <vscale x 32 x i8> @cttz_zero_undef_nxv32i8(<vscale x 32 x i8> %va) { 1632; CHECK-LABEL: cttz_zero_undef_nxv32i8: 1633; CHECK: # %bb.0: 1634; CHECK-NEXT: li a0, 1 1635; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu 1636; CHECK-NEXT: vsub.vx v12, v8, a0 1637; CHECK-NEXT: vnot.v v8, v8 1638; CHECK-NEXT: vand.vv v8, v8, v12 1639; CHECK-NEXT: vsrl.vi v12, v8, 1 1640; CHECK-NEXT: li a0, 85 1641; CHECK-NEXT: vand.vx v12, v12, a0 1642; CHECK-NEXT: vsub.vv v8, v8, v12 1643; CHECK-NEXT: li a0, 51 1644; CHECK-NEXT: vand.vx v12, v8, a0 1645; CHECK-NEXT: vsrl.vi v8, v8, 2 1646; CHECK-NEXT: vand.vx v8, v8, a0 1647; CHECK-NEXT: vadd.vv v8, v12, v8 1648; CHECK-NEXT: vsrl.vi v12, v8, 4 1649; CHECK-NEXT: vadd.vv v8, v8, v12 1650; CHECK-NEXT: vand.vi v8, v8, 15 1651; CHECK-NEXT: ret 1652 %a = call <vscale x 32 x i8> @llvm.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true) 1653 ret <vscale x 32 x i8> %a 1654} 1655 1656define <vscale x 64 x i8> @cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) { 1657; CHECK-LABEL: cttz_zero_undef_nxv64i8: 1658; CHECK: # %bb.0: 1659; CHECK-NEXT: li a0, 1 1660; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu 1661; CHECK-NEXT: vsub.vx v16, v8, a0 1662; CHECK-NEXT: vnot.v v8, v8 1663; CHECK-NEXT: vand.vv v8, v8, v16 1664; CHECK-NEXT: vsrl.vi v16, v8, 1 1665; CHECK-NEXT: li a0, 85 1666; CHECK-NEXT: vand.vx v16, v16, a0 1667; CHECK-NEXT: vsub.vv v8, v8, v16 1668; CHECK-NEXT: li a0, 51 1669; CHECK-NEXT: vand.vx v16, v8, a0 1670; CHECK-NEXT: vsrl.vi v8, v8, 2 1671; CHECK-NEXT: vand.vx v8, v8, a0 1672; CHECK-NEXT: vadd.vv v8, v16, v8 1673; CHECK-NEXT: vsrl.vi v16, v8, 4 1674; CHECK-NEXT: vadd.vv v8, v8, v16 1675; CHECK-NEXT: vand.vi v8, v8, 15 1676; CHECK-NEXT: ret 1677 %a = call <vscale x 64 x i8> @llvm.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true) 1678 ret <vscale x 64 x i8> %a 1679} 1680 1681define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) { 1682; RV32I-LABEL: cttz_zero_undef_nxv1i16: 1683; RV32I: # %bb.0: 1684; RV32I-NEXT: li a0, 1 1685; RV32I-NEXT: vsetvli a1, zero, e16, mf4, ta, mu 1686; RV32I-NEXT: vsub.vx v9, v8, a0 1687; RV32I-NEXT: vnot.v v8, v8 1688; RV32I-NEXT: vand.vv v8, v8, v9 1689; RV32I-NEXT: vsrl.vi v9, v8, 1 1690; RV32I-NEXT: lui a0, 5 1691; RV32I-NEXT: addi a0, a0, 1365 1692; RV32I-NEXT: vand.vx v9, v9, a0 1693; RV32I-NEXT: vsub.vv v8, v8, v9 1694; RV32I-NEXT: lui a0, 3 1695; RV32I-NEXT: addi a0, a0, 819 1696; RV32I-NEXT: vand.vx v9, v8, a0 1697; RV32I-NEXT: vsrl.vi v8, v8, 2 1698; RV32I-NEXT: vand.vx v8, v8, a0 1699; RV32I-NEXT: vadd.vv v8, v9, v8 1700; RV32I-NEXT: vsrl.vi v9, v8, 4 1701; RV32I-NEXT: vadd.vv v8, v8, v9 1702; RV32I-NEXT: lui a0, 1 1703; RV32I-NEXT: addi a0, a0, -241 1704; RV32I-NEXT: vand.vx v8, v8, a0 1705; RV32I-NEXT: li a0, 257 1706; RV32I-NEXT: vmul.vx v8, v8, a0 1707; RV32I-NEXT: vsrl.vi v8, v8, 8 1708; RV32I-NEXT: ret 1709; 1710; RV64I-LABEL: cttz_zero_undef_nxv1i16: 1711; RV64I: # %bb.0: 1712; RV64I-NEXT: li a0, 1 1713; RV64I-NEXT: vsetvli a1, zero, e16, mf4, ta, mu 1714; RV64I-NEXT: vsub.vx v9, v8, a0 1715; RV64I-NEXT: vnot.v v8, v8 1716; RV64I-NEXT: vand.vv v8, v8, v9 1717; RV64I-NEXT: vsrl.vi v9, v8, 1 1718; RV64I-NEXT: lui a0, 5 1719; RV64I-NEXT: addiw a0, a0, 1365 1720; RV64I-NEXT: vand.vx v9, v9, a0 1721; RV64I-NEXT: vsub.vv v8, v8, v9 1722; RV64I-NEXT: lui a0, 3 1723; RV64I-NEXT: addiw a0, a0, 819 1724; RV64I-NEXT: vand.vx v9, v8, a0 1725; RV64I-NEXT: vsrl.vi v8, v8, 2 1726; RV64I-NEXT: vand.vx v8, v8, a0 1727; RV64I-NEXT: vadd.vv v8, v9, v8 1728; RV64I-NEXT: vsrl.vi v9, v8, 4 1729; RV64I-NEXT: vadd.vv v8, v8, v9 1730; RV64I-NEXT: lui a0, 1 1731; RV64I-NEXT: addiw a0, a0, -241 1732; RV64I-NEXT: vand.vx v8, v8, a0 1733; RV64I-NEXT: li a0, 257 1734; RV64I-NEXT: vmul.vx v8, v8, a0 1735; RV64I-NEXT: vsrl.vi v8, v8, 8 1736; RV64I-NEXT: ret 1737; 1738; CHECK-D-LABEL: cttz_zero_undef_nxv1i16: 1739; CHECK-D: # %bb.0: 1740; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1741; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1742; CHECK-D-NEXT: vand.vv v8, v8, v9 1743; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 1744; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 1745; CHECK-D-NEXT: li a0, 127 1746; CHECK-D-NEXT: vsub.vx v8, v8, a0 1747; CHECK-D-NEXT: ret 1748 %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true) 1749 ret <vscale x 1 x i16> %a 1750} 1751 1752define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) { 1753; RV32I-LABEL: cttz_zero_undef_nxv2i16: 1754; RV32I: # %bb.0: 1755; RV32I-NEXT: li a0, 1 1756; RV32I-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 1757; RV32I-NEXT: vsub.vx v9, v8, a0 1758; RV32I-NEXT: vnot.v v8, v8 1759; RV32I-NEXT: vand.vv v8, v8, v9 1760; RV32I-NEXT: vsrl.vi v9, v8, 1 1761; RV32I-NEXT: lui a0, 5 1762; RV32I-NEXT: addi a0, a0, 1365 1763; RV32I-NEXT: vand.vx v9, v9, a0 1764; RV32I-NEXT: vsub.vv v8, v8, v9 1765; RV32I-NEXT: lui a0, 3 1766; RV32I-NEXT: addi a0, a0, 819 1767; RV32I-NEXT: vand.vx v9, v8, a0 1768; RV32I-NEXT: vsrl.vi v8, v8, 2 1769; RV32I-NEXT: vand.vx v8, v8, a0 1770; RV32I-NEXT: vadd.vv v8, v9, v8 1771; RV32I-NEXT: vsrl.vi v9, v8, 4 1772; RV32I-NEXT: vadd.vv v8, v8, v9 1773; RV32I-NEXT: lui a0, 1 1774; RV32I-NEXT: addi a0, a0, -241 1775; RV32I-NEXT: vand.vx v8, v8, a0 1776; RV32I-NEXT: li a0, 257 1777; RV32I-NEXT: vmul.vx v8, v8, a0 1778; RV32I-NEXT: vsrl.vi v8, v8, 8 1779; RV32I-NEXT: ret 1780; 1781; RV64I-LABEL: cttz_zero_undef_nxv2i16: 1782; RV64I: # %bb.0: 1783; RV64I-NEXT: li a0, 1 1784; RV64I-NEXT: vsetvli a1, zero, e16, mf2, ta, mu 1785; RV64I-NEXT: vsub.vx v9, v8, a0 1786; RV64I-NEXT: vnot.v v8, v8 1787; RV64I-NEXT: vand.vv v8, v8, v9 1788; RV64I-NEXT: vsrl.vi v9, v8, 1 1789; RV64I-NEXT: lui a0, 5 1790; RV64I-NEXT: addiw a0, a0, 1365 1791; RV64I-NEXT: vand.vx v9, v9, a0 1792; RV64I-NEXT: vsub.vv v8, v8, v9 1793; RV64I-NEXT: lui a0, 3 1794; RV64I-NEXT: addiw a0, a0, 819 1795; RV64I-NEXT: vand.vx v9, v8, a0 1796; RV64I-NEXT: vsrl.vi v8, v8, 2 1797; RV64I-NEXT: vand.vx v8, v8, a0 1798; RV64I-NEXT: vadd.vv v8, v9, v8 1799; RV64I-NEXT: vsrl.vi v9, v8, 4 1800; RV64I-NEXT: vadd.vv v8, v8, v9 1801; RV64I-NEXT: lui a0, 1 1802; RV64I-NEXT: addiw a0, a0, -241 1803; RV64I-NEXT: vand.vx v8, v8, a0 1804; RV64I-NEXT: li a0, 257 1805; RV64I-NEXT: vmul.vx v8, v8, a0 1806; RV64I-NEXT: vsrl.vi v8, v8, 8 1807; RV64I-NEXT: ret 1808; 1809; CHECK-D-LABEL: cttz_zero_undef_nxv2i16: 1810; CHECK-D: # %bb.0: 1811; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1812; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1813; CHECK-D-NEXT: vand.vv v8, v8, v9 1814; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 1815; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 1816; CHECK-D-NEXT: li a0, 127 1817; CHECK-D-NEXT: vsub.vx v8, v8, a0 1818; CHECK-D-NEXT: ret 1819 %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true) 1820 ret <vscale x 2 x i16> %a 1821} 1822 1823define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) { 1824; RV32I-LABEL: cttz_zero_undef_nxv4i16: 1825; RV32I: # %bb.0: 1826; RV32I-NEXT: li a0, 1 1827; RV32I-NEXT: vsetvli a1, zero, e16, m1, ta, mu 1828; RV32I-NEXT: vsub.vx v9, v8, a0 1829; RV32I-NEXT: vnot.v v8, v8 1830; RV32I-NEXT: vand.vv v8, v8, v9 1831; RV32I-NEXT: vsrl.vi v9, v8, 1 1832; RV32I-NEXT: lui a0, 5 1833; RV32I-NEXT: addi a0, a0, 1365 1834; RV32I-NEXT: vand.vx v9, v9, a0 1835; RV32I-NEXT: vsub.vv v8, v8, v9 1836; RV32I-NEXT: lui a0, 3 1837; RV32I-NEXT: addi a0, a0, 819 1838; RV32I-NEXT: vand.vx v9, v8, a0 1839; RV32I-NEXT: vsrl.vi v8, v8, 2 1840; RV32I-NEXT: vand.vx v8, v8, a0 1841; RV32I-NEXT: vadd.vv v8, v9, v8 1842; RV32I-NEXT: vsrl.vi v9, v8, 4 1843; RV32I-NEXT: vadd.vv v8, v8, v9 1844; RV32I-NEXT: lui a0, 1 1845; RV32I-NEXT: addi a0, a0, -241 1846; RV32I-NEXT: vand.vx v8, v8, a0 1847; RV32I-NEXT: li a0, 257 1848; RV32I-NEXT: vmul.vx v8, v8, a0 1849; RV32I-NEXT: vsrl.vi v8, v8, 8 1850; RV32I-NEXT: ret 1851; 1852; RV64I-LABEL: cttz_zero_undef_nxv4i16: 1853; RV64I: # %bb.0: 1854; RV64I-NEXT: li a0, 1 1855; RV64I-NEXT: vsetvli a1, zero, e16, m1, ta, mu 1856; RV64I-NEXT: vsub.vx v9, v8, a0 1857; RV64I-NEXT: vnot.v v8, v8 1858; RV64I-NEXT: vand.vv v8, v8, v9 1859; RV64I-NEXT: vsrl.vi v9, v8, 1 1860; RV64I-NEXT: lui a0, 5 1861; RV64I-NEXT: addiw a0, a0, 1365 1862; RV64I-NEXT: vand.vx v9, v9, a0 1863; RV64I-NEXT: vsub.vv v8, v8, v9 1864; RV64I-NEXT: lui a0, 3 1865; RV64I-NEXT: addiw a0, a0, 819 1866; RV64I-NEXT: vand.vx v9, v8, a0 1867; RV64I-NEXT: vsrl.vi v8, v8, 2 1868; RV64I-NEXT: vand.vx v8, v8, a0 1869; RV64I-NEXT: vadd.vv v8, v9, v8 1870; RV64I-NEXT: vsrl.vi v9, v8, 4 1871; RV64I-NEXT: vadd.vv v8, v8, v9 1872; RV64I-NEXT: lui a0, 1 1873; RV64I-NEXT: addiw a0, a0, -241 1874; RV64I-NEXT: vand.vx v8, v8, a0 1875; RV64I-NEXT: li a0, 257 1876; RV64I-NEXT: vmul.vx v8, v8, a0 1877; RV64I-NEXT: vsrl.vi v8, v8, 8 1878; RV64I-NEXT: ret 1879; 1880; CHECK-D-LABEL: cttz_zero_undef_nxv4i16: 1881; CHECK-D: # %bb.0: 1882; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu 1883; CHECK-D-NEXT: vrsub.vi v9, v8, 0 1884; CHECK-D-NEXT: vand.vv v8, v8, v9 1885; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 1886; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 1887; CHECK-D-NEXT: li a0, 127 1888; CHECK-D-NEXT: vsub.vx v8, v8, a0 1889; CHECK-D-NEXT: ret 1890 %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true) 1891 ret <vscale x 4 x i16> %a 1892} 1893 1894define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) { 1895; RV32I-LABEL: cttz_zero_undef_nxv8i16: 1896; RV32I: # %bb.0: 1897; RV32I-NEXT: li a0, 1 1898; RV32I-NEXT: vsetvli a1, zero, e16, m2, ta, mu 1899; RV32I-NEXT: vsub.vx v10, v8, a0 1900; RV32I-NEXT: vnot.v v8, v8 1901; RV32I-NEXT: vand.vv v8, v8, v10 1902; RV32I-NEXT: vsrl.vi v10, v8, 1 1903; RV32I-NEXT: lui a0, 5 1904; RV32I-NEXT: addi a0, a0, 1365 1905; RV32I-NEXT: vand.vx v10, v10, a0 1906; RV32I-NEXT: vsub.vv v8, v8, v10 1907; RV32I-NEXT: lui a0, 3 1908; RV32I-NEXT: addi a0, a0, 819 1909; RV32I-NEXT: vand.vx v10, v8, a0 1910; RV32I-NEXT: vsrl.vi v8, v8, 2 1911; RV32I-NEXT: vand.vx v8, v8, a0 1912; RV32I-NEXT: vadd.vv v8, v10, v8 1913; RV32I-NEXT: vsrl.vi v10, v8, 4 1914; RV32I-NEXT: vadd.vv v8, v8, v10 1915; RV32I-NEXT: lui a0, 1 1916; RV32I-NEXT: addi a0, a0, -241 1917; RV32I-NEXT: vand.vx v8, v8, a0 1918; RV32I-NEXT: li a0, 257 1919; RV32I-NEXT: vmul.vx v8, v8, a0 1920; RV32I-NEXT: vsrl.vi v8, v8, 8 1921; RV32I-NEXT: ret 1922; 1923; RV64I-LABEL: cttz_zero_undef_nxv8i16: 1924; RV64I: # %bb.0: 1925; RV64I-NEXT: li a0, 1 1926; RV64I-NEXT: vsetvli a1, zero, e16, m2, ta, mu 1927; RV64I-NEXT: vsub.vx v10, v8, a0 1928; RV64I-NEXT: vnot.v v8, v8 1929; RV64I-NEXT: vand.vv v8, v8, v10 1930; RV64I-NEXT: vsrl.vi v10, v8, 1 1931; RV64I-NEXT: lui a0, 5 1932; RV64I-NEXT: addiw a0, a0, 1365 1933; RV64I-NEXT: vand.vx v10, v10, a0 1934; RV64I-NEXT: vsub.vv v8, v8, v10 1935; RV64I-NEXT: lui a0, 3 1936; RV64I-NEXT: addiw a0, a0, 819 1937; RV64I-NEXT: vand.vx v10, v8, a0 1938; RV64I-NEXT: vsrl.vi v8, v8, 2 1939; RV64I-NEXT: vand.vx v8, v8, a0 1940; RV64I-NEXT: vadd.vv v8, v10, v8 1941; RV64I-NEXT: vsrl.vi v10, v8, 4 1942; RV64I-NEXT: vadd.vv v8, v8, v10 1943; RV64I-NEXT: lui a0, 1 1944; RV64I-NEXT: addiw a0, a0, -241 1945; RV64I-NEXT: vand.vx v8, v8, a0 1946; RV64I-NEXT: li a0, 257 1947; RV64I-NEXT: vmul.vx v8, v8, a0 1948; RV64I-NEXT: vsrl.vi v8, v8, 8 1949; RV64I-NEXT: ret 1950; 1951; CHECK-D-LABEL: cttz_zero_undef_nxv8i16: 1952; CHECK-D: # %bb.0: 1953; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu 1954; CHECK-D-NEXT: vrsub.vi v10, v8, 0 1955; CHECK-D-NEXT: vand.vv v8, v8, v10 1956; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 1957; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 1958; CHECK-D-NEXT: li a0, 127 1959; CHECK-D-NEXT: vsub.vx v8, v8, a0 1960; CHECK-D-NEXT: ret 1961 %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true) 1962 ret <vscale x 8 x i16> %a 1963} 1964 1965define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) { 1966; RV32I-LABEL: cttz_zero_undef_nxv16i16: 1967; RV32I: # %bb.0: 1968; RV32I-NEXT: li a0, 1 1969; RV32I-NEXT: vsetvli a1, zero, e16, m4, ta, mu 1970; RV32I-NEXT: vsub.vx v12, v8, a0 1971; RV32I-NEXT: vnot.v v8, v8 1972; RV32I-NEXT: vand.vv v8, v8, v12 1973; RV32I-NEXT: vsrl.vi v12, v8, 1 1974; RV32I-NEXT: lui a0, 5 1975; RV32I-NEXT: addi a0, a0, 1365 1976; RV32I-NEXT: vand.vx v12, v12, a0 1977; RV32I-NEXT: vsub.vv v8, v8, v12 1978; RV32I-NEXT: lui a0, 3 1979; RV32I-NEXT: addi a0, a0, 819 1980; RV32I-NEXT: vand.vx v12, v8, a0 1981; RV32I-NEXT: vsrl.vi v8, v8, 2 1982; RV32I-NEXT: vand.vx v8, v8, a0 1983; RV32I-NEXT: vadd.vv v8, v12, v8 1984; RV32I-NEXT: vsrl.vi v12, v8, 4 1985; RV32I-NEXT: vadd.vv v8, v8, v12 1986; RV32I-NEXT: lui a0, 1 1987; RV32I-NEXT: addi a0, a0, -241 1988; RV32I-NEXT: vand.vx v8, v8, a0 1989; RV32I-NEXT: li a0, 257 1990; RV32I-NEXT: vmul.vx v8, v8, a0 1991; RV32I-NEXT: vsrl.vi v8, v8, 8 1992; RV32I-NEXT: ret 1993; 1994; RV64I-LABEL: cttz_zero_undef_nxv16i16: 1995; RV64I: # %bb.0: 1996; RV64I-NEXT: li a0, 1 1997; RV64I-NEXT: vsetvli a1, zero, e16, m4, ta, mu 1998; RV64I-NEXT: vsub.vx v12, v8, a0 1999; RV64I-NEXT: vnot.v v8, v8 2000; RV64I-NEXT: vand.vv v8, v8, v12 2001; RV64I-NEXT: vsrl.vi v12, v8, 1 2002; RV64I-NEXT: lui a0, 5 2003; RV64I-NEXT: addiw a0, a0, 1365 2004; RV64I-NEXT: vand.vx v12, v12, a0 2005; RV64I-NEXT: vsub.vv v8, v8, v12 2006; RV64I-NEXT: lui a0, 3 2007; RV64I-NEXT: addiw a0, a0, 819 2008; RV64I-NEXT: vand.vx v12, v8, a0 2009; RV64I-NEXT: vsrl.vi v8, v8, 2 2010; RV64I-NEXT: vand.vx v8, v8, a0 2011; RV64I-NEXT: vadd.vv v8, v12, v8 2012; RV64I-NEXT: vsrl.vi v12, v8, 4 2013; RV64I-NEXT: vadd.vv v8, v8, v12 2014; RV64I-NEXT: lui a0, 1 2015; RV64I-NEXT: addiw a0, a0, -241 2016; RV64I-NEXT: vand.vx v8, v8, a0 2017; RV64I-NEXT: li a0, 257 2018; RV64I-NEXT: vmul.vx v8, v8, a0 2019; RV64I-NEXT: vsrl.vi v8, v8, 8 2020; RV64I-NEXT: ret 2021; 2022; CHECK-D-LABEL: cttz_zero_undef_nxv16i16: 2023; CHECK-D: # %bb.0: 2024; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu 2025; CHECK-D-NEXT: vrsub.vi v12, v8, 0 2026; CHECK-D-NEXT: vand.vv v8, v8, v12 2027; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 2028; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 2029; CHECK-D-NEXT: li a0, 127 2030; CHECK-D-NEXT: vsub.vx v8, v8, a0 2031; CHECK-D-NEXT: ret 2032 %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true) 2033 ret <vscale x 16 x i16> %a 2034} 2035 2036define <vscale x 32 x i16> @cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) { 2037; RV32-LABEL: cttz_zero_undef_nxv32i16: 2038; RV32: # %bb.0: 2039; RV32-NEXT: li a0, 1 2040; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, mu 2041; RV32-NEXT: vsub.vx v16, v8, a0 2042; RV32-NEXT: vnot.v v8, v8 2043; RV32-NEXT: vand.vv v8, v8, v16 2044; RV32-NEXT: vsrl.vi v16, v8, 1 2045; RV32-NEXT: lui a0, 5 2046; RV32-NEXT: addi a0, a0, 1365 2047; RV32-NEXT: vand.vx v16, v16, a0 2048; RV32-NEXT: vsub.vv v8, v8, v16 2049; RV32-NEXT: lui a0, 3 2050; RV32-NEXT: addi a0, a0, 819 2051; RV32-NEXT: vand.vx v16, v8, a0 2052; RV32-NEXT: vsrl.vi v8, v8, 2 2053; RV32-NEXT: vand.vx v8, v8, a0 2054; RV32-NEXT: vadd.vv v8, v16, v8 2055; RV32-NEXT: vsrl.vi v16, v8, 4 2056; RV32-NEXT: vadd.vv v8, v8, v16 2057; RV32-NEXT: lui a0, 1 2058; RV32-NEXT: addi a0, a0, -241 2059; RV32-NEXT: vand.vx v8, v8, a0 2060; RV32-NEXT: li a0, 257 2061; RV32-NEXT: vmul.vx v8, v8, a0 2062; RV32-NEXT: vsrl.vi v8, v8, 8 2063; RV32-NEXT: ret 2064; 2065; RV64-LABEL: cttz_zero_undef_nxv32i16: 2066; RV64: # %bb.0: 2067; RV64-NEXT: li a0, 1 2068; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, mu 2069; RV64-NEXT: vsub.vx v16, v8, a0 2070; RV64-NEXT: vnot.v v8, v8 2071; RV64-NEXT: vand.vv v8, v8, v16 2072; RV64-NEXT: vsrl.vi v16, v8, 1 2073; RV64-NEXT: lui a0, 5 2074; RV64-NEXT: addiw a0, a0, 1365 2075; RV64-NEXT: vand.vx v16, v16, a0 2076; RV64-NEXT: vsub.vv v8, v8, v16 2077; RV64-NEXT: lui a0, 3 2078; RV64-NEXT: addiw a0, a0, 819 2079; RV64-NEXT: vand.vx v16, v8, a0 2080; RV64-NEXT: vsrl.vi v8, v8, 2 2081; RV64-NEXT: vand.vx v8, v8, a0 2082; RV64-NEXT: vadd.vv v8, v16, v8 2083; RV64-NEXT: vsrl.vi v16, v8, 4 2084; RV64-NEXT: vadd.vv v8, v8, v16 2085; RV64-NEXT: lui a0, 1 2086; RV64-NEXT: addiw a0, a0, -241 2087; RV64-NEXT: vand.vx v8, v8, a0 2088; RV64-NEXT: li a0, 257 2089; RV64-NEXT: vmul.vx v8, v8, a0 2090; RV64-NEXT: vsrl.vi v8, v8, 8 2091; RV64-NEXT: ret 2092 %a = call <vscale x 32 x i16> @llvm.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true) 2093 ret <vscale x 32 x i16> %a 2094} 2095 2096define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) { 2097; RV32I-LABEL: cttz_zero_undef_nxv1i32: 2098; RV32I: # %bb.0: 2099; RV32I-NEXT: li a0, 1 2100; RV32I-NEXT: vsetvli a1, zero, e32, mf2, ta, mu 2101; RV32I-NEXT: vsub.vx v9, v8, a0 2102; RV32I-NEXT: vnot.v v8, v8 2103; RV32I-NEXT: vand.vv v8, v8, v9 2104; RV32I-NEXT: vsrl.vi v9, v8, 1 2105; RV32I-NEXT: lui a0, 349525 2106; RV32I-NEXT: addi a0, a0, 1365 2107; RV32I-NEXT: vand.vx v9, v9, a0 2108; RV32I-NEXT: vsub.vv v8, v8, v9 2109; RV32I-NEXT: lui a0, 209715 2110; RV32I-NEXT: addi a0, a0, 819 2111; RV32I-NEXT: vand.vx v9, v8, a0 2112; RV32I-NEXT: vsrl.vi v8, v8, 2 2113; RV32I-NEXT: vand.vx v8, v8, a0 2114; RV32I-NEXT: vadd.vv v8, v9, v8 2115; RV32I-NEXT: vsrl.vi v9, v8, 4 2116; RV32I-NEXT: vadd.vv v8, v8, v9 2117; RV32I-NEXT: lui a0, 61681 2118; RV32I-NEXT: addi a0, a0, -241 2119; RV32I-NEXT: vand.vx v8, v8, a0 2120; RV32I-NEXT: lui a0, 4112 2121; RV32I-NEXT: addi a0, a0, 257 2122; RV32I-NEXT: vmul.vx v8, v8, a0 2123; RV32I-NEXT: vsrl.vi v8, v8, 24 2124; RV32I-NEXT: ret 2125; 2126; RV64I-LABEL: cttz_zero_undef_nxv1i32: 2127; RV64I: # %bb.0: 2128; RV64I-NEXT: li a0, 1 2129; RV64I-NEXT: vsetvli a1, zero, e32, mf2, ta, mu 2130; RV64I-NEXT: vsub.vx v9, v8, a0 2131; RV64I-NEXT: vnot.v v8, v8 2132; RV64I-NEXT: vand.vv v8, v8, v9 2133; RV64I-NEXT: vsrl.vi v9, v8, 1 2134; RV64I-NEXT: lui a0, 349525 2135; RV64I-NEXT: addiw a0, a0, 1365 2136; RV64I-NEXT: vand.vx v9, v9, a0 2137; RV64I-NEXT: vsub.vv v8, v8, v9 2138; RV64I-NEXT: lui a0, 209715 2139; RV64I-NEXT: addiw a0, a0, 819 2140; RV64I-NEXT: vand.vx v9, v8, a0 2141; RV64I-NEXT: vsrl.vi v8, v8, 2 2142; RV64I-NEXT: vand.vx v8, v8, a0 2143; RV64I-NEXT: vadd.vv v8, v9, v8 2144; RV64I-NEXT: vsrl.vi v9, v8, 4 2145; RV64I-NEXT: vadd.vv v8, v8, v9 2146; RV64I-NEXT: lui a0, 61681 2147; RV64I-NEXT: addiw a0, a0, -241 2148; RV64I-NEXT: vand.vx v8, v8, a0 2149; RV64I-NEXT: lui a0, 4112 2150; RV64I-NEXT: addiw a0, a0, 257 2151; RV64I-NEXT: vmul.vx v8, v8, a0 2152; RV64I-NEXT: vsrl.vi v8, v8, 24 2153; RV64I-NEXT: ret 2154; 2155; CHECK-D-LABEL: cttz_zero_undef_nxv1i32: 2156; CHECK-D: # %bb.0: 2157; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 2158; CHECK-D-NEXT: vrsub.vi v9, v8, 0 2159; CHECK-D-NEXT: vand.vv v8, v8, v9 2160; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 2161; CHECK-D-NEXT: li a0, 52 2162; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu 2163; CHECK-D-NEXT: vsrl.vx v8, v9, a0 2164; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 2165; CHECK-D-NEXT: vncvt.x.x.w v8, v8 2166; CHECK-D-NEXT: li a0, 1023 2167; CHECK-D-NEXT: vsub.vx v8, v8, a0 2168; CHECK-D-NEXT: ret 2169 %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true) 2170 ret <vscale x 1 x i32> %a 2171} 2172 2173define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) { 2174; RV32I-LABEL: cttz_zero_undef_nxv2i32: 2175; RV32I: # %bb.0: 2176; RV32I-NEXT: li a0, 1 2177; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, mu 2178; RV32I-NEXT: vsub.vx v9, v8, a0 2179; RV32I-NEXT: vnot.v v8, v8 2180; RV32I-NEXT: vand.vv v8, v8, v9 2181; RV32I-NEXT: vsrl.vi v9, v8, 1 2182; RV32I-NEXT: lui a0, 349525 2183; RV32I-NEXT: addi a0, a0, 1365 2184; RV32I-NEXT: vand.vx v9, v9, a0 2185; RV32I-NEXT: vsub.vv v8, v8, v9 2186; RV32I-NEXT: lui a0, 209715 2187; RV32I-NEXT: addi a0, a0, 819 2188; RV32I-NEXT: vand.vx v9, v8, a0 2189; RV32I-NEXT: vsrl.vi v8, v8, 2 2190; RV32I-NEXT: vand.vx v8, v8, a0 2191; RV32I-NEXT: vadd.vv v8, v9, v8 2192; RV32I-NEXT: vsrl.vi v9, v8, 4 2193; RV32I-NEXT: vadd.vv v8, v8, v9 2194; RV32I-NEXT: lui a0, 61681 2195; RV32I-NEXT: addi a0, a0, -241 2196; RV32I-NEXT: vand.vx v8, v8, a0 2197; RV32I-NEXT: lui a0, 4112 2198; RV32I-NEXT: addi a0, a0, 257 2199; RV32I-NEXT: vmul.vx v8, v8, a0 2200; RV32I-NEXT: vsrl.vi v8, v8, 24 2201; RV32I-NEXT: ret 2202; 2203; RV64I-LABEL: cttz_zero_undef_nxv2i32: 2204; RV64I: # %bb.0: 2205; RV64I-NEXT: li a0, 1 2206; RV64I-NEXT: vsetvli a1, zero, e32, m1, ta, mu 2207; RV64I-NEXT: vsub.vx v9, v8, a0 2208; RV64I-NEXT: vnot.v v8, v8 2209; RV64I-NEXT: vand.vv v8, v8, v9 2210; RV64I-NEXT: vsrl.vi v9, v8, 1 2211; RV64I-NEXT: lui a0, 349525 2212; RV64I-NEXT: addiw a0, a0, 1365 2213; RV64I-NEXT: vand.vx v9, v9, a0 2214; RV64I-NEXT: vsub.vv v8, v8, v9 2215; RV64I-NEXT: lui a0, 209715 2216; RV64I-NEXT: addiw a0, a0, 819 2217; RV64I-NEXT: vand.vx v9, v8, a0 2218; RV64I-NEXT: vsrl.vi v8, v8, 2 2219; RV64I-NEXT: vand.vx v8, v8, a0 2220; RV64I-NEXT: vadd.vv v8, v9, v8 2221; RV64I-NEXT: vsrl.vi v9, v8, 4 2222; RV64I-NEXT: vadd.vv v8, v8, v9 2223; RV64I-NEXT: lui a0, 61681 2224; RV64I-NEXT: addiw a0, a0, -241 2225; RV64I-NEXT: vand.vx v8, v8, a0 2226; RV64I-NEXT: lui a0, 4112 2227; RV64I-NEXT: addiw a0, a0, 257 2228; RV64I-NEXT: vmul.vx v8, v8, a0 2229; RV64I-NEXT: vsrl.vi v8, v8, 24 2230; RV64I-NEXT: ret 2231; 2232; CHECK-D-LABEL: cttz_zero_undef_nxv2i32: 2233; CHECK-D: # %bb.0: 2234; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 2235; CHECK-D-NEXT: vrsub.vi v9, v8, 0 2236; CHECK-D-NEXT: vand.vv v8, v8, v9 2237; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 2238; CHECK-D-NEXT: li a0, 52 2239; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu 2240; CHECK-D-NEXT: vsrl.vx v8, v10, a0 2241; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 2242; CHECK-D-NEXT: vncvt.x.x.w v10, v8 2243; CHECK-D-NEXT: li a0, 1023 2244; CHECK-D-NEXT: vsub.vx v8, v10, a0 2245; CHECK-D-NEXT: ret 2246 %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true) 2247 ret <vscale x 2 x i32> %a 2248} 2249 2250define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) { 2251; RV32I-LABEL: cttz_zero_undef_nxv4i32: 2252; RV32I: # %bb.0: 2253; RV32I-NEXT: li a0, 1 2254; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, mu 2255; RV32I-NEXT: vsub.vx v10, v8, a0 2256; RV32I-NEXT: vnot.v v8, v8 2257; RV32I-NEXT: vand.vv v8, v8, v10 2258; RV32I-NEXT: vsrl.vi v10, v8, 1 2259; RV32I-NEXT: lui a0, 349525 2260; RV32I-NEXT: addi a0, a0, 1365 2261; RV32I-NEXT: vand.vx v10, v10, a0 2262; RV32I-NEXT: vsub.vv v8, v8, v10 2263; RV32I-NEXT: lui a0, 209715 2264; RV32I-NEXT: addi a0, a0, 819 2265; RV32I-NEXT: vand.vx v10, v8, a0 2266; RV32I-NEXT: vsrl.vi v8, v8, 2 2267; RV32I-NEXT: vand.vx v8, v8, a0 2268; RV32I-NEXT: vadd.vv v8, v10, v8 2269; RV32I-NEXT: vsrl.vi v10, v8, 4 2270; RV32I-NEXT: vadd.vv v8, v8, v10 2271; RV32I-NEXT: lui a0, 61681 2272; RV32I-NEXT: addi a0, a0, -241 2273; RV32I-NEXT: vand.vx v8, v8, a0 2274; RV32I-NEXT: lui a0, 4112 2275; RV32I-NEXT: addi a0, a0, 257 2276; RV32I-NEXT: vmul.vx v8, v8, a0 2277; RV32I-NEXT: vsrl.vi v8, v8, 24 2278; RV32I-NEXT: ret 2279; 2280; RV64I-LABEL: cttz_zero_undef_nxv4i32: 2281; RV64I: # %bb.0: 2282; RV64I-NEXT: li a0, 1 2283; RV64I-NEXT: vsetvli a1, zero, e32, m2, ta, mu 2284; RV64I-NEXT: vsub.vx v10, v8, a0 2285; RV64I-NEXT: vnot.v v8, v8 2286; RV64I-NEXT: vand.vv v8, v8, v10 2287; RV64I-NEXT: vsrl.vi v10, v8, 1 2288; RV64I-NEXT: lui a0, 349525 2289; RV64I-NEXT: addiw a0, a0, 1365 2290; RV64I-NEXT: vand.vx v10, v10, a0 2291; RV64I-NEXT: vsub.vv v8, v8, v10 2292; RV64I-NEXT: lui a0, 209715 2293; RV64I-NEXT: addiw a0, a0, 819 2294; RV64I-NEXT: vand.vx v10, v8, a0 2295; RV64I-NEXT: vsrl.vi v8, v8, 2 2296; RV64I-NEXT: vand.vx v8, v8, a0 2297; RV64I-NEXT: vadd.vv v8, v10, v8 2298; RV64I-NEXT: vsrl.vi v10, v8, 4 2299; RV64I-NEXT: vadd.vv v8, v8, v10 2300; RV64I-NEXT: lui a0, 61681 2301; RV64I-NEXT: addiw a0, a0, -241 2302; RV64I-NEXT: vand.vx v8, v8, a0 2303; RV64I-NEXT: lui a0, 4112 2304; RV64I-NEXT: addiw a0, a0, 257 2305; RV64I-NEXT: vmul.vx v8, v8, a0 2306; RV64I-NEXT: vsrl.vi v8, v8, 24 2307; RV64I-NEXT: ret 2308; 2309; CHECK-D-LABEL: cttz_zero_undef_nxv4i32: 2310; CHECK-D: # %bb.0: 2311; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 2312; CHECK-D-NEXT: vrsub.vi v10, v8, 0 2313; CHECK-D-NEXT: vand.vv v8, v8, v10 2314; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 2315; CHECK-D-NEXT: li a0, 52 2316; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu 2317; CHECK-D-NEXT: vsrl.vx v8, v12, a0 2318; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2319; CHECK-D-NEXT: vncvt.x.x.w v12, v8 2320; CHECK-D-NEXT: li a0, 1023 2321; CHECK-D-NEXT: vsub.vx v8, v12, a0 2322; CHECK-D-NEXT: ret 2323 %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true) 2324 ret <vscale x 4 x i32> %a 2325} 2326 2327define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) { 2328; RV32I-LABEL: cttz_zero_undef_nxv8i32: 2329; RV32I: # %bb.0: 2330; RV32I-NEXT: li a0, 1 2331; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, mu 2332; RV32I-NEXT: vsub.vx v12, v8, a0 2333; RV32I-NEXT: vnot.v v8, v8 2334; RV32I-NEXT: vand.vv v8, v8, v12 2335; RV32I-NEXT: vsrl.vi v12, v8, 1 2336; RV32I-NEXT: lui a0, 349525 2337; RV32I-NEXT: addi a0, a0, 1365 2338; RV32I-NEXT: vand.vx v12, v12, a0 2339; RV32I-NEXT: vsub.vv v8, v8, v12 2340; RV32I-NEXT: lui a0, 209715 2341; RV32I-NEXT: addi a0, a0, 819 2342; RV32I-NEXT: vand.vx v12, v8, a0 2343; RV32I-NEXT: vsrl.vi v8, v8, 2 2344; RV32I-NEXT: vand.vx v8, v8, a0 2345; RV32I-NEXT: vadd.vv v8, v12, v8 2346; RV32I-NEXT: vsrl.vi v12, v8, 4 2347; RV32I-NEXT: vadd.vv v8, v8, v12 2348; RV32I-NEXT: lui a0, 61681 2349; RV32I-NEXT: addi a0, a0, -241 2350; RV32I-NEXT: vand.vx v8, v8, a0 2351; RV32I-NEXT: lui a0, 4112 2352; RV32I-NEXT: addi a0, a0, 257 2353; RV32I-NEXT: vmul.vx v8, v8, a0 2354; RV32I-NEXT: vsrl.vi v8, v8, 24 2355; RV32I-NEXT: ret 2356; 2357; RV64I-LABEL: cttz_zero_undef_nxv8i32: 2358; RV64I: # %bb.0: 2359; RV64I-NEXT: li a0, 1 2360; RV64I-NEXT: vsetvli a1, zero, e32, m4, ta, mu 2361; RV64I-NEXT: vsub.vx v12, v8, a0 2362; RV64I-NEXT: vnot.v v8, v8 2363; RV64I-NEXT: vand.vv v8, v8, v12 2364; RV64I-NEXT: vsrl.vi v12, v8, 1 2365; RV64I-NEXT: lui a0, 349525 2366; RV64I-NEXT: addiw a0, a0, 1365 2367; RV64I-NEXT: vand.vx v12, v12, a0 2368; RV64I-NEXT: vsub.vv v8, v8, v12 2369; RV64I-NEXT: lui a0, 209715 2370; RV64I-NEXT: addiw a0, a0, 819 2371; RV64I-NEXT: vand.vx v12, v8, a0 2372; RV64I-NEXT: vsrl.vi v8, v8, 2 2373; RV64I-NEXT: vand.vx v8, v8, a0 2374; RV64I-NEXT: vadd.vv v8, v12, v8 2375; RV64I-NEXT: vsrl.vi v12, v8, 4 2376; RV64I-NEXT: vadd.vv v8, v8, v12 2377; RV64I-NEXT: lui a0, 61681 2378; RV64I-NEXT: addiw a0, a0, -241 2379; RV64I-NEXT: vand.vx v8, v8, a0 2380; RV64I-NEXT: lui a0, 4112 2381; RV64I-NEXT: addiw a0, a0, 257 2382; RV64I-NEXT: vmul.vx v8, v8, a0 2383; RV64I-NEXT: vsrl.vi v8, v8, 24 2384; RV64I-NEXT: ret 2385; 2386; CHECK-D-LABEL: cttz_zero_undef_nxv8i32: 2387; CHECK-D: # %bb.0: 2388; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 2389; CHECK-D-NEXT: vrsub.vi v12, v8, 0 2390; CHECK-D-NEXT: vand.vv v8, v8, v12 2391; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 2392; CHECK-D-NEXT: li a0, 52 2393; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2394; CHECK-D-NEXT: vsrl.vx v8, v16, a0 2395; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2396; CHECK-D-NEXT: vncvt.x.x.w v16, v8 2397; CHECK-D-NEXT: li a0, 1023 2398; CHECK-D-NEXT: vsub.vx v8, v16, a0 2399; CHECK-D-NEXT: ret 2400 %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true) 2401 ret <vscale x 8 x i32> %a 2402} 2403 2404define <vscale x 16 x i32> @cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) { 2405; RV32-LABEL: cttz_zero_undef_nxv16i32: 2406; RV32: # %bb.0: 2407; RV32-NEXT: li a0, 1 2408; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, mu 2409; RV32-NEXT: vsub.vx v16, v8, a0 2410; RV32-NEXT: vnot.v v8, v8 2411; RV32-NEXT: vand.vv v8, v8, v16 2412; RV32-NEXT: vsrl.vi v16, v8, 1 2413; RV32-NEXT: lui a0, 349525 2414; RV32-NEXT: addi a0, a0, 1365 2415; RV32-NEXT: vand.vx v16, v16, a0 2416; RV32-NEXT: vsub.vv v8, v8, v16 2417; RV32-NEXT: lui a0, 209715 2418; RV32-NEXT: addi a0, a0, 819 2419; RV32-NEXT: vand.vx v16, v8, a0 2420; RV32-NEXT: vsrl.vi v8, v8, 2 2421; RV32-NEXT: vand.vx v8, v8, a0 2422; RV32-NEXT: vadd.vv v8, v16, v8 2423; RV32-NEXT: vsrl.vi v16, v8, 4 2424; RV32-NEXT: vadd.vv v8, v8, v16 2425; RV32-NEXT: lui a0, 61681 2426; RV32-NEXT: addi a0, a0, -241 2427; RV32-NEXT: vand.vx v8, v8, a0 2428; RV32-NEXT: lui a0, 4112 2429; RV32-NEXT: addi a0, a0, 257 2430; RV32-NEXT: vmul.vx v8, v8, a0 2431; RV32-NEXT: vsrl.vi v8, v8, 24 2432; RV32-NEXT: ret 2433; 2434; RV64-LABEL: cttz_zero_undef_nxv16i32: 2435; RV64: # %bb.0: 2436; RV64-NEXT: li a0, 1 2437; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, mu 2438; RV64-NEXT: vsub.vx v16, v8, a0 2439; RV64-NEXT: vnot.v v8, v8 2440; RV64-NEXT: vand.vv v8, v8, v16 2441; RV64-NEXT: vsrl.vi v16, v8, 1 2442; RV64-NEXT: lui a0, 349525 2443; RV64-NEXT: addiw a0, a0, 1365 2444; RV64-NEXT: vand.vx v16, v16, a0 2445; RV64-NEXT: vsub.vv v8, v8, v16 2446; RV64-NEXT: lui a0, 209715 2447; RV64-NEXT: addiw a0, a0, 819 2448; RV64-NEXT: vand.vx v16, v8, a0 2449; RV64-NEXT: vsrl.vi v8, v8, 2 2450; RV64-NEXT: vand.vx v8, v8, a0 2451; RV64-NEXT: vadd.vv v8, v16, v8 2452; RV64-NEXT: vsrl.vi v16, v8, 4 2453; RV64-NEXT: vadd.vv v8, v8, v16 2454; RV64-NEXT: lui a0, 61681 2455; RV64-NEXT: addiw a0, a0, -241 2456; RV64-NEXT: vand.vx v8, v8, a0 2457; RV64-NEXT: lui a0, 4112 2458; RV64-NEXT: addiw a0, a0, 257 2459; RV64-NEXT: vmul.vx v8, v8, a0 2460; RV64-NEXT: vsrl.vi v8, v8, 24 2461; RV64-NEXT: ret 2462 %a = call <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true) 2463 ret <vscale x 16 x i32> %a 2464} 2465 2466define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) { 2467; RV32-LABEL: cttz_zero_undef_nxv1i64: 2468; RV32: # %bb.0: 2469; RV32-NEXT: addi sp, sp, -16 2470; RV32-NEXT: .cfi_def_cfa_offset 16 2471; RV32-NEXT: lui a0, 349525 2472; RV32-NEXT: addi a0, a0, 1365 2473; RV32-NEXT: sw a0, 12(sp) 2474; RV32-NEXT: sw a0, 8(sp) 2475; RV32-NEXT: lui a0, 209715 2476; RV32-NEXT: addi a0, a0, 819 2477; RV32-NEXT: sw a0, 12(sp) 2478; RV32-NEXT: sw a0, 8(sp) 2479; RV32-NEXT: lui a0, 61681 2480; RV32-NEXT: addi a0, a0, -241 2481; RV32-NEXT: sw a0, 12(sp) 2482; RV32-NEXT: sw a0, 8(sp) 2483; RV32-NEXT: lui a0, 4112 2484; RV32-NEXT: addi a0, a0, 257 2485; RV32-NEXT: sw a0, 12(sp) 2486; RV32-NEXT: sw a0, 8(sp) 2487; RV32-NEXT: li a0, 1 2488; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, mu 2489; RV32-NEXT: vsub.vx v9, v8, a0 2490; RV32-NEXT: vnot.v v8, v8 2491; RV32-NEXT: addi a0, sp, 8 2492; RV32-NEXT: vlse64.v v10, (a0), zero 2493; RV32-NEXT: vand.vv v8, v8, v9 2494; RV32-NEXT: vlse64.v v9, (a0), zero 2495; RV32-NEXT: vsrl.vi v11, v8, 1 2496; RV32-NEXT: vand.vv v10, v11, v10 2497; RV32-NEXT: vsub.vv v8, v8, v10 2498; RV32-NEXT: vand.vv v10, v8, v9 2499; RV32-NEXT: vsrl.vi v8, v8, 2 2500; RV32-NEXT: vand.vv v8, v8, v9 2501; RV32-NEXT: vadd.vv v8, v10, v8 2502; RV32-NEXT: vlse64.v v9, (a0), zero 2503; RV32-NEXT: vlse64.v v10, (a0), zero 2504; RV32-NEXT: vsrl.vi v11, v8, 4 2505; RV32-NEXT: vadd.vv v8, v8, v11 2506; RV32-NEXT: vand.vv v8, v8, v9 2507; RV32-NEXT: vmul.vv v8, v8, v10 2508; RV32-NEXT: li a0, 56 2509; RV32-NEXT: vsrl.vx v8, v8, a0 2510; RV32-NEXT: addi sp, sp, 16 2511; RV32-NEXT: ret 2512; 2513; RV64-LABEL: cttz_zero_undef_nxv1i64: 2514; RV64: # %bb.0: 2515; RV64-NEXT: li a0, 1 2516; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu 2517; RV64-NEXT: vsub.vx v9, v8, a0 2518; RV64-NEXT: vnot.v v8, v8 2519; RV64-NEXT: vand.vv v8, v8, v9 2520; RV64-NEXT: lui a0, %hi(.LCPI40_0) 2521; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) 2522; RV64-NEXT: lui a1, %hi(.LCPI40_1) 2523; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) 2524; RV64-NEXT: vsrl.vi v9, v8, 1 2525; RV64-NEXT: vand.vx v9, v9, a0 2526; RV64-NEXT: vsub.vv v8, v8, v9 2527; RV64-NEXT: vand.vx v9, v8, a1 2528; RV64-NEXT: vsrl.vi v8, v8, 2 2529; RV64-NEXT: vand.vx v8, v8, a1 2530; RV64-NEXT: vadd.vv v8, v9, v8 2531; RV64-NEXT: lui a0, %hi(.LCPI40_2) 2532; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) 2533; RV64-NEXT: lui a1, %hi(.LCPI40_3) 2534; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) 2535; RV64-NEXT: vsrl.vi v9, v8, 4 2536; RV64-NEXT: vadd.vv v8, v8, v9 2537; RV64-NEXT: vand.vx v8, v8, a0 2538; RV64-NEXT: vmul.vx v8, v8, a1 2539; RV64-NEXT: li a0, 56 2540; RV64-NEXT: vsrl.vx v8, v8, a0 2541; RV64-NEXT: ret 2542 %a = call <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true) 2543 ret <vscale x 1 x i64> %a 2544} 2545 2546define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) { 2547; RV32-LABEL: cttz_zero_undef_nxv2i64: 2548; RV32: # %bb.0: 2549; RV32-NEXT: addi sp, sp, -16 2550; RV32-NEXT: .cfi_def_cfa_offset 16 2551; RV32-NEXT: lui a0, 349525 2552; RV32-NEXT: addi a0, a0, 1365 2553; RV32-NEXT: sw a0, 12(sp) 2554; RV32-NEXT: sw a0, 8(sp) 2555; RV32-NEXT: lui a0, 209715 2556; RV32-NEXT: addi a0, a0, 819 2557; RV32-NEXT: sw a0, 12(sp) 2558; RV32-NEXT: sw a0, 8(sp) 2559; RV32-NEXT: lui a0, 61681 2560; RV32-NEXT: addi a0, a0, -241 2561; RV32-NEXT: sw a0, 12(sp) 2562; RV32-NEXT: sw a0, 8(sp) 2563; RV32-NEXT: lui a0, 4112 2564; RV32-NEXT: addi a0, a0, 257 2565; RV32-NEXT: sw a0, 12(sp) 2566; RV32-NEXT: sw a0, 8(sp) 2567; RV32-NEXT: li a0, 1 2568; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu 2569; RV32-NEXT: vsub.vx v10, v8, a0 2570; RV32-NEXT: vnot.v v8, v8 2571; RV32-NEXT: addi a0, sp, 8 2572; RV32-NEXT: vlse64.v v12, (a0), zero 2573; RV32-NEXT: vand.vv v8, v8, v10 2574; RV32-NEXT: vlse64.v v10, (a0), zero 2575; RV32-NEXT: vsrl.vi v14, v8, 1 2576; RV32-NEXT: vand.vv v12, v14, v12 2577; RV32-NEXT: vsub.vv v8, v8, v12 2578; RV32-NEXT: vand.vv v12, v8, v10 2579; RV32-NEXT: vsrl.vi v8, v8, 2 2580; RV32-NEXT: vand.vv v8, v8, v10 2581; RV32-NEXT: vadd.vv v8, v12, v8 2582; RV32-NEXT: vlse64.v v10, (a0), zero 2583; RV32-NEXT: vlse64.v v12, (a0), zero 2584; RV32-NEXT: vsrl.vi v14, v8, 4 2585; RV32-NEXT: vadd.vv v8, v8, v14 2586; RV32-NEXT: vand.vv v8, v8, v10 2587; RV32-NEXT: vmul.vv v8, v8, v12 2588; RV32-NEXT: li a0, 56 2589; RV32-NEXT: vsrl.vx v8, v8, a0 2590; RV32-NEXT: addi sp, sp, 16 2591; RV32-NEXT: ret 2592; 2593; RV64-LABEL: cttz_zero_undef_nxv2i64: 2594; RV64: # %bb.0: 2595; RV64-NEXT: li a0, 1 2596; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu 2597; RV64-NEXT: vsub.vx v10, v8, a0 2598; RV64-NEXT: vnot.v v8, v8 2599; RV64-NEXT: vand.vv v8, v8, v10 2600; RV64-NEXT: lui a0, %hi(.LCPI41_0) 2601; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) 2602; RV64-NEXT: lui a1, %hi(.LCPI41_1) 2603; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) 2604; RV64-NEXT: vsrl.vi v10, v8, 1 2605; RV64-NEXT: vand.vx v10, v10, a0 2606; RV64-NEXT: vsub.vv v8, v8, v10 2607; RV64-NEXT: vand.vx v10, v8, a1 2608; RV64-NEXT: vsrl.vi v8, v8, 2 2609; RV64-NEXT: vand.vx v8, v8, a1 2610; RV64-NEXT: vadd.vv v8, v10, v8 2611; RV64-NEXT: lui a0, %hi(.LCPI41_2) 2612; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) 2613; RV64-NEXT: lui a1, %hi(.LCPI41_3) 2614; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) 2615; RV64-NEXT: vsrl.vi v10, v8, 4 2616; RV64-NEXT: vadd.vv v8, v8, v10 2617; RV64-NEXT: vand.vx v8, v8, a0 2618; RV64-NEXT: vmul.vx v8, v8, a1 2619; RV64-NEXT: li a0, 56 2620; RV64-NEXT: vsrl.vx v8, v8, a0 2621; RV64-NEXT: ret 2622 %a = call <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true) 2623 ret <vscale x 2 x i64> %a 2624} 2625 2626define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) { 2627; RV32-LABEL: cttz_zero_undef_nxv4i64: 2628; RV32: # %bb.0: 2629; RV32-NEXT: addi sp, sp, -16 2630; RV32-NEXT: .cfi_def_cfa_offset 16 2631; RV32-NEXT: lui a0, 349525 2632; RV32-NEXT: addi a0, a0, 1365 2633; RV32-NEXT: sw a0, 12(sp) 2634; RV32-NEXT: sw a0, 8(sp) 2635; RV32-NEXT: lui a0, 209715 2636; RV32-NEXT: addi a0, a0, 819 2637; RV32-NEXT: sw a0, 12(sp) 2638; RV32-NEXT: sw a0, 8(sp) 2639; RV32-NEXT: lui a0, 61681 2640; RV32-NEXT: addi a0, a0, -241 2641; RV32-NEXT: sw a0, 12(sp) 2642; RV32-NEXT: sw a0, 8(sp) 2643; RV32-NEXT: lui a0, 4112 2644; RV32-NEXT: addi a0, a0, 257 2645; RV32-NEXT: sw a0, 12(sp) 2646; RV32-NEXT: sw a0, 8(sp) 2647; RV32-NEXT: li a0, 1 2648; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, mu 2649; RV32-NEXT: vsub.vx v12, v8, a0 2650; RV32-NEXT: vnot.v v8, v8 2651; RV32-NEXT: addi a0, sp, 8 2652; RV32-NEXT: vlse64.v v16, (a0), zero 2653; RV32-NEXT: vand.vv v8, v8, v12 2654; RV32-NEXT: vlse64.v v12, (a0), zero 2655; RV32-NEXT: vsrl.vi v20, v8, 1 2656; RV32-NEXT: vand.vv v16, v20, v16 2657; RV32-NEXT: vsub.vv v8, v8, v16 2658; RV32-NEXT: vand.vv v16, v8, v12 2659; RV32-NEXT: vsrl.vi v8, v8, 2 2660; RV32-NEXT: vand.vv v8, v8, v12 2661; RV32-NEXT: vadd.vv v8, v16, v8 2662; RV32-NEXT: vlse64.v v12, (a0), zero 2663; RV32-NEXT: vlse64.v v16, (a0), zero 2664; RV32-NEXT: vsrl.vi v20, v8, 4 2665; RV32-NEXT: vadd.vv v8, v8, v20 2666; RV32-NEXT: vand.vv v8, v8, v12 2667; RV32-NEXT: vmul.vv v8, v8, v16 2668; RV32-NEXT: li a0, 56 2669; RV32-NEXT: vsrl.vx v8, v8, a0 2670; RV32-NEXT: addi sp, sp, 16 2671; RV32-NEXT: ret 2672; 2673; RV64-LABEL: cttz_zero_undef_nxv4i64: 2674; RV64: # %bb.0: 2675; RV64-NEXT: li a0, 1 2676; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu 2677; RV64-NEXT: vsub.vx v12, v8, a0 2678; RV64-NEXT: vnot.v v8, v8 2679; RV64-NEXT: vand.vv v8, v8, v12 2680; RV64-NEXT: lui a0, %hi(.LCPI42_0) 2681; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) 2682; RV64-NEXT: lui a1, %hi(.LCPI42_1) 2683; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) 2684; RV64-NEXT: vsrl.vi v12, v8, 1 2685; RV64-NEXT: vand.vx v12, v12, a0 2686; RV64-NEXT: vsub.vv v8, v8, v12 2687; RV64-NEXT: vand.vx v12, v8, a1 2688; RV64-NEXT: vsrl.vi v8, v8, 2 2689; RV64-NEXT: vand.vx v8, v8, a1 2690; RV64-NEXT: vadd.vv v8, v12, v8 2691; RV64-NEXT: lui a0, %hi(.LCPI42_2) 2692; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) 2693; RV64-NEXT: lui a1, %hi(.LCPI42_3) 2694; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) 2695; RV64-NEXT: vsrl.vi v12, v8, 4 2696; RV64-NEXT: vadd.vv v8, v8, v12 2697; RV64-NEXT: vand.vx v8, v8, a0 2698; RV64-NEXT: vmul.vx v8, v8, a1 2699; RV64-NEXT: li a0, 56 2700; RV64-NEXT: vsrl.vx v8, v8, a0 2701; RV64-NEXT: ret 2702 %a = call <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true) 2703 ret <vscale x 4 x i64> %a 2704} 2705 2706define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) { 2707; RV32-LABEL: cttz_zero_undef_nxv8i64: 2708; RV32: # %bb.0: 2709; RV32-NEXT: addi sp, sp, -16 2710; RV32-NEXT: .cfi_def_cfa_offset 16 2711; RV32-NEXT: lui a0, 349525 2712; RV32-NEXT: addi a0, a0, 1365 2713; RV32-NEXT: sw a0, 12(sp) 2714; RV32-NEXT: sw a0, 8(sp) 2715; RV32-NEXT: lui a0, 209715 2716; RV32-NEXT: addi a0, a0, 819 2717; RV32-NEXT: sw a0, 12(sp) 2718; RV32-NEXT: sw a0, 8(sp) 2719; RV32-NEXT: lui a0, 61681 2720; RV32-NEXT: addi a0, a0, -241 2721; RV32-NEXT: sw a0, 12(sp) 2722; RV32-NEXT: sw a0, 8(sp) 2723; RV32-NEXT: lui a0, 4112 2724; RV32-NEXT: addi a0, a0, 257 2725; RV32-NEXT: sw a0, 12(sp) 2726; RV32-NEXT: sw a0, 8(sp) 2727; RV32-NEXT: li a0, 1 2728; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2729; RV32-NEXT: vsub.vx v16, v8, a0 2730; RV32-NEXT: vnot.v v8, v8 2731; RV32-NEXT: addi a0, sp, 8 2732; RV32-NEXT: vlse64.v v24, (a0), zero 2733; RV32-NEXT: vand.vv v8, v8, v16 2734; RV32-NEXT: vlse64.v v16, (a0), zero 2735; RV32-NEXT: vsrl.vi v0, v8, 1 2736; RV32-NEXT: vand.vv v24, v0, v24 2737; RV32-NEXT: vsub.vv v8, v8, v24 2738; RV32-NEXT: vand.vv v24, v8, v16 2739; RV32-NEXT: vsrl.vi v8, v8, 2 2740; RV32-NEXT: vand.vv v8, v8, v16 2741; RV32-NEXT: vadd.vv v8, v24, v8 2742; RV32-NEXT: vlse64.v v16, (a0), zero 2743; RV32-NEXT: vlse64.v v24, (a0), zero 2744; RV32-NEXT: vsrl.vi v0, v8, 4 2745; RV32-NEXT: vadd.vv v8, v8, v0 2746; RV32-NEXT: vand.vv v8, v8, v16 2747; RV32-NEXT: vmul.vv v8, v8, v24 2748; RV32-NEXT: li a0, 56 2749; RV32-NEXT: vsrl.vx v8, v8, a0 2750; RV32-NEXT: addi sp, sp, 16 2751; RV32-NEXT: ret 2752; 2753; RV64-LABEL: cttz_zero_undef_nxv8i64: 2754; RV64: # %bb.0: 2755; RV64-NEXT: li a0, 1 2756; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 2757; RV64-NEXT: vsub.vx v16, v8, a0 2758; RV64-NEXT: vnot.v v8, v8 2759; RV64-NEXT: vand.vv v8, v8, v16 2760; RV64-NEXT: lui a0, %hi(.LCPI43_0) 2761; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) 2762; RV64-NEXT: lui a1, %hi(.LCPI43_1) 2763; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) 2764; RV64-NEXT: vsrl.vi v16, v8, 1 2765; RV64-NEXT: vand.vx v16, v16, a0 2766; RV64-NEXT: vsub.vv v8, v8, v16 2767; RV64-NEXT: vand.vx v16, v8, a1 2768; RV64-NEXT: vsrl.vi v8, v8, 2 2769; RV64-NEXT: vand.vx v8, v8, a1 2770; RV64-NEXT: vadd.vv v8, v16, v8 2771; RV64-NEXT: lui a0, %hi(.LCPI43_2) 2772; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) 2773; RV64-NEXT: lui a1, %hi(.LCPI43_3) 2774; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) 2775; RV64-NEXT: vsrl.vi v16, v8, 4 2776; RV64-NEXT: vadd.vv v8, v8, v16 2777; RV64-NEXT: vand.vx v8, v8, a0 2778; RV64-NEXT: vmul.vx v8, v8, a1 2779; RV64-NEXT: li a0, 56 2780; RV64-NEXT: vsrl.vx v8, v8, a0 2781; RV64-NEXT: ret 2782 %a = call <vscale x 8 x i64> @llvm.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true) 2783 ret <vscale x 8 x i64> %a 2784} 2785