1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I 3; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I 4; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 5; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 6 7define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) { 8; CHECK-ZVE64X-LABEL: ctlz_nxv1i8: 9; CHECK-ZVE64X: # %bb.0: 10; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 11; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 12; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 13; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 14; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 15; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 16; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 17; CHECK-ZVE64X-NEXT: vnot.v v8, v8 18; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 19; CHECK-ZVE64X-NEXT: li a0, 85 20; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 21; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 22; CHECK-ZVE64X-NEXT: li a0, 51 23; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 24; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 25; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 26; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 27; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 28; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 29; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 30; CHECK-ZVE64X-NEXT: ret 31; 32; CHECK-D-LABEL: ctlz_nxv1i8: 33; CHECK-D: # %bb.0: 34; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 35; CHECK-D-NEXT: vzext.vf4 v9, v8 36; CHECK-D-NEXT: vfcvt.f.xu.v v9, v9 37; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 38; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 39; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu 40; CHECK-D-NEXT: vncvt.x.x.w v9, v9 41; CHECK-D-NEXT: li a0, 134 42; CHECK-D-NEXT: vmseq.vi v0, v8, 0 43; CHECK-D-NEXT: vrsub.vx v8, v9, a0 44; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 45; CHECK-D-NEXT: ret 46 %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false) 47 ret <vscale x 1 x i8> %a 48} 49declare <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8>, i1) 50 51define <vscale x 2 x i8> @ctlz_nxv2i8(<vscale x 2 x i8> %va) { 52; CHECK-ZVE64X-LABEL: ctlz_nxv2i8: 53; CHECK-ZVE64X: # %bb.0: 54; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 55; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 56; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 57; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 58; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 59; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 60; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 61; CHECK-ZVE64X-NEXT: vnot.v v8, v8 62; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 63; CHECK-ZVE64X-NEXT: li a0, 85 64; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 65; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 66; CHECK-ZVE64X-NEXT: li a0, 51 67; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 68; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 69; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 70; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 71; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 72; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 73; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 74; CHECK-ZVE64X-NEXT: ret 75; 76; CHECK-D-LABEL: ctlz_nxv2i8: 77; CHECK-D: # %bb.0: 78; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 79; CHECK-D-NEXT: vzext.vf4 v9, v8 80; CHECK-D-NEXT: vfcvt.f.xu.v v9, v9 81; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 82; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 83; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 84; CHECK-D-NEXT: vncvt.x.x.w v9, v9 85; CHECK-D-NEXT: li a0, 134 86; CHECK-D-NEXT: vmseq.vi v0, v8, 0 87; CHECK-D-NEXT: vrsub.vx v8, v9, a0 88; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 89; CHECK-D-NEXT: ret 90 %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false) 91 ret <vscale x 2 x i8> %a 92} 93declare <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8>, i1) 94 95define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) { 96; CHECK-ZVE64X-LABEL: ctlz_nxv4i8: 97; CHECK-ZVE64X: # %bb.0: 98; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 99; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 100; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 101; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 102; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 103; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 104; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 105; CHECK-ZVE64X-NEXT: vnot.v v8, v8 106; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 107; CHECK-ZVE64X-NEXT: li a0, 85 108; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 109; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 110; CHECK-ZVE64X-NEXT: li a0, 51 111; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 112; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 113; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 114; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 115; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 116; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 117; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 118; CHECK-ZVE64X-NEXT: ret 119; 120; CHECK-D-LABEL: ctlz_nxv4i8: 121; CHECK-D: # %bb.0: 122; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 123; CHECK-D-NEXT: vzext.vf4 v10, v8 124; CHECK-D-NEXT: vfcvt.f.xu.v v10, v10 125; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu 126; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 127; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 128; CHECK-D-NEXT: vncvt.x.x.w v9, v9 129; CHECK-D-NEXT: li a0, 134 130; CHECK-D-NEXT: vmseq.vi v0, v8, 0 131; CHECK-D-NEXT: vrsub.vx v8, v9, a0 132; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 133; CHECK-D-NEXT: ret 134 %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false) 135 ret <vscale x 4 x i8> %a 136} 137declare <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8>, i1) 138 139define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) { 140; CHECK-ZVE64X-LABEL: ctlz_nxv8i8: 141; CHECK-ZVE64X: # %bb.0: 142; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, mu 143; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 144; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 145; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 146; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 147; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 148; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 149; CHECK-ZVE64X-NEXT: vnot.v v8, v8 150; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 151; CHECK-ZVE64X-NEXT: li a0, 85 152; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 153; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 154; CHECK-ZVE64X-NEXT: li a0, 51 155; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 156; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 157; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 158; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 159; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 160; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 161; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 162; CHECK-ZVE64X-NEXT: ret 163; 164; CHECK-D-LABEL: ctlz_nxv8i8: 165; CHECK-D: # %bb.0: 166; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 167; CHECK-D-NEXT: vzext.vf4 v12, v8 168; CHECK-D-NEXT: vfcvt.f.xu.v v12, v12 169; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu 170; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 171; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu 172; CHECK-D-NEXT: vncvt.x.x.w v9, v10 173; CHECK-D-NEXT: li a0, 134 174; CHECK-D-NEXT: vmseq.vi v0, v8, 0 175; CHECK-D-NEXT: vrsub.vx v8, v9, a0 176; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 177; CHECK-D-NEXT: ret 178 %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false) 179 ret <vscale x 8 x i8> %a 180} 181declare <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8>, i1) 182 183define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) { 184; CHECK-ZVE64X-LABEL: ctlz_nxv16i8: 185; CHECK-ZVE64X: # %bb.0: 186; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, mu 187; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 188; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 189; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 190; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 191; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 192; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 193; CHECK-ZVE64X-NEXT: vnot.v v8, v8 194; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 195; CHECK-ZVE64X-NEXT: li a0, 85 196; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 197; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 198; CHECK-ZVE64X-NEXT: li a0, 51 199; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 200; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 201; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 202; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 203; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 204; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 205; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 206; CHECK-ZVE64X-NEXT: ret 207; 208; CHECK-D-LABEL: ctlz_nxv16i8: 209; CHECK-D: # %bb.0: 210; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, mu 211; CHECK-D-NEXT: vzext.vf4 v16, v8 212; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16 213; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu 214; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 215; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu 216; CHECK-D-NEXT: vncvt.x.x.w v10, v12 217; CHECK-D-NEXT: li a0, 134 218; CHECK-D-NEXT: vmseq.vi v0, v8, 0 219; CHECK-D-NEXT: vrsub.vx v8, v10, a0 220; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 221; CHECK-D-NEXT: ret 222 %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false) 223 ret <vscale x 16 x i8> %a 224} 225declare <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8>, i1) 226 227define <vscale x 32 x i8> @ctlz_nxv32i8(<vscale x 32 x i8> %va) { 228; CHECK-LABEL: ctlz_nxv32i8: 229; CHECK: # %bb.0: 230; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu 231; CHECK-NEXT: vsrl.vi v12, v8, 1 232; CHECK-NEXT: vor.vv v8, v8, v12 233; CHECK-NEXT: vsrl.vi v12, v8, 2 234; CHECK-NEXT: vor.vv v8, v8, v12 235; CHECK-NEXT: vsrl.vi v12, v8, 4 236; CHECK-NEXT: vor.vv v8, v8, v12 237; CHECK-NEXT: vnot.v v8, v8 238; CHECK-NEXT: vsrl.vi v12, v8, 1 239; CHECK-NEXT: li a0, 85 240; CHECK-NEXT: vand.vx v12, v12, a0 241; CHECK-NEXT: vsub.vv v8, v8, v12 242; CHECK-NEXT: li a0, 51 243; CHECK-NEXT: vand.vx v12, v8, a0 244; CHECK-NEXT: vsrl.vi v8, v8, 2 245; CHECK-NEXT: vand.vx v8, v8, a0 246; CHECK-NEXT: vadd.vv v8, v12, v8 247; CHECK-NEXT: vsrl.vi v12, v8, 4 248; CHECK-NEXT: vadd.vv v8, v8, v12 249; CHECK-NEXT: vand.vi v8, v8, 15 250; CHECK-NEXT: ret 251 %a = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 false) 252 ret <vscale x 32 x i8> %a 253} 254declare <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8>, i1) 255 256define <vscale x 64 x i8> @ctlz_nxv64i8(<vscale x 64 x i8> %va) { 257; CHECK-LABEL: ctlz_nxv64i8: 258; CHECK: # %bb.0: 259; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu 260; CHECK-NEXT: vsrl.vi v16, v8, 1 261; CHECK-NEXT: vor.vv v8, v8, v16 262; CHECK-NEXT: vsrl.vi v16, v8, 2 263; CHECK-NEXT: vor.vv v8, v8, v16 264; CHECK-NEXT: vsrl.vi v16, v8, 4 265; CHECK-NEXT: vor.vv v8, v8, v16 266; CHECK-NEXT: vnot.v v8, v8 267; CHECK-NEXT: vsrl.vi v16, v8, 1 268; CHECK-NEXT: li a0, 85 269; CHECK-NEXT: vand.vx v16, v16, a0 270; CHECK-NEXT: vsub.vv v8, v8, v16 271; CHECK-NEXT: li a0, 51 272; CHECK-NEXT: vand.vx v16, v8, a0 273; CHECK-NEXT: vsrl.vi v8, v8, 2 274; CHECK-NEXT: vand.vx v8, v8, a0 275; CHECK-NEXT: vadd.vv v8, v16, v8 276; CHECK-NEXT: vsrl.vi v16, v8, 4 277; CHECK-NEXT: vadd.vv v8, v8, v16 278; CHECK-NEXT: vand.vi v8, v8, 15 279; CHECK-NEXT: ret 280 %a = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 false) 281 ret <vscale x 64 x i8> %a 282} 283declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1) 284 285define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) { 286; RV32I-LABEL: ctlz_nxv1i16: 287; RV32I: # %bb.0: 288; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 289; RV32I-NEXT: vsrl.vi v9, v8, 1 290; RV32I-NEXT: vor.vv v8, v8, v9 291; RV32I-NEXT: vsrl.vi v9, v8, 2 292; RV32I-NEXT: vor.vv v8, v8, v9 293; RV32I-NEXT: vsrl.vi v9, v8, 4 294; RV32I-NEXT: vor.vv v8, v8, v9 295; RV32I-NEXT: vsrl.vi v9, v8, 8 296; RV32I-NEXT: vor.vv v8, v8, v9 297; RV32I-NEXT: vnot.v v8, v8 298; RV32I-NEXT: vsrl.vi v9, v8, 1 299; RV32I-NEXT: lui a0, 5 300; RV32I-NEXT: addi a0, a0, 1365 301; RV32I-NEXT: vand.vx v9, v9, a0 302; RV32I-NEXT: vsub.vv v8, v8, v9 303; RV32I-NEXT: lui a0, 3 304; RV32I-NEXT: addi a0, a0, 819 305; RV32I-NEXT: vand.vx v9, v8, a0 306; RV32I-NEXT: vsrl.vi v8, v8, 2 307; RV32I-NEXT: vand.vx v8, v8, a0 308; RV32I-NEXT: vadd.vv v8, v9, v8 309; RV32I-NEXT: vsrl.vi v9, v8, 4 310; RV32I-NEXT: vadd.vv v8, v8, v9 311; RV32I-NEXT: lui a0, 1 312; RV32I-NEXT: addi a0, a0, -241 313; RV32I-NEXT: vand.vx v8, v8, a0 314; RV32I-NEXT: li a0, 257 315; RV32I-NEXT: vmul.vx v8, v8, a0 316; RV32I-NEXT: vsrl.vi v8, v8, 8 317; RV32I-NEXT: ret 318; 319; RV64I-LABEL: ctlz_nxv1i16: 320; RV64I: # %bb.0: 321; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 322; RV64I-NEXT: vsrl.vi v9, v8, 1 323; RV64I-NEXT: vor.vv v8, v8, v9 324; RV64I-NEXT: vsrl.vi v9, v8, 2 325; RV64I-NEXT: vor.vv v8, v8, v9 326; RV64I-NEXT: vsrl.vi v9, v8, 4 327; RV64I-NEXT: vor.vv v8, v8, v9 328; RV64I-NEXT: vsrl.vi v9, v8, 8 329; RV64I-NEXT: vor.vv v8, v8, v9 330; RV64I-NEXT: vnot.v v8, v8 331; RV64I-NEXT: vsrl.vi v9, v8, 1 332; RV64I-NEXT: lui a0, 5 333; RV64I-NEXT: addiw a0, a0, 1365 334; RV64I-NEXT: vand.vx v9, v9, a0 335; RV64I-NEXT: vsub.vv v8, v8, v9 336; RV64I-NEXT: lui a0, 3 337; RV64I-NEXT: addiw a0, a0, 819 338; RV64I-NEXT: vand.vx v9, v8, a0 339; RV64I-NEXT: vsrl.vi v8, v8, 2 340; RV64I-NEXT: vand.vx v8, v8, a0 341; RV64I-NEXT: vadd.vv v8, v9, v8 342; RV64I-NEXT: vsrl.vi v9, v8, 4 343; RV64I-NEXT: vadd.vv v8, v8, v9 344; RV64I-NEXT: lui a0, 1 345; RV64I-NEXT: addiw a0, a0, -241 346; RV64I-NEXT: vand.vx v8, v8, a0 347; RV64I-NEXT: li a0, 257 348; RV64I-NEXT: vmul.vx v8, v8, a0 349; RV64I-NEXT: vsrl.vi v8, v8, 8 350; RV64I-NEXT: ret 351; 352; CHECK-D-LABEL: ctlz_nxv1i16: 353; CHECK-D: # %bb.0: 354; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 355; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 356; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 357; CHECK-D-NEXT: li a0, 142 358; CHECK-D-NEXT: vrsub.vx v9, v9, a0 359; CHECK-D-NEXT: vmseq.vi v0, v8, 0 360; CHECK-D-NEXT: li a0, 16 361; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 362; CHECK-D-NEXT: ret 363 %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 false) 364 ret <vscale x 1 x i16> %a 365} 366declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1) 367 368define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) { 369; RV32I-LABEL: ctlz_nxv2i16: 370; RV32I: # %bb.0: 371; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 372; RV32I-NEXT: vsrl.vi v9, v8, 1 373; RV32I-NEXT: vor.vv v8, v8, v9 374; RV32I-NEXT: vsrl.vi v9, v8, 2 375; RV32I-NEXT: vor.vv v8, v8, v9 376; RV32I-NEXT: vsrl.vi v9, v8, 4 377; RV32I-NEXT: vor.vv v8, v8, v9 378; RV32I-NEXT: vsrl.vi v9, v8, 8 379; RV32I-NEXT: vor.vv v8, v8, v9 380; RV32I-NEXT: vnot.v v8, v8 381; RV32I-NEXT: vsrl.vi v9, v8, 1 382; RV32I-NEXT: lui a0, 5 383; RV32I-NEXT: addi a0, a0, 1365 384; RV32I-NEXT: vand.vx v9, v9, a0 385; RV32I-NEXT: vsub.vv v8, v8, v9 386; RV32I-NEXT: lui a0, 3 387; RV32I-NEXT: addi a0, a0, 819 388; RV32I-NEXT: vand.vx v9, v8, a0 389; RV32I-NEXT: vsrl.vi v8, v8, 2 390; RV32I-NEXT: vand.vx v8, v8, a0 391; RV32I-NEXT: vadd.vv v8, v9, v8 392; RV32I-NEXT: vsrl.vi v9, v8, 4 393; RV32I-NEXT: vadd.vv v8, v8, v9 394; RV32I-NEXT: lui a0, 1 395; RV32I-NEXT: addi a0, a0, -241 396; RV32I-NEXT: vand.vx v8, v8, a0 397; RV32I-NEXT: li a0, 257 398; RV32I-NEXT: vmul.vx v8, v8, a0 399; RV32I-NEXT: vsrl.vi v8, v8, 8 400; RV32I-NEXT: ret 401; 402; RV64I-LABEL: ctlz_nxv2i16: 403; RV64I: # %bb.0: 404; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 405; RV64I-NEXT: vsrl.vi v9, v8, 1 406; RV64I-NEXT: vor.vv v8, v8, v9 407; RV64I-NEXT: vsrl.vi v9, v8, 2 408; RV64I-NEXT: vor.vv v8, v8, v9 409; RV64I-NEXT: vsrl.vi v9, v8, 4 410; RV64I-NEXT: vor.vv v8, v8, v9 411; RV64I-NEXT: vsrl.vi v9, v8, 8 412; RV64I-NEXT: vor.vv v8, v8, v9 413; RV64I-NEXT: vnot.v v8, v8 414; RV64I-NEXT: vsrl.vi v9, v8, 1 415; RV64I-NEXT: lui a0, 5 416; RV64I-NEXT: addiw a0, a0, 1365 417; RV64I-NEXT: vand.vx v9, v9, a0 418; RV64I-NEXT: vsub.vv v8, v8, v9 419; RV64I-NEXT: lui a0, 3 420; RV64I-NEXT: addiw a0, a0, 819 421; RV64I-NEXT: vand.vx v9, v8, a0 422; RV64I-NEXT: vsrl.vi v8, v8, 2 423; RV64I-NEXT: vand.vx v8, v8, a0 424; RV64I-NEXT: vadd.vv v8, v9, v8 425; RV64I-NEXT: vsrl.vi v9, v8, 4 426; RV64I-NEXT: vadd.vv v8, v8, v9 427; RV64I-NEXT: lui a0, 1 428; RV64I-NEXT: addiw a0, a0, -241 429; RV64I-NEXT: vand.vx v8, v8, a0 430; RV64I-NEXT: li a0, 257 431; RV64I-NEXT: vmul.vx v8, v8, a0 432; RV64I-NEXT: vsrl.vi v8, v8, 8 433; RV64I-NEXT: ret 434; 435; CHECK-D-LABEL: ctlz_nxv2i16: 436; CHECK-D: # %bb.0: 437; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 438; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 439; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 440; CHECK-D-NEXT: li a0, 142 441; CHECK-D-NEXT: vrsub.vx v9, v9, a0 442; CHECK-D-NEXT: vmseq.vi v0, v8, 0 443; CHECK-D-NEXT: li a0, 16 444; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 445; CHECK-D-NEXT: ret 446 %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 false) 447 ret <vscale x 2 x i16> %a 448} 449declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1) 450 451define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) { 452; RV32I-LABEL: ctlz_nxv4i16: 453; RV32I: # %bb.0: 454; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, mu 455; RV32I-NEXT: vsrl.vi v9, v8, 1 456; RV32I-NEXT: vor.vv v8, v8, v9 457; RV32I-NEXT: vsrl.vi v9, v8, 2 458; RV32I-NEXT: vor.vv v8, v8, v9 459; RV32I-NEXT: vsrl.vi v9, v8, 4 460; RV32I-NEXT: vor.vv v8, v8, v9 461; RV32I-NEXT: vsrl.vi v9, v8, 8 462; RV32I-NEXT: vor.vv v8, v8, v9 463; RV32I-NEXT: vnot.v v8, v8 464; RV32I-NEXT: vsrl.vi v9, v8, 1 465; RV32I-NEXT: lui a0, 5 466; RV32I-NEXT: addi a0, a0, 1365 467; RV32I-NEXT: vand.vx v9, v9, a0 468; RV32I-NEXT: vsub.vv v8, v8, v9 469; RV32I-NEXT: lui a0, 3 470; RV32I-NEXT: addi a0, a0, 819 471; RV32I-NEXT: vand.vx v9, v8, a0 472; RV32I-NEXT: vsrl.vi v8, v8, 2 473; RV32I-NEXT: vand.vx v8, v8, a0 474; RV32I-NEXT: vadd.vv v8, v9, v8 475; RV32I-NEXT: vsrl.vi v9, v8, 4 476; RV32I-NEXT: vadd.vv v8, v8, v9 477; RV32I-NEXT: lui a0, 1 478; RV32I-NEXT: addi a0, a0, -241 479; RV32I-NEXT: vand.vx v8, v8, a0 480; RV32I-NEXT: li a0, 257 481; RV32I-NEXT: vmul.vx v8, v8, a0 482; RV32I-NEXT: vsrl.vi v8, v8, 8 483; RV32I-NEXT: ret 484; 485; RV64I-LABEL: ctlz_nxv4i16: 486; RV64I: # %bb.0: 487; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, mu 488; RV64I-NEXT: vsrl.vi v9, v8, 1 489; RV64I-NEXT: vor.vv v8, v8, v9 490; RV64I-NEXT: vsrl.vi v9, v8, 2 491; RV64I-NEXT: vor.vv v8, v8, v9 492; RV64I-NEXT: vsrl.vi v9, v8, 4 493; RV64I-NEXT: vor.vv v8, v8, v9 494; RV64I-NEXT: vsrl.vi v9, v8, 8 495; RV64I-NEXT: vor.vv v8, v8, v9 496; RV64I-NEXT: vnot.v v8, v8 497; RV64I-NEXT: vsrl.vi v9, v8, 1 498; RV64I-NEXT: lui a0, 5 499; RV64I-NEXT: addiw a0, a0, 1365 500; RV64I-NEXT: vand.vx v9, v9, a0 501; RV64I-NEXT: vsub.vv v8, v8, v9 502; RV64I-NEXT: lui a0, 3 503; RV64I-NEXT: addiw a0, a0, 819 504; RV64I-NEXT: vand.vx v9, v8, a0 505; RV64I-NEXT: vsrl.vi v8, v8, 2 506; RV64I-NEXT: vand.vx v8, v8, a0 507; RV64I-NEXT: vadd.vv v8, v9, v8 508; RV64I-NEXT: vsrl.vi v9, v8, 4 509; RV64I-NEXT: vadd.vv v8, v8, v9 510; RV64I-NEXT: lui a0, 1 511; RV64I-NEXT: addiw a0, a0, -241 512; RV64I-NEXT: vand.vx v8, v8, a0 513; RV64I-NEXT: li a0, 257 514; RV64I-NEXT: vmul.vx v8, v8, a0 515; RV64I-NEXT: vsrl.vi v8, v8, 8 516; RV64I-NEXT: ret 517; 518; CHECK-D-LABEL: ctlz_nxv4i16: 519; CHECK-D: # %bb.0: 520; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu 521; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 522; CHECK-D-NEXT: vnsrl.wi v9, v10, 23 523; CHECK-D-NEXT: li a0, 142 524; CHECK-D-NEXT: vrsub.vx v9, v9, a0 525; CHECK-D-NEXT: vmseq.vi v0, v8, 0 526; CHECK-D-NEXT: li a0, 16 527; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 528; CHECK-D-NEXT: ret 529 %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 false) 530 ret <vscale x 4 x i16> %a 531} 532declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1) 533 534define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) { 535; RV32I-LABEL: ctlz_nxv8i16: 536; RV32I: # %bb.0: 537; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, mu 538; RV32I-NEXT: vsrl.vi v10, v8, 1 539; RV32I-NEXT: vor.vv v8, v8, v10 540; RV32I-NEXT: vsrl.vi v10, v8, 2 541; RV32I-NEXT: vor.vv v8, v8, v10 542; RV32I-NEXT: vsrl.vi v10, v8, 4 543; RV32I-NEXT: vor.vv v8, v8, v10 544; RV32I-NEXT: vsrl.vi v10, v8, 8 545; RV32I-NEXT: vor.vv v8, v8, v10 546; RV32I-NEXT: vnot.v v8, v8 547; RV32I-NEXT: vsrl.vi v10, v8, 1 548; RV32I-NEXT: lui a0, 5 549; RV32I-NEXT: addi a0, a0, 1365 550; RV32I-NEXT: vand.vx v10, v10, a0 551; RV32I-NEXT: vsub.vv v8, v8, v10 552; RV32I-NEXT: lui a0, 3 553; RV32I-NEXT: addi a0, a0, 819 554; RV32I-NEXT: vand.vx v10, v8, a0 555; RV32I-NEXT: vsrl.vi v8, v8, 2 556; RV32I-NEXT: vand.vx v8, v8, a0 557; RV32I-NEXT: vadd.vv v8, v10, v8 558; RV32I-NEXT: vsrl.vi v10, v8, 4 559; RV32I-NEXT: vadd.vv v8, v8, v10 560; RV32I-NEXT: lui a0, 1 561; RV32I-NEXT: addi a0, a0, -241 562; RV32I-NEXT: vand.vx v8, v8, a0 563; RV32I-NEXT: li a0, 257 564; RV32I-NEXT: vmul.vx v8, v8, a0 565; RV32I-NEXT: vsrl.vi v8, v8, 8 566; RV32I-NEXT: ret 567; 568; RV64I-LABEL: ctlz_nxv8i16: 569; RV64I: # %bb.0: 570; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, mu 571; RV64I-NEXT: vsrl.vi v10, v8, 1 572; RV64I-NEXT: vor.vv v8, v8, v10 573; RV64I-NEXT: vsrl.vi v10, v8, 2 574; RV64I-NEXT: vor.vv v8, v8, v10 575; RV64I-NEXT: vsrl.vi v10, v8, 4 576; RV64I-NEXT: vor.vv v8, v8, v10 577; RV64I-NEXT: vsrl.vi v10, v8, 8 578; RV64I-NEXT: vor.vv v8, v8, v10 579; RV64I-NEXT: vnot.v v8, v8 580; RV64I-NEXT: vsrl.vi v10, v8, 1 581; RV64I-NEXT: lui a0, 5 582; RV64I-NEXT: addiw a0, a0, 1365 583; RV64I-NEXT: vand.vx v10, v10, a0 584; RV64I-NEXT: vsub.vv v8, v8, v10 585; RV64I-NEXT: lui a0, 3 586; RV64I-NEXT: addiw a0, a0, 819 587; RV64I-NEXT: vand.vx v10, v8, a0 588; RV64I-NEXT: vsrl.vi v8, v8, 2 589; RV64I-NEXT: vand.vx v8, v8, a0 590; RV64I-NEXT: vadd.vv v8, v10, v8 591; RV64I-NEXT: vsrl.vi v10, v8, 4 592; RV64I-NEXT: vadd.vv v8, v8, v10 593; RV64I-NEXT: lui a0, 1 594; RV64I-NEXT: addiw a0, a0, -241 595; RV64I-NEXT: vand.vx v8, v8, a0 596; RV64I-NEXT: li a0, 257 597; RV64I-NEXT: vmul.vx v8, v8, a0 598; RV64I-NEXT: vsrl.vi v8, v8, 8 599; RV64I-NEXT: ret 600; 601; CHECK-D-LABEL: ctlz_nxv8i16: 602; CHECK-D: # %bb.0: 603; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu 604; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 605; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 606; CHECK-D-NEXT: li a0, 142 607; CHECK-D-NEXT: vrsub.vx v10, v10, a0 608; CHECK-D-NEXT: vmseq.vi v0, v8, 0 609; CHECK-D-NEXT: li a0, 16 610; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 611; CHECK-D-NEXT: ret 612 %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 false) 613 ret <vscale x 8 x i16> %a 614} 615declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1) 616 617define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) { 618; RV32I-LABEL: ctlz_nxv16i16: 619; RV32I: # %bb.0: 620; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, mu 621; RV32I-NEXT: vsrl.vi v12, v8, 1 622; RV32I-NEXT: vor.vv v8, v8, v12 623; RV32I-NEXT: vsrl.vi v12, v8, 2 624; RV32I-NEXT: vor.vv v8, v8, v12 625; RV32I-NEXT: vsrl.vi v12, v8, 4 626; RV32I-NEXT: vor.vv v8, v8, v12 627; RV32I-NEXT: vsrl.vi v12, v8, 8 628; RV32I-NEXT: vor.vv v8, v8, v12 629; RV32I-NEXT: vnot.v v8, v8 630; RV32I-NEXT: vsrl.vi v12, v8, 1 631; RV32I-NEXT: lui a0, 5 632; RV32I-NEXT: addi a0, a0, 1365 633; RV32I-NEXT: vand.vx v12, v12, a0 634; RV32I-NEXT: vsub.vv v8, v8, v12 635; RV32I-NEXT: lui a0, 3 636; RV32I-NEXT: addi a0, a0, 819 637; RV32I-NEXT: vand.vx v12, v8, a0 638; RV32I-NEXT: vsrl.vi v8, v8, 2 639; RV32I-NEXT: vand.vx v8, v8, a0 640; RV32I-NEXT: vadd.vv v8, v12, v8 641; RV32I-NEXT: vsrl.vi v12, v8, 4 642; RV32I-NEXT: vadd.vv v8, v8, v12 643; RV32I-NEXT: lui a0, 1 644; RV32I-NEXT: addi a0, a0, -241 645; RV32I-NEXT: vand.vx v8, v8, a0 646; RV32I-NEXT: li a0, 257 647; RV32I-NEXT: vmul.vx v8, v8, a0 648; RV32I-NEXT: vsrl.vi v8, v8, 8 649; RV32I-NEXT: ret 650; 651; RV64I-LABEL: ctlz_nxv16i16: 652; RV64I: # %bb.0: 653; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, mu 654; RV64I-NEXT: vsrl.vi v12, v8, 1 655; RV64I-NEXT: vor.vv v8, v8, v12 656; RV64I-NEXT: vsrl.vi v12, v8, 2 657; RV64I-NEXT: vor.vv v8, v8, v12 658; RV64I-NEXT: vsrl.vi v12, v8, 4 659; RV64I-NEXT: vor.vv v8, v8, v12 660; RV64I-NEXT: vsrl.vi v12, v8, 8 661; RV64I-NEXT: vor.vv v8, v8, v12 662; RV64I-NEXT: vnot.v v8, v8 663; RV64I-NEXT: vsrl.vi v12, v8, 1 664; RV64I-NEXT: lui a0, 5 665; RV64I-NEXT: addiw a0, a0, 1365 666; RV64I-NEXT: vand.vx v12, v12, a0 667; RV64I-NEXT: vsub.vv v8, v8, v12 668; RV64I-NEXT: lui a0, 3 669; RV64I-NEXT: addiw a0, a0, 819 670; RV64I-NEXT: vand.vx v12, v8, a0 671; RV64I-NEXT: vsrl.vi v8, v8, 2 672; RV64I-NEXT: vand.vx v8, v8, a0 673; RV64I-NEXT: vadd.vv v8, v12, v8 674; RV64I-NEXT: vsrl.vi v12, v8, 4 675; RV64I-NEXT: vadd.vv v8, v8, v12 676; RV64I-NEXT: lui a0, 1 677; RV64I-NEXT: addiw a0, a0, -241 678; RV64I-NEXT: vand.vx v8, v8, a0 679; RV64I-NEXT: li a0, 257 680; RV64I-NEXT: vmul.vx v8, v8, a0 681; RV64I-NEXT: vsrl.vi v8, v8, 8 682; RV64I-NEXT: ret 683; 684; CHECK-D-LABEL: ctlz_nxv16i16: 685; CHECK-D: # %bb.0: 686; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu 687; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 688; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 689; CHECK-D-NEXT: li a0, 142 690; CHECK-D-NEXT: vrsub.vx v12, v12, a0 691; CHECK-D-NEXT: vmseq.vi v0, v8, 0 692; CHECK-D-NEXT: li a0, 16 693; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 694; CHECK-D-NEXT: ret 695 %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 false) 696 ret <vscale x 16 x i16> %a 697} 698declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1) 699 700define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) { 701; RV32-LABEL: ctlz_nxv32i16: 702; RV32: # %bb.0: 703; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, mu 704; RV32-NEXT: vsrl.vi v16, v8, 1 705; RV32-NEXT: vor.vv v8, v8, v16 706; RV32-NEXT: vsrl.vi v16, v8, 2 707; RV32-NEXT: vor.vv v8, v8, v16 708; RV32-NEXT: vsrl.vi v16, v8, 4 709; RV32-NEXT: vor.vv v8, v8, v16 710; RV32-NEXT: vsrl.vi v16, v8, 8 711; RV32-NEXT: vor.vv v8, v8, v16 712; RV32-NEXT: vnot.v v8, v8 713; RV32-NEXT: vsrl.vi v16, v8, 1 714; RV32-NEXT: lui a0, 5 715; RV32-NEXT: addi a0, a0, 1365 716; RV32-NEXT: vand.vx v16, v16, a0 717; RV32-NEXT: vsub.vv v8, v8, v16 718; RV32-NEXT: lui a0, 3 719; RV32-NEXT: addi a0, a0, 819 720; RV32-NEXT: vand.vx v16, v8, a0 721; RV32-NEXT: vsrl.vi v8, v8, 2 722; RV32-NEXT: vand.vx v8, v8, a0 723; RV32-NEXT: vadd.vv v8, v16, v8 724; RV32-NEXT: vsrl.vi v16, v8, 4 725; RV32-NEXT: vadd.vv v8, v8, v16 726; RV32-NEXT: lui a0, 1 727; RV32-NEXT: addi a0, a0, -241 728; RV32-NEXT: vand.vx v8, v8, a0 729; RV32-NEXT: li a0, 257 730; RV32-NEXT: vmul.vx v8, v8, a0 731; RV32-NEXT: vsrl.vi v8, v8, 8 732; RV32-NEXT: ret 733; 734; RV64-LABEL: ctlz_nxv32i16: 735; RV64: # %bb.0: 736; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, mu 737; RV64-NEXT: vsrl.vi v16, v8, 1 738; RV64-NEXT: vor.vv v8, v8, v16 739; RV64-NEXT: vsrl.vi v16, v8, 2 740; RV64-NEXT: vor.vv v8, v8, v16 741; RV64-NEXT: vsrl.vi v16, v8, 4 742; RV64-NEXT: vor.vv v8, v8, v16 743; RV64-NEXT: vsrl.vi v16, v8, 8 744; RV64-NEXT: vor.vv v8, v8, v16 745; RV64-NEXT: vnot.v v8, v8 746; RV64-NEXT: vsrl.vi v16, v8, 1 747; RV64-NEXT: lui a0, 5 748; RV64-NEXT: addiw a0, a0, 1365 749; RV64-NEXT: vand.vx v16, v16, a0 750; RV64-NEXT: vsub.vv v8, v8, v16 751; RV64-NEXT: lui a0, 3 752; RV64-NEXT: addiw a0, a0, 819 753; RV64-NEXT: vand.vx v16, v8, a0 754; RV64-NEXT: vsrl.vi v8, v8, 2 755; RV64-NEXT: vand.vx v8, v8, a0 756; RV64-NEXT: vadd.vv v8, v16, v8 757; RV64-NEXT: vsrl.vi v16, v8, 4 758; RV64-NEXT: vadd.vv v8, v8, v16 759; RV64-NEXT: lui a0, 1 760; RV64-NEXT: addiw a0, a0, -241 761; RV64-NEXT: vand.vx v8, v8, a0 762; RV64-NEXT: li a0, 257 763; RV64-NEXT: vmul.vx v8, v8, a0 764; RV64-NEXT: vsrl.vi v8, v8, 8 765; RV64-NEXT: ret 766 %a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 false) 767 ret <vscale x 32 x i16> %a 768} 769declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1) 770 771define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) { 772; RV32I-LABEL: ctlz_nxv1i32: 773; RV32I: # %bb.0: 774; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 775; RV32I-NEXT: vsrl.vi v9, v8, 1 776; RV32I-NEXT: vor.vv v8, v8, v9 777; RV32I-NEXT: vsrl.vi v9, v8, 2 778; RV32I-NEXT: vor.vv v8, v8, v9 779; RV32I-NEXT: vsrl.vi v9, v8, 4 780; RV32I-NEXT: vor.vv v8, v8, v9 781; RV32I-NEXT: vsrl.vi v9, v8, 8 782; RV32I-NEXT: vor.vv v8, v8, v9 783; RV32I-NEXT: vsrl.vi v9, v8, 16 784; RV32I-NEXT: vor.vv v8, v8, v9 785; RV32I-NEXT: vnot.v v8, v8 786; RV32I-NEXT: vsrl.vi v9, v8, 1 787; RV32I-NEXT: lui a0, 349525 788; RV32I-NEXT: addi a0, a0, 1365 789; RV32I-NEXT: vand.vx v9, v9, a0 790; RV32I-NEXT: vsub.vv v8, v8, v9 791; RV32I-NEXT: lui a0, 209715 792; RV32I-NEXT: addi a0, a0, 819 793; RV32I-NEXT: vand.vx v9, v8, a0 794; RV32I-NEXT: vsrl.vi v8, v8, 2 795; RV32I-NEXT: vand.vx v8, v8, a0 796; RV32I-NEXT: vadd.vv v8, v9, v8 797; RV32I-NEXT: vsrl.vi v9, v8, 4 798; RV32I-NEXT: vadd.vv v8, v8, v9 799; RV32I-NEXT: lui a0, 61681 800; RV32I-NEXT: addi a0, a0, -241 801; RV32I-NEXT: vand.vx v8, v8, a0 802; RV32I-NEXT: lui a0, 4112 803; RV32I-NEXT: addi a0, a0, 257 804; RV32I-NEXT: vmul.vx v8, v8, a0 805; RV32I-NEXT: vsrl.vi v8, v8, 24 806; RV32I-NEXT: ret 807; 808; RV64I-LABEL: ctlz_nxv1i32: 809; RV64I: # %bb.0: 810; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 811; RV64I-NEXT: vsrl.vi v9, v8, 1 812; RV64I-NEXT: vor.vv v8, v8, v9 813; RV64I-NEXT: vsrl.vi v9, v8, 2 814; RV64I-NEXT: vor.vv v8, v8, v9 815; RV64I-NEXT: vsrl.vi v9, v8, 4 816; RV64I-NEXT: vor.vv v8, v8, v9 817; RV64I-NEXT: vsrl.vi v9, v8, 8 818; RV64I-NEXT: vor.vv v8, v8, v9 819; RV64I-NEXT: vsrl.vi v9, v8, 16 820; RV64I-NEXT: vor.vv v8, v8, v9 821; RV64I-NEXT: vnot.v v8, v8 822; RV64I-NEXT: vsrl.vi v9, v8, 1 823; RV64I-NEXT: lui a0, 349525 824; RV64I-NEXT: addiw a0, a0, 1365 825; RV64I-NEXT: vand.vx v9, v9, a0 826; RV64I-NEXT: vsub.vv v8, v8, v9 827; RV64I-NEXT: lui a0, 209715 828; RV64I-NEXT: addiw a0, a0, 819 829; RV64I-NEXT: vand.vx v9, v8, a0 830; RV64I-NEXT: vsrl.vi v8, v8, 2 831; RV64I-NEXT: vand.vx v8, v8, a0 832; RV64I-NEXT: vadd.vv v8, v9, v8 833; RV64I-NEXT: vsrl.vi v9, v8, 4 834; RV64I-NEXT: vadd.vv v8, v8, v9 835; RV64I-NEXT: lui a0, 61681 836; RV64I-NEXT: addiw a0, a0, -241 837; RV64I-NEXT: vand.vx v8, v8, a0 838; RV64I-NEXT: lui a0, 4112 839; RV64I-NEXT: addiw a0, a0, 257 840; RV64I-NEXT: vmul.vx v8, v8, a0 841; RV64I-NEXT: vsrl.vi v8, v8, 24 842; RV64I-NEXT: ret 843; 844; CHECK-D-LABEL: ctlz_nxv1i32: 845; CHECK-D: # %bb.0: 846; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 847; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 848; CHECK-D-NEXT: li a0, 52 849; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu 850; CHECK-D-NEXT: vsrl.vx v9, v9, a0 851; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 852; CHECK-D-NEXT: vncvt.x.x.w v9, v9 853; CHECK-D-NEXT: li a0, 1054 854; CHECK-D-NEXT: vrsub.vx v9, v9, a0 855; CHECK-D-NEXT: vmseq.vi v0, v8, 0 856; CHECK-D-NEXT: li a0, 32 857; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 858; CHECK-D-NEXT: ret 859 %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 false) 860 ret <vscale x 1 x i32> %a 861} 862declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1) 863 864define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) { 865; RV32I-LABEL: ctlz_nxv2i32: 866; RV32I: # %bb.0: 867; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, mu 868; RV32I-NEXT: vsrl.vi v9, v8, 1 869; RV32I-NEXT: vor.vv v8, v8, v9 870; RV32I-NEXT: vsrl.vi v9, v8, 2 871; RV32I-NEXT: vor.vv v8, v8, v9 872; RV32I-NEXT: vsrl.vi v9, v8, 4 873; RV32I-NEXT: vor.vv v8, v8, v9 874; RV32I-NEXT: vsrl.vi v9, v8, 8 875; RV32I-NEXT: vor.vv v8, v8, v9 876; RV32I-NEXT: vsrl.vi v9, v8, 16 877; RV32I-NEXT: vor.vv v8, v8, v9 878; RV32I-NEXT: vnot.v v8, v8 879; RV32I-NEXT: vsrl.vi v9, v8, 1 880; RV32I-NEXT: lui a0, 349525 881; RV32I-NEXT: addi a0, a0, 1365 882; RV32I-NEXT: vand.vx v9, v9, a0 883; RV32I-NEXT: vsub.vv v8, v8, v9 884; RV32I-NEXT: lui a0, 209715 885; RV32I-NEXT: addi a0, a0, 819 886; RV32I-NEXT: vand.vx v9, v8, a0 887; RV32I-NEXT: vsrl.vi v8, v8, 2 888; RV32I-NEXT: vand.vx v8, v8, a0 889; RV32I-NEXT: vadd.vv v8, v9, v8 890; RV32I-NEXT: vsrl.vi v9, v8, 4 891; RV32I-NEXT: vadd.vv v8, v8, v9 892; RV32I-NEXT: lui a0, 61681 893; RV32I-NEXT: addi a0, a0, -241 894; RV32I-NEXT: vand.vx v8, v8, a0 895; RV32I-NEXT: lui a0, 4112 896; RV32I-NEXT: addi a0, a0, 257 897; RV32I-NEXT: vmul.vx v8, v8, a0 898; RV32I-NEXT: vsrl.vi v8, v8, 24 899; RV32I-NEXT: ret 900; 901; RV64I-LABEL: ctlz_nxv2i32: 902; RV64I: # %bb.0: 903; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, mu 904; RV64I-NEXT: vsrl.vi v9, v8, 1 905; RV64I-NEXT: vor.vv v8, v8, v9 906; RV64I-NEXT: vsrl.vi v9, v8, 2 907; RV64I-NEXT: vor.vv v8, v8, v9 908; RV64I-NEXT: vsrl.vi v9, v8, 4 909; RV64I-NEXT: vor.vv v8, v8, v9 910; RV64I-NEXT: vsrl.vi v9, v8, 8 911; RV64I-NEXT: vor.vv v8, v8, v9 912; RV64I-NEXT: vsrl.vi v9, v8, 16 913; RV64I-NEXT: vor.vv v8, v8, v9 914; RV64I-NEXT: vnot.v v8, v8 915; RV64I-NEXT: vsrl.vi v9, v8, 1 916; RV64I-NEXT: lui a0, 349525 917; RV64I-NEXT: addiw a0, a0, 1365 918; RV64I-NEXT: vand.vx v9, v9, a0 919; RV64I-NEXT: vsub.vv v8, v8, v9 920; RV64I-NEXT: lui a0, 209715 921; RV64I-NEXT: addiw a0, a0, 819 922; RV64I-NEXT: vand.vx v9, v8, a0 923; RV64I-NEXT: vsrl.vi v8, v8, 2 924; RV64I-NEXT: vand.vx v8, v8, a0 925; RV64I-NEXT: vadd.vv v8, v9, v8 926; RV64I-NEXT: vsrl.vi v9, v8, 4 927; RV64I-NEXT: vadd.vv v8, v8, v9 928; RV64I-NEXT: lui a0, 61681 929; RV64I-NEXT: addiw a0, a0, -241 930; RV64I-NEXT: vand.vx v8, v8, a0 931; RV64I-NEXT: lui a0, 4112 932; RV64I-NEXT: addiw a0, a0, 257 933; RV64I-NEXT: vmul.vx v8, v8, a0 934; RV64I-NEXT: vsrl.vi v8, v8, 24 935; RV64I-NEXT: ret 936; 937; CHECK-D-LABEL: ctlz_nxv2i32: 938; CHECK-D: # %bb.0: 939; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 940; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 941; CHECK-D-NEXT: li a0, 52 942; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu 943; CHECK-D-NEXT: vsrl.vx v10, v10, a0 944; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 945; CHECK-D-NEXT: vncvt.x.x.w v9, v10 946; CHECK-D-NEXT: li a0, 1054 947; CHECK-D-NEXT: vrsub.vx v9, v9, a0 948; CHECK-D-NEXT: vmseq.vi v0, v8, 0 949; CHECK-D-NEXT: li a0, 32 950; CHECK-D-NEXT: vmerge.vxm v8, v9, a0, v0 951; CHECK-D-NEXT: ret 952 %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 false) 953 ret <vscale x 2 x i32> %a 954} 955declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1) 956 957define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) { 958; RV32I-LABEL: ctlz_nxv4i32: 959; RV32I: # %bb.0: 960; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, mu 961; RV32I-NEXT: vsrl.vi v10, v8, 1 962; RV32I-NEXT: vor.vv v8, v8, v10 963; RV32I-NEXT: vsrl.vi v10, v8, 2 964; RV32I-NEXT: vor.vv v8, v8, v10 965; RV32I-NEXT: vsrl.vi v10, v8, 4 966; RV32I-NEXT: vor.vv v8, v8, v10 967; RV32I-NEXT: vsrl.vi v10, v8, 8 968; RV32I-NEXT: vor.vv v8, v8, v10 969; RV32I-NEXT: vsrl.vi v10, v8, 16 970; RV32I-NEXT: vor.vv v8, v8, v10 971; RV32I-NEXT: vnot.v v8, v8 972; RV32I-NEXT: vsrl.vi v10, v8, 1 973; RV32I-NEXT: lui a0, 349525 974; RV32I-NEXT: addi a0, a0, 1365 975; RV32I-NEXT: vand.vx v10, v10, a0 976; RV32I-NEXT: vsub.vv v8, v8, v10 977; RV32I-NEXT: lui a0, 209715 978; RV32I-NEXT: addi a0, a0, 819 979; RV32I-NEXT: vand.vx v10, v8, a0 980; RV32I-NEXT: vsrl.vi v8, v8, 2 981; RV32I-NEXT: vand.vx v8, v8, a0 982; RV32I-NEXT: vadd.vv v8, v10, v8 983; RV32I-NEXT: vsrl.vi v10, v8, 4 984; RV32I-NEXT: vadd.vv v8, v8, v10 985; RV32I-NEXT: lui a0, 61681 986; RV32I-NEXT: addi a0, a0, -241 987; RV32I-NEXT: vand.vx v8, v8, a0 988; RV32I-NEXT: lui a0, 4112 989; RV32I-NEXT: addi a0, a0, 257 990; RV32I-NEXT: vmul.vx v8, v8, a0 991; RV32I-NEXT: vsrl.vi v8, v8, 24 992; RV32I-NEXT: ret 993; 994; RV64I-LABEL: ctlz_nxv4i32: 995; RV64I: # %bb.0: 996; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, mu 997; RV64I-NEXT: vsrl.vi v10, v8, 1 998; RV64I-NEXT: vor.vv v8, v8, v10 999; RV64I-NEXT: vsrl.vi v10, v8, 2 1000; RV64I-NEXT: vor.vv v8, v8, v10 1001; RV64I-NEXT: vsrl.vi v10, v8, 4 1002; RV64I-NEXT: vor.vv v8, v8, v10 1003; RV64I-NEXT: vsrl.vi v10, v8, 8 1004; RV64I-NEXT: vor.vv v8, v8, v10 1005; RV64I-NEXT: vsrl.vi v10, v8, 16 1006; RV64I-NEXT: vor.vv v8, v8, v10 1007; RV64I-NEXT: vnot.v v8, v8 1008; RV64I-NEXT: vsrl.vi v10, v8, 1 1009; RV64I-NEXT: lui a0, 349525 1010; RV64I-NEXT: addiw a0, a0, 1365 1011; RV64I-NEXT: vand.vx v10, v10, a0 1012; RV64I-NEXT: vsub.vv v8, v8, v10 1013; RV64I-NEXT: lui a0, 209715 1014; RV64I-NEXT: addiw a0, a0, 819 1015; RV64I-NEXT: vand.vx v10, v8, a0 1016; RV64I-NEXT: vsrl.vi v8, v8, 2 1017; RV64I-NEXT: vand.vx v8, v8, a0 1018; RV64I-NEXT: vadd.vv v8, v10, v8 1019; RV64I-NEXT: vsrl.vi v10, v8, 4 1020; RV64I-NEXT: vadd.vv v8, v8, v10 1021; RV64I-NEXT: lui a0, 61681 1022; RV64I-NEXT: addiw a0, a0, -241 1023; RV64I-NEXT: vand.vx v8, v8, a0 1024; RV64I-NEXT: lui a0, 4112 1025; RV64I-NEXT: addiw a0, a0, 257 1026; RV64I-NEXT: vmul.vx v8, v8, a0 1027; RV64I-NEXT: vsrl.vi v8, v8, 24 1028; RV64I-NEXT: ret 1029; 1030; CHECK-D-LABEL: ctlz_nxv4i32: 1031; CHECK-D: # %bb.0: 1032; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1033; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 1034; CHECK-D-NEXT: li a0, 52 1035; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu 1036; CHECK-D-NEXT: vsrl.vx v12, v12, a0 1037; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 1038; CHECK-D-NEXT: vncvt.x.x.w v10, v12 1039; CHECK-D-NEXT: li a0, 1054 1040; CHECK-D-NEXT: vrsub.vx v10, v10, a0 1041; CHECK-D-NEXT: vmseq.vi v0, v8, 0 1042; CHECK-D-NEXT: li a0, 32 1043; CHECK-D-NEXT: vmerge.vxm v8, v10, a0, v0 1044; CHECK-D-NEXT: ret 1045 %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 false) 1046 ret <vscale x 4 x i32> %a 1047} 1048declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1) 1049 1050define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) { 1051; RV32I-LABEL: ctlz_nxv8i32: 1052; RV32I: # %bb.0: 1053; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1054; RV32I-NEXT: vsrl.vi v12, v8, 1 1055; RV32I-NEXT: vor.vv v8, v8, v12 1056; RV32I-NEXT: vsrl.vi v12, v8, 2 1057; RV32I-NEXT: vor.vv v8, v8, v12 1058; RV32I-NEXT: vsrl.vi v12, v8, 4 1059; RV32I-NEXT: vor.vv v8, v8, v12 1060; RV32I-NEXT: vsrl.vi v12, v8, 8 1061; RV32I-NEXT: vor.vv v8, v8, v12 1062; RV32I-NEXT: vsrl.vi v12, v8, 16 1063; RV32I-NEXT: vor.vv v8, v8, v12 1064; RV32I-NEXT: vnot.v v8, v8 1065; RV32I-NEXT: vsrl.vi v12, v8, 1 1066; RV32I-NEXT: lui a0, 349525 1067; RV32I-NEXT: addi a0, a0, 1365 1068; RV32I-NEXT: vand.vx v12, v12, a0 1069; RV32I-NEXT: vsub.vv v8, v8, v12 1070; RV32I-NEXT: lui a0, 209715 1071; RV32I-NEXT: addi a0, a0, 819 1072; RV32I-NEXT: vand.vx v12, v8, a0 1073; RV32I-NEXT: vsrl.vi v8, v8, 2 1074; RV32I-NEXT: vand.vx v8, v8, a0 1075; RV32I-NEXT: vadd.vv v8, v12, v8 1076; RV32I-NEXT: vsrl.vi v12, v8, 4 1077; RV32I-NEXT: vadd.vv v8, v8, v12 1078; RV32I-NEXT: lui a0, 61681 1079; RV32I-NEXT: addi a0, a0, -241 1080; RV32I-NEXT: vand.vx v8, v8, a0 1081; RV32I-NEXT: lui a0, 4112 1082; RV32I-NEXT: addi a0, a0, 257 1083; RV32I-NEXT: vmul.vx v8, v8, a0 1084; RV32I-NEXT: vsrl.vi v8, v8, 24 1085; RV32I-NEXT: ret 1086; 1087; RV64I-LABEL: ctlz_nxv8i32: 1088; RV64I: # %bb.0: 1089; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1090; RV64I-NEXT: vsrl.vi v12, v8, 1 1091; RV64I-NEXT: vor.vv v8, v8, v12 1092; RV64I-NEXT: vsrl.vi v12, v8, 2 1093; RV64I-NEXT: vor.vv v8, v8, v12 1094; RV64I-NEXT: vsrl.vi v12, v8, 4 1095; RV64I-NEXT: vor.vv v8, v8, v12 1096; RV64I-NEXT: vsrl.vi v12, v8, 8 1097; RV64I-NEXT: vor.vv v8, v8, v12 1098; RV64I-NEXT: vsrl.vi v12, v8, 16 1099; RV64I-NEXT: vor.vv v8, v8, v12 1100; RV64I-NEXT: vnot.v v8, v8 1101; RV64I-NEXT: vsrl.vi v12, v8, 1 1102; RV64I-NEXT: lui a0, 349525 1103; RV64I-NEXT: addiw a0, a0, 1365 1104; RV64I-NEXT: vand.vx v12, v12, a0 1105; RV64I-NEXT: vsub.vv v8, v8, v12 1106; RV64I-NEXT: lui a0, 209715 1107; RV64I-NEXT: addiw a0, a0, 819 1108; RV64I-NEXT: vand.vx v12, v8, a0 1109; RV64I-NEXT: vsrl.vi v8, v8, 2 1110; RV64I-NEXT: vand.vx v8, v8, a0 1111; RV64I-NEXT: vadd.vv v8, v12, v8 1112; RV64I-NEXT: vsrl.vi v12, v8, 4 1113; RV64I-NEXT: vadd.vv v8, v8, v12 1114; RV64I-NEXT: lui a0, 61681 1115; RV64I-NEXT: addiw a0, a0, -241 1116; RV64I-NEXT: vand.vx v8, v8, a0 1117; RV64I-NEXT: lui a0, 4112 1118; RV64I-NEXT: addiw a0, a0, 257 1119; RV64I-NEXT: vmul.vx v8, v8, a0 1120; RV64I-NEXT: vsrl.vi v8, v8, 24 1121; RV64I-NEXT: ret 1122; 1123; CHECK-D-LABEL: ctlz_nxv8i32: 1124; CHECK-D: # %bb.0: 1125; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1126; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 1127; CHECK-D-NEXT: li a0, 52 1128; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu 1129; CHECK-D-NEXT: vsrl.vx v16, v16, a0 1130; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 1131; CHECK-D-NEXT: vncvt.x.x.w v12, v16 1132; CHECK-D-NEXT: li a0, 1054 1133; CHECK-D-NEXT: vrsub.vx v12, v12, a0 1134; CHECK-D-NEXT: vmseq.vi v0, v8, 0 1135; CHECK-D-NEXT: li a0, 32 1136; CHECK-D-NEXT: vmerge.vxm v8, v12, a0, v0 1137; CHECK-D-NEXT: ret 1138 %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 false) 1139 ret <vscale x 8 x i32> %a 1140} 1141declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1) 1142 1143define <vscale x 16 x i32> @ctlz_nxv16i32(<vscale x 16 x i32> %va) { 1144; RV32-LABEL: ctlz_nxv16i32: 1145; RV32: # %bb.0: 1146; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 1147; RV32-NEXT: vsrl.vi v16, v8, 1 1148; RV32-NEXT: vor.vv v8, v8, v16 1149; RV32-NEXT: vsrl.vi v16, v8, 2 1150; RV32-NEXT: vor.vv v8, v8, v16 1151; RV32-NEXT: vsrl.vi v16, v8, 4 1152; RV32-NEXT: vor.vv v8, v8, v16 1153; RV32-NEXT: vsrl.vi v16, v8, 8 1154; RV32-NEXT: vor.vv v8, v8, v16 1155; RV32-NEXT: vsrl.vi v16, v8, 16 1156; RV32-NEXT: vor.vv v8, v8, v16 1157; RV32-NEXT: vnot.v v8, v8 1158; RV32-NEXT: vsrl.vi v16, v8, 1 1159; RV32-NEXT: lui a0, 349525 1160; RV32-NEXT: addi a0, a0, 1365 1161; RV32-NEXT: vand.vx v16, v16, a0 1162; RV32-NEXT: vsub.vv v8, v8, v16 1163; RV32-NEXT: lui a0, 209715 1164; RV32-NEXT: addi a0, a0, 819 1165; RV32-NEXT: vand.vx v16, v8, a0 1166; RV32-NEXT: vsrl.vi v8, v8, 2 1167; RV32-NEXT: vand.vx v8, v8, a0 1168; RV32-NEXT: vadd.vv v8, v16, v8 1169; RV32-NEXT: vsrl.vi v16, v8, 4 1170; RV32-NEXT: vadd.vv v8, v8, v16 1171; RV32-NEXT: lui a0, 61681 1172; RV32-NEXT: addi a0, a0, -241 1173; RV32-NEXT: vand.vx v8, v8, a0 1174; RV32-NEXT: lui a0, 4112 1175; RV32-NEXT: addi a0, a0, 257 1176; RV32-NEXT: vmul.vx v8, v8, a0 1177; RV32-NEXT: vsrl.vi v8, v8, 24 1178; RV32-NEXT: ret 1179; 1180; RV64-LABEL: ctlz_nxv16i32: 1181; RV64: # %bb.0: 1182; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 1183; RV64-NEXT: vsrl.vi v16, v8, 1 1184; RV64-NEXT: vor.vv v8, v8, v16 1185; RV64-NEXT: vsrl.vi v16, v8, 2 1186; RV64-NEXT: vor.vv v8, v8, v16 1187; RV64-NEXT: vsrl.vi v16, v8, 4 1188; RV64-NEXT: vor.vv v8, v8, v16 1189; RV64-NEXT: vsrl.vi v16, v8, 8 1190; RV64-NEXT: vor.vv v8, v8, v16 1191; RV64-NEXT: vsrl.vi v16, v8, 16 1192; RV64-NEXT: vor.vv v8, v8, v16 1193; RV64-NEXT: vnot.v v8, v8 1194; RV64-NEXT: vsrl.vi v16, v8, 1 1195; RV64-NEXT: lui a0, 349525 1196; RV64-NEXT: addiw a0, a0, 1365 1197; RV64-NEXT: vand.vx v16, v16, a0 1198; RV64-NEXT: vsub.vv v8, v8, v16 1199; RV64-NEXT: lui a0, 209715 1200; RV64-NEXT: addiw a0, a0, 819 1201; RV64-NEXT: vand.vx v16, v8, a0 1202; RV64-NEXT: vsrl.vi v8, v8, 2 1203; RV64-NEXT: vand.vx v8, v8, a0 1204; RV64-NEXT: vadd.vv v8, v16, v8 1205; RV64-NEXT: vsrl.vi v16, v8, 4 1206; RV64-NEXT: vadd.vv v8, v8, v16 1207; RV64-NEXT: lui a0, 61681 1208; RV64-NEXT: addiw a0, a0, -241 1209; RV64-NEXT: vand.vx v8, v8, a0 1210; RV64-NEXT: lui a0, 4112 1211; RV64-NEXT: addiw a0, a0, 257 1212; RV64-NEXT: vmul.vx v8, v8, a0 1213; RV64-NEXT: vsrl.vi v8, v8, 24 1214; RV64-NEXT: ret 1215 %a = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 false) 1216 ret <vscale x 16 x i32> %a 1217} 1218declare <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32>, i1) 1219 1220define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) { 1221; RV32-LABEL: ctlz_nxv1i64: 1222; RV32: # %bb.0: 1223; RV32-NEXT: addi sp, sp, -16 1224; RV32-NEXT: .cfi_def_cfa_offset 16 1225; RV32-NEXT: lui a0, 349525 1226; RV32-NEXT: addi a0, a0, 1365 1227; RV32-NEXT: sw a0, 12(sp) 1228; RV32-NEXT: sw a0, 8(sp) 1229; RV32-NEXT: lui a0, 209715 1230; RV32-NEXT: addi a0, a0, 819 1231; RV32-NEXT: sw a0, 12(sp) 1232; RV32-NEXT: sw a0, 8(sp) 1233; RV32-NEXT: lui a0, 61681 1234; RV32-NEXT: addi a0, a0, -241 1235; RV32-NEXT: sw a0, 12(sp) 1236; RV32-NEXT: sw a0, 8(sp) 1237; RV32-NEXT: lui a0, 4112 1238; RV32-NEXT: addi a0, a0, 257 1239; RV32-NEXT: sw a0, 12(sp) 1240; RV32-NEXT: sw a0, 8(sp) 1241; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1242; RV32-NEXT: vsrl.vi v9, v8, 1 1243; RV32-NEXT: vor.vv v8, v8, v9 1244; RV32-NEXT: vsrl.vi v9, v8, 2 1245; RV32-NEXT: vor.vv v8, v8, v9 1246; RV32-NEXT: vsrl.vi v9, v8, 4 1247; RV32-NEXT: vor.vv v8, v8, v9 1248; RV32-NEXT: vsrl.vi v9, v8, 8 1249; RV32-NEXT: vor.vv v8, v8, v9 1250; RV32-NEXT: vsrl.vi v9, v8, 16 1251; RV32-NEXT: vor.vv v8, v8, v9 1252; RV32-NEXT: li a0, 32 1253; RV32-NEXT: vsrl.vx v9, v8, a0 1254; RV32-NEXT: vor.vv v8, v8, v9 1255; RV32-NEXT: addi a0, sp, 8 1256; RV32-NEXT: vlse64.v v9, (a0), zero 1257; RV32-NEXT: vnot.v v8, v8 1258; RV32-NEXT: vlse64.v v10, (a0), zero 1259; RV32-NEXT: vsrl.vi v11, v8, 1 1260; RV32-NEXT: vand.vv v9, v11, v9 1261; RV32-NEXT: vsub.vv v8, v8, v9 1262; RV32-NEXT: vand.vv v9, v8, v10 1263; RV32-NEXT: vsrl.vi v8, v8, 2 1264; RV32-NEXT: vand.vv v8, v8, v10 1265; RV32-NEXT: vadd.vv v8, v9, v8 1266; RV32-NEXT: vlse64.v v9, (a0), zero 1267; RV32-NEXT: vlse64.v v10, (a0), zero 1268; RV32-NEXT: vsrl.vi v11, v8, 4 1269; RV32-NEXT: vadd.vv v8, v8, v11 1270; RV32-NEXT: vand.vv v8, v8, v9 1271; RV32-NEXT: vmul.vv v8, v8, v10 1272; RV32-NEXT: li a0, 56 1273; RV32-NEXT: vsrl.vx v8, v8, a0 1274; RV32-NEXT: addi sp, sp, 16 1275; RV32-NEXT: ret 1276; 1277; RV64-LABEL: ctlz_nxv1i64: 1278; RV64: # %bb.0: 1279; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 1280; RV64-NEXT: vsrl.vi v9, v8, 1 1281; RV64-NEXT: vor.vv v8, v8, v9 1282; RV64-NEXT: vsrl.vi v9, v8, 2 1283; RV64-NEXT: vor.vv v8, v8, v9 1284; RV64-NEXT: vsrl.vi v9, v8, 4 1285; RV64-NEXT: vor.vv v8, v8, v9 1286; RV64-NEXT: vsrl.vi v9, v8, 8 1287; RV64-NEXT: vor.vv v8, v8, v9 1288; RV64-NEXT: vsrl.vi v9, v8, 16 1289; RV64-NEXT: vor.vv v8, v8, v9 1290; RV64-NEXT: li a0, 32 1291; RV64-NEXT: vsrl.vx v9, v8, a0 1292; RV64-NEXT: vor.vv v8, v8, v9 1293; RV64-NEXT: vnot.v v8, v8 1294; RV64-NEXT: lui a0, %hi(.LCPI18_0) 1295; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) 1296; RV64-NEXT: lui a1, %hi(.LCPI18_1) 1297; RV64-NEXT: ld a1, %lo(.LCPI18_1)(a1) 1298; RV64-NEXT: vsrl.vi v9, v8, 1 1299; RV64-NEXT: vand.vx v9, v9, a0 1300; RV64-NEXT: vsub.vv v8, v8, v9 1301; RV64-NEXT: vand.vx v9, v8, a1 1302; RV64-NEXT: vsrl.vi v8, v8, 2 1303; RV64-NEXT: vand.vx v8, v8, a1 1304; RV64-NEXT: vadd.vv v8, v9, v8 1305; RV64-NEXT: lui a0, %hi(.LCPI18_2) 1306; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) 1307; RV64-NEXT: lui a1, %hi(.LCPI18_3) 1308; RV64-NEXT: ld a1, %lo(.LCPI18_3)(a1) 1309; RV64-NEXT: vsrl.vi v9, v8, 4 1310; RV64-NEXT: vadd.vv v8, v8, v9 1311; RV64-NEXT: vand.vx v8, v8, a0 1312; RV64-NEXT: vmul.vx v8, v8, a1 1313; RV64-NEXT: li a0, 56 1314; RV64-NEXT: vsrl.vx v8, v8, a0 1315; RV64-NEXT: ret 1316 %a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 false) 1317 ret <vscale x 1 x i64> %a 1318} 1319declare <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64>, i1) 1320 1321define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) { 1322; RV32-LABEL: ctlz_nxv2i64: 1323; RV32: # %bb.0: 1324; RV32-NEXT: addi sp, sp, -16 1325; RV32-NEXT: .cfi_def_cfa_offset 16 1326; RV32-NEXT: lui a0, 349525 1327; RV32-NEXT: addi a0, a0, 1365 1328; RV32-NEXT: sw a0, 12(sp) 1329; RV32-NEXT: sw a0, 8(sp) 1330; RV32-NEXT: lui a0, 209715 1331; RV32-NEXT: addi a0, a0, 819 1332; RV32-NEXT: sw a0, 12(sp) 1333; RV32-NEXT: sw a0, 8(sp) 1334; RV32-NEXT: lui a0, 61681 1335; RV32-NEXT: addi a0, a0, -241 1336; RV32-NEXT: sw a0, 12(sp) 1337; RV32-NEXT: sw a0, 8(sp) 1338; RV32-NEXT: lui a0, 4112 1339; RV32-NEXT: addi a0, a0, 257 1340; RV32-NEXT: sw a0, 12(sp) 1341; RV32-NEXT: sw a0, 8(sp) 1342; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1343; RV32-NEXT: vsrl.vi v10, v8, 1 1344; RV32-NEXT: vor.vv v8, v8, v10 1345; RV32-NEXT: vsrl.vi v10, v8, 2 1346; RV32-NEXT: vor.vv v8, v8, v10 1347; RV32-NEXT: vsrl.vi v10, v8, 4 1348; RV32-NEXT: vor.vv v8, v8, v10 1349; RV32-NEXT: vsrl.vi v10, v8, 8 1350; RV32-NEXT: vor.vv v8, v8, v10 1351; RV32-NEXT: vsrl.vi v10, v8, 16 1352; RV32-NEXT: vor.vv v8, v8, v10 1353; RV32-NEXT: li a0, 32 1354; RV32-NEXT: vsrl.vx v10, v8, a0 1355; RV32-NEXT: vor.vv v8, v8, v10 1356; RV32-NEXT: addi a0, sp, 8 1357; RV32-NEXT: vlse64.v v10, (a0), zero 1358; RV32-NEXT: vnot.v v8, v8 1359; RV32-NEXT: vlse64.v v12, (a0), zero 1360; RV32-NEXT: vsrl.vi v14, v8, 1 1361; RV32-NEXT: vand.vv v10, v14, v10 1362; RV32-NEXT: vsub.vv v8, v8, v10 1363; RV32-NEXT: vand.vv v10, v8, v12 1364; RV32-NEXT: vsrl.vi v8, v8, 2 1365; RV32-NEXT: vand.vv v8, v8, v12 1366; RV32-NEXT: vadd.vv v8, v10, v8 1367; RV32-NEXT: vlse64.v v10, (a0), zero 1368; RV32-NEXT: vlse64.v v12, (a0), zero 1369; RV32-NEXT: vsrl.vi v14, v8, 4 1370; RV32-NEXT: vadd.vv v8, v8, v14 1371; RV32-NEXT: vand.vv v8, v8, v10 1372; RV32-NEXT: vmul.vv v8, v8, v12 1373; RV32-NEXT: li a0, 56 1374; RV32-NEXT: vsrl.vx v8, v8, a0 1375; RV32-NEXT: addi sp, sp, 16 1376; RV32-NEXT: ret 1377; 1378; RV64-LABEL: ctlz_nxv2i64: 1379; RV64: # %bb.0: 1380; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 1381; RV64-NEXT: vsrl.vi v10, v8, 1 1382; RV64-NEXT: vor.vv v8, v8, v10 1383; RV64-NEXT: vsrl.vi v10, v8, 2 1384; RV64-NEXT: vor.vv v8, v8, v10 1385; RV64-NEXT: vsrl.vi v10, v8, 4 1386; RV64-NEXT: vor.vv v8, v8, v10 1387; RV64-NEXT: vsrl.vi v10, v8, 8 1388; RV64-NEXT: vor.vv v8, v8, v10 1389; RV64-NEXT: vsrl.vi v10, v8, 16 1390; RV64-NEXT: vor.vv v8, v8, v10 1391; RV64-NEXT: li a0, 32 1392; RV64-NEXT: vsrl.vx v10, v8, a0 1393; RV64-NEXT: vor.vv v8, v8, v10 1394; RV64-NEXT: vnot.v v8, v8 1395; RV64-NEXT: lui a0, %hi(.LCPI19_0) 1396; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) 1397; RV64-NEXT: lui a1, %hi(.LCPI19_1) 1398; RV64-NEXT: ld a1, %lo(.LCPI19_1)(a1) 1399; RV64-NEXT: vsrl.vi v10, v8, 1 1400; RV64-NEXT: vand.vx v10, v10, a0 1401; RV64-NEXT: vsub.vv v8, v8, v10 1402; RV64-NEXT: vand.vx v10, v8, a1 1403; RV64-NEXT: vsrl.vi v8, v8, 2 1404; RV64-NEXT: vand.vx v8, v8, a1 1405; RV64-NEXT: vadd.vv v8, v10, v8 1406; RV64-NEXT: lui a0, %hi(.LCPI19_2) 1407; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) 1408; RV64-NEXT: lui a1, %hi(.LCPI19_3) 1409; RV64-NEXT: ld a1, %lo(.LCPI19_3)(a1) 1410; RV64-NEXT: vsrl.vi v10, v8, 4 1411; RV64-NEXT: vadd.vv v8, v8, v10 1412; RV64-NEXT: vand.vx v8, v8, a0 1413; RV64-NEXT: vmul.vx v8, v8, a1 1414; RV64-NEXT: li a0, 56 1415; RV64-NEXT: vsrl.vx v8, v8, a0 1416; RV64-NEXT: ret 1417 %a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 false) 1418 ret <vscale x 2 x i64> %a 1419} 1420declare <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64>, i1) 1421 1422define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) { 1423; RV32-LABEL: ctlz_nxv4i64: 1424; RV32: # %bb.0: 1425; RV32-NEXT: addi sp, sp, -16 1426; RV32-NEXT: .cfi_def_cfa_offset 16 1427; RV32-NEXT: lui a0, 349525 1428; RV32-NEXT: addi a0, a0, 1365 1429; RV32-NEXT: sw a0, 12(sp) 1430; RV32-NEXT: sw a0, 8(sp) 1431; RV32-NEXT: lui a0, 209715 1432; RV32-NEXT: addi a0, a0, 819 1433; RV32-NEXT: sw a0, 12(sp) 1434; RV32-NEXT: sw a0, 8(sp) 1435; RV32-NEXT: lui a0, 61681 1436; RV32-NEXT: addi a0, a0, -241 1437; RV32-NEXT: sw a0, 12(sp) 1438; RV32-NEXT: sw a0, 8(sp) 1439; RV32-NEXT: lui a0, 4112 1440; RV32-NEXT: addi a0, a0, 257 1441; RV32-NEXT: sw a0, 12(sp) 1442; RV32-NEXT: sw a0, 8(sp) 1443; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1444; RV32-NEXT: vsrl.vi v12, v8, 1 1445; RV32-NEXT: vor.vv v8, v8, v12 1446; RV32-NEXT: vsrl.vi v12, v8, 2 1447; RV32-NEXT: vor.vv v8, v8, v12 1448; RV32-NEXT: vsrl.vi v12, v8, 4 1449; RV32-NEXT: vor.vv v8, v8, v12 1450; RV32-NEXT: vsrl.vi v12, v8, 8 1451; RV32-NEXT: vor.vv v8, v8, v12 1452; RV32-NEXT: vsrl.vi v12, v8, 16 1453; RV32-NEXT: vor.vv v8, v8, v12 1454; RV32-NEXT: li a0, 32 1455; RV32-NEXT: vsrl.vx v12, v8, a0 1456; RV32-NEXT: vor.vv v8, v8, v12 1457; RV32-NEXT: addi a0, sp, 8 1458; RV32-NEXT: vlse64.v v12, (a0), zero 1459; RV32-NEXT: vnot.v v8, v8 1460; RV32-NEXT: vlse64.v v16, (a0), zero 1461; RV32-NEXT: vsrl.vi v20, v8, 1 1462; RV32-NEXT: vand.vv v12, v20, v12 1463; RV32-NEXT: vsub.vv v8, v8, v12 1464; RV32-NEXT: vand.vv v12, v8, v16 1465; RV32-NEXT: vsrl.vi v8, v8, 2 1466; RV32-NEXT: vand.vv v8, v8, v16 1467; RV32-NEXT: vadd.vv v8, v12, v8 1468; RV32-NEXT: vlse64.v v12, (a0), zero 1469; RV32-NEXT: vlse64.v v16, (a0), zero 1470; RV32-NEXT: vsrl.vi v20, v8, 4 1471; RV32-NEXT: vadd.vv v8, v8, v20 1472; RV32-NEXT: vand.vv v8, v8, v12 1473; RV32-NEXT: vmul.vv v8, v8, v16 1474; RV32-NEXT: li a0, 56 1475; RV32-NEXT: vsrl.vx v8, v8, a0 1476; RV32-NEXT: addi sp, sp, 16 1477; RV32-NEXT: ret 1478; 1479; RV64-LABEL: ctlz_nxv4i64: 1480; RV64: # %bb.0: 1481; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 1482; RV64-NEXT: vsrl.vi v12, v8, 1 1483; RV64-NEXT: vor.vv v8, v8, v12 1484; RV64-NEXT: vsrl.vi v12, v8, 2 1485; RV64-NEXT: vor.vv v8, v8, v12 1486; RV64-NEXT: vsrl.vi v12, v8, 4 1487; RV64-NEXT: vor.vv v8, v8, v12 1488; RV64-NEXT: vsrl.vi v12, v8, 8 1489; RV64-NEXT: vor.vv v8, v8, v12 1490; RV64-NEXT: vsrl.vi v12, v8, 16 1491; RV64-NEXT: vor.vv v8, v8, v12 1492; RV64-NEXT: li a0, 32 1493; RV64-NEXT: vsrl.vx v12, v8, a0 1494; RV64-NEXT: vor.vv v8, v8, v12 1495; RV64-NEXT: vnot.v v8, v8 1496; RV64-NEXT: lui a0, %hi(.LCPI20_0) 1497; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) 1498; RV64-NEXT: lui a1, %hi(.LCPI20_1) 1499; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) 1500; RV64-NEXT: vsrl.vi v12, v8, 1 1501; RV64-NEXT: vand.vx v12, v12, a0 1502; RV64-NEXT: vsub.vv v8, v8, v12 1503; RV64-NEXT: vand.vx v12, v8, a1 1504; RV64-NEXT: vsrl.vi v8, v8, 2 1505; RV64-NEXT: vand.vx v8, v8, a1 1506; RV64-NEXT: vadd.vv v8, v12, v8 1507; RV64-NEXT: lui a0, %hi(.LCPI20_2) 1508; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) 1509; RV64-NEXT: lui a1, %hi(.LCPI20_3) 1510; RV64-NEXT: ld a1, %lo(.LCPI20_3)(a1) 1511; RV64-NEXT: vsrl.vi v12, v8, 4 1512; RV64-NEXT: vadd.vv v8, v8, v12 1513; RV64-NEXT: vand.vx v8, v8, a0 1514; RV64-NEXT: vmul.vx v8, v8, a1 1515; RV64-NEXT: li a0, 56 1516; RV64-NEXT: vsrl.vx v8, v8, a0 1517; RV64-NEXT: ret 1518 %a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 false) 1519 ret <vscale x 4 x i64> %a 1520} 1521declare <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64>, i1) 1522 1523define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) { 1524; RV32-LABEL: ctlz_nxv8i64: 1525; RV32: # %bb.0: 1526; RV32-NEXT: addi sp, sp, -16 1527; RV32-NEXT: .cfi_def_cfa_offset 16 1528; RV32-NEXT: lui a0, 349525 1529; RV32-NEXT: addi a0, a0, 1365 1530; RV32-NEXT: sw a0, 12(sp) 1531; RV32-NEXT: sw a0, 8(sp) 1532; RV32-NEXT: lui a0, 209715 1533; RV32-NEXT: addi a0, a0, 819 1534; RV32-NEXT: sw a0, 12(sp) 1535; RV32-NEXT: sw a0, 8(sp) 1536; RV32-NEXT: lui a0, 61681 1537; RV32-NEXT: addi a0, a0, -241 1538; RV32-NEXT: sw a0, 12(sp) 1539; RV32-NEXT: sw a0, 8(sp) 1540; RV32-NEXT: lui a0, 4112 1541; RV32-NEXT: addi a0, a0, 257 1542; RV32-NEXT: sw a0, 12(sp) 1543; RV32-NEXT: sw a0, 8(sp) 1544; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1545; RV32-NEXT: vsrl.vi v16, v8, 1 1546; RV32-NEXT: vor.vv v8, v8, v16 1547; RV32-NEXT: vsrl.vi v16, v8, 2 1548; RV32-NEXT: vor.vv v8, v8, v16 1549; RV32-NEXT: vsrl.vi v16, v8, 4 1550; RV32-NEXT: vor.vv v8, v8, v16 1551; RV32-NEXT: vsrl.vi v16, v8, 8 1552; RV32-NEXT: vor.vv v8, v8, v16 1553; RV32-NEXT: vsrl.vi v16, v8, 16 1554; RV32-NEXT: vor.vv v8, v8, v16 1555; RV32-NEXT: li a0, 32 1556; RV32-NEXT: vsrl.vx v16, v8, a0 1557; RV32-NEXT: vor.vv v8, v8, v16 1558; RV32-NEXT: addi a0, sp, 8 1559; RV32-NEXT: vlse64.v v16, (a0), zero 1560; RV32-NEXT: vnot.v v8, v8 1561; RV32-NEXT: vlse64.v v24, (a0), zero 1562; RV32-NEXT: vsrl.vi v0, v8, 1 1563; RV32-NEXT: vand.vv v16, v0, v16 1564; RV32-NEXT: vsub.vv v8, v8, v16 1565; RV32-NEXT: vand.vv v16, v8, v24 1566; RV32-NEXT: vsrl.vi v8, v8, 2 1567; RV32-NEXT: vand.vv v8, v8, v24 1568; RV32-NEXT: vadd.vv v8, v16, v8 1569; RV32-NEXT: vlse64.v v16, (a0), zero 1570; RV32-NEXT: vlse64.v v24, (a0), zero 1571; RV32-NEXT: vsrl.vi v0, v8, 4 1572; RV32-NEXT: vadd.vv v8, v8, v0 1573; RV32-NEXT: vand.vv v8, v8, v16 1574; RV32-NEXT: vmul.vv v8, v8, v24 1575; RV32-NEXT: li a0, 56 1576; RV32-NEXT: vsrl.vx v8, v8, a0 1577; RV32-NEXT: addi sp, sp, 16 1578; RV32-NEXT: ret 1579; 1580; RV64-LABEL: ctlz_nxv8i64: 1581; RV64: # %bb.0: 1582; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 1583; RV64-NEXT: vsrl.vi v16, v8, 1 1584; RV64-NEXT: vor.vv v8, v8, v16 1585; RV64-NEXT: vsrl.vi v16, v8, 2 1586; RV64-NEXT: vor.vv v8, v8, v16 1587; RV64-NEXT: vsrl.vi v16, v8, 4 1588; RV64-NEXT: vor.vv v8, v8, v16 1589; RV64-NEXT: vsrl.vi v16, v8, 8 1590; RV64-NEXT: vor.vv v8, v8, v16 1591; RV64-NEXT: vsrl.vi v16, v8, 16 1592; RV64-NEXT: vor.vv v8, v8, v16 1593; RV64-NEXT: li a0, 32 1594; RV64-NEXT: vsrl.vx v16, v8, a0 1595; RV64-NEXT: vor.vv v8, v8, v16 1596; RV64-NEXT: vnot.v v8, v8 1597; RV64-NEXT: lui a0, %hi(.LCPI21_0) 1598; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) 1599; RV64-NEXT: lui a1, %hi(.LCPI21_1) 1600; RV64-NEXT: ld a1, %lo(.LCPI21_1)(a1) 1601; RV64-NEXT: vsrl.vi v16, v8, 1 1602; RV64-NEXT: vand.vx v16, v16, a0 1603; RV64-NEXT: vsub.vv v8, v8, v16 1604; RV64-NEXT: vand.vx v16, v8, a1 1605; RV64-NEXT: vsrl.vi v8, v8, 2 1606; RV64-NEXT: vand.vx v8, v8, a1 1607; RV64-NEXT: vadd.vv v8, v16, v8 1608; RV64-NEXT: lui a0, %hi(.LCPI21_2) 1609; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) 1610; RV64-NEXT: lui a1, %hi(.LCPI21_3) 1611; RV64-NEXT: ld a1, %lo(.LCPI21_3)(a1) 1612; RV64-NEXT: vsrl.vi v16, v8, 4 1613; RV64-NEXT: vadd.vv v8, v8, v16 1614; RV64-NEXT: vand.vx v8, v8, a0 1615; RV64-NEXT: vmul.vx v8, v8, a1 1616; RV64-NEXT: li a0, 56 1617; RV64-NEXT: vsrl.vx v8, v8, a0 1618; RV64-NEXT: ret 1619 %a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 false) 1620 ret <vscale x 8 x i64> %a 1621} 1622declare <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64>, i1) 1623 1624define <vscale x 1 x i8> @ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) { 1625; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i8: 1626; CHECK-ZVE64X: # %bb.0: 1627; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 1628; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1629; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1630; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 1631; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1632; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1633; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1634; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1635; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1636; CHECK-ZVE64X-NEXT: li a0, 85 1637; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1638; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1639; CHECK-ZVE64X-NEXT: li a0, 51 1640; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1641; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1642; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1643; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1644; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1645; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1646; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1647; CHECK-ZVE64X-NEXT: ret 1648; 1649; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8: 1650; CHECK-D: # %bb.0: 1651; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 1652; CHECK-D-NEXT: vzext.vf4 v9, v8 1653; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 1654; CHECK-D-NEXT: vsetvli zero, zero, e16, mf4, ta, mu 1655; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 1656; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, mu 1657; CHECK-D-NEXT: vncvt.x.x.w v8, v8 1658; CHECK-D-NEXT: li a0, 134 1659; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1660; CHECK-D-NEXT: ret 1661 %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 true) 1662 ret <vscale x 1 x i8> %a 1663} 1664 1665define <vscale x 2 x i8> @ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) { 1666; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i8: 1667; CHECK-ZVE64X: # %bb.0: 1668; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 1669; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1670; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1671; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 1672; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1673; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1674; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1675; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1676; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1677; CHECK-ZVE64X-NEXT: li a0, 85 1678; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1679; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1680; CHECK-ZVE64X-NEXT: li a0, 51 1681; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1682; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1683; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1684; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1685; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1686; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1687; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1688; CHECK-ZVE64X-NEXT: ret 1689; 1690; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8: 1691; CHECK-D: # %bb.0: 1692; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 1693; CHECK-D-NEXT: vzext.vf4 v9, v8 1694; CHECK-D-NEXT: vfcvt.f.xu.v v8, v9 1695; CHECK-D-NEXT: vsetvli zero, zero, e16, mf2, ta, mu 1696; CHECK-D-NEXT: vnsrl.wi v8, v8, 23 1697; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, mu 1698; CHECK-D-NEXT: vncvt.x.x.w v8, v8 1699; CHECK-D-NEXT: li a0, 134 1700; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1701; CHECK-D-NEXT: ret 1702 %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 true) 1703 ret <vscale x 2 x i8> %a 1704} 1705 1706define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) { 1707; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i8: 1708; CHECK-ZVE64X: # %bb.0: 1709; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 1710; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1711; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1712; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 1713; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1714; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1715; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1716; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1717; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1718; CHECK-ZVE64X-NEXT: li a0, 85 1719; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1720; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1721; CHECK-ZVE64X-NEXT: li a0, 51 1722; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1723; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1724; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1725; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1726; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1727; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1728; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1729; CHECK-ZVE64X-NEXT: ret 1730; 1731; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8: 1732; CHECK-D: # %bb.0: 1733; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 1734; CHECK-D-NEXT: vzext.vf4 v10, v8 1735; CHECK-D-NEXT: vfcvt.f.xu.v v8, v10 1736; CHECK-D-NEXT: vsetvli zero, zero, e16, m1, ta, mu 1737; CHECK-D-NEXT: vnsrl.wi v10, v8, 23 1738; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, mu 1739; CHECK-D-NEXT: vncvt.x.x.w v8, v10 1740; CHECK-D-NEXT: li a0, 134 1741; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1742; CHECK-D-NEXT: ret 1743 %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 true) 1744 ret <vscale x 4 x i8> %a 1745} 1746 1747define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) { 1748; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i8: 1749; CHECK-ZVE64X: # %bb.0: 1750; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, mu 1751; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1752; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1753; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 2 1754; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1755; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1756; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v9 1757; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1758; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 1759; CHECK-ZVE64X-NEXT: li a0, 85 1760; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 1761; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v9 1762; CHECK-ZVE64X-NEXT: li a0, 51 1763; CHECK-ZVE64X-NEXT: vand.vx v9, v8, a0 1764; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1765; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1766; CHECK-ZVE64X-NEXT: vadd.vv v8, v9, v8 1767; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 4 1768; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v9 1769; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1770; CHECK-ZVE64X-NEXT: ret 1771; 1772; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8: 1773; CHECK-D: # %bb.0: 1774; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 1775; CHECK-D-NEXT: vzext.vf4 v12, v8 1776; CHECK-D-NEXT: vfcvt.f.xu.v v8, v12 1777; CHECK-D-NEXT: vsetvli zero, zero, e16, m2, ta, mu 1778; CHECK-D-NEXT: vnsrl.wi v12, v8, 23 1779; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, mu 1780; CHECK-D-NEXT: vncvt.x.x.w v8, v12 1781; CHECK-D-NEXT: li a0, 134 1782; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1783; CHECK-D-NEXT: ret 1784 %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 true) 1785 ret <vscale x 8 x i8> %a 1786} 1787 1788define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) { 1789; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i8: 1790; CHECK-ZVE64X: # %bb.0: 1791; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, mu 1792; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 1793; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 1794; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 2 1795; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 1796; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 1797; CHECK-ZVE64X-NEXT: vor.vv v8, v8, v10 1798; CHECK-ZVE64X-NEXT: vnot.v v8, v8 1799; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 1800; CHECK-ZVE64X-NEXT: li a0, 85 1801; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 1802; CHECK-ZVE64X-NEXT: vsub.vv v8, v8, v10 1803; CHECK-ZVE64X-NEXT: li a0, 51 1804; CHECK-ZVE64X-NEXT: vand.vx v10, v8, a0 1805; CHECK-ZVE64X-NEXT: vsrl.vi v8, v8, 2 1806; CHECK-ZVE64X-NEXT: vand.vx v8, v8, a0 1807; CHECK-ZVE64X-NEXT: vadd.vv v8, v10, v8 1808; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 4 1809; CHECK-ZVE64X-NEXT: vadd.vv v8, v8, v10 1810; CHECK-ZVE64X-NEXT: vand.vi v8, v8, 15 1811; CHECK-ZVE64X-NEXT: ret 1812; 1813; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8: 1814; CHECK-D: # %bb.0: 1815; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, mu 1816; CHECK-D-NEXT: vzext.vf4 v16, v8 1817; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 1818; CHECK-D-NEXT: vsetvli zero, zero, e16, m4, ta, mu 1819; CHECK-D-NEXT: vnsrl.wi v16, v8, 23 1820; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, mu 1821; CHECK-D-NEXT: vncvt.x.x.w v8, v16 1822; CHECK-D-NEXT: li a0, 134 1823; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1824; CHECK-D-NEXT: ret 1825 %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 true) 1826 ret <vscale x 16 x i8> %a 1827} 1828 1829define <vscale x 32 x i8> @ctlz_zero_undef_nxv32i8(<vscale x 32 x i8> %va) { 1830; CHECK-LABEL: ctlz_zero_undef_nxv32i8: 1831; CHECK: # %bb.0: 1832; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu 1833; CHECK-NEXT: vsrl.vi v12, v8, 1 1834; CHECK-NEXT: vor.vv v8, v8, v12 1835; CHECK-NEXT: vsrl.vi v12, v8, 2 1836; CHECK-NEXT: vor.vv v8, v8, v12 1837; CHECK-NEXT: vsrl.vi v12, v8, 4 1838; CHECK-NEXT: vor.vv v8, v8, v12 1839; CHECK-NEXT: vnot.v v8, v8 1840; CHECK-NEXT: vsrl.vi v12, v8, 1 1841; CHECK-NEXT: li a0, 85 1842; CHECK-NEXT: vand.vx v12, v12, a0 1843; CHECK-NEXT: vsub.vv v8, v8, v12 1844; CHECK-NEXT: li a0, 51 1845; CHECK-NEXT: vand.vx v12, v8, a0 1846; CHECK-NEXT: vsrl.vi v8, v8, 2 1847; CHECK-NEXT: vand.vx v8, v8, a0 1848; CHECK-NEXT: vadd.vv v8, v12, v8 1849; CHECK-NEXT: vsrl.vi v12, v8, 4 1850; CHECK-NEXT: vadd.vv v8, v8, v12 1851; CHECK-NEXT: vand.vi v8, v8, 15 1852; CHECK-NEXT: ret 1853 %a = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 true) 1854 ret <vscale x 32 x i8> %a 1855} 1856 1857define <vscale x 64 x i8> @ctlz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) { 1858; CHECK-LABEL: ctlz_zero_undef_nxv64i8: 1859; CHECK: # %bb.0: 1860; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu 1861; CHECK-NEXT: vsrl.vi v16, v8, 1 1862; CHECK-NEXT: vor.vv v8, v8, v16 1863; CHECK-NEXT: vsrl.vi v16, v8, 2 1864; CHECK-NEXT: vor.vv v8, v8, v16 1865; CHECK-NEXT: vsrl.vi v16, v8, 4 1866; CHECK-NEXT: vor.vv v8, v8, v16 1867; CHECK-NEXT: vnot.v v8, v8 1868; CHECK-NEXT: vsrl.vi v16, v8, 1 1869; CHECK-NEXT: li a0, 85 1870; CHECK-NEXT: vand.vx v16, v16, a0 1871; CHECK-NEXT: vsub.vv v8, v8, v16 1872; CHECK-NEXT: li a0, 51 1873; CHECK-NEXT: vand.vx v16, v8, a0 1874; CHECK-NEXT: vsrl.vi v8, v8, 2 1875; CHECK-NEXT: vand.vx v8, v8, a0 1876; CHECK-NEXT: vadd.vv v8, v16, v8 1877; CHECK-NEXT: vsrl.vi v16, v8, 4 1878; CHECK-NEXT: vadd.vv v8, v8, v16 1879; CHECK-NEXT: vand.vi v8, v8, 15 1880; CHECK-NEXT: ret 1881 %a = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 true) 1882 ret <vscale x 64 x i8> %a 1883} 1884 1885define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) { 1886; RV32I-LABEL: ctlz_zero_undef_nxv1i16: 1887; RV32I: # %bb.0: 1888; RV32I-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1889; RV32I-NEXT: vsrl.vi v9, v8, 1 1890; RV32I-NEXT: vor.vv v8, v8, v9 1891; RV32I-NEXT: vsrl.vi v9, v8, 2 1892; RV32I-NEXT: vor.vv v8, v8, v9 1893; RV32I-NEXT: vsrl.vi v9, v8, 4 1894; RV32I-NEXT: vor.vv v8, v8, v9 1895; RV32I-NEXT: vsrl.vi v9, v8, 8 1896; RV32I-NEXT: vor.vv v8, v8, v9 1897; RV32I-NEXT: vnot.v v8, v8 1898; RV32I-NEXT: vsrl.vi v9, v8, 1 1899; RV32I-NEXT: lui a0, 5 1900; RV32I-NEXT: addi a0, a0, 1365 1901; RV32I-NEXT: vand.vx v9, v9, a0 1902; RV32I-NEXT: vsub.vv v8, v8, v9 1903; RV32I-NEXT: lui a0, 3 1904; RV32I-NEXT: addi a0, a0, 819 1905; RV32I-NEXT: vand.vx v9, v8, a0 1906; RV32I-NEXT: vsrl.vi v8, v8, 2 1907; RV32I-NEXT: vand.vx v8, v8, a0 1908; RV32I-NEXT: vadd.vv v8, v9, v8 1909; RV32I-NEXT: vsrl.vi v9, v8, 4 1910; RV32I-NEXT: vadd.vv v8, v8, v9 1911; RV32I-NEXT: lui a0, 1 1912; RV32I-NEXT: addi a0, a0, -241 1913; RV32I-NEXT: vand.vx v8, v8, a0 1914; RV32I-NEXT: li a0, 257 1915; RV32I-NEXT: vmul.vx v8, v8, a0 1916; RV32I-NEXT: vsrl.vi v8, v8, 8 1917; RV32I-NEXT: ret 1918; 1919; RV64I-LABEL: ctlz_zero_undef_nxv1i16: 1920; RV64I: # %bb.0: 1921; RV64I-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1922; RV64I-NEXT: vsrl.vi v9, v8, 1 1923; RV64I-NEXT: vor.vv v8, v8, v9 1924; RV64I-NEXT: vsrl.vi v9, v8, 2 1925; RV64I-NEXT: vor.vv v8, v8, v9 1926; RV64I-NEXT: vsrl.vi v9, v8, 4 1927; RV64I-NEXT: vor.vv v8, v8, v9 1928; RV64I-NEXT: vsrl.vi v9, v8, 8 1929; RV64I-NEXT: vor.vv v8, v8, v9 1930; RV64I-NEXT: vnot.v v8, v8 1931; RV64I-NEXT: vsrl.vi v9, v8, 1 1932; RV64I-NEXT: lui a0, 5 1933; RV64I-NEXT: addiw a0, a0, 1365 1934; RV64I-NEXT: vand.vx v9, v9, a0 1935; RV64I-NEXT: vsub.vv v8, v8, v9 1936; RV64I-NEXT: lui a0, 3 1937; RV64I-NEXT: addiw a0, a0, 819 1938; RV64I-NEXT: vand.vx v9, v8, a0 1939; RV64I-NEXT: vsrl.vi v8, v8, 2 1940; RV64I-NEXT: vand.vx v8, v8, a0 1941; RV64I-NEXT: vadd.vv v8, v9, v8 1942; RV64I-NEXT: vsrl.vi v9, v8, 4 1943; RV64I-NEXT: vadd.vv v8, v8, v9 1944; RV64I-NEXT: lui a0, 1 1945; RV64I-NEXT: addiw a0, a0, -241 1946; RV64I-NEXT: vand.vx v8, v8, a0 1947; RV64I-NEXT: li a0, 257 1948; RV64I-NEXT: vmul.vx v8, v8, a0 1949; RV64I-NEXT: vsrl.vi v8, v8, 8 1950; RV64I-NEXT: ret 1951; 1952; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16: 1953; CHECK-D: # %bb.0: 1954; CHECK-D-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 1955; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 1956; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 1957; CHECK-D-NEXT: li a0, 142 1958; CHECK-D-NEXT: vrsub.vx v8, v8, a0 1959; CHECK-D-NEXT: ret 1960 %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 true) 1961 ret <vscale x 1 x i16> %a 1962} 1963 1964define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) { 1965; RV32I-LABEL: ctlz_zero_undef_nxv2i16: 1966; RV32I: # %bb.0: 1967; RV32I-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 1968; RV32I-NEXT: vsrl.vi v9, v8, 1 1969; RV32I-NEXT: vor.vv v8, v8, v9 1970; RV32I-NEXT: vsrl.vi v9, v8, 2 1971; RV32I-NEXT: vor.vv v8, v8, v9 1972; RV32I-NEXT: vsrl.vi v9, v8, 4 1973; RV32I-NEXT: vor.vv v8, v8, v9 1974; RV32I-NEXT: vsrl.vi v9, v8, 8 1975; RV32I-NEXT: vor.vv v8, v8, v9 1976; RV32I-NEXT: vnot.v v8, v8 1977; RV32I-NEXT: vsrl.vi v9, v8, 1 1978; RV32I-NEXT: lui a0, 5 1979; RV32I-NEXT: addi a0, a0, 1365 1980; RV32I-NEXT: vand.vx v9, v9, a0 1981; RV32I-NEXT: vsub.vv v8, v8, v9 1982; RV32I-NEXT: lui a0, 3 1983; RV32I-NEXT: addi a0, a0, 819 1984; RV32I-NEXT: vand.vx v9, v8, a0 1985; RV32I-NEXT: vsrl.vi v8, v8, 2 1986; RV32I-NEXT: vand.vx v8, v8, a0 1987; RV32I-NEXT: vadd.vv v8, v9, v8 1988; RV32I-NEXT: vsrl.vi v9, v8, 4 1989; RV32I-NEXT: vadd.vv v8, v8, v9 1990; RV32I-NEXT: lui a0, 1 1991; RV32I-NEXT: addi a0, a0, -241 1992; RV32I-NEXT: vand.vx v8, v8, a0 1993; RV32I-NEXT: li a0, 257 1994; RV32I-NEXT: vmul.vx v8, v8, a0 1995; RV32I-NEXT: vsrl.vi v8, v8, 8 1996; RV32I-NEXT: ret 1997; 1998; RV64I-LABEL: ctlz_zero_undef_nxv2i16: 1999; RV64I: # %bb.0: 2000; RV64I-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 2001; RV64I-NEXT: vsrl.vi v9, v8, 1 2002; RV64I-NEXT: vor.vv v8, v8, v9 2003; RV64I-NEXT: vsrl.vi v9, v8, 2 2004; RV64I-NEXT: vor.vv v8, v8, v9 2005; RV64I-NEXT: vsrl.vi v9, v8, 4 2006; RV64I-NEXT: vor.vv v8, v8, v9 2007; RV64I-NEXT: vsrl.vi v9, v8, 8 2008; RV64I-NEXT: vor.vv v8, v8, v9 2009; RV64I-NEXT: vnot.v v8, v8 2010; RV64I-NEXT: vsrl.vi v9, v8, 1 2011; RV64I-NEXT: lui a0, 5 2012; RV64I-NEXT: addiw a0, a0, 1365 2013; RV64I-NEXT: vand.vx v9, v9, a0 2014; RV64I-NEXT: vsub.vv v8, v8, v9 2015; RV64I-NEXT: lui a0, 3 2016; RV64I-NEXT: addiw a0, a0, 819 2017; RV64I-NEXT: vand.vx v9, v8, a0 2018; RV64I-NEXT: vsrl.vi v8, v8, 2 2019; RV64I-NEXT: vand.vx v8, v8, a0 2020; RV64I-NEXT: vadd.vv v8, v9, v8 2021; RV64I-NEXT: vsrl.vi v9, v8, 4 2022; RV64I-NEXT: vadd.vv v8, v8, v9 2023; RV64I-NEXT: lui a0, 1 2024; RV64I-NEXT: addiw a0, a0, -241 2025; RV64I-NEXT: vand.vx v8, v8, a0 2026; RV64I-NEXT: li a0, 257 2027; RV64I-NEXT: vmul.vx v8, v8, a0 2028; RV64I-NEXT: vsrl.vi v8, v8, 8 2029; RV64I-NEXT: ret 2030; 2031; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16: 2032; CHECK-D: # %bb.0: 2033; CHECK-D-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 2034; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 2035; CHECK-D-NEXT: vnsrl.wi v8, v9, 23 2036; CHECK-D-NEXT: li a0, 142 2037; CHECK-D-NEXT: vrsub.vx v8, v8, a0 2038; CHECK-D-NEXT: ret 2039 %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 true) 2040 ret <vscale x 2 x i16> %a 2041} 2042 2043define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) { 2044; RV32I-LABEL: ctlz_zero_undef_nxv4i16: 2045; RV32I: # %bb.0: 2046; RV32I-NEXT: vsetvli a0, zero, e16, m1, ta, mu 2047; RV32I-NEXT: vsrl.vi v9, v8, 1 2048; RV32I-NEXT: vor.vv v8, v8, v9 2049; RV32I-NEXT: vsrl.vi v9, v8, 2 2050; RV32I-NEXT: vor.vv v8, v8, v9 2051; RV32I-NEXT: vsrl.vi v9, v8, 4 2052; RV32I-NEXT: vor.vv v8, v8, v9 2053; RV32I-NEXT: vsrl.vi v9, v8, 8 2054; RV32I-NEXT: vor.vv v8, v8, v9 2055; RV32I-NEXT: vnot.v v8, v8 2056; RV32I-NEXT: vsrl.vi v9, v8, 1 2057; RV32I-NEXT: lui a0, 5 2058; RV32I-NEXT: addi a0, a0, 1365 2059; RV32I-NEXT: vand.vx v9, v9, a0 2060; RV32I-NEXT: vsub.vv v8, v8, v9 2061; RV32I-NEXT: lui a0, 3 2062; RV32I-NEXT: addi a0, a0, 819 2063; RV32I-NEXT: vand.vx v9, v8, a0 2064; RV32I-NEXT: vsrl.vi v8, v8, 2 2065; RV32I-NEXT: vand.vx v8, v8, a0 2066; RV32I-NEXT: vadd.vv v8, v9, v8 2067; RV32I-NEXT: vsrl.vi v9, v8, 4 2068; RV32I-NEXT: vadd.vv v8, v8, v9 2069; RV32I-NEXT: lui a0, 1 2070; RV32I-NEXT: addi a0, a0, -241 2071; RV32I-NEXT: vand.vx v8, v8, a0 2072; RV32I-NEXT: li a0, 257 2073; RV32I-NEXT: vmul.vx v8, v8, a0 2074; RV32I-NEXT: vsrl.vi v8, v8, 8 2075; RV32I-NEXT: ret 2076; 2077; RV64I-LABEL: ctlz_zero_undef_nxv4i16: 2078; RV64I: # %bb.0: 2079; RV64I-NEXT: vsetvli a0, zero, e16, m1, ta, mu 2080; RV64I-NEXT: vsrl.vi v9, v8, 1 2081; RV64I-NEXT: vor.vv v8, v8, v9 2082; RV64I-NEXT: vsrl.vi v9, v8, 2 2083; RV64I-NEXT: vor.vv v8, v8, v9 2084; RV64I-NEXT: vsrl.vi v9, v8, 4 2085; RV64I-NEXT: vor.vv v8, v8, v9 2086; RV64I-NEXT: vsrl.vi v9, v8, 8 2087; RV64I-NEXT: vor.vv v8, v8, v9 2088; RV64I-NEXT: vnot.v v8, v8 2089; RV64I-NEXT: vsrl.vi v9, v8, 1 2090; RV64I-NEXT: lui a0, 5 2091; RV64I-NEXT: addiw a0, a0, 1365 2092; RV64I-NEXT: vand.vx v9, v9, a0 2093; RV64I-NEXT: vsub.vv v8, v8, v9 2094; RV64I-NEXT: lui a0, 3 2095; RV64I-NEXT: addiw a0, a0, 819 2096; RV64I-NEXT: vand.vx v9, v8, a0 2097; RV64I-NEXT: vsrl.vi v8, v8, 2 2098; RV64I-NEXT: vand.vx v8, v8, a0 2099; RV64I-NEXT: vadd.vv v8, v9, v8 2100; RV64I-NEXT: vsrl.vi v9, v8, 4 2101; RV64I-NEXT: vadd.vv v8, v8, v9 2102; RV64I-NEXT: lui a0, 1 2103; RV64I-NEXT: addiw a0, a0, -241 2104; RV64I-NEXT: vand.vx v8, v8, a0 2105; RV64I-NEXT: li a0, 257 2106; RV64I-NEXT: vmul.vx v8, v8, a0 2107; RV64I-NEXT: vsrl.vi v8, v8, 8 2108; RV64I-NEXT: ret 2109; 2110; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16: 2111; CHECK-D: # %bb.0: 2112; CHECK-D-NEXT: vsetvli a0, zero, e16, m1, ta, mu 2113; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 2114; CHECK-D-NEXT: vnsrl.wi v8, v10, 23 2115; CHECK-D-NEXT: li a0, 142 2116; CHECK-D-NEXT: vrsub.vx v8, v8, a0 2117; CHECK-D-NEXT: ret 2118 %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true) 2119 ret <vscale x 4 x i16> %a 2120} 2121 2122define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) { 2123; RV32I-LABEL: ctlz_zero_undef_nxv8i16: 2124; RV32I: # %bb.0: 2125; RV32I-NEXT: vsetvli a0, zero, e16, m2, ta, mu 2126; RV32I-NEXT: vsrl.vi v10, v8, 1 2127; RV32I-NEXT: vor.vv v8, v8, v10 2128; RV32I-NEXT: vsrl.vi v10, v8, 2 2129; RV32I-NEXT: vor.vv v8, v8, v10 2130; RV32I-NEXT: vsrl.vi v10, v8, 4 2131; RV32I-NEXT: vor.vv v8, v8, v10 2132; RV32I-NEXT: vsrl.vi v10, v8, 8 2133; RV32I-NEXT: vor.vv v8, v8, v10 2134; RV32I-NEXT: vnot.v v8, v8 2135; RV32I-NEXT: vsrl.vi v10, v8, 1 2136; RV32I-NEXT: lui a0, 5 2137; RV32I-NEXT: addi a0, a0, 1365 2138; RV32I-NEXT: vand.vx v10, v10, a0 2139; RV32I-NEXT: vsub.vv v8, v8, v10 2140; RV32I-NEXT: lui a0, 3 2141; RV32I-NEXT: addi a0, a0, 819 2142; RV32I-NEXT: vand.vx v10, v8, a0 2143; RV32I-NEXT: vsrl.vi v8, v8, 2 2144; RV32I-NEXT: vand.vx v8, v8, a0 2145; RV32I-NEXT: vadd.vv v8, v10, v8 2146; RV32I-NEXT: vsrl.vi v10, v8, 4 2147; RV32I-NEXT: vadd.vv v8, v8, v10 2148; RV32I-NEXT: lui a0, 1 2149; RV32I-NEXT: addi a0, a0, -241 2150; RV32I-NEXT: vand.vx v8, v8, a0 2151; RV32I-NEXT: li a0, 257 2152; RV32I-NEXT: vmul.vx v8, v8, a0 2153; RV32I-NEXT: vsrl.vi v8, v8, 8 2154; RV32I-NEXT: ret 2155; 2156; RV64I-LABEL: ctlz_zero_undef_nxv8i16: 2157; RV64I: # %bb.0: 2158; RV64I-NEXT: vsetvli a0, zero, e16, m2, ta, mu 2159; RV64I-NEXT: vsrl.vi v10, v8, 1 2160; RV64I-NEXT: vor.vv v8, v8, v10 2161; RV64I-NEXT: vsrl.vi v10, v8, 2 2162; RV64I-NEXT: vor.vv v8, v8, v10 2163; RV64I-NEXT: vsrl.vi v10, v8, 4 2164; RV64I-NEXT: vor.vv v8, v8, v10 2165; RV64I-NEXT: vsrl.vi v10, v8, 8 2166; RV64I-NEXT: vor.vv v8, v8, v10 2167; RV64I-NEXT: vnot.v v8, v8 2168; RV64I-NEXT: vsrl.vi v10, v8, 1 2169; RV64I-NEXT: lui a0, 5 2170; RV64I-NEXT: addiw a0, a0, 1365 2171; RV64I-NEXT: vand.vx v10, v10, a0 2172; RV64I-NEXT: vsub.vv v8, v8, v10 2173; RV64I-NEXT: lui a0, 3 2174; RV64I-NEXT: addiw a0, a0, 819 2175; RV64I-NEXT: vand.vx v10, v8, a0 2176; RV64I-NEXT: vsrl.vi v8, v8, 2 2177; RV64I-NEXT: vand.vx v8, v8, a0 2178; RV64I-NEXT: vadd.vv v8, v10, v8 2179; RV64I-NEXT: vsrl.vi v10, v8, 4 2180; RV64I-NEXT: vadd.vv v8, v8, v10 2181; RV64I-NEXT: lui a0, 1 2182; RV64I-NEXT: addiw a0, a0, -241 2183; RV64I-NEXT: vand.vx v8, v8, a0 2184; RV64I-NEXT: li a0, 257 2185; RV64I-NEXT: vmul.vx v8, v8, a0 2186; RV64I-NEXT: vsrl.vi v8, v8, 8 2187; RV64I-NEXT: ret 2188; 2189; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16: 2190; CHECK-D: # %bb.0: 2191; CHECK-D-NEXT: vsetvli a0, zero, e16, m2, ta, mu 2192; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 2193; CHECK-D-NEXT: vnsrl.wi v8, v12, 23 2194; CHECK-D-NEXT: li a0, 142 2195; CHECK-D-NEXT: vrsub.vx v8, v8, a0 2196; CHECK-D-NEXT: ret 2197 %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true) 2198 ret <vscale x 8 x i16> %a 2199} 2200 2201define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) { 2202; RV32I-LABEL: ctlz_zero_undef_nxv16i16: 2203; RV32I: # %bb.0: 2204; RV32I-NEXT: vsetvli a0, zero, e16, m4, ta, mu 2205; RV32I-NEXT: vsrl.vi v12, v8, 1 2206; RV32I-NEXT: vor.vv v8, v8, v12 2207; RV32I-NEXT: vsrl.vi v12, v8, 2 2208; RV32I-NEXT: vor.vv v8, v8, v12 2209; RV32I-NEXT: vsrl.vi v12, v8, 4 2210; RV32I-NEXT: vor.vv v8, v8, v12 2211; RV32I-NEXT: vsrl.vi v12, v8, 8 2212; RV32I-NEXT: vor.vv v8, v8, v12 2213; RV32I-NEXT: vnot.v v8, v8 2214; RV32I-NEXT: vsrl.vi v12, v8, 1 2215; RV32I-NEXT: lui a0, 5 2216; RV32I-NEXT: addi a0, a0, 1365 2217; RV32I-NEXT: vand.vx v12, v12, a0 2218; RV32I-NEXT: vsub.vv v8, v8, v12 2219; RV32I-NEXT: lui a0, 3 2220; RV32I-NEXT: addi a0, a0, 819 2221; RV32I-NEXT: vand.vx v12, v8, a0 2222; RV32I-NEXT: vsrl.vi v8, v8, 2 2223; RV32I-NEXT: vand.vx v8, v8, a0 2224; RV32I-NEXT: vadd.vv v8, v12, v8 2225; RV32I-NEXT: vsrl.vi v12, v8, 4 2226; RV32I-NEXT: vadd.vv v8, v8, v12 2227; RV32I-NEXT: lui a0, 1 2228; RV32I-NEXT: addi a0, a0, -241 2229; RV32I-NEXT: vand.vx v8, v8, a0 2230; RV32I-NEXT: li a0, 257 2231; RV32I-NEXT: vmul.vx v8, v8, a0 2232; RV32I-NEXT: vsrl.vi v8, v8, 8 2233; RV32I-NEXT: ret 2234; 2235; RV64I-LABEL: ctlz_zero_undef_nxv16i16: 2236; RV64I: # %bb.0: 2237; RV64I-NEXT: vsetvli a0, zero, e16, m4, ta, mu 2238; RV64I-NEXT: vsrl.vi v12, v8, 1 2239; RV64I-NEXT: vor.vv v8, v8, v12 2240; RV64I-NEXT: vsrl.vi v12, v8, 2 2241; RV64I-NEXT: vor.vv v8, v8, v12 2242; RV64I-NEXT: vsrl.vi v12, v8, 4 2243; RV64I-NEXT: vor.vv v8, v8, v12 2244; RV64I-NEXT: vsrl.vi v12, v8, 8 2245; RV64I-NEXT: vor.vv v8, v8, v12 2246; RV64I-NEXT: vnot.v v8, v8 2247; RV64I-NEXT: vsrl.vi v12, v8, 1 2248; RV64I-NEXT: lui a0, 5 2249; RV64I-NEXT: addiw a0, a0, 1365 2250; RV64I-NEXT: vand.vx v12, v12, a0 2251; RV64I-NEXT: vsub.vv v8, v8, v12 2252; RV64I-NEXT: lui a0, 3 2253; RV64I-NEXT: addiw a0, a0, 819 2254; RV64I-NEXT: vand.vx v12, v8, a0 2255; RV64I-NEXT: vsrl.vi v8, v8, 2 2256; RV64I-NEXT: vand.vx v8, v8, a0 2257; RV64I-NEXT: vadd.vv v8, v12, v8 2258; RV64I-NEXT: vsrl.vi v12, v8, 4 2259; RV64I-NEXT: vadd.vv v8, v8, v12 2260; RV64I-NEXT: lui a0, 1 2261; RV64I-NEXT: addiw a0, a0, -241 2262; RV64I-NEXT: vand.vx v8, v8, a0 2263; RV64I-NEXT: li a0, 257 2264; RV64I-NEXT: vmul.vx v8, v8, a0 2265; RV64I-NEXT: vsrl.vi v8, v8, 8 2266; RV64I-NEXT: ret 2267; 2268; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16: 2269; CHECK-D: # %bb.0: 2270; CHECK-D-NEXT: vsetvli a0, zero, e16, m4, ta, mu 2271; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 2272; CHECK-D-NEXT: vnsrl.wi v8, v16, 23 2273; CHECK-D-NEXT: li a0, 142 2274; CHECK-D-NEXT: vrsub.vx v8, v8, a0 2275; CHECK-D-NEXT: ret 2276 %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 true) 2277 ret <vscale x 16 x i16> %a 2278} 2279 2280define <vscale x 32 x i16> @ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) { 2281; RV32-LABEL: ctlz_zero_undef_nxv32i16: 2282; RV32: # %bb.0: 2283; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, mu 2284; RV32-NEXT: vsrl.vi v16, v8, 1 2285; RV32-NEXT: vor.vv v8, v8, v16 2286; RV32-NEXT: vsrl.vi v16, v8, 2 2287; RV32-NEXT: vor.vv v8, v8, v16 2288; RV32-NEXT: vsrl.vi v16, v8, 4 2289; RV32-NEXT: vor.vv v8, v8, v16 2290; RV32-NEXT: vsrl.vi v16, v8, 8 2291; RV32-NEXT: vor.vv v8, v8, v16 2292; RV32-NEXT: vnot.v v8, v8 2293; RV32-NEXT: vsrl.vi v16, v8, 1 2294; RV32-NEXT: lui a0, 5 2295; RV32-NEXT: addi a0, a0, 1365 2296; RV32-NEXT: vand.vx v16, v16, a0 2297; RV32-NEXT: vsub.vv v8, v8, v16 2298; RV32-NEXT: lui a0, 3 2299; RV32-NEXT: addi a0, a0, 819 2300; RV32-NEXT: vand.vx v16, v8, a0 2301; RV32-NEXT: vsrl.vi v8, v8, 2 2302; RV32-NEXT: vand.vx v8, v8, a0 2303; RV32-NEXT: vadd.vv v8, v16, v8 2304; RV32-NEXT: vsrl.vi v16, v8, 4 2305; RV32-NEXT: vadd.vv v8, v8, v16 2306; RV32-NEXT: lui a0, 1 2307; RV32-NEXT: addi a0, a0, -241 2308; RV32-NEXT: vand.vx v8, v8, a0 2309; RV32-NEXT: li a0, 257 2310; RV32-NEXT: vmul.vx v8, v8, a0 2311; RV32-NEXT: vsrl.vi v8, v8, 8 2312; RV32-NEXT: ret 2313; 2314; RV64-LABEL: ctlz_zero_undef_nxv32i16: 2315; RV64: # %bb.0: 2316; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, mu 2317; RV64-NEXT: vsrl.vi v16, v8, 1 2318; RV64-NEXT: vor.vv v8, v8, v16 2319; RV64-NEXT: vsrl.vi v16, v8, 2 2320; RV64-NEXT: vor.vv v8, v8, v16 2321; RV64-NEXT: vsrl.vi v16, v8, 4 2322; RV64-NEXT: vor.vv v8, v8, v16 2323; RV64-NEXT: vsrl.vi v16, v8, 8 2324; RV64-NEXT: vor.vv v8, v8, v16 2325; RV64-NEXT: vnot.v v8, v8 2326; RV64-NEXT: vsrl.vi v16, v8, 1 2327; RV64-NEXT: lui a0, 5 2328; RV64-NEXT: addiw a0, a0, 1365 2329; RV64-NEXT: vand.vx v16, v16, a0 2330; RV64-NEXT: vsub.vv v8, v8, v16 2331; RV64-NEXT: lui a0, 3 2332; RV64-NEXT: addiw a0, a0, 819 2333; RV64-NEXT: vand.vx v16, v8, a0 2334; RV64-NEXT: vsrl.vi v8, v8, 2 2335; RV64-NEXT: vand.vx v8, v8, a0 2336; RV64-NEXT: vadd.vv v8, v16, v8 2337; RV64-NEXT: vsrl.vi v16, v8, 4 2338; RV64-NEXT: vadd.vv v8, v8, v16 2339; RV64-NEXT: lui a0, 1 2340; RV64-NEXT: addiw a0, a0, -241 2341; RV64-NEXT: vand.vx v8, v8, a0 2342; RV64-NEXT: li a0, 257 2343; RV64-NEXT: vmul.vx v8, v8, a0 2344; RV64-NEXT: vsrl.vi v8, v8, 8 2345; RV64-NEXT: ret 2346 %a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true) 2347 ret <vscale x 32 x i16> %a 2348} 2349 2350define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) { 2351; RV32I-LABEL: ctlz_zero_undef_nxv1i32: 2352; RV32I: # %bb.0: 2353; RV32I-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 2354; RV32I-NEXT: vsrl.vi v9, v8, 1 2355; RV32I-NEXT: vor.vv v8, v8, v9 2356; RV32I-NEXT: vsrl.vi v9, v8, 2 2357; RV32I-NEXT: vor.vv v8, v8, v9 2358; RV32I-NEXT: vsrl.vi v9, v8, 4 2359; RV32I-NEXT: vor.vv v8, v8, v9 2360; RV32I-NEXT: vsrl.vi v9, v8, 8 2361; RV32I-NEXT: vor.vv v8, v8, v9 2362; RV32I-NEXT: vsrl.vi v9, v8, 16 2363; RV32I-NEXT: vor.vv v8, v8, v9 2364; RV32I-NEXT: vnot.v v8, v8 2365; RV32I-NEXT: vsrl.vi v9, v8, 1 2366; RV32I-NEXT: lui a0, 349525 2367; RV32I-NEXT: addi a0, a0, 1365 2368; RV32I-NEXT: vand.vx v9, v9, a0 2369; RV32I-NEXT: vsub.vv v8, v8, v9 2370; RV32I-NEXT: lui a0, 209715 2371; RV32I-NEXT: addi a0, a0, 819 2372; RV32I-NEXT: vand.vx v9, v8, a0 2373; RV32I-NEXT: vsrl.vi v8, v8, 2 2374; RV32I-NEXT: vand.vx v8, v8, a0 2375; RV32I-NEXT: vadd.vv v8, v9, v8 2376; RV32I-NEXT: vsrl.vi v9, v8, 4 2377; RV32I-NEXT: vadd.vv v8, v8, v9 2378; RV32I-NEXT: lui a0, 61681 2379; RV32I-NEXT: addi a0, a0, -241 2380; RV32I-NEXT: vand.vx v8, v8, a0 2381; RV32I-NEXT: lui a0, 4112 2382; RV32I-NEXT: addi a0, a0, 257 2383; RV32I-NEXT: vmul.vx v8, v8, a0 2384; RV32I-NEXT: vsrl.vi v8, v8, 24 2385; RV32I-NEXT: ret 2386; 2387; RV64I-LABEL: ctlz_zero_undef_nxv1i32: 2388; RV64I: # %bb.0: 2389; RV64I-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 2390; RV64I-NEXT: vsrl.vi v9, v8, 1 2391; RV64I-NEXT: vor.vv v8, v8, v9 2392; RV64I-NEXT: vsrl.vi v9, v8, 2 2393; RV64I-NEXT: vor.vv v8, v8, v9 2394; RV64I-NEXT: vsrl.vi v9, v8, 4 2395; RV64I-NEXT: vor.vv v8, v8, v9 2396; RV64I-NEXT: vsrl.vi v9, v8, 8 2397; RV64I-NEXT: vor.vv v8, v8, v9 2398; RV64I-NEXT: vsrl.vi v9, v8, 16 2399; RV64I-NEXT: vor.vv v8, v8, v9 2400; RV64I-NEXT: vnot.v v8, v8 2401; RV64I-NEXT: vsrl.vi v9, v8, 1 2402; RV64I-NEXT: lui a0, 349525 2403; RV64I-NEXT: addiw a0, a0, 1365 2404; RV64I-NEXT: vand.vx v9, v9, a0 2405; RV64I-NEXT: vsub.vv v8, v8, v9 2406; RV64I-NEXT: lui a0, 209715 2407; RV64I-NEXT: addiw a0, a0, 819 2408; RV64I-NEXT: vand.vx v9, v8, a0 2409; RV64I-NEXT: vsrl.vi v8, v8, 2 2410; RV64I-NEXT: vand.vx v8, v8, a0 2411; RV64I-NEXT: vadd.vv v8, v9, v8 2412; RV64I-NEXT: vsrl.vi v9, v8, 4 2413; RV64I-NEXT: vadd.vv v8, v8, v9 2414; RV64I-NEXT: lui a0, 61681 2415; RV64I-NEXT: addiw a0, a0, -241 2416; RV64I-NEXT: vand.vx v8, v8, a0 2417; RV64I-NEXT: lui a0, 4112 2418; RV64I-NEXT: addiw a0, a0, 257 2419; RV64I-NEXT: vmul.vx v8, v8, a0 2420; RV64I-NEXT: vsrl.vi v8, v8, 24 2421; RV64I-NEXT: ret 2422; 2423; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32: 2424; CHECK-D: # %bb.0: 2425; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 2426; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8 2427; CHECK-D-NEXT: li a0, 52 2428; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, mu 2429; CHECK-D-NEXT: vsrl.vx v8, v9, a0 2430; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, mu 2431; CHECK-D-NEXT: vncvt.x.x.w v8, v8 2432; CHECK-D-NEXT: li a0, 1054 2433; CHECK-D-NEXT: vrsub.vx v8, v8, a0 2434; CHECK-D-NEXT: ret 2435 %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 true) 2436 ret <vscale x 1 x i32> %a 2437} 2438 2439define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) { 2440; RV32I-LABEL: ctlz_zero_undef_nxv2i32: 2441; RV32I: # %bb.0: 2442; RV32I-NEXT: vsetvli a0, zero, e32, m1, ta, mu 2443; RV32I-NEXT: vsrl.vi v9, v8, 1 2444; RV32I-NEXT: vor.vv v8, v8, v9 2445; RV32I-NEXT: vsrl.vi v9, v8, 2 2446; RV32I-NEXT: vor.vv v8, v8, v9 2447; RV32I-NEXT: vsrl.vi v9, v8, 4 2448; RV32I-NEXT: vor.vv v8, v8, v9 2449; RV32I-NEXT: vsrl.vi v9, v8, 8 2450; RV32I-NEXT: vor.vv v8, v8, v9 2451; RV32I-NEXT: vsrl.vi v9, v8, 16 2452; RV32I-NEXT: vor.vv v8, v8, v9 2453; RV32I-NEXT: vnot.v v8, v8 2454; RV32I-NEXT: vsrl.vi v9, v8, 1 2455; RV32I-NEXT: lui a0, 349525 2456; RV32I-NEXT: addi a0, a0, 1365 2457; RV32I-NEXT: vand.vx v9, v9, a0 2458; RV32I-NEXT: vsub.vv v8, v8, v9 2459; RV32I-NEXT: lui a0, 209715 2460; RV32I-NEXT: addi a0, a0, 819 2461; RV32I-NEXT: vand.vx v9, v8, a0 2462; RV32I-NEXT: vsrl.vi v8, v8, 2 2463; RV32I-NEXT: vand.vx v8, v8, a0 2464; RV32I-NEXT: vadd.vv v8, v9, v8 2465; RV32I-NEXT: vsrl.vi v9, v8, 4 2466; RV32I-NEXT: vadd.vv v8, v8, v9 2467; RV32I-NEXT: lui a0, 61681 2468; RV32I-NEXT: addi a0, a0, -241 2469; RV32I-NEXT: vand.vx v8, v8, a0 2470; RV32I-NEXT: lui a0, 4112 2471; RV32I-NEXT: addi a0, a0, 257 2472; RV32I-NEXT: vmul.vx v8, v8, a0 2473; RV32I-NEXT: vsrl.vi v8, v8, 24 2474; RV32I-NEXT: ret 2475; 2476; RV64I-LABEL: ctlz_zero_undef_nxv2i32: 2477; RV64I: # %bb.0: 2478; RV64I-NEXT: vsetvli a0, zero, e32, m1, ta, mu 2479; RV64I-NEXT: vsrl.vi v9, v8, 1 2480; RV64I-NEXT: vor.vv v8, v8, v9 2481; RV64I-NEXT: vsrl.vi v9, v8, 2 2482; RV64I-NEXT: vor.vv v8, v8, v9 2483; RV64I-NEXT: vsrl.vi v9, v8, 4 2484; RV64I-NEXT: vor.vv v8, v8, v9 2485; RV64I-NEXT: vsrl.vi v9, v8, 8 2486; RV64I-NEXT: vor.vv v8, v8, v9 2487; RV64I-NEXT: vsrl.vi v9, v8, 16 2488; RV64I-NEXT: vor.vv v8, v8, v9 2489; RV64I-NEXT: vnot.v v8, v8 2490; RV64I-NEXT: vsrl.vi v9, v8, 1 2491; RV64I-NEXT: lui a0, 349525 2492; RV64I-NEXT: addiw a0, a0, 1365 2493; RV64I-NEXT: vand.vx v9, v9, a0 2494; RV64I-NEXT: vsub.vv v8, v8, v9 2495; RV64I-NEXT: lui a0, 209715 2496; RV64I-NEXT: addiw a0, a0, 819 2497; RV64I-NEXT: vand.vx v9, v8, a0 2498; RV64I-NEXT: vsrl.vi v8, v8, 2 2499; RV64I-NEXT: vand.vx v8, v8, a0 2500; RV64I-NEXT: vadd.vv v8, v9, v8 2501; RV64I-NEXT: vsrl.vi v9, v8, 4 2502; RV64I-NEXT: vadd.vv v8, v8, v9 2503; RV64I-NEXT: lui a0, 61681 2504; RV64I-NEXT: addiw a0, a0, -241 2505; RV64I-NEXT: vand.vx v8, v8, a0 2506; RV64I-NEXT: lui a0, 4112 2507; RV64I-NEXT: addiw a0, a0, 257 2508; RV64I-NEXT: vmul.vx v8, v8, a0 2509; RV64I-NEXT: vsrl.vi v8, v8, 24 2510; RV64I-NEXT: ret 2511; 2512; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32: 2513; CHECK-D: # %bb.0: 2514; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, mu 2515; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8 2516; CHECK-D-NEXT: li a0, 52 2517; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, mu 2518; CHECK-D-NEXT: vsrl.vx v8, v10, a0 2519; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, mu 2520; CHECK-D-NEXT: vncvt.x.x.w v10, v8 2521; CHECK-D-NEXT: li a0, 1054 2522; CHECK-D-NEXT: vrsub.vx v8, v10, a0 2523; CHECK-D-NEXT: ret 2524 %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 true) 2525 ret <vscale x 2 x i32> %a 2526} 2527 2528define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) { 2529; RV32I-LABEL: ctlz_zero_undef_nxv4i32: 2530; RV32I: # %bb.0: 2531; RV32I-NEXT: vsetvli a0, zero, e32, m2, ta, mu 2532; RV32I-NEXT: vsrl.vi v10, v8, 1 2533; RV32I-NEXT: vor.vv v8, v8, v10 2534; RV32I-NEXT: vsrl.vi v10, v8, 2 2535; RV32I-NEXT: vor.vv v8, v8, v10 2536; RV32I-NEXT: vsrl.vi v10, v8, 4 2537; RV32I-NEXT: vor.vv v8, v8, v10 2538; RV32I-NEXT: vsrl.vi v10, v8, 8 2539; RV32I-NEXT: vor.vv v8, v8, v10 2540; RV32I-NEXT: vsrl.vi v10, v8, 16 2541; RV32I-NEXT: vor.vv v8, v8, v10 2542; RV32I-NEXT: vnot.v v8, v8 2543; RV32I-NEXT: vsrl.vi v10, v8, 1 2544; RV32I-NEXT: lui a0, 349525 2545; RV32I-NEXT: addi a0, a0, 1365 2546; RV32I-NEXT: vand.vx v10, v10, a0 2547; RV32I-NEXT: vsub.vv v8, v8, v10 2548; RV32I-NEXT: lui a0, 209715 2549; RV32I-NEXT: addi a0, a0, 819 2550; RV32I-NEXT: vand.vx v10, v8, a0 2551; RV32I-NEXT: vsrl.vi v8, v8, 2 2552; RV32I-NEXT: vand.vx v8, v8, a0 2553; RV32I-NEXT: vadd.vv v8, v10, v8 2554; RV32I-NEXT: vsrl.vi v10, v8, 4 2555; RV32I-NEXT: vadd.vv v8, v8, v10 2556; RV32I-NEXT: lui a0, 61681 2557; RV32I-NEXT: addi a0, a0, -241 2558; RV32I-NEXT: vand.vx v8, v8, a0 2559; RV32I-NEXT: lui a0, 4112 2560; RV32I-NEXT: addi a0, a0, 257 2561; RV32I-NEXT: vmul.vx v8, v8, a0 2562; RV32I-NEXT: vsrl.vi v8, v8, 24 2563; RV32I-NEXT: ret 2564; 2565; RV64I-LABEL: ctlz_zero_undef_nxv4i32: 2566; RV64I: # %bb.0: 2567; RV64I-NEXT: vsetvli a0, zero, e32, m2, ta, mu 2568; RV64I-NEXT: vsrl.vi v10, v8, 1 2569; RV64I-NEXT: vor.vv v8, v8, v10 2570; RV64I-NEXT: vsrl.vi v10, v8, 2 2571; RV64I-NEXT: vor.vv v8, v8, v10 2572; RV64I-NEXT: vsrl.vi v10, v8, 4 2573; RV64I-NEXT: vor.vv v8, v8, v10 2574; RV64I-NEXT: vsrl.vi v10, v8, 8 2575; RV64I-NEXT: vor.vv v8, v8, v10 2576; RV64I-NEXT: vsrl.vi v10, v8, 16 2577; RV64I-NEXT: vor.vv v8, v8, v10 2578; RV64I-NEXT: vnot.v v8, v8 2579; RV64I-NEXT: vsrl.vi v10, v8, 1 2580; RV64I-NEXT: lui a0, 349525 2581; RV64I-NEXT: addiw a0, a0, 1365 2582; RV64I-NEXT: vand.vx v10, v10, a0 2583; RV64I-NEXT: vsub.vv v8, v8, v10 2584; RV64I-NEXT: lui a0, 209715 2585; RV64I-NEXT: addiw a0, a0, 819 2586; RV64I-NEXT: vand.vx v10, v8, a0 2587; RV64I-NEXT: vsrl.vi v8, v8, 2 2588; RV64I-NEXT: vand.vx v8, v8, a0 2589; RV64I-NEXT: vadd.vv v8, v10, v8 2590; RV64I-NEXT: vsrl.vi v10, v8, 4 2591; RV64I-NEXT: vadd.vv v8, v8, v10 2592; RV64I-NEXT: lui a0, 61681 2593; RV64I-NEXT: addiw a0, a0, -241 2594; RV64I-NEXT: vand.vx v8, v8, a0 2595; RV64I-NEXT: lui a0, 4112 2596; RV64I-NEXT: addiw a0, a0, 257 2597; RV64I-NEXT: vmul.vx v8, v8, a0 2598; RV64I-NEXT: vsrl.vi v8, v8, 24 2599; RV64I-NEXT: ret 2600; 2601; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32: 2602; CHECK-D: # %bb.0: 2603; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, mu 2604; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8 2605; CHECK-D-NEXT: li a0, 52 2606; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, mu 2607; CHECK-D-NEXT: vsrl.vx v8, v12, a0 2608; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, mu 2609; CHECK-D-NEXT: vncvt.x.x.w v12, v8 2610; CHECK-D-NEXT: li a0, 1054 2611; CHECK-D-NEXT: vrsub.vx v8, v12, a0 2612; CHECK-D-NEXT: ret 2613 %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true) 2614 ret <vscale x 4 x i32> %a 2615} 2616 2617define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) { 2618; RV32I-LABEL: ctlz_zero_undef_nxv8i32: 2619; RV32I: # %bb.0: 2620; RV32I-NEXT: vsetvli a0, zero, e32, m4, ta, mu 2621; RV32I-NEXT: vsrl.vi v12, v8, 1 2622; RV32I-NEXT: vor.vv v8, v8, v12 2623; RV32I-NEXT: vsrl.vi v12, v8, 2 2624; RV32I-NEXT: vor.vv v8, v8, v12 2625; RV32I-NEXT: vsrl.vi v12, v8, 4 2626; RV32I-NEXT: vor.vv v8, v8, v12 2627; RV32I-NEXT: vsrl.vi v12, v8, 8 2628; RV32I-NEXT: vor.vv v8, v8, v12 2629; RV32I-NEXT: vsrl.vi v12, v8, 16 2630; RV32I-NEXT: vor.vv v8, v8, v12 2631; RV32I-NEXT: vnot.v v8, v8 2632; RV32I-NEXT: vsrl.vi v12, v8, 1 2633; RV32I-NEXT: lui a0, 349525 2634; RV32I-NEXT: addi a0, a0, 1365 2635; RV32I-NEXT: vand.vx v12, v12, a0 2636; RV32I-NEXT: vsub.vv v8, v8, v12 2637; RV32I-NEXT: lui a0, 209715 2638; RV32I-NEXT: addi a0, a0, 819 2639; RV32I-NEXT: vand.vx v12, v8, a0 2640; RV32I-NEXT: vsrl.vi v8, v8, 2 2641; RV32I-NEXT: vand.vx v8, v8, a0 2642; RV32I-NEXT: vadd.vv v8, v12, v8 2643; RV32I-NEXT: vsrl.vi v12, v8, 4 2644; RV32I-NEXT: vadd.vv v8, v8, v12 2645; RV32I-NEXT: lui a0, 61681 2646; RV32I-NEXT: addi a0, a0, -241 2647; RV32I-NEXT: vand.vx v8, v8, a0 2648; RV32I-NEXT: lui a0, 4112 2649; RV32I-NEXT: addi a0, a0, 257 2650; RV32I-NEXT: vmul.vx v8, v8, a0 2651; RV32I-NEXT: vsrl.vi v8, v8, 24 2652; RV32I-NEXT: ret 2653; 2654; RV64I-LABEL: ctlz_zero_undef_nxv8i32: 2655; RV64I: # %bb.0: 2656; RV64I-NEXT: vsetvli a0, zero, e32, m4, ta, mu 2657; RV64I-NEXT: vsrl.vi v12, v8, 1 2658; RV64I-NEXT: vor.vv v8, v8, v12 2659; RV64I-NEXT: vsrl.vi v12, v8, 2 2660; RV64I-NEXT: vor.vv v8, v8, v12 2661; RV64I-NEXT: vsrl.vi v12, v8, 4 2662; RV64I-NEXT: vor.vv v8, v8, v12 2663; RV64I-NEXT: vsrl.vi v12, v8, 8 2664; RV64I-NEXT: vor.vv v8, v8, v12 2665; RV64I-NEXT: vsrl.vi v12, v8, 16 2666; RV64I-NEXT: vor.vv v8, v8, v12 2667; RV64I-NEXT: vnot.v v8, v8 2668; RV64I-NEXT: vsrl.vi v12, v8, 1 2669; RV64I-NEXT: lui a0, 349525 2670; RV64I-NEXT: addiw a0, a0, 1365 2671; RV64I-NEXT: vand.vx v12, v12, a0 2672; RV64I-NEXT: vsub.vv v8, v8, v12 2673; RV64I-NEXT: lui a0, 209715 2674; RV64I-NEXT: addiw a0, a0, 819 2675; RV64I-NEXT: vand.vx v12, v8, a0 2676; RV64I-NEXT: vsrl.vi v8, v8, 2 2677; RV64I-NEXT: vand.vx v8, v8, a0 2678; RV64I-NEXT: vadd.vv v8, v12, v8 2679; RV64I-NEXT: vsrl.vi v12, v8, 4 2680; RV64I-NEXT: vadd.vv v8, v8, v12 2681; RV64I-NEXT: lui a0, 61681 2682; RV64I-NEXT: addiw a0, a0, -241 2683; RV64I-NEXT: vand.vx v8, v8, a0 2684; RV64I-NEXT: lui a0, 4112 2685; RV64I-NEXT: addiw a0, a0, 257 2686; RV64I-NEXT: vmul.vx v8, v8, a0 2687; RV64I-NEXT: vsrl.vi v8, v8, 24 2688; RV64I-NEXT: ret 2689; 2690; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32: 2691; CHECK-D: # %bb.0: 2692; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, mu 2693; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8 2694; CHECK-D-NEXT: li a0, 52 2695; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, mu 2696; CHECK-D-NEXT: vsrl.vx v8, v16, a0 2697; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, mu 2698; CHECK-D-NEXT: vncvt.x.x.w v16, v8 2699; CHECK-D-NEXT: li a0, 1054 2700; CHECK-D-NEXT: vrsub.vx v8, v16, a0 2701; CHECK-D-NEXT: ret 2702 %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 true) 2703 ret <vscale x 8 x i32> %a 2704} 2705 2706define <vscale x 16 x i32> @ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) { 2707; RV32-LABEL: ctlz_zero_undef_nxv16i32: 2708; RV32: # %bb.0: 2709; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 2710; RV32-NEXT: vsrl.vi v16, v8, 1 2711; RV32-NEXT: vor.vv v8, v8, v16 2712; RV32-NEXT: vsrl.vi v16, v8, 2 2713; RV32-NEXT: vor.vv v8, v8, v16 2714; RV32-NEXT: vsrl.vi v16, v8, 4 2715; RV32-NEXT: vor.vv v8, v8, v16 2716; RV32-NEXT: vsrl.vi v16, v8, 8 2717; RV32-NEXT: vor.vv v8, v8, v16 2718; RV32-NEXT: vsrl.vi v16, v8, 16 2719; RV32-NEXT: vor.vv v8, v8, v16 2720; RV32-NEXT: vnot.v v8, v8 2721; RV32-NEXT: vsrl.vi v16, v8, 1 2722; RV32-NEXT: lui a0, 349525 2723; RV32-NEXT: addi a0, a0, 1365 2724; RV32-NEXT: vand.vx v16, v16, a0 2725; RV32-NEXT: vsub.vv v8, v8, v16 2726; RV32-NEXT: lui a0, 209715 2727; RV32-NEXT: addi a0, a0, 819 2728; RV32-NEXT: vand.vx v16, v8, a0 2729; RV32-NEXT: vsrl.vi v8, v8, 2 2730; RV32-NEXT: vand.vx v8, v8, a0 2731; RV32-NEXT: vadd.vv v8, v16, v8 2732; RV32-NEXT: vsrl.vi v16, v8, 4 2733; RV32-NEXT: vadd.vv v8, v8, v16 2734; RV32-NEXT: lui a0, 61681 2735; RV32-NEXT: addi a0, a0, -241 2736; RV32-NEXT: vand.vx v8, v8, a0 2737; RV32-NEXT: lui a0, 4112 2738; RV32-NEXT: addi a0, a0, 257 2739; RV32-NEXT: vmul.vx v8, v8, a0 2740; RV32-NEXT: vsrl.vi v8, v8, 24 2741; RV32-NEXT: ret 2742; 2743; RV64-LABEL: ctlz_zero_undef_nxv16i32: 2744; RV64: # %bb.0: 2745; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 2746; RV64-NEXT: vsrl.vi v16, v8, 1 2747; RV64-NEXT: vor.vv v8, v8, v16 2748; RV64-NEXT: vsrl.vi v16, v8, 2 2749; RV64-NEXT: vor.vv v8, v8, v16 2750; RV64-NEXT: vsrl.vi v16, v8, 4 2751; RV64-NEXT: vor.vv v8, v8, v16 2752; RV64-NEXT: vsrl.vi v16, v8, 8 2753; RV64-NEXT: vor.vv v8, v8, v16 2754; RV64-NEXT: vsrl.vi v16, v8, 16 2755; RV64-NEXT: vor.vv v8, v8, v16 2756; RV64-NEXT: vnot.v v8, v8 2757; RV64-NEXT: vsrl.vi v16, v8, 1 2758; RV64-NEXT: lui a0, 349525 2759; RV64-NEXT: addiw a0, a0, 1365 2760; RV64-NEXT: vand.vx v16, v16, a0 2761; RV64-NEXT: vsub.vv v8, v8, v16 2762; RV64-NEXT: lui a0, 209715 2763; RV64-NEXT: addiw a0, a0, 819 2764; RV64-NEXT: vand.vx v16, v8, a0 2765; RV64-NEXT: vsrl.vi v8, v8, 2 2766; RV64-NEXT: vand.vx v8, v8, a0 2767; RV64-NEXT: vadd.vv v8, v16, v8 2768; RV64-NEXT: vsrl.vi v16, v8, 4 2769; RV64-NEXT: vadd.vv v8, v8, v16 2770; RV64-NEXT: lui a0, 61681 2771; RV64-NEXT: addiw a0, a0, -241 2772; RV64-NEXT: vand.vx v8, v8, a0 2773; RV64-NEXT: lui a0, 4112 2774; RV64-NEXT: addiw a0, a0, 257 2775; RV64-NEXT: vmul.vx v8, v8, a0 2776; RV64-NEXT: vsrl.vi v8, v8, 24 2777; RV64-NEXT: ret 2778 %a = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 true) 2779 ret <vscale x 16 x i32> %a 2780} 2781 2782define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) { 2783; RV32-LABEL: ctlz_zero_undef_nxv1i64: 2784; RV32: # %bb.0: 2785; RV32-NEXT: addi sp, sp, -16 2786; RV32-NEXT: .cfi_def_cfa_offset 16 2787; RV32-NEXT: lui a0, 349525 2788; RV32-NEXT: addi a0, a0, 1365 2789; RV32-NEXT: sw a0, 12(sp) 2790; RV32-NEXT: sw a0, 8(sp) 2791; RV32-NEXT: lui a0, 209715 2792; RV32-NEXT: addi a0, a0, 819 2793; RV32-NEXT: sw a0, 12(sp) 2794; RV32-NEXT: sw a0, 8(sp) 2795; RV32-NEXT: lui a0, 61681 2796; RV32-NEXT: addi a0, a0, -241 2797; RV32-NEXT: sw a0, 12(sp) 2798; RV32-NEXT: sw a0, 8(sp) 2799; RV32-NEXT: lui a0, 4112 2800; RV32-NEXT: addi a0, a0, 257 2801; RV32-NEXT: sw a0, 12(sp) 2802; RV32-NEXT: sw a0, 8(sp) 2803; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu 2804; RV32-NEXT: vsrl.vi v9, v8, 1 2805; RV32-NEXT: vor.vv v8, v8, v9 2806; RV32-NEXT: vsrl.vi v9, v8, 2 2807; RV32-NEXT: vor.vv v8, v8, v9 2808; RV32-NEXT: vsrl.vi v9, v8, 4 2809; RV32-NEXT: vor.vv v8, v8, v9 2810; RV32-NEXT: vsrl.vi v9, v8, 8 2811; RV32-NEXT: vor.vv v8, v8, v9 2812; RV32-NEXT: vsrl.vi v9, v8, 16 2813; RV32-NEXT: vor.vv v8, v8, v9 2814; RV32-NEXT: li a0, 32 2815; RV32-NEXT: vsrl.vx v9, v8, a0 2816; RV32-NEXT: vor.vv v8, v8, v9 2817; RV32-NEXT: addi a0, sp, 8 2818; RV32-NEXT: vlse64.v v9, (a0), zero 2819; RV32-NEXT: vnot.v v8, v8 2820; RV32-NEXT: vlse64.v v10, (a0), zero 2821; RV32-NEXT: vsrl.vi v11, v8, 1 2822; RV32-NEXT: vand.vv v9, v11, v9 2823; RV32-NEXT: vsub.vv v8, v8, v9 2824; RV32-NEXT: vand.vv v9, v8, v10 2825; RV32-NEXT: vsrl.vi v8, v8, 2 2826; RV32-NEXT: vand.vv v8, v8, v10 2827; RV32-NEXT: vadd.vv v8, v9, v8 2828; RV32-NEXT: vlse64.v v9, (a0), zero 2829; RV32-NEXT: vlse64.v v10, (a0), zero 2830; RV32-NEXT: vsrl.vi v11, v8, 4 2831; RV32-NEXT: vadd.vv v8, v8, v11 2832; RV32-NEXT: vand.vv v8, v8, v9 2833; RV32-NEXT: vmul.vv v8, v8, v10 2834; RV32-NEXT: li a0, 56 2835; RV32-NEXT: vsrl.vx v8, v8, a0 2836; RV32-NEXT: addi sp, sp, 16 2837; RV32-NEXT: ret 2838; 2839; RV64-LABEL: ctlz_zero_undef_nxv1i64: 2840; RV64: # %bb.0: 2841; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, mu 2842; RV64-NEXT: vsrl.vi v9, v8, 1 2843; RV64-NEXT: vor.vv v8, v8, v9 2844; RV64-NEXT: vsrl.vi v9, v8, 2 2845; RV64-NEXT: vor.vv v8, v8, v9 2846; RV64-NEXT: vsrl.vi v9, v8, 4 2847; RV64-NEXT: vor.vv v8, v8, v9 2848; RV64-NEXT: vsrl.vi v9, v8, 8 2849; RV64-NEXT: vor.vv v8, v8, v9 2850; RV64-NEXT: vsrl.vi v9, v8, 16 2851; RV64-NEXT: vor.vv v8, v8, v9 2852; RV64-NEXT: li a0, 32 2853; RV64-NEXT: vsrl.vx v9, v8, a0 2854; RV64-NEXT: vor.vv v8, v8, v9 2855; RV64-NEXT: vnot.v v8, v8 2856; RV64-NEXT: lui a0, %hi(.LCPI40_0) 2857; RV64-NEXT: ld a0, %lo(.LCPI40_0)(a0) 2858; RV64-NEXT: lui a1, %hi(.LCPI40_1) 2859; RV64-NEXT: ld a1, %lo(.LCPI40_1)(a1) 2860; RV64-NEXT: vsrl.vi v9, v8, 1 2861; RV64-NEXT: vand.vx v9, v9, a0 2862; RV64-NEXT: vsub.vv v8, v8, v9 2863; RV64-NEXT: vand.vx v9, v8, a1 2864; RV64-NEXT: vsrl.vi v8, v8, 2 2865; RV64-NEXT: vand.vx v8, v8, a1 2866; RV64-NEXT: vadd.vv v8, v9, v8 2867; RV64-NEXT: lui a0, %hi(.LCPI40_2) 2868; RV64-NEXT: ld a0, %lo(.LCPI40_2)(a0) 2869; RV64-NEXT: lui a1, %hi(.LCPI40_3) 2870; RV64-NEXT: ld a1, %lo(.LCPI40_3)(a1) 2871; RV64-NEXT: vsrl.vi v9, v8, 4 2872; RV64-NEXT: vadd.vv v8, v8, v9 2873; RV64-NEXT: vand.vx v8, v8, a0 2874; RV64-NEXT: vmul.vx v8, v8, a1 2875; RV64-NEXT: li a0, 56 2876; RV64-NEXT: vsrl.vx v8, v8, a0 2877; RV64-NEXT: ret 2878 %a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 true) 2879 ret <vscale x 1 x i64> %a 2880} 2881 2882define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) { 2883; RV32-LABEL: ctlz_zero_undef_nxv2i64: 2884; RV32: # %bb.0: 2885; RV32-NEXT: addi sp, sp, -16 2886; RV32-NEXT: .cfi_def_cfa_offset 16 2887; RV32-NEXT: lui a0, 349525 2888; RV32-NEXT: addi a0, a0, 1365 2889; RV32-NEXT: sw a0, 12(sp) 2890; RV32-NEXT: sw a0, 8(sp) 2891; RV32-NEXT: lui a0, 209715 2892; RV32-NEXT: addi a0, a0, 819 2893; RV32-NEXT: sw a0, 12(sp) 2894; RV32-NEXT: sw a0, 8(sp) 2895; RV32-NEXT: lui a0, 61681 2896; RV32-NEXT: addi a0, a0, -241 2897; RV32-NEXT: sw a0, 12(sp) 2898; RV32-NEXT: sw a0, 8(sp) 2899; RV32-NEXT: lui a0, 4112 2900; RV32-NEXT: addi a0, a0, 257 2901; RV32-NEXT: sw a0, 12(sp) 2902; RV32-NEXT: sw a0, 8(sp) 2903; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu 2904; RV32-NEXT: vsrl.vi v10, v8, 1 2905; RV32-NEXT: vor.vv v8, v8, v10 2906; RV32-NEXT: vsrl.vi v10, v8, 2 2907; RV32-NEXT: vor.vv v8, v8, v10 2908; RV32-NEXT: vsrl.vi v10, v8, 4 2909; RV32-NEXT: vor.vv v8, v8, v10 2910; RV32-NEXT: vsrl.vi v10, v8, 8 2911; RV32-NEXT: vor.vv v8, v8, v10 2912; RV32-NEXT: vsrl.vi v10, v8, 16 2913; RV32-NEXT: vor.vv v8, v8, v10 2914; RV32-NEXT: li a0, 32 2915; RV32-NEXT: vsrl.vx v10, v8, a0 2916; RV32-NEXT: vor.vv v8, v8, v10 2917; RV32-NEXT: addi a0, sp, 8 2918; RV32-NEXT: vlse64.v v10, (a0), zero 2919; RV32-NEXT: vnot.v v8, v8 2920; RV32-NEXT: vlse64.v v12, (a0), zero 2921; RV32-NEXT: vsrl.vi v14, v8, 1 2922; RV32-NEXT: vand.vv v10, v14, v10 2923; RV32-NEXT: vsub.vv v8, v8, v10 2924; RV32-NEXT: vand.vv v10, v8, v12 2925; RV32-NEXT: vsrl.vi v8, v8, 2 2926; RV32-NEXT: vand.vv v8, v8, v12 2927; RV32-NEXT: vadd.vv v8, v10, v8 2928; RV32-NEXT: vlse64.v v10, (a0), zero 2929; RV32-NEXT: vlse64.v v12, (a0), zero 2930; RV32-NEXT: vsrl.vi v14, v8, 4 2931; RV32-NEXT: vadd.vv v8, v8, v14 2932; RV32-NEXT: vand.vv v8, v8, v10 2933; RV32-NEXT: vmul.vv v8, v8, v12 2934; RV32-NEXT: li a0, 56 2935; RV32-NEXT: vsrl.vx v8, v8, a0 2936; RV32-NEXT: addi sp, sp, 16 2937; RV32-NEXT: ret 2938; 2939; RV64-LABEL: ctlz_zero_undef_nxv2i64: 2940; RV64: # %bb.0: 2941; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, mu 2942; RV64-NEXT: vsrl.vi v10, v8, 1 2943; RV64-NEXT: vor.vv v8, v8, v10 2944; RV64-NEXT: vsrl.vi v10, v8, 2 2945; RV64-NEXT: vor.vv v8, v8, v10 2946; RV64-NEXT: vsrl.vi v10, v8, 4 2947; RV64-NEXT: vor.vv v8, v8, v10 2948; RV64-NEXT: vsrl.vi v10, v8, 8 2949; RV64-NEXT: vor.vv v8, v8, v10 2950; RV64-NEXT: vsrl.vi v10, v8, 16 2951; RV64-NEXT: vor.vv v8, v8, v10 2952; RV64-NEXT: li a0, 32 2953; RV64-NEXT: vsrl.vx v10, v8, a0 2954; RV64-NEXT: vor.vv v8, v8, v10 2955; RV64-NEXT: vnot.v v8, v8 2956; RV64-NEXT: lui a0, %hi(.LCPI41_0) 2957; RV64-NEXT: ld a0, %lo(.LCPI41_0)(a0) 2958; RV64-NEXT: lui a1, %hi(.LCPI41_1) 2959; RV64-NEXT: ld a1, %lo(.LCPI41_1)(a1) 2960; RV64-NEXT: vsrl.vi v10, v8, 1 2961; RV64-NEXT: vand.vx v10, v10, a0 2962; RV64-NEXT: vsub.vv v8, v8, v10 2963; RV64-NEXT: vand.vx v10, v8, a1 2964; RV64-NEXT: vsrl.vi v8, v8, 2 2965; RV64-NEXT: vand.vx v8, v8, a1 2966; RV64-NEXT: vadd.vv v8, v10, v8 2967; RV64-NEXT: lui a0, %hi(.LCPI41_2) 2968; RV64-NEXT: ld a0, %lo(.LCPI41_2)(a0) 2969; RV64-NEXT: lui a1, %hi(.LCPI41_3) 2970; RV64-NEXT: ld a1, %lo(.LCPI41_3)(a1) 2971; RV64-NEXT: vsrl.vi v10, v8, 4 2972; RV64-NEXT: vadd.vv v8, v8, v10 2973; RV64-NEXT: vand.vx v8, v8, a0 2974; RV64-NEXT: vmul.vx v8, v8, a1 2975; RV64-NEXT: li a0, 56 2976; RV64-NEXT: vsrl.vx v8, v8, a0 2977; RV64-NEXT: ret 2978 %a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 true) 2979 ret <vscale x 2 x i64> %a 2980} 2981 2982define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) { 2983; RV32-LABEL: ctlz_zero_undef_nxv4i64: 2984; RV32: # %bb.0: 2985; RV32-NEXT: addi sp, sp, -16 2986; RV32-NEXT: .cfi_def_cfa_offset 16 2987; RV32-NEXT: lui a0, 349525 2988; RV32-NEXT: addi a0, a0, 1365 2989; RV32-NEXT: sw a0, 12(sp) 2990; RV32-NEXT: sw a0, 8(sp) 2991; RV32-NEXT: lui a0, 209715 2992; RV32-NEXT: addi a0, a0, 819 2993; RV32-NEXT: sw a0, 12(sp) 2994; RV32-NEXT: sw a0, 8(sp) 2995; RV32-NEXT: lui a0, 61681 2996; RV32-NEXT: addi a0, a0, -241 2997; RV32-NEXT: sw a0, 12(sp) 2998; RV32-NEXT: sw a0, 8(sp) 2999; RV32-NEXT: lui a0, 4112 3000; RV32-NEXT: addi a0, a0, 257 3001; RV32-NEXT: sw a0, 12(sp) 3002; RV32-NEXT: sw a0, 8(sp) 3003; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu 3004; RV32-NEXT: vsrl.vi v12, v8, 1 3005; RV32-NEXT: vor.vv v8, v8, v12 3006; RV32-NEXT: vsrl.vi v12, v8, 2 3007; RV32-NEXT: vor.vv v8, v8, v12 3008; RV32-NEXT: vsrl.vi v12, v8, 4 3009; RV32-NEXT: vor.vv v8, v8, v12 3010; RV32-NEXT: vsrl.vi v12, v8, 8 3011; RV32-NEXT: vor.vv v8, v8, v12 3012; RV32-NEXT: vsrl.vi v12, v8, 16 3013; RV32-NEXT: vor.vv v8, v8, v12 3014; RV32-NEXT: li a0, 32 3015; RV32-NEXT: vsrl.vx v12, v8, a0 3016; RV32-NEXT: vor.vv v8, v8, v12 3017; RV32-NEXT: addi a0, sp, 8 3018; RV32-NEXT: vlse64.v v12, (a0), zero 3019; RV32-NEXT: vnot.v v8, v8 3020; RV32-NEXT: vlse64.v v16, (a0), zero 3021; RV32-NEXT: vsrl.vi v20, v8, 1 3022; RV32-NEXT: vand.vv v12, v20, v12 3023; RV32-NEXT: vsub.vv v8, v8, v12 3024; RV32-NEXT: vand.vv v12, v8, v16 3025; RV32-NEXT: vsrl.vi v8, v8, 2 3026; RV32-NEXT: vand.vv v8, v8, v16 3027; RV32-NEXT: vadd.vv v8, v12, v8 3028; RV32-NEXT: vlse64.v v12, (a0), zero 3029; RV32-NEXT: vlse64.v v16, (a0), zero 3030; RV32-NEXT: vsrl.vi v20, v8, 4 3031; RV32-NEXT: vadd.vv v8, v8, v20 3032; RV32-NEXT: vand.vv v8, v8, v12 3033; RV32-NEXT: vmul.vv v8, v8, v16 3034; RV32-NEXT: li a0, 56 3035; RV32-NEXT: vsrl.vx v8, v8, a0 3036; RV32-NEXT: addi sp, sp, 16 3037; RV32-NEXT: ret 3038; 3039; RV64-LABEL: ctlz_zero_undef_nxv4i64: 3040; RV64: # %bb.0: 3041; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, mu 3042; RV64-NEXT: vsrl.vi v12, v8, 1 3043; RV64-NEXT: vor.vv v8, v8, v12 3044; RV64-NEXT: vsrl.vi v12, v8, 2 3045; RV64-NEXT: vor.vv v8, v8, v12 3046; RV64-NEXT: vsrl.vi v12, v8, 4 3047; RV64-NEXT: vor.vv v8, v8, v12 3048; RV64-NEXT: vsrl.vi v12, v8, 8 3049; RV64-NEXT: vor.vv v8, v8, v12 3050; RV64-NEXT: vsrl.vi v12, v8, 16 3051; RV64-NEXT: vor.vv v8, v8, v12 3052; RV64-NEXT: li a0, 32 3053; RV64-NEXT: vsrl.vx v12, v8, a0 3054; RV64-NEXT: vor.vv v8, v8, v12 3055; RV64-NEXT: vnot.v v8, v8 3056; RV64-NEXT: lui a0, %hi(.LCPI42_0) 3057; RV64-NEXT: ld a0, %lo(.LCPI42_0)(a0) 3058; RV64-NEXT: lui a1, %hi(.LCPI42_1) 3059; RV64-NEXT: ld a1, %lo(.LCPI42_1)(a1) 3060; RV64-NEXT: vsrl.vi v12, v8, 1 3061; RV64-NEXT: vand.vx v12, v12, a0 3062; RV64-NEXT: vsub.vv v8, v8, v12 3063; RV64-NEXT: vand.vx v12, v8, a1 3064; RV64-NEXT: vsrl.vi v8, v8, 2 3065; RV64-NEXT: vand.vx v8, v8, a1 3066; RV64-NEXT: vadd.vv v8, v12, v8 3067; RV64-NEXT: lui a0, %hi(.LCPI42_2) 3068; RV64-NEXT: ld a0, %lo(.LCPI42_2)(a0) 3069; RV64-NEXT: lui a1, %hi(.LCPI42_3) 3070; RV64-NEXT: ld a1, %lo(.LCPI42_3)(a1) 3071; RV64-NEXT: vsrl.vi v12, v8, 4 3072; RV64-NEXT: vadd.vv v8, v8, v12 3073; RV64-NEXT: vand.vx v8, v8, a0 3074; RV64-NEXT: vmul.vx v8, v8, a1 3075; RV64-NEXT: li a0, 56 3076; RV64-NEXT: vsrl.vx v8, v8, a0 3077; RV64-NEXT: ret 3078 %a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true) 3079 ret <vscale x 4 x i64> %a 3080} 3081 3082define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) { 3083; RV32-LABEL: ctlz_zero_undef_nxv8i64: 3084; RV32: # %bb.0: 3085; RV32-NEXT: addi sp, sp, -16 3086; RV32-NEXT: .cfi_def_cfa_offset 16 3087; RV32-NEXT: lui a0, 349525 3088; RV32-NEXT: addi a0, a0, 1365 3089; RV32-NEXT: sw a0, 12(sp) 3090; RV32-NEXT: sw a0, 8(sp) 3091; RV32-NEXT: lui a0, 209715 3092; RV32-NEXT: addi a0, a0, 819 3093; RV32-NEXT: sw a0, 12(sp) 3094; RV32-NEXT: sw a0, 8(sp) 3095; RV32-NEXT: lui a0, 61681 3096; RV32-NEXT: addi a0, a0, -241 3097; RV32-NEXT: sw a0, 12(sp) 3098; RV32-NEXT: sw a0, 8(sp) 3099; RV32-NEXT: lui a0, 4112 3100; RV32-NEXT: addi a0, a0, 257 3101; RV32-NEXT: sw a0, 12(sp) 3102; RV32-NEXT: sw a0, 8(sp) 3103; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu 3104; RV32-NEXT: vsrl.vi v16, v8, 1 3105; RV32-NEXT: vor.vv v8, v8, v16 3106; RV32-NEXT: vsrl.vi v16, v8, 2 3107; RV32-NEXT: vor.vv v8, v8, v16 3108; RV32-NEXT: vsrl.vi v16, v8, 4 3109; RV32-NEXT: vor.vv v8, v8, v16 3110; RV32-NEXT: vsrl.vi v16, v8, 8 3111; RV32-NEXT: vor.vv v8, v8, v16 3112; RV32-NEXT: vsrl.vi v16, v8, 16 3113; RV32-NEXT: vor.vv v8, v8, v16 3114; RV32-NEXT: li a0, 32 3115; RV32-NEXT: vsrl.vx v16, v8, a0 3116; RV32-NEXT: vor.vv v8, v8, v16 3117; RV32-NEXT: addi a0, sp, 8 3118; RV32-NEXT: vlse64.v v16, (a0), zero 3119; RV32-NEXT: vnot.v v8, v8 3120; RV32-NEXT: vlse64.v v24, (a0), zero 3121; RV32-NEXT: vsrl.vi v0, v8, 1 3122; RV32-NEXT: vand.vv v16, v0, v16 3123; RV32-NEXT: vsub.vv v8, v8, v16 3124; RV32-NEXT: vand.vv v16, v8, v24 3125; RV32-NEXT: vsrl.vi v8, v8, 2 3126; RV32-NEXT: vand.vv v8, v8, v24 3127; RV32-NEXT: vadd.vv v8, v16, v8 3128; RV32-NEXT: vlse64.v v16, (a0), zero 3129; RV32-NEXT: vlse64.v v24, (a0), zero 3130; RV32-NEXT: vsrl.vi v0, v8, 4 3131; RV32-NEXT: vadd.vv v8, v8, v0 3132; RV32-NEXT: vand.vv v8, v8, v16 3133; RV32-NEXT: vmul.vv v8, v8, v24 3134; RV32-NEXT: li a0, 56 3135; RV32-NEXT: vsrl.vx v8, v8, a0 3136; RV32-NEXT: addi sp, sp, 16 3137; RV32-NEXT: ret 3138; 3139; RV64-LABEL: ctlz_zero_undef_nxv8i64: 3140; RV64: # %bb.0: 3141; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu 3142; RV64-NEXT: vsrl.vi v16, v8, 1 3143; RV64-NEXT: vor.vv v8, v8, v16 3144; RV64-NEXT: vsrl.vi v16, v8, 2 3145; RV64-NEXT: vor.vv v8, v8, v16 3146; RV64-NEXT: vsrl.vi v16, v8, 4 3147; RV64-NEXT: vor.vv v8, v8, v16 3148; RV64-NEXT: vsrl.vi v16, v8, 8 3149; RV64-NEXT: vor.vv v8, v8, v16 3150; RV64-NEXT: vsrl.vi v16, v8, 16 3151; RV64-NEXT: vor.vv v8, v8, v16 3152; RV64-NEXT: li a0, 32 3153; RV64-NEXT: vsrl.vx v16, v8, a0 3154; RV64-NEXT: vor.vv v8, v8, v16 3155; RV64-NEXT: vnot.v v8, v8 3156; RV64-NEXT: lui a0, %hi(.LCPI43_0) 3157; RV64-NEXT: ld a0, %lo(.LCPI43_0)(a0) 3158; RV64-NEXT: lui a1, %hi(.LCPI43_1) 3159; RV64-NEXT: ld a1, %lo(.LCPI43_1)(a1) 3160; RV64-NEXT: vsrl.vi v16, v8, 1 3161; RV64-NEXT: vand.vx v16, v16, a0 3162; RV64-NEXT: vsub.vv v8, v8, v16 3163; RV64-NEXT: vand.vx v16, v8, a1 3164; RV64-NEXT: vsrl.vi v8, v8, 2 3165; RV64-NEXT: vand.vx v8, v8, a1 3166; RV64-NEXT: vadd.vv v8, v16, v8 3167; RV64-NEXT: lui a0, %hi(.LCPI43_2) 3168; RV64-NEXT: ld a0, %lo(.LCPI43_2)(a0) 3169; RV64-NEXT: lui a1, %hi(.LCPI43_3) 3170; RV64-NEXT: ld a1, %lo(.LCPI43_3)(a1) 3171; RV64-NEXT: vsrl.vi v16, v8, 4 3172; RV64-NEXT: vadd.vv v8, v8, v16 3173; RV64-NEXT: vand.vx v8, v8, a0 3174; RV64-NEXT: vmul.vx v8, v8, a1 3175; RV64-NEXT: li a0, 56 3176; RV64-NEXT: vsrl.vx v8, v8, a0 3177; RV64-NEXT: ret 3178 %a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true) 3179 ret <vscale x 8 x i64> %a 3180} 3181