1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) { 6; CHECK-LABEL: bitreverse_nxv1i8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 9; CHECK-NEXT: vand.vi v9, v8, 15 10; CHECK-NEXT: vsll.vi v9, v9, 4 11; CHECK-NEXT: vsrl.vi v8, v8, 4 12; CHECK-NEXT: vand.vi v8, v8, 15 13; CHECK-NEXT: vor.vv v8, v8, v9 14; CHECK-NEXT: vsrl.vi v9, v8, 2 15; CHECK-NEXT: li a0, 51 16; CHECK-NEXT: vand.vx v9, v9, a0 17; CHECK-NEXT: vand.vx v8, v8, a0 18; CHECK-NEXT: vsll.vi v8, v8, 2 19; CHECK-NEXT: vor.vv v8, v9, v8 20; CHECK-NEXT: vsrl.vi v9, v8, 1 21; CHECK-NEXT: li a0, 85 22; CHECK-NEXT: vand.vx v9, v9, a0 23; CHECK-NEXT: vand.vx v8, v8, a0 24; CHECK-NEXT: vadd.vv v8, v8, v8 25; CHECK-NEXT: vor.vv v8, v9, v8 26; CHECK-NEXT: ret 27 %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va) 28 ret <vscale x 1 x i8> %a 29} 30declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>) 31 32define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) { 33; CHECK-LABEL: bitreverse_nxv2i8: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 36; CHECK-NEXT: vand.vi v9, v8, 15 37; CHECK-NEXT: vsll.vi v9, v9, 4 38; CHECK-NEXT: vsrl.vi v8, v8, 4 39; CHECK-NEXT: vand.vi v8, v8, 15 40; CHECK-NEXT: vor.vv v8, v8, v9 41; CHECK-NEXT: vsrl.vi v9, v8, 2 42; CHECK-NEXT: li a0, 51 43; CHECK-NEXT: vand.vx v9, v9, a0 44; CHECK-NEXT: vand.vx v8, v8, a0 45; CHECK-NEXT: vsll.vi v8, v8, 2 46; CHECK-NEXT: vor.vv v8, v9, v8 47; CHECK-NEXT: vsrl.vi v9, v8, 1 48; CHECK-NEXT: li a0, 85 49; CHECK-NEXT: vand.vx v9, v9, a0 50; CHECK-NEXT: vand.vx v8, v8, a0 51; CHECK-NEXT: vadd.vv v8, v8, v8 52; CHECK-NEXT: vor.vv v8, v9, v8 53; CHECK-NEXT: ret 54 %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va) 55 ret <vscale x 2 x i8> %a 56} 57declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>) 58 59define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) { 60; CHECK-LABEL: bitreverse_nxv4i8: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 63; CHECK-NEXT: vand.vi v9, v8, 15 64; CHECK-NEXT: vsll.vi v9, v9, 4 65; CHECK-NEXT: vsrl.vi v8, v8, 4 66; CHECK-NEXT: vand.vi v8, v8, 15 67; CHECK-NEXT: vor.vv v8, v8, v9 68; CHECK-NEXT: vsrl.vi v9, v8, 2 69; CHECK-NEXT: li a0, 51 70; CHECK-NEXT: vand.vx v9, v9, a0 71; CHECK-NEXT: vand.vx v8, v8, a0 72; CHECK-NEXT: vsll.vi v8, v8, 2 73; CHECK-NEXT: vor.vv v8, v9, v8 74; CHECK-NEXT: vsrl.vi v9, v8, 1 75; CHECK-NEXT: li a0, 85 76; CHECK-NEXT: vand.vx v9, v9, a0 77; CHECK-NEXT: vand.vx v8, v8, a0 78; CHECK-NEXT: vadd.vv v8, v8, v8 79; CHECK-NEXT: vor.vv v8, v9, v8 80; CHECK-NEXT: ret 81 %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va) 82 ret <vscale x 4 x i8> %a 83} 84declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>) 85 86define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) { 87; CHECK-LABEL: bitreverse_nxv8i8: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 90; CHECK-NEXT: vand.vi v9, v8, 15 91; CHECK-NEXT: vsll.vi v9, v9, 4 92; CHECK-NEXT: vsrl.vi v8, v8, 4 93; CHECK-NEXT: vand.vi v8, v8, 15 94; CHECK-NEXT: vor.vv v8, v8, v9 95; CHECK-NEXT: vsrl.vi v9, v8, 2 96; CHECK-NEXT: li a0, 51 97; CHECK-NEXT: vand.vx v9, v9, a0 98; CHECK-NEXT: vand.vx v8, v8, a0 99; CHECK-NEXT: vsll.vi v8, v8, 2 100; CHECK-NEXT: vor.vv v8, v9, v8 101; CHECK-NEXT: vsrl.vi v9, v8, 1 102; CHECK-NEXT: li a0, 85 103; CHECK-NEXT: vand.vx v9, v9, a0 104; CHECK-NEXT: vand.vx v8, v8, a0 105; CHECK-NEXT: vadd.vv v8, v8, v8 106; CHECK-NEXT: vor.vv v8, v9, v8 107; CHECK-NEXT: ret 108 %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va) 109 ret <vscale x 8 x i8> %a 110} 111declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>) 112 113define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) { 114; CHECK-LABEL: bitreverse_nxv16i8: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu 117; CHECK-NEXT: vand.vi v10, v8, 15 118; CHECK-NEXT: vsll.vi v10, v10, 4 119; CHECK-NEXT: vsrl.vi v8, v8, 4 120; CHECK-NEXT: vand.vi v8, v8, 15 121; CHECK-NEXT: vor.vv v8, v8, v10 122; CHECK-NEXT: vsrl.vi v10, v8, 2 123; CHECK-NEXT: li a0, 51 124; CHECK-NEXT: vand.vx v10, v10, a0 125; CHECK-NEXT: vand.vx v8, v8, a0 126; CHECK-NEXT: vsll.vi v8, v8, 2 127; CHECK-NEXT: vor.vv v8, v10, v8 128; CHECK-NEXT: vsrl.vi v10, v8, 1 129; CHECK-NEXT: li a0, 85 130; CHECK-NEXT: vand.vx v10, v10, a0 131; CHECK-NEXT: vand.vx v8, v8, a0 132; CHECK-NEXT: vadd.vv v8, v8, v8 133; CHECK-NEXT: vor.vv v8, v10, v8 134; CHECK-NEXT: ret 135 %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va) 136 ret <vscale x 16 x i8> %a 137} 138declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>) 139 140define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) { 141; CHECK-LABEL: bitreverse_nxv32i8: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu 144; CHECK-NEXT: vand.vi v12, v8, 15 145; CHECK-NEXT: vsll.vi v12, v12, 4 146; CHECK-NEXT: vsrl.vi v8, v8, 4 147; CHECK-NEXT: vand.vi v8, v8, 15 148; CHECK-NEXT: vor.vv v8, v8, v12 149; CHECK-NEXT: vsrl.vi v12, v8, 2 150; CHECK-NEXT: li a0, 51 151; CHECK-NEXT: vand.vx v12, v12, a0 152; CHECK-NEXT: vand.vx v8, v8, a0 153; CHECK-NEXT: vsll.vi v8, v8, 2 154; CHECK-NEXT: vor.vv v8, v12, v8 155; CHECK-NEXT: vsrl.vi v12, v8, 1 156; CHECK-NEXT: li a0, 85 157; CHECK-NEXT: vand.vx v12, v12, a0 158; CHECK-NEXT: vand.vx v8, v8, a0 159; CHECK-NEXT: vadd.vv v8, v8, v8 160; CHECK-NEXT: vor.vv v8, v12, v8 161; CHECK-NEXT: ret 162 %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va) 163 ret <vscale x 32 x i8> %a 164} 165declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>) 166 167define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) { 168; CHECK-LABEL: bitreverse_nxv64i8: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu 171; CHECK-NEXT: vand.vi v16, v8, 15 172; CHECK-NEXT: vsll.vi v16, v16, 4 173; CHECK-NEXT: vsrl.vi v8, v8, 4 174; CHECK-NEXT: vand.vi v8, v8, 15 175; CHECK-NEXT: vor.vv v8, v8, v16 176; CHECK-NEXT: vsrl.vi v16, v8, 2 177; CHECK-NEXT: li a0, 51 178; CHECK-NEXT: vand.vx v16, v16, a0 179; CHECK-NEXT: vand.vx v8, v8, a0 180; CHECK-NEXT: vsll.vi v8, v8, 2 181; CHECK-NEXT: vor.vv v8, v16, v8 182; CHECK-NEXT: vsrl.vi v16, v8, 1 183; CHECK-NEXT: li a0, 85 184; CHECK-NEXT: vand.vx v16, v16, a0 185; CHECK-NEXT: vand.vx v8, v8, a0 186; CHECK-NEXT: vadd.vv v8, v8, v8 187; CHECK-NEXT: vor.vv v8, v16, v8 188; CHECK-NEXT: ret 189 %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va) 190 ret <vscale x 64 x i8> %a 191} 192declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>) 193 194define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) { 195; RV32-LABEL: bitreverse_nxv1i16: 196; RV32: # %bb.0: 197; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 198; RV32-NEXT: vsrl.vi v9, v8, 8 199; RV32-NEXT: vsll.vi v8, v8, 8 200; RV32-NEXT: vor.vv v8, v8, v9 201; RV32-NEXT: vsrl.vi v9, v8, 4 202; RV32-NEXT: lui a0, 1 203; RV32-NEXT: addi a0, a0, -241 204; RV32-NEXT: vand.vx v9, v9, a0 205; RV32-NEXT: vand.vx v8, v8, a0 206; RV32-NEXT: vsll.vi v8, v8, 4 207; RV32-NEXT: vor.vv v8, v9, v8 208; RV32-NEXT: vsrl.vi v9, v8, 2 209; RV32-NEXT: lui a0, 3 210; RV32-NEXT: addi a0, a0, 819 211; RV32-NEXT: vand.vx v9, v9, a0 212; RV32-NEXT: vand.vx v8, v8, a0 213; RV32-NEXT: vsll.vi v8, v8, 2 214; RV32-NEXT: vor.vv v8, v9, v8 215; RV32-NEXT: vsrl.vi v9, v8, 1 216; RV32-NEXT: lui a0, 5 217; RV32-NEXT: addi a0, a0, 1365 218; RV32-NEXT: vand.vx v9, v9, a0 219; RV32-NEXT: vand.vx v8, v8, a0 220; RV32-NEXT: vadd.vv v8, v8, v8 221; RV32-NEXT: vor.vv v8, v9, v8 222; RV32-NEXT: ret 223; 224; RV64-LABEL: bitreverse_nxv1i16: 225; RV64: # %bb.0: 226; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 227; RV64-NEXT: vsrl.vi v9, v8, 8 228; RV64-NEXT: vsll.vi v8, v8, 8 229; RV64-NEXT: vor.vv v8, v8, v9 230; RV64-NEXT: vsrl.vi v9, v8, 4 231; RV64-NEXT: lui a0, 1 232; RV64-NEXT: addiw a0, a0, -241 233; RV64-NEXT: vand.vx v9, v9, a0 234; RV64-NEXT: vand.vx v8, v8, a0 235; RV64-NEXT: vsll.vi v8, v8, 4 236; RV64-NEXT: vor.vv v8, v9, v8 237; RV64-NEXT: vsrl.vi v9, v8, 2 238; RV64-NEXT: lui a0, 3 239; RV64-NEXT: addiw a0, a0, 819 240; RV64-NEXT: vand.vx v9, v9, a0 241; RV64-NEXT: vand.vx v8, v8, a0 242; RV64-NEXT: vsll.vi v8, v8, 2 243; RV64-NEXT: vor.vv v8, v9, v8 244; RV64-NEXT: vsrl.vi v9, v8, 1 245; RV64-NEXT: lui a0, 5 246; RV64-NEXT: addiw a0, a0, 1365 247; RV64-NEXT: vand.vx v9, v9, a0 248; RV64-NEXT: vand.vx v8, v8, a0 249; RV64-NEXT: vadd.vv v8, v8, v8 250; RV64-NEXT: vor.vv v8, v9, v8 251; RV64-NEXT: ret 252 %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va) 253 ret <vscale x 1 x i16> %a 254} 255declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>) 256 257define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) { 258; RV32-LABEL: bitreverse_nxv2i16: 259; RV32: # %bb.0: 260; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 261; RV32-NEXT: vsrl.vi v9, v8, 8 262; RV32-NEXT: vsll.vi v8, v8, 8 263; RV32-NEXT: vor.vv v8, v8, v9 264; RV32-NEXT: vsrl.vi v9, v8, 4 265; RV32-NEXT: lui a0, 1 266; RV32-NEXT: addi a0, a0, -241 267; RV32-NEXT: vand.vx v9, v9, a0 268; RV32-NEXT: vand.vx v8, v8, a0 269; RV32-NEXT: vsll.vi v8, v8, 4 270; RV32-NEXT: vor.vv v8, v9, v8 271; RV32-NEXT: vsrl.vi v9, v8, 2 272; RV32-NEXT: lui a0, 3 273; RV32-NEXT: addi a0, a0, 819 274; RV32-NEXT: vand.vx v9, v9, a0 275; RV32-NEXT: vand.vx v8, v8, a0 276; RV32-NEXT: vsll.vi v8, v8, 2 277; RV32-NEXT: vor.vv v8, v9, v8 278; RV32-NEXT: vsrl.vi v9, v8, 1 279; RV32-NEXT: lui a0, 5 280; RV32-NEXT: addi a0, a0, 1365 281; RV32-NEXT: vand.vx v9, v9, a0 282; RV32-NEXT: vand.vx v8, v8, a0 283; RV32-NEXT: vadd.vv v8, v8, v8 284; RV32-NEXT: vor.vv v8, v9, v8 285; RV32-NEXT: ret 286; 287; RV64-LABEL: bitreverse_nxv2i16: 288; RV64: # %bb.0: 289; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 290; RV64-NEXT: vsrl.vi v9, v8, 8 291; RV64-NEXT: vsll.vi v8, v8, 8 292; RV64-NEXT: vor.vv v8, v8, v9 293; RV64-NEXT: vsrl.vi v9, v8, 4 294; RV64-NEXT: lui a0, 1 295; RV64-NEXT: addiw a0, a0, -241 296; RV64-NEXT: vand.vx v9, v9, a0 297; RV64-NEXT: vand.vx v8, v8, a0 298; RV64-NEXT: vsll.vi v8, v8, 4 299; RV64-NEXT: vor.vv v8, v9, v8 300; RV64-NEXT: vsrl.vi v9, v8, 2 301; RV64-NEXT: lui a0, 3 302; RV64-NEXT: addiw a0, a0, 819 303; RV64-NEXT: vand.vx v9, v9, a0 304; RV64-NEXT: vand.vx v8, v8, a0 305; RV64-NEXT: vsll.vi v8, v8, 2 306; RV64-NEXT: vor.vv v8, v9, v8 307; RV64-NEXT: vsrl.vi v9, v8, 1 308; RV64-NEXT: lui a0, 5 309; RV64-NEXT: addiw a0, a0, 1365 310; RV64-NEXT: vand.vx v9, v9, a0 311; RV64-NEXT: vand.vx v8, v8, a0 312; RV64-NEXT: vadd.vv v8, v8, v8 313; RV64-NEXT: vor.vv v8, v9, v8 314; RV64-NEXT: ret 315 %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va) 316 ret <vscale x 2 x i16> %a 317} 318declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>) 319 320define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) { 321; RV32-LABEL: bitreverse_nxv4i16: 322; RV32: # %bb.0: 323; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 324; RV32-NEXT: vsrl.vi v9, v8, 8 325; RV32-NEXT: vsll.vi v8, v8, 8 326; RV32-NEXT: vor.vv v8, v8, v9 327; RV32-NEXT: vsrl.vi v9, v8, 4 328; RV32-NEXT: lui a0, 1 329; RV32-NEXT: addi a0, a0, -241 330; RV32-NEXT: vand.vx v9, v9, a0 331; RV32-NEXT: vand.vx v8, v8, a0 332; RV32-NEXT: vsll.vi v8, v8, 4 333; RV32-NEXT: vor.vv v8, v9, v8 334; RV32-NEXT: vsrl.vi v9, v8, 2 335; RV32-NEXT: lui a0, 3 336; RV32-NEXT: addi a0, a0, 819 337; RV32-NEXT: vand.vx v9, v9, a0 338; RV32-NEXT: vand.vx v8, v8, a0 339; RV32-NEXT: vsll.vi v8, v8, 2 340; RV32-NEXT: vor.vv v8, v9, v8 341; RV32-NEXT: vsrl.vi v9, v8, 1 342; RV32-NEXT: lui a0, 5 343; RV32-NEXT: addi a0, a0, 1365 344; RV32-NEXT: vand.vx v9, v9, a0 345; RV32-NEXT: vand.vx v8, v8, a0 346; RV32-NEXT: vadd.vv v8, v8, v8 347; RV32-NEXT: vor.vv v8, v9, v8 348; RV32-NEXT: ret 349; 350; RV64-LABEL: bitreverse_nxv4i16: 351; RV64: # %bb.0: 352; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 353; RV64-NEXT: vsrl.vi v9, v8, 8 354; RV64-NEXT: vsll.vi v8, v8, 8 355; RV64-NEXT: vor.vv v8, v8, v9 356; RV64-NEXT: vsrl.vi v9, v8, 4 357; RV64-NEXT: lui a0, 1 358; RV64-NEXT: addiw a0, a0, -241 359; RV64-NEXT: vand.vx v9, v9, a0 360; RV64-NEXT: vand.vx v8, v8, a0 361; RV64-NEXT: vsll.vi v8, v8, 4 362; RV64-NEXT: vor.vv v8, v9, v8 363; RV64-NEXT: vsrl.vi v9, v8, 2 364; RV64-NEXT: lui a0, 3 365; RV64-NEXT: addiw a0, a0, 819 366; RV64-NEXT: vand.vx v9, v9, a0 367; RV64-NEXT: vand.vx v8, v8, a0 368; RV64-NEXT: vsll.vi v8, v8, 2 369; RV64-NEXT: vor.vv v8, v9, v8 370; RV64-NEXT: vsrl.vi v9, v8, 1 371; RV64-NEXT: lui a0, 5 372; RV64-NEXT: addiw a0, a0, 1365 373; RV64-NEXT: vand.vx v9, v9, a0 374; RV64-NEXT: vand.vx v8, v8, a0 375; RV64-NEXT: vadd.vv v8, v8, v8 376; RV64-NEXT: vor.vv v8, v9, v8 377; RV64-NEXT: ret 378 %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va) 379 ret <vscale x 4 x i16> %a 380} 381declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>) 382 383define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) { 384; RV32-LABEL: bitreverse_nxv8i16: 385; RV32: # %bb.0: 386; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 387; RV32-NEXT: vsrl.vi v10, v8, 8 388; RV32-NEXT: vsll.vi v8, v8, 8 389; RV32-NEXT: vor.vv v8, v8, v10 390; RV32-NEXT: vsrl.vi v10, v8, 4 391; RV32-NEXT: lui a0, 1 392; RV32-NEXT: addi a0, a0, -241 393; RV32-NEXT: vand.vx v10, v10, a0 394; RV32-NEXT: vand.vx v8, v8, a0 395; RV32-NEXT: vsll.vi v8, v8, 4 396; RV32-NEXT: vor.vv v8, v10, v8 397; RV32-NEXT: vsrl.vi v10, v8, 2 398; RV32-NEXT: lui a0, 3 399; RV32-NEXT: addi a0, a0, 819 400; RV32-NEXT: vand.vx v10, v10, a0 401; RV32-NEXT: vand.vx v8, v8, a0 402; RV32-NEXT: vsll.vi v8, v8, 2 403; RV32-NEXT: vor.vv v8, v10, v8 404; RV32-NEXT: vsrl.vi v10, v8, 1 405; RV32-NEXT: lui a0, 5 406; RV32-NEXT: addi a0, a0, 1365 407; RV32-NEXT: vand.vx v10, v10, a0 408; RV32-NEXT: vand.vx v8, v8, a0 409; RV32-NEXT: vadd.vv v8, v8, v8 410; RV32-NEXT: vor.vv v8, v10, v8 411; RV32-NEXT: ret 412; 413; RV64-LABEL: bitreverse_nxv8i16: 414; RV64: # %bb.0: 415; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 416; RV64-NEXT: vsrl.vi v10, v8, 8 417; RV64-NEXT: vsll.vi v8, v8, 8 418; RV64-NEXT: vor.vv v8, v8, v10 419; RV64-NEXT: vsrl.vi v10, v8, 4 420; RV64-NEXT: lui a0, 1 421; RV64-NEXT: addiw a0, a0, -241 422; RV64-NEXT: vand.vx v10, v10, a0 423; RV64-NEXT: vand.vx v8, v8, a0 424; RV64-NEXT: vsll.vi v8, v8, 4 425; RV64-NEXT: vor.vv v8, v10, v8 426; RV64-NEXT: vsrl.vi v10, v8, 2 427; RV64-NEXT: lui a0, 3 428; RV64-NEXT: addiw a0, a0, 819 429; RV64-NEXT: vand.vx v10, v10, a0 430; RV64-NEXT: vand.vx v8, v8, a0 431; RV64-NEXT: vsll.vi v8, v8, 2 432; RV64-NEXT: vor.vv v8, v10, v8 433; RV64-NEXT: vsrl.vi v10, v8, 1 434; RV64-NEXT: lui a0, 5 435; RV64-NEXT: addiw a0, a0, 1365 436; RV64-NEXT: vand.vx v10, v10, a0 437; RV64-NEXT: vand.vx v8, v8, a0 438; RV64-NEXT: vadd.vv v8, v8, v8 439; RV64-NEXT: vor.vv v8, v10, v8 440; RV64-NEXT: ret 441 %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va) 442 ret <vscale x 8 x i16> %a 443} 444declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>) 445 446define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) { 447; RV32-LABEL: bitreverse_nxv16i16: 448; RV32: # %bb.0: 449; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, mu 450; RV32-NEXT: vsrl.vi v12, v8, 8 451; RV32-NEXT: vsll.vi v8, v8, 8 452; RV32-NEXT: vor.vv v8, v8, v12 453; RV32-NEXT: vsrl.vi v12, v8, 4 454; RV32-NEXT: lui a0, 1 455; RV32-NEXT: addi a0, a0, -241 456; RV32-NEXT: vand.vx v12, v12, a0 457; RV32-NEXT: vand.vx v8, v8, a0 458; RV32-NEXT: vsll.vi v8, v8, 4 459; RV32-NEXT: vor.vv v8, v12, v8 460; RV32-NEXT: vsrl.vi v12, v8, 2 461; RV32-NEXT: lui a0, 3 462; RV32-NEXT: addi a0, a0, 819 463; RV32-NEXT: vand.vx v12, v12, a0 464; RV32-NEXT: vand.vx v8, v8, a0 465; RV32-NEXT: vsll.vi v8, v8, 2 466; RV32-NEXT: vor.vv v8, v12, v8 467; RV32-NEXT: vsrl.vi v12, v8, 1 468; RV32-NEXT: lui a0, 5 469; RV32-NEXT: addi a0, a0, 1365 470; RV32-NEXT: vand.vx v12, v12, a0 471; RV32-NEXT: vand.vx v8, v8, a0 472; RV32-NEXT: vadd.vv v8, v8, v8 473; RV32-NEXT: vor.vv v8, v12, v8 474; RV32-NEXT: ret 475; 476; RV64-LABEL: bitreverse_nxv16i16: 477; RV64: # %bb.0: 478; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, mu 479; RV64-NEXT: vsrl.vi v12, v8, 8 480; RV64-NEXT: vsll.vi v8, v8, 8 481; RV64-NEXT: vor.vv v8, v8, v12 482; RV64-NEXT: vsrl.vi v12, v8, 4 483; RV64-NEXT: lui a0, 1 484; RV64-NEXT: addiw a0, a0, -241 485; RV64-NEXT: vand.vx v12, v12, a0 486; RV64-NEXT: vand.vx v8, v8, a0 487; RV64-NEXT: vsll.vi v8, v8, 4 488; RV64-NEXT: vor.vv v8, v12, v8 489; RV64-NEXT: vsrl.vi v12, v8, 2 490; RV64-NEXT: lui a0, 3 491; RV64-NEXT: addiw a0, a0, 819 492; RV64-NEXT: vand.vx v12, v12, a0 493; RV64-NEXT: vand.vx v8, v8, a0 494; RV64-NEXT: vsll.vi v8, v8, 2 495; RV64-NEXT: vor.vv v8, v12, v8 496; RV64-NEXT: vsrl.vi v12, v8, 1 497; RV64-NEXT: lui a0, 5 498; RV64-NEXT: addiw a0, a0, 1365 499; RV64-NEXT: vand.vx v12, v12, a0 500; RV64-NEXT: vand.vx v8, v8, a0 501; RV64-NEXT: vadd.vv v8, v8, v8 502; RV64-NEXT: vor.vv v8, v12, v8 503; RV64-NEXT: ret 504 %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va) 505 ret <vscale x 16 x i16> %a 506} 507declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>) 508 509define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) { 510; RV32-LABEL: bitreverse_nxv32i16: 511; RV32: # %bb.0: 512; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, mu 513; RV32-NEXT: vsrl.vi v16, v8, 8 514; RV32-NEXT: vsll.vi v8, v8, 8 515; RV32-NEXT: vor.vv v8, v8, v16 516; RV32-NEXT: vsrl.vi v16, v8, 4 517; RV32-NEXT: lui a0, 1 518; RV32-NEXT: addi a0, a0, -241 519; RV32-NEXT: vand.vx v16, v16, a0 520; RV32-NEXT: vand.vx v8, v8, a0 521; RV32-NEXT: vsll.vi v8, v8, 4 522; RV32-NEXT: vor.vv v8, v16, v8 523; RV32-NEXT: vsrl.vi v16, v8, 2 524; RV32-NEXT: lui a0, 3 525; RV32-NEXT: addi a0, a0, 819 526; RV32-NEXT: vand.vx v16, v16, a0 527; RV32-NEXT: vand.vx v8, v8, a0 528; RV32-NEXT: vsll.vi v8, v8, 2 529; RV32-NEXT: vor.vv v8, v16, v8 530; RV32-NEXT: vsrl.vi v16, v8, 1 531; RV32-NEXT: lui a0, 5 532; RV32-NEXT: addi a0, a0, 1365 533; RV32-NEXT: vand.vx v16, v16, a0 534; RV32-NEXT: vand.vx v8, v8, a0 535; RV32-NEXT: vadd.vv v8, v8, v8 536; RV32-NEXT: vor.vv v8, v16, v8 537; RV32-NEXT: ret 538; 539; RV64-LABEL: bitreverse_nxv32i16: 540; RV64: # %bb.0: 541; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, mu 542; RV64-NEXT: vsrl.vi v16, v8, 8 543; RV64-NEXT: vsll.vi v8, v8, 8 544; RV64-NEXT: vor.vv v8, v8, v16 545; RV64-NEXT: vsrl.vi v16, v8, 4 546; RV64-NEXT: lui a0, 1 547; RV64-NEXT: addiw a0, a0, -241 548; RV64-NEXT: vand.vx v16, v16, a0 549; RV64-NEXT: vand.vx v8, v8, a0 550; RV64-NEXT: vsll.vi v8, v8, 4 551; RV64-NEXT: vor.vv v8, v16, v8 552; RV64-NEXT: vsrl.vi v16, v8, 2 553; RV64-NEXT: lui a0, 3 554; RV64-NEXT: addiw a0, a0, 819 555; RV64-NEXT: vand.vx v16, v16, a0 556; RV64-NEXT: vand.vx v8, v8, a0 557; RV64-NEXT: vsll.vi v8, v8, 2 558; RV64-NEXT: vor.vv v8, v16, v8 559; RV64-NEXT: vsrl.vi v16, v8, 1 560; RV64-NEXT: lui a0, 5 561; RV64-NEXT: addiw a0, a0, 1365 562; RV64-NEXT: vand.vx v16, v16, a0 563; RV64-NEXT: vand.vx v8, v8, a0 564; RV64-NEXT: vadd.vv v8, v8, v8 565; RV64-NEXT: vor.vv v8, v16, v8 566; RV64-NEXT: ret 567 %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va) 568 ret <vscale x 32 x i16> %a 569} 570declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>) 571 572define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) { 573; RV32-LABEL: bitreverse_nxv1i32: 574; RV32: # %bb.0: 575; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 576; RV32-NEXT: vsrl.vi v9, v8, 8 577; RV32-NEXT: lui a0, 16 578; RV32-NEXT: addi a0, a0, -256 579; RV32-NEXT: vand.vx v9, v9, a0 580; RV32-NEXT: vsrl.vi v10, v8, 24 581; RV32-NEXT: vor.vv v9, v9, v10 582; RV32-NEXT: vsll.vi v10, v8, 8 583; RV32-NEXT: lui a0, 4080 584; RV32-NEXT: vand.vx v10, v10, a0 585; RV32-NEXT: vsll.vi v8, v8, 24 586; RV32-NEXT: vor.vv v8, v8, v10 587; RV32-NEXT: vor.vv v8, v8, v9 588; RV32-NEXT: vsrl.vi v9, v8, 4 589; RV32-NEXT: lui a0, 61681 590; RV32-NEXT: addi a0, a0, -241 591; RV32-NEXT: vand.vx v9, v9, a0 592; RV32-NEXT: vand.vx v8, v8, a0 593; RV32-NEXT: vsll.vi v8, v8, 4 594; RV32-NEXT: vor.vv v8, v9, v8 595; RV32-NEXT: vsrl.vi v9, v8, 2 596; RV32-NEXT: lui a0, 209715 597; RV32-NEXT: addi a0, a0, 819 598; RV32-NEXT: vand.vx v9, v9, a0 599; RV32-NEXT: vand.vx v8, v8, a0 600; RV32-NEXT: vsll.vi v8, v8, 2 601; RV32-NEXT: vor.vv v8, v9, v8 602; RV32-NEXT: vsrl.vi v9, v8, 1 603; RV32-NEXT: lui a0, 349525 604; RV32-NEXT: addi a0, a0, 1365 605; RV32-NEXT: vand.vx v9, v9, a0 606; RV32-NEXT: vand.vx v8, v8, a0 607; RV32-NEXT: vadd.vv v8, v8, v8 608; RV32-NEXT: vor.vv v8, v9, v8 609; RV32-NEXT: ret 610; 611; RV64-LABEL: bitreverse_nxv1i32: 612; RV64: # %bb.0: 613; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 614; RV64-NEXT: vsrl.vi v9, v8, 8 615; RV64-NEXT: lui a0, 16 616; RV64-NEXT: addiw a0, a0, -256 617; RV64-NEXT: vand.vx v9, v9, a0 618; RV64-NEXT: vsrl.vi v10, v8, 24 619; RV64-NEXT: vor.vv v9, v9, v10 620; RV64-NEXT: vsll.vi v10, v8, 8 621; RV64-NEXT: lui a0, 4080 622; RV64-NEXT: vand.vx v10, v10, a0 623; RV64-NEXT: vsll.vi v8, v8, 24 624; RV64-NEXT: vor.vv v8, v8, v10 625; RV64-NEXT: vor.vv v8, v8, v9 626; RV64-NEXT: vsrl.vi v9, v8, 4 627; RV64-NEXT: lui a0, 61681 628; RV64-NEXT: addiw a0, a0, -241 629; RV64-NEXT: vand.vx v9, v9, a0 630; RV64-NEXT: vand.vx v8, v8, a0 631; RV64-NEXT: vsll.vi v8, v8, 4 632; RV64-NEXT: vor.vv v8, v9, v8 633; RV64-NEXT: vsrl.vi v9, v8, 2 634; RV64-NEXT: lui a0, 209715 635; RV64-NEXT: addiw a0, a0, 819 636; RV64-NEXT: vand.vx v9, v9, a0 637; RV64-NEXT: vand.vx v8, v8, a0 638; RV64-NEXT: vsll.vi v8, v8, 2 639; RV64-NEXT: vor.vv v8, v9, v8 640; RV64-NEXT: vsrl.vi v9, v8, 1 641; RV64-NEXT: lui a0, 349525 642; RV64-NEXT: addiw a0, a0, 1365 643; RV64-NEXT: vand.vx v9, v9, a0 644; RV64-NEXT: vand.vx v8, v8, a0 645; RV64-NEXT: vadd.vv v8, v8, v8 646; RV64-NEXT: vor.vv v8, v9, v8 647; RV64-NEXT: ret 648 %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va) 649 ret <vscale x 1 x i32> %a 650} 651declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>) 652 653define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) { 654; RV32-LABEL: bitreverse_nxv2i32: 655; RV32: # %bb.0: 656; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 657; RV32-NEXT: vsrl.vi v9, v8, 8 658; RV32-NEXT: lui a0, 16 659; RV32-NEXT: addi a0, a0, -256 660; RV32-NEXT: vand.vx v9, v9, a0 661; RV32-NEXT: vsrl.vi v10, v8, 24 662; RV32-NEXT: vor.vv v9, v9, v10 663; RV32-NEXT: vsll.vi v10, v8, 8 664; RV32-NEXT: lui a0, 4080 665; RV32-NEXT: vand.vx v10, v10, a0 666; RV32-NEXT: vsll.vi v8, v8, 24 667; RV32-NEXT: vor.vv v8, v8, v10 668; RV32-NEXT: vor.vv v8, v8, v9 669; RV32-NEXT: vsrl.vi v9, v8, 4 670; RV32-NEXT: lui a0, 61681 671; RV32-NEXT: addi a0, a0, -241 672; RV32-NEXT: vand.vx v9, v9, a0 673; RV32-NEXT: vand.vx v8, v8, a0 674; RV32-NEXT: vsll.vi v8, v8, 4 675; RV32-NEXT: vor.vv v8, v9, v8 676; RV32-NEXT: vsrl.vi v9, v8, 2 677; RV32-NEXT: lui a0, 209715 678; RV32-NEXT: addi a0, a0, 819 679; RV32-NEXT: vand.vx v9, v9, a0 680; RV32-NEXT: vand.vx v8, v8, a0 681; RV32-NEXT: vsll.vi v8, v8, 2 682; RV32-NEXT: vor.vv v8, v9, v8 683; RV32-NEXT: vsrl.vi v9, v8, 1 684; RV32-NEXT: lui a0, 349525 685; RV32-NEXT: addi a0, a0, 1365 686; RV32-NEXT: vand.vx v9, v9, a0 687; RV32-NEXT: vand.vx v8, v8, a0 688; RV32-NEXT: vadd.vv v8, v8, v8 689; RV32-NEXT: vor.vv v8, v9, v8 690; RV32-NEXT: ret 691; 692; RV64-LABEL: bitreverse_nxv2i32: 693; RV64: # %bb.0: 694; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 695; RV64-NEXT: vsrl.vi v9, v8, 8 696; RV64-NEXT: lui a0, 16 697; RV64-NEXT: addiw a0, a0, -256 698; RV64-NEXT: vand.vx v9, v9, a0 699; RV64-NEXT: vsrl.vi v10, v8, 24 700; RV64-NEXT: vor.vv v9, v9, v10 701; RV64-NEXT: vsll.vi v10, v8, 8 702; RV64-NEXT: lui a0, 4080 703; RV64-NEXT: vand.vx v10, v10, a0 704; RV64-NEXT: vsll.vi v8, v8, 24 705; RV64-NEXT: vor.vv v8, v8, v10 706; RV64-NEXT: vor.vv v8, v8, v9 707; RV64-NEXT: vsrl.vi v9, v8, 4 708; RV64-NEXT: lui a0, 61681 709; RV64-NEXT: addiw a0, a0, -241 710; RV64-NEXT: vand.vx v9, v9, a0 711; RV64-NEXT: vand.vx v8, v8, a0 712; RV64-NEXT: vsll.vi v8, v8, 4 713; RV64-NEXT: vor.vv v8, v9, v8 714; RV64-NEXT: vsrl.vi v9, v8, 2 715; RV64-NEXT: lui a0, 209715 716; RV64-NEXT: addiw a0, a0, 819 717; RV64-NEXT: vand.vx v9, v9, a0 718; RV64-NEXT: vand.vx v8, v8, a0 719; RV64-NEXT: vsll.vi v8, v8, 2 720; RV64-NEXT: vor.vv v8, v9, v8 721; RV64-NEXT: vsrl.vi v9, v8, 1 722; RV64-NEXT: lui a0, 349525 723; RV64-NEXT: addiw a0, a0, 1365 724; RV64-NEXT: vand.vx v9, v9, a0 725; RV64-NEXT: vand.vx v8, v8, a0 726; RV64-NEXT: vadd.vv v8, v8, v8 727; RV64-NEXT: vor.vv v8, v9, v8 728; RV64-NEXT: ret 729 %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va) 730 ret <vscale x 2 x i32> %a 731} 732declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>) 733 734define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) { 735; RV32-LABEL: bitreverse_nxv4i32: 736; RV32: # %bb.0: 737; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 738; RV32-NEXT: vsrl.vi v10, v8, 8 739; RV32-NEXT: lui a0, 16 740; RV32-NEXT: addi a0, a0, -256 741; RV32-NEXT: vand.vx v10, v10, a0 742; RV32-NEXT: vsrl.vi v12, v8, 24 743; RV32-NEXT: vor.vv v10, v10, v12 744; RV32-NEXT: vsll.vi v12, v8, 8 745; RV32-NEXT: lui a0, 4080 746; RV32-NEXT: vand.vx v12, v12, a0 747; RV32-NEXT: vsll.vi v8, v8, 24 748; RV32-NEXT: vor.vv v8, v8, v12 749; RV32-NEXT: vor.vv v8, v8, v10 750; RV32-NEXT: vsrl.vi v10, v8, 4 751; RV32-NEXT: lui a0, 61681 752; RV32-NEXT: addi a0, a0, -241 753; RV32-NEXT: vand.vx v10, v10, a0 754; RV32-NEXT: vand.vx v8, v8, a0 755; RV32-NEXT: vsll.vi v8, v8, 4 756; RV32-NEXT: vor.vv v8, v10, v8 757; RV32-NEXT: vsrl.vi v10, v8, 2 758; RV32-NEXT: lui a0, 209715 759; RV32-NEXT: addi a0, a0, 819 760; RV32-NEXT: vand.vx v10, v10, a0 761; RV32-NEXT: vand.vx v8, v8, a0 762; RV32-NEXT: vsll.vi v8, v8, 2 763; RV32-NEXT: vor.vv v8, v10, v8 764; RV32-NEXT: vsrl.vi v10, v8, 1 765; RV32-NEXT: lui a0, 349525 766; RV32-NEXT: addi a0, a0, 1365 767; RV32-NEXT: vand.vx v10, v10, a0 768; RV32-NEXT: vand.vx v8, v8, a0 769; RV32-NEXT: vadd.vv v8, v8, v8 770; RV32-NEXT: vor.vv v8, v10, v8 771; RV32-NEXT: ret 772; 773; RV64-LABEL: bitreverse_nxv4i32: 774; RV64: # %bb.0: 775; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 776; RV64-NEXT: vsrl.vi v10, v8, 8 777; RV64-NEXT: lui a0, 16 778; RV64-NEXT: addiw a0, a0, -256 779; RV64-NEXT: vand.vx v10, v10, a0 780; RV64-NEXT: vsrl.vi v12, v8, 24 781; RV64-NEXT: vor.vv v10, v10, v12 782; RV64-NEXT: vsll.vi v12, v8, 8 783; RV64-NEXT: lui a0, 4080 784; RV64-NEXT: vand.vx v12, v12, a0 785; RV64-NEXT: vsll.vi v8, v8, 24 786; RV64-NEXT: vor.vv v8, v8, v12 787; RV64-NEXT: vor.vv v8, v8, v10 788; RV64-NEXT: vsrl.vi v10, v8, 4 789; RV64-NEXT: lui a0, 61681 790; RV64-NEXT: addiw a0, a0, -241 791; RV64-NEXT: vand.vx v10, v10, a0 792; RV64-NEXT: vand.vx v8, v8, a0 793; RV64-NEXT: vsll.vi v8, v8, 4 794; RV64-NEXT: vor.vv v8, v10, v8 795; RV64-NEXT: vsrl.vi v10, v8, 2 796; RV64-NEXT: lui a0, 209715 797; RV64-NEXT: addiw a0, a0, 819 798; RV64-NEXT: vand.vx v10, v10, a0 799; RV64-NEXT: vand.vx v8, v8, a0 800; RV64-NEXT: vsll.vi v8, v8, 2 801; RV64-NEXT: vor.vv v8, v10, v8 802; RV64-NEXT: vsrl.vi v10, v8, 1 803; RV64-NEXT: lui a0, 349525 804; RV64-NEXT: addiw a0, a0, 1365 805; RV64-NEXT: vand.vx v10, v10, a0 806; RV64-NEXT: vand.vx v8, v8, a0 807; RV64-NEXT: vadd.vv v8, v8, v8 808; RV64-NEXT: vor.vv v8, v10, v8 809; RV64-NEXT: ret 810 %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va) 811 ret <vscale x 4 x i32> %a 812} 813declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>) 814 815define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) { 816; RV32-LABEL: bitreverse_nxv8i32: 817; RV32: # %bb.0: 818; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 819; RV32-NEXT: vsrl.vi v12, v8, 8 820; RV32-NEXT: lui a0, 16 821; RV32-NEXT: addi a0, a0, -256 822; RV32-NEXT: vand.vx v12, v12, a0 823; RV32-NEXT: vsrl.vi v16, v8, 24 824; RV32-NEXT: vor.vv v12, v12, v16 825; RV32-NEXT: vsll.vi v16, v8, 8 826; RV32-NEXT: lui a0, 4080 827; RV32-NEXT: vand.vx v16, v16, a0 828; RV32-NEXT: vsll.vi v8, v8, 24 829; RV32-NEXT: vor.vv v8, v8, v16 830; RV32-NEXT: vor.vv v8, v8, v12 831; RV32-NEXT: vsrl.vi v12, v8, 4 832; RV32-NEXT: lui a0, 61681 833; RV32-NEXT: addi a0, a0, -241 834; RV32-NEXT: vand.vx v12, v12, a0 835; RV32-NEXT: vand.vx v8, v8, a0 836; RV32-NEXT: vsll.vi v8, v8, 4 837; RV32-NEXT: vor.vv v8, v12, v8 838; RV32-NEXT: vsrl.vi v12, v8, 2 839; RV32-NEXT: lui a0, 209715 840; RV32-NEXT: addi a0, a0, 819 841; RV32-NEXT: vand.vx v12, v12, a0 842; RV32-NEXT: vand.vx v8, v8, a0 843; RV32-NEXT: vsll.vi v8, v8, 2 844; RV32-NEXT: vor.vv v8, v12, v8 845; RV32-NEXT: vsrl.vi v12, v8, 1 846; RV32-NEXT: lui a0, 349525 847; RV32-NEXT: addi a0, a0, 1365 848; RV32-NEXT: vand.vx v12, v12, a0 849; RV32-NEXT: vand.vx v8, v8, a0 850; RV32-NEXT: vadd.vv v8, v8, v8 851; RV32-NEXT: vor.vv v8, v12, v8 852; RV32-NEXT: ret 853; 854; RV64-LABEL: bitreverse_nxv8i32: 855; RV64: # %bb.0: 856; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 857; RV64-NEXT: vsrl.vi v12, v8, 8 858; RV64-NEXT: lui a0, 16 859; RV64-NEXT: addiw a0, a0, -256 860; RV64-NEXT: vand.vx v12, v12, a0 861; RV64-NEXT: vsrl.vi v16, v8, 24 862; RV64-NEXT: vor.vv v12, v12, v16 863; RV64-NEXT: vsll.vi v16, v8, 8 864; RV64-NEXT: lui a0, 4080 865; RV64-NEXT: vand.vx v16, v16, a0 866; RV64-NEXT: vsll.vi v8, v8, 24 867; RV64-NEXT: vor.vv v8, v8, v16 868; RV64-NEXT: vor.vv v8, v8, v12 869; RV64-NEXT: vsrl.vi v12, v8, 4 870; RV64-NEXT: lui a0, 61681 871; RV64-NEXT: addiw a0, a0, -241 872; RV64-NEXT: vand.vx v12, v12, a0 873; RV64-NEXT: vand.vx v8, v8, a0 874; RV64-NEXT: vsll.vi v8, v8, 4 875; RV64-NEXT: vor.vv v8, v12, v8 876; RV64-NEXT: vsrl.vi v12, v8, 2 877; RV64-NEXT: lui a0, 209715 878; RV64-NEXT: addiw a0, a0, 819 879; RV64-NEXT: vand.vx v12, v12, a0 880; RV64-NEXT: vand.vx v8, v8, a0 881; RV64-NEXT: vsll.vi v8, v8, 2 882; RV64-NEXT: vor.vv v8, v12, v8 883; RV64-NEXT: vsrl.vi v12, v8, 1 884; RV64-NEXT: lui a0, 349525 885; RV64-NEXT: addiw a0, a0, 1365 886; RV64-NEXT: vand.vx v12, v12, a0 887; RV64-NEXT: vand.vx v8, v8, a0 888; RV64-NEXT: vadd.vv v8, v8, v8 889; RV64-NEXT: vor.vv v8, v12, v8 890; RV64-NEXT: ret 891 %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va) 892 ret <vscale x 8 x i32> %a 893} 894declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>) 895 896define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) { 897; RV32-LABEL: bitreverse_nxv16i32: 898; RV32: # %bb.0: 899; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 900; RV32-NEXT: vsrl.vi v16, v8, 8 901; RV32-NEXT: lui a0, 16 902; RV32-NEXT: addi a0, a0, -256 903; RV32-NEXT: vand.vx v16, v16, a0 904; RV32-NEXT: vsrl.vi v24, v8, 24 905; RV32-NEXT: vor.vv v16, v16, v24 906; RV32-NEXT: vsll.vi v24, v8, 8 907; RV32-NEXT: lui a0, 4080 908; RV32-NEXT: vand.vx v24, v24, a0 909; RV32-NEXT: vsll.vi v8, v8, 24 910; RV32-NEXT: vor.vv v8, v8, v24 911; RV32-NEXT: vor.vv v8, v8, v16 912; RV32-NEXT: vsrl.vi v16, v8, 4 913; RV32-NEXT: lui a0, 61681 914; RV32-NEXT: addi a0, a0, -241 915; RV32-NEXT: vand.vx v16, v16, a0 916; RV32-NEXT: vand.vx v8, v8, a0 917; RV32-NEXT: vsll.vi v8, v8, 4 918; RV32-NEXT: vor.vv v8, v16, v8 919; RV32-NEXT: vsrl.vi v16, v8, 2 920; RV32-NEXT: lui a0, 209715 921; RV32-NEXT: addi a0, a0, 819 922; RV32-NEXT: vand.vx v16, v16, a0 923; RV32-NEXT: vand.vx v8, v8, a0 924; RV32-NEXT: vsll.vi v8, v8, 2 925; RV32-NEXT: vor.vv v8, v16, v8 926; RV32-NEXT: vsrl.vi v16, v8, 1 927; RV32-NEXT: lui a0, 349525 928; RV32-NEXT: addi a0, a0, 1365 929; RV32-NEXT: vand.vx v16, v16, a0 930; RV32-NEXT: vand.vx v8, v8, a0 931; RV32-NEXT: vadd.vv v8, v8, v8 932; RV32-NEXT: vor.vv v8, v16, v8 933; RV32-NEXT: ret 934; 935; RV64-LABEL: bitreverse_nxv16i32: 936; RV64: # %bb.0: 937; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 938; RV64-NEXT: vsrl.vi v16, v8, 8 939; RV64-NEXT: lui a0, 16 940; RV64-NEXT: addiw a0, a0, -256 941; RV64-NEXT: vand.vx v16, v16, a0 942; RV64-NEXT: vsrl.vi v24, v8, 24 943; RV64-NEXT: vor.vv v16, v16, v24 944; RV64-NEXT: vsll.vi v24, v8, 8 945; RV64-NEXT: lui a0, 4080 946; RV64-NEXT: vand.vx v24, v24, a0 947; RV64-NEXT: vsll.vi v8, v8, 24 948; RV64-NEXT: vor.vv v8, v8, v24 949; RV64-NEXT: vor.vv v8, v8, v16 950; RV64-NEXT: vsrl.vi v16, v8, 4 951; RV64-NEXT: lui a0, 61681 952; RV64-NEXT: addiw a0, a0, -241 953; RV64-NEXT: vand.vx v16, v16, a0 954; RV64-NEXT: vand.vx v8, v8, a0 955; RV64-NEXT: vsll.vi v8, v8, 4 956; RV64-NEXT: vor.vv v8, v16, v8 957; RV64-NEXT: vsrl.vi v16, v8, 2 958; RV64-NEXT: lui a0, 209715 959; RV64-NEXT: addiw a0, a0, 819 960; RV64-NEXT: vand.vx v16, v16, a0 961; RV64-NEXT: vand.vx v8, v8, a0 962; RV64-NEXT: vsll.vi v8, v8, 2 963; RV64-NEXT: vor.vv v8, v16, v8 964; RV64-NEXT: vsrl.vi v16, v8, 1 965; RV64-NEXT: lui a0, 349525 966; RV64-NEXT: addiw a0, a0, 1365 967; RV64-NEXT: vand.vx v16, v16, a0 968; RV64-NEXT: vand.vx v8, v8, a0 969; RV64-NEXT: vadd.vv v8, v8, v8 970; RV64-NEXT: vor.vv v8, v16, v8 971; RV64-NEXT: ret 972 %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va) 973 ret <vscale x 16 x i32> %a 974} 975declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>) 976 977define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) { 978; RV32-LABEL: bitreverse_nxv1i64: 979; RV32: # %bb.0: 980; RV32-NEXT: addi sp, sp, -16 981; RV32-NEXT: .cfi_def_cfa_offset 16 982; RV32-NEXT: sw zero, 12(sp) 983; RV32-NEXT: lui a0, 1044480 984; RV32-NEXT: sw a0, 8(sp) 985; RV32-NEXT: lui a0, 4080 986; RV32-NEXT: sw a0, 12(sp) 987; RV32-NEXT: sw zero, 8(sp) 988; RV32-NEXT: li a1, 255 989; RV32-NEXT: sw a1, 12(sp) 990; RV32-NEXT: lui a1, 16 991; RV32-NEXT: addi a1, a1, -256 992; RV32-NEXT: sw a1, 12(sp) 993; RV32-NEXT: lui a2, 61681 994; RV32-NEXT: addi a2, a2, -241 995; RV32-NEXT: sw a2, 12(sp) 996; RV32-NEXT: sw a2, 8(sp) 997; RV32-NEXT: lui a2, 209715 998; RV32-NEXT: addi a2, a2, 819 999; RV32-NEXT: sw a2, 12(sp) 1000; RV32-NEXT: sw a2, 8(sp) 1001; RV32-NEXT: lui a2, 349525 1002; RV32-NEXT: addi a2, a2, 1365 1003; RV32-NEXT: sw a2, 12(sp) 1004; RV32-NEXT: sw a2, 8(sp) 1005; RV32-NEXT: li a2, 56 1006; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, mu 1007; RV32-NEXT: vsrl.vx v9, v8, a2 1008; RV32-NEXT: li a3, 40 1009; RV32-NEXT: vsrl.vx v10, v8, a3 1010; RV32-NEXT: vand.vx v10, v10, a1 1011; RV32-NEXT: vor.vv v9, v10, v9 1012; RV32-NEXT: addi a1, sp, 8 1013; RV32-NEXT: vlse64.v v10, (a1), zero 1014; RV32-NEXT: vsrl.vi v11, v8, 24 1015; RV32-NEXT: vand.vx v11, v11, a0 1016; RV32-NEXT: vsrl.vi v12, v8, 8 1017; RV32-NEXT: vand.vv v10, v12, v10 1018; RV32-NEXT: vor.vv v10, v10, v11 1019; RV32-NEXT: vlse64.v v11, (a1), zero 1020; RV32-NEXT: vor.vv v9, v10, v9 1021; RV32-NEXT: vsll.vx v10, v8, a2 1022; RV32-NEXT: vsll.vx v12, v8, a3 1023; RV32-NEXT: vand.vv v11, v12, v11 1024; RV32-NEXT: vlse64.v v12, (a1), zero 1025; RV32-NEXT: vor.vv v10, v10, v11 1026; RV32-NEXT: vlse64.v v11, (a1), zero 1027; RV32-NEXT: vsll.vi v13, v8, 8 1028; RV32-NEXT: vand.vv v12, v13, v12 1029; RV32-NEXT: vsll.vi v8, v8, 24 1030; RV32-NEXT: vand.vv v8, v8, v11 1031; RV32-NEXT: vor.vv v8, v8, v12 1032; RV32-NEXT: vlse64.v v11, (a1), zero 1033; RV32-NEXT: vor.vv v8, v10, v8 1034; RV32-NEXT: vor.vv v8, v8, v9 1035; RV32-NEXT: vsrl.vi v9, v8, 4 1036; RV32-NEXT: vand.vv v9, v9, v11 1037; RV32-NEXT: vand.vv v8, v8, v11 1038; RV32-NEXT: vlse64.v v10, (a1), zero 1039; RV32-NEXT: vsll.vi v8, v8, 4 1040; RV32-NEXT: vor.vv v8, v9, v8 1041; RV32-NEXT: vsrl.vi v9, v8, 2 1042; RV32-NEXT: vand.vv v9, v9, v10 1043; RV32-NEXT: vand.vv v8, v8, v10 1044; RV32-NEXT: vlse64.v v10, (a1), zero 1045; RV32-NEXT: vsll.vi v8, v8, 2 1046; RV32-NEXT: vor.vv v8, v9, v8 1047; RV32-NEXT: vsrl.vi v9, v8, 1 1048; RV32-NEXT: vand.vv v9, v9, v10 1049; RV32-NEXT: vand.vv v8, v8, v10 1050; RV32-NEXT: vadd.vv v8, v8, v8 1051; RV32-NEXT: vor.vv v8, v9, v8 1052; RV32-NEXT: addi sp, sp, 16 1053; RV32-NEXT: ret 1054; 1055; RV64-LABEL: bitreverse_nxv1i64: 1056; RV64: # %bb.0: 1057; RV64-NEXT: li a0, 56 1058; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu 1059; RV64-NEXT: vsrl.vx v9, v8, a0 1060; RV64-NEXT: li a1, 40 1061; RV64-NEXT: vsrl.vx v10, v8, a1 1062; RV64-NEXT: lui a2, 16 1063; RV64-NEXT: addiw a2, a2, -256 1064; RV64-NEXT: vand.vx v10, v10, a2 1065; RV64-NEXT: vor.vv v9, v10, v9 1066; RV64-NEXT: vsrl.vi v10, v8, 24 1067; RV64-NEXT: lui a2, 4080 1068; RV64-NEXT: vand.vx v10, v10, a2 1069; RV64-NEXT: vsrl.vi v11, v8, 8 1070; RV64-NEXT: li a2, 255 1071; RV64-NEXT: slli a3, a2, 24 1072; RV64-NEXT: vand.vx v11, v11, a3 1073; RV64-NEXT: vor.vv v10, v11, v10 1074; RV64-NEXT: vor.vv v9, v10, v9 1075; RV64-NEXT: vsll.vi v10, v8, 8 1076; RV64-NEXT: slli a3, a2, 32 1077; RV64-NEXT: vand.vx v10, v10, a3 1078; RV64-NEXT: vsll.vi v11, v8, 24 1079; RV64-NEXT: slli a3, a2, 40 1080; RV64-NEXT: vand.vx v11, v11, a3 1081; RV64-NEXT: vor.vv v10, v11, v10 1082; RV64-NEXT: vsll.vx v11, v8, a0 1083; RV64-NEXT: vsll.vx v8, v8, a1 1084; RV64-NEXT: slli a0, a2, 48 1085; RV64-NEXT: vand.vx v8, v8, a0 1086; RV64-NEXT: vor.vv v8, v11, v8 1087; RV64-NEXT: lui a0, %hi(.LCPI18_0) 1088; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) 1089; RV64-NEXT: vor.vv v8, v8, v10 1090; RV64-NEXT: vor.vv v8, v8, v9 1091; RV64-NEXT: vsrl.vi v9, v8, 4 1092; RV64-NEXT: vand.vx v9, v9, a0 1093; RV64-NEXT: vand.vx v8, v8, a0 1094; RV64-NEXT: lui a0, %hi(.LCPI18_1) 1095; RV64-NEXT: ld a0, %lo(.LCPI18_1)(a0) 1096; RV64-NEXT: vsll.vi v8, v8, 4 1097; RV64-NEXT: vor.vv v8, v9, v8 1098; RV64-NEXT: vsrl.vi v9, v8, 2 1099; RV64-NEXT: vand.vx v9, v9, a0 1100; RV64-NEXT: vand.vx v8, v8, a0 1101; RV64-NEXT: lui a0, %hi(.LCPI18_2) 1102; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) 1103; RV64-NEXT: vsll.vi v8, v8, 2 1104; RV64-NEXT: vor.vv v8, v9, v8 1105; RV64-NEXT: vsrl.vi v9, v8, 1 1106; RV64-NEXT: vand.vx v9, v9, a0 1107; RV64-NEXT: vand.vx v8, v8, a0 1108; RV64-NEXT: vadd.vv v8, v8, v8 1109; RV64-NEXT: vor.vv v8, v9, v8 1110; RV64-NEXT: ret 1111 %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va) 1112 ret <vscale x 1 x i64> %a 1113} 1114declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>) 1115 1116define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) { 1117; RV32-LABEL: bitreverse_nxv2i64: 1118; RV32: # %bb.0: 1119; RV32-NEXT: addi sp, sp, -16 1120; RV32-NEXT: .cfi_def_cfa_offset 16 1121; RV32-NEXT: sw zero, 12(sp) 1122; RV32-NEXT: lui a0, 1044480 1123; RV32-NEXT: sw a0, 8(sp) 1124; RV32-NEXT: lui a0, 4080 1125; RV32-NEXT: sw a0, 12(sp) 1126; RV32-NEXT: sw zero, 8(sp) 1127; RV32-NEXT: li a1, 255 1128; RV32-NEXT: sw a1, 12(sp) 1129; RV32-NEXT: lui a1, 16 1130; RV32-NEXT: addi a1, a1, -256 1131; RV32-NEXT: sw a1, 12(sp) 1132; RV32-NEXT: lui a2, 61681 1133; RV32-NEXT: addi a2, a2, -241 1134; RV32-NEXT: sw a2, 12(sp) 1135; RV32-NEXT: sw a2, 8(sp) 1136; RV32-NEXT: lui a2, 209715 1137; RV32-NEXT: addi a2, a2, 819 1138; RV32-NEXT: sw a2, 12(sp) 1139; RV32-NEXT: sw a2, 8(sp) 1140; RV32-NEXT: lui a2, 349525 1141; RV32-NEXT: addi a2, a2, 1365 1142; RV32-NEXT: sw a2, 12(sp) 1143; RV32-NEXT: sw a2, 8(sp) 1144; RV32-NEXT: li a2, 56 1145; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, mu 1146; RV32-NEXT: vsrl.vx v10, v8, a2 1147; RV32-NEXT: li a3, 40 1148; RV32-NEXT: vsrl.vx v12, v8, a3 1149; RV32-NEXT: vand.vx v12, v12, a1 1150; RV32-NEXT: vor.vv v10, v12, v10 1151; RV32-NEXT: addi a1, sp, 8 1152; RV32-NEXT: vlse64.v v12, (a1), zero 1153; RV32-NEXT: vsrl.vi v14, v8, 24 1154; RV32-NEXT: vand.vx v14, v14, a0 1155; RV32-NEXT: vsrl.vi v16, v8, 8 1156; RV32-NEXT: vand.vv v12, v16, v12 1157; RV32-NEXT: vor.vv v12, v12, v14 1158; RV32-NEXT: vlse64.v v14, (a1), zero 1159; RV32-NEXT: vor.vv v10, v12, v10 1160; RV32-NEXT: vsll.vx v12, v8, a2 1161; RV32-NEXT: vsll.vx v16, v8, a3 1162; RV32-NEXT: vand.vv v14, v16, v14 1163; RV32-NEXT: vlse64.v v16, (a1), zero 1164; RV32-NEXT: vor.vv v12, v12, v14 1165; RV32-NEXT: vlse64.v v14, (a1), zero 1166; RV32-NEXT: vsll.vi v18, v8, 8 1167; RV32-NEXT: vand.vv v16, v18, v16 1168; RV32-NEXT: vsll.vi v8, v8, 24 1169; RV32-NEXT: vand.vv v8, v8, v14 1170; RV32-NEXT: vor.vv v8, v8, v16 1171; RV32-NEXT: vlse64.v v14, (a1), zero 1172; RV32-NEXT: vor.vv v8, v12, v8 1173; RV32-NEXT: vor.vv v8, v8, v10 1174; RV32-NEXT: vsrl.vi v10, v8, 4 1175; RV32-NEXT: vand.vv v10, v10, v14 1176; RV32-NEXT: vand.vv v8, v8, v14 1177; RV32-NEXT: vlse64.v v12, (a1), zero 1178; RV32-NEXT: vsll.vi v8, v8, 4 1179; RV32-NEXT: vor.vv v8, v10, v8 1180; RV32-NEXT: vsrl.vi v10, v8, 2 1181; RV32-NEXT: vand.vv v10, v10, v12 1182; RV32-NEXT: vand.vv v8, v8, v12 1183; RV32-NEXT: vlse64.v v12, (a1), zero 1184; RV32-NEXT: vsll.vi v8, v8, 2 1185; RV32-NEXT: vor.vv v8, v10, v8 1186; RV32-NEXT: vsrl.vi v10, v8, 1 1187; RV32-NEXT: vand.vv v10, v10, v12 1188; RV32-NEXT: vand.vv v8, v8, v12 1189; RV32-NEXT: vadd.vv v8, v8, v8 1190; RV32-NEXT: vor.vv v8, v10, v8 1191; RV32-NEXT: addi sp, sp, 16 1192; RV32-NEXT: ret 1193; 1194; RV64-LABEL: bitreverse_nxv2i64: 1195; RV64: # %bb.0: 1196; RV64-NEXT: li a0, 56 1197; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu 1198; RV64-NEXT: vsrl.vx v10, v8, a0 1199; RV64-NEXT: li a1, 40 1200; RV64-NEXT: vsrl.vx v12, v8, a1 1201; RV64-NEXT: lui a2, 16 1202; RV64-NEXT: addiw a2, a2, -256 1203; RV64-NEXT: vand.vx v12, v12, a2 1204; RV64-NEXT: vor.vv v10, v12, v10 1205; RV64-NEXT: vsrl.vi v12, v8, 24 1206; RV64-NEXT: lui a2, 4080 1207; RV64-NEXT: vand.vx v12, v12, a2 1208; RV64-NEXT: vsrl.vi v14, v8, 8 1209; RV64-NEXT: li a2, 255 1210; RV64-NEXT: slli a3, a2, 24 1211; RV64-NEXT: vand.vx v14, v14, a3 1212; RV64-NEXT: vor.vv v12, v14, v12 1213; RV64-NEXT: vor.vv v10, v12, v10 1214; RV64-NEXT: vsll.vi v12, v8, 8 1215; RV64-NEXT: slli a3, a2, 32 1216; RV64-NEXT: vand.vx v12, v12, a3 1217; RV64-NEXT: vsll.vi v14, v8, 24 1218; RV64-NEXT: slli a3, a2, 40 1219; RV64-NEXT: vand.vx v14, v14, a3 1220; RV64-NEXT: vor.vv v12, v14, v12 1221; RV64-NEXT: vsll.vx v14, v8, a0 1222; RV64-NEXT: vsll.vx v8, v8, a1 1223; RV64-NEXT: slli a0, a2, 48 1224; RV64-NEXT: vand.vx v8, v8, a0 1225; RV64-NEXT: vor.vv v8, v14, v8 1226; RV64-NEXT: lui a0, %hi(.LCPI19_0) 1227; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) 1228; RV64-NEXT: vor.vv v8, v8, v12 1229; RV64-NEXT: vor.vv v8, v8, v10 1230; RV64-NEXT: vsrl.vi v10, v8, 4 1231; RV64-NEXT: vand.vx v10, v10, a0 1232; RV64-NEXT: vand.vx v8, v8, a0 1233; RV64-NEXT: lui a0, %hi(.LCPI19_1) 1234; RV64-NEXT: ld a0, %lo(.LCPI19_1)(a0) 1235; RV64-NEXT: vsll.vi v8, v8, 4 1236; RV64-NEXT: vor.vv v8, v10, v8 1237; RV64-NEXT: vsrl.vi v10, v8, 2 1238; RV64-NEXT: vand.vx v10, v10, a0 1239; RV64-NEXT: vand.vx v8, v8, a0 1240; RV64-NEXT: lui a0, %hi(.LCPI19_2) 1241; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) 1242; RV64-NEXT: vsll.vi v8, v8, 2 1243; RV64-NEXT: vor.vv v8, v10, v8 1244; RV64-NEXT: vsrl.vi v10, v8, 1 1245; RV64-NEXT: vand.vx v10, v10, a0 1246; RV64-NEXT: vand.vx v8, v8, a0 1247; RV64-NEXT: vadd.vv v8, v8, v8 1248; RV64-NEXT: vor.vv v8, v10, v8 1249; RV64-NEXT: ret 1250 %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va) 1251 ret <vscale x 2 x i64> %a 1252} 1253declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>) 1254 1255define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) { 1256; RV32-LABEL: bitreverse_nxv4i64: 1257; RV32: # %bb.0: 1258; RV32-NEXT: addi sp, sp, -16 1259; RV32-NEXT: .cfi_def_cfa_offset 16 1260; RV32-NEXT: sw zero, 12(sp) 1261; RV32-NEXT: lui a0, 1044480 1262; RV32-NEXT: sw a0, 8(sp) 1263; RV32-NEXT: lui a0, 4080 1264; RV32-NEXT: sw a0, 12(sp) 1265; RV32-NEXT: sw zero, 8(sp) 1266; RV32-NEXT: li a1, 255 1267; RV32-NEXT: sw a1, 12(sp) 1268; RV32-NEXT: lui a1, 16 1269; RV32-NEXT: addi a1, a1, -256 1270; RV32-NEXT: sw a1, 12(sp) 1271; RV32-NEXT: lui a2, 61681 1272; RV32-NEXT: addi a2, a2, -241 1273; RV32-NEXT: sw a2, 12(sp) 1274; RV32-NEXT: sw a2, 8(sp) 1275; RV32-NEXT: lui a2, 209715 1276; RV32-NEXT: addi a2, a2, 819 1277; RV32-NEXT: sw a2, 12(sp) 1278; RV32-NEXT: sw a2, 8(sp) 1279; RV32-NEXT: lui a2, 349525 1280; RV32-NEXT: addi a2, a2, 1365 1281; RV32-NEXT: sw a2, 12(sp) 1282; RV32-NEXT: sw a2, 8(sp) 1283; RV32-NEXT: li a2, 56 1284; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, mu 1285; RV32-NEXT: vsrl.vx v12, v8, a2 1286; RV32-NEXT: li a3, 40 1287; RV32-NEXT: vsrl.vx v16, v8, a3 1288; RV32-NEXT: vand.vx v16, v16, a1 1289; RV32-NEXT: vor.vv v12, v16, v12 1290; RV32-NEXT: addi a1, sp, 8 1291; RV32-NEXT: vlse64.v v16, (a1), zero 1292; RV32-NEXT: vsrl.vi v20, v8, 24 1293; RV32-NEXT: vand.vx v20, v20, a0 1294; RV32-NEXT: vsrl.vi v24, v8, 8 1295; RV32-NEXT: vand.vv v16, v24, v16 1296; RV32-NEXT: vor.vv v16, v16, v20 1297; RV32-NEXT: vlse64.v v20, (a1), zero 1298; RV32-NEXT: vor.vv v12, v16, v12 1299; RV32-NEXT: vsll.vx v16, v8, a2 1300; RV32-NEXT: vsll.vx v24, v8, a3 1301; RV32-NEXT: vand.vv v20, v24, v20 1302; RV32-NEXT: vlse64.v v24, (a1), zero 1303; RV32-NEXT: vor.vv v16, v16, v20 1304; RV32-NEXT: vlse64.v v20, (a1), zero 1305; RV32-NEXT: vsll.vi v28, v8, 8 1306; RV32-NEXT: vand.vv v24, v28, v24 1307; RV32-NEXT: vsll.vi v8, v8, 24 1308; RV32-NEXT: vand.vv v8, v8, v20 1309; RV32-NEXT: vor.vv v8, v8, v24 1310; RV32-NEXT: vlse64.v v20, (a1), zero 1311; RV32-NEXT: vor.vv v8, v16, v8 1312; RV32-NEXT: vor.vv v8, v8, v12 1313; RV32-NEXT: vsrl.vi v12, v8, 4 1314; RV32-NEXT: vand.vv v12, v12, v20 1315; RV32-NEXT: vand.vv v8, v8, v20 1316; RV32-NEXT: vlse64.v v16, (a1), zero 1317; RV32-NEXT: vsll.vi v8, v8, 4 1318; RV32-NEXT: vor.vv v8, v12, v8 1319; RV32-NEXT: vsrl.vi v12, v8, 2 1320; RV32-NEXT: vand.vv v12, v12, v16 1321; RV32-NEXT: vand.vv v8, v8, v16 1322; RV32-NEXT: vlse64.v v16, (a1), zero 1323; RV32-NEXT: vsll.vi v8, v8, 2 1324; RV32-NEXT: vor.vv v8, v12, v8 1325; RV32-NEXT: vsrl.vi v12, v8, 1 1326; RV32-NEXT: vand.vv v12, v12, v16 1327; RV32-NEXT: vand.vv v8, v8, v16 1328; RV32-NEXT: vadd.vv v8, v8, v8 1329; RV32-NEXT: vor.vv v8, v12, v8 1330; RV32-NEXT: addi sp, sp, 16 1331; RV32-NEXT: ret 1332; 1333; RV64-LABEL: bitreverse_nxv4i64: 1334; RV64: # %bb.0: 1335; RV64-NEXT: li a0, 56 1336; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu 1337; RV64-NEXT: vsrl.vx v12, v8, a0 1338; RV64-NEXT: li a1, 40 1339; RV64-NEXT: vsrl.vx v16, v8, a1 1340; RV64-NEXT: lui a2, 16 1341; RV64-NEXT: addiw a2, a2, -256 1342; RV64-NEXT: vand.vx v16, v16, a2 1343; RV64-NEXT: vor.vv v12, v16, v12 1344; RV64-NEXT: vsrl.vi v16, v8, 24 1345; RV64-NEXT: lui a2, 4080 1346; RV64-NEXT: vand.vx v16, v16, a2 1347; RV64-NEXT: vsrl.vi v20, v8, 8 1348; RV64-NEXT: li a2, 255 1349; RV64-NEXT: slli a3, a2, 24 1350; RV64-NEXT: vand.vx v20, v20, a3 1351; RV64-NEXT: vor.vv v16, v20, v16 1352; RV64-NEXT: vor.vv v12, v16, v12 1353; RV64-NEXT: vsll.vi v16, v8, 8 1354; RV64-NEXT: slli a3, a2, 32 1355; RV64-NEXT: vand.vx v16, v16, a3 1356; RV64-NEXT: vsll.vi v20, v8, 24 1357; RV64-NEXT: slli a3, a2, 40 1358; RV64-NEXT: vand.vx v20, v20, a3 1359; RV64-NEXT: vor.vv v16, v20, v16 1360; RV64-NEXT: vsll.vx v20, v8, a0 1361; RV64-NEXT: vsll.vx v8, v8, a1 1362; RV64-NEXT: slli a0, a2, 48 1363; RV64-NEXT: vand.vx v8, v8, a0 1364; RV64-NEXT: vor.vv v8, v20, v8 1365; RV64-NEXT: lui a0, %hi(.LCPI20_0) 1366; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) 1367; RV64-NEXT: vor.vv v8, v8, v16 1368; RV64-NEXT: vor.vv v8, v8, v12 1369; RV64-NEXT: vsrl.vi v12, v8, 4 1370; RV64-NEXT: vand.vx v12, v12, a0 1371; RV64-NEXT: vand.vx v8, v8, a0 1372; RV64-NEXT: lui a0, %hi(.LCPI20_1) 1373; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) 1374; RV64-NEXT: vsll.vi v8, v8, 4 1375; RV64-NEXT: vor.vv v8, v12, v8 1376; RV64-NEXT: vsrl.vi v12, v8, 2 1377; RV64-NEXT: vand.vx v12, v12, a0 1378; RV64-NEXT: vand.vx v8, v8, a0 1379; RV64-NEXT: lui a0, %hi(.LCPI20_2) 1380; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) 1381; RV64-NEXT: vsll.vi v8, v8, 2 1382; RV64-NEXT: vor.vv v8, v12, v8 1383; RV64-NEXT: vsrl.vi v12, v8, 1 1384; RV64-NEXT: vand.vx v12, v12, a0 1385; RV64-NEXT: vand.vx v8, v8, a0 1386; RV64-NEXT: vadd.vv v8, v8, v8 1387; RV64-NEXT: vor.vv v8, v12, v8 1388; RV64-NEXT: ret 1389 %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va) 1390 ret <vscale x 4 x i64> %a 1391} 1392declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>) 1393 1394define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) { 1395; RV32-LABEL: bitreverse_nxv8i64: 1396; RV32: # %bb.0: 1397; RV32-NEXT: addi sp, sp, -16 1398; RV32-NEXT: .cfi_def_cfa_offset 16 1399; RV32-NEXT: csrr a0, vlenb 1400; RV32-NEXT: slli a0, a0, 4 1401; RV32-NEXT: sub sp, sp, a0 1402; RV32-NEXT: sw zero, 12(sp) 1403; RV32-NEXT: lui a0, 1044480 1404; RV32-NEXT: sw a0, 8(sp) 1405; RV32-NEXT: lui a0, 4080 1406; RV32-NEXT: sw a0, 12(sp) 1407; RV32-NEXT: sw zero, 8(sp) 1408; RV32-NEXT: li a1, 255 1409; RV32-NEXT: sw a1, 12(sp) 1410; RV32-NEXT: lui a1, 16 1411; RV32-NEXT: addi a1, a1, -256 1412; RV32-NEXT: sw a1, 12(sp) 1413; RV32-NEXT: lui a2, 61681 1414; RV32-NEXT: addi a2, a2, -241 1415; RV32-NEXT: sw a2, 12(sp) 1416; RV32-NEXT: sw a2, 8(sp) 1417; RV32-NEXT: lui a2, 209715 1418; RV32-NEXT: addi a2, a2, 819 1419; RV32-NEXT: sw a2, 12(sp) 1420; RV32-NEXT: sw a2, 8(sp) 1421; RV32-NEXT: lui a2, 349525 1422; RV32-NEXT: addi a2, a2, 1365 1423; RV32-NEXT: sw a2, 12(sp) 1424; RV32-NEXT: sw a2, 8(sp) 1425; RV32-NEXT: li a2, 56 1426; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, mu 1427; RV32-NEXT: li a3, 40 1428; RV32-NEXT: vsrl.vx v16, v8, a3 1429; RV32-NEXT: vand.vx v16, v16, a1 1430; RV32-NEXT: addi a1, sp, 8 1431; RV32-NEXT: vlse64.v v24, (a1), zero 1432; RV32-NEXT: vsrl.vx v0, v8, a2 1433; RV32-NEXT: vor.vv v16, v16, v0 1434; RV32-NEXT: csrr a4, vlenb 1435; RV32-NEXT: slli a4, a4, 3 1436; RV32-NEXT: add a4, sp, a4 1437; RV32-NEXT: addi a4, a4, 16 1438; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 1439; RV32-NEXT: vsrl.vi v0, v8, 8 1440; RV32-NEXT: vand.vv v24, v0, v24 1441; RV32-NEXT: vsrl.vi v0, v8, 24 1442; RV32-NEXT: vand.vx v0, v0, a0 1443; RV32-NEXT: vlse64.v v16, (a1), zero 1444; RV32-NEXT: vor.vv v24, v24, v0 1445; RV32-NEXT: csrr a0, vlenb 1446; RV32-NEXT: slli a0, a0, 3 1447; RV32-NEXT: add a0, sp, a0 1448; RV32-NEXT: addi a0, a0, 16 1449; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload 1450; RV32-NEXT: vor.vv v24, v24, v0 1451; RV32-NEXT: csrr a0, vlenb 1452; RV32-NEXT: slli a0, a0, 3 1453; RV32-NEXT: add a0, sp, a0 1454; RV32-NEXT: addi a0, a0, 16 1455; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1456; RV32-NEXT: vsll.vx v24, v8, a3 1457; RV32-NEXT: vand.vv v16, v24, v16 1458; RV32-NEXT: vsll.vx v24, v8, a2 1459; RV32-NEXT: vlse64.v v0, (a1), zero 1460; RV32-NEXT: vor.vv v16, v24, v16 1461; RV32-NEXT: addi a0, sp, 16 1462; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1463; RV32-NEXT: vlse64.v v16, (a1), zero 1464; RV32-NEXT: vsll.vi v24, v8, 8 1465; RV32-NEXT: vand.vv v24, v24, v0 1466; RV32-NEXT: vsll.vi v8, v8, 24 1467; RV32-NEXT: vand.vv v8, v8, v16 1468; RV32-NEXT: vor.vv v8, v8, v24 1469; RV32-NEXT: vlse64.v v16, (a1), zero 1470; RV32-NEXT: addi a0, sp, 16 1471; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload 1472; RV32-NEXT: vor.vv v8, v24, v8 1473; RV32-NEXT: csrr a0, vlenb 1474; RV32-NEXT: slli a0, a0, 3 1475; RV32-NEXT: add a0, sp, a0 1476; RV32-NEXT: addi a0, a0, 16 1477; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload 1478; RV32-NEXT: vor.vv v8, v8, v24 1479; RV32-NEXT: vsrl.vi v24, v8, 4 1480; RV32-NEXT: vand.vv v24, v24, v16 1481; RV32-NEXT: vand.vv v8, v8, v16 1482; RV32-NEXT: vlse64.v v16, (a1), zero 1483; RV32-NEXT: vsll.vi v8, v8, 4 1484; RV32-NEXT: vor.vv v8, v24, v8 1485; RV32-NEXT: vsrl.vi v24, v8, 2 1486; RV32-NEXT: vand.vv v24, v24, v16 1487; RV32-NEXT: vand.vv v8, v8, v16 1488; RV32-NEXT: vlse64.v v16, (a1), zero 1489; RV32-NEXT: vsll.vi v8, v8, 2 1490; RV32-NEXT: vor.vv v8, v24, v8 1491; RV32-NEXT: vsrl.vi v24, v8, 1 1492; RV32-NEXT: vand.vv v24, v24, v16 1493; RV32-NEXT: vand.vv v8, v8, v16 1494; RV32-NEXT: vadd.vv v8, v8, v8 1495; RV32-NEXT: vor.vv v8, v24, v8 1496; RV32-NEXT: csrr a0, vlenb 1497; RV32-NEXT: slli a0, a0, 4 1498; RV32-NEXT: add sp, sp, a0 1499; RV32-NEXT: addi sp, sp, 16 1500; RV32-NEXT: ret 1501; 1502; RV64-LABEL: bitreverse_nxv8i64: 1503; RV64: # %bb.0: 1504; RV64-NEXT: li a0, 56 1505; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1506; RV64-NEXT: vsrl.vx v16, v8, a0 1507; RV64-NEXT: li a1, 40 1508; RV64-NEXT: vsrl.vx v24, v8, a1 1509; RV64-NEXT: lui a2, 16 1510; RV64-NEXT: addiw a2, a2, -256 1511; RV64-NEXT: vand.vx v24, v24, a2 1512; RV64-NEXT: vor.vv v16, v24, v16 1513; RV64-NEXT: vsrl.vi v24, v8, 24 1514; RV64-NEXT: lui a2, 4080 1515; RV64-NEXT: vand.vx v24, v24, a2 1516; RV64-NEXT: vsrl.vi v0, v8, 8 1517; RV64-NEXT: li a2, 255 1518; RV64-NEXT: slli a3, a2, 24 1519; RV64-NEXT: vand.vx v0, v0, a3 1520; RV64-NEXT: vor.vv v24, v0, v24 1521; RV64-NEXT: vor.vv v16, v24, v16 1522; RV64-NEXT: vsll.vi v24, v8, 8 1523; RV64-NEXT: slli a3, a2, 32 1524; RV64-NEXT: vand.vx v24, v24, a3 1525; RV64-NEXT: vsll.vi v0, v8, 24 1526; RV64-NEXT: slli a3, a2, 40 1527; RV64-NEXT: vand.vx v0, v0, a3 1528; RV64-NEXT: vor.vv v24, v0, v24 1529; RV64-NEXT: vsll.vx v0, v8, a0 1530; RV64-NEXT: vsll.vx v8, v8, a1 1531; RV64-NEXT: slli a0, a2, 48 1532; RV64-NEXT: vand.vx v8, v8, a0 1533; RV64-NEXT: vor.vv v8, v0, v8 1534; RV64-NEXT: lui a0, %hi(.LCPI21_0) 1535; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) 1536; RV64-NEXT: vor.vv v8, v8, v24 1537; RV64-NEXT: vor.vv v8, v8, v16 1538; RV64-NEXT: vsrl.vi v16, v8, 4 1539; RV64-NEXT: vand.vx v16, v16, a0 1540; RV64-NEXT: vand.vx v8, v8, a0 1541; RV64-NEXT: lui a0, %hi(.LCPI21_1) 1542; RV64-NEXT: ld a0, %lo(.LCPI21_1)(a0) 1543; RV64-NEXT: vsll.vi v8, v8, 4 1544; RV64-NEXT: vor.vv v8, v16, v8 1545; RV64-NEXT: vsrl.vi v16, v8, 2 1546; RV64-NEXT: vand.vx v16, v16, a0 1547; RV64-NEXT: vand.vx v8, v8, a0 1548; RV64-NEXT: lui a0, %hi(.LCPI21_2) 1549; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) 1550; RV64-NEXT: vsll.vi v8, v8, 2 1551; RV64-NEXT: vor.vv v8, v16, v8 1552; RV64-NEXT: vsrl.vi v16, v8, 1 1553; RV64-NEXT: vand.vx v16, v16, a0 1554; RV64-NEXT: vand.vx v8, v8, a0 1555; RV64-NEXT: vadd.vv v8, v8, v8 1556; RV64-NEXT: vor.vv v8, v16, v8 1557; RV64-NEXT: ret 1558 %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va) 1559 ret <vscale x 8 x i64> %a 1560} 1561declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>) 1562