1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) { 6; CHECK-LABEL: bitreverse_nxv1i8: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu 9; CHECK-NEXT: vand.vi v9, v8, 15 10; CHECK-NEXT: vsll.vi v9, v9, 4 11; CHECK-NEXT: vsrl.vi v8, v8, 4 12; CHECK-NEXT: vand.vi v8, v8, 15 13; CHECK-NEXT: vor.vv v8, v8, v9 14; CHECK-NEXT: vsrl.vi v9, v8, 2 15; CHECK-NEXT: li a0, 51 16; CHECK-NEXT: vand.vx v9, v9, a0 17; CHECK-NEXT: vand.vx v8, v8, a0 18; CHECK-NEXT: vsll.vi v8, v8, 2 19; CHECK-NEXT: vor.vv v8, v9, v8 20; CHECK-NEXT: vsrl.vi v9, v8, 1 21; CHECK-NEXT: li a0, 85 22; CHECK-NEXT: vand.vx v9, v9, a0 23; CHECK-NEXT: vand.vx v8, v8, a0 24; CHECK-NEXT: vadd.vv v8, v8, v8 25; CHECK-NEXT: vor.vv v8, v9, v8 26; CHECK-NEXT: ret 27 %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va) 28 ret <vscale x 1 x i8> %a 29} 30declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>) 31 32define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) { 33; CHECK-LABEL: bitreverse_nxv2i8: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu 36; CHECK-NEXT: vand.vi v9, v8, 15 37; CHECK-NEXT: vsll.vi v9, v9, 4 38; CHECK-NEXT: vsrl.vi v8, v8, 4 39; CHECK-NEXT: vand.vi v8, v8, 15 40; CHECK-NEXT: vor.vv v8, v8, v9 41; CHECK-NEXT: vsrl.vi v9, v8, 2 42; CHECK-NEXT: li a0, 51 43; CHECK-NEXT: vand.vx v9, v9, a0 44; CHECK-NEXT: vand.vx v8, v8, a0 45; CHECK-NEXT: vsll.vi v8, v8, 2 46; CHECK-NEXT: vor.vv v8, v9, v8 47; CHECK-NEXT: vsrl.vi v9, v8, 1 48; CHECK-NEXT: li a0, 85 49; CHECK-NEXT: vand.vx v9, v9, a0 50; CHECK-NEXT: vand.vx v8, v8, a0 51; CHECK-NEXT: vadd.vv v8, v8, v8 52; CHECK-NEXT: vor.vv v8, v9, v8 53; CHECK-NEXT: ret 54 %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va) 55 ret <vscale x 2 x i8> %a 56} 57declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>) 58 59define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) { 60; CHECK-LABEL: bitreverse_nxv4i8: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu 63; CHECK-NEXT: vand.vi v9, v8, 15 64; CHECK-NEXT: vsll.vi v9, v9, 4 65; CHECK-NEXT: vsrl.vi v8, v8, 4 66; CHECK-NEXT: vand.vi v8, v8, 15 67; CHECK-NEXT: vor.vv v8, v8, v9 68; CHECK-NEXT: vsrl.vi v9, v8, 2 69; CHECK-NEXT: li a0, 51 70; CHECK-NEXT: vand.vx v9, v9, a0 71; CHECK-NEXT: vand.vx v8, v8, a0 72; CHECK-NEXT: vsll.vi v8, v8, 2 73; CHECK-NEXT: vor.vv v8, v9, v8 74; CHECK-NEXT: vsrl.vi v9, v8, 1 75; CHECK-NEXT: li a0, 85 76; CHECK-NEXT: vand.vx v9, v9, a0 77; CHECK-NEXT: vand.vx v8, v8, a0 78; CHECK-NEXT: vadd.vv v8, v8, v8 79; CHECK-NEXT: vor.vv v8, v9, v8 80; CHECK-NEXT: ret 81 %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va) 82 ret <vscale x 4 x i8> %a 83} 84declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>) 85 86define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) { 87; CHECK-LABEL: bitreverse_nxv8i8: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu 90; CHECK-NEXT: vand.vi v9, v8, 15 91; CHECK-NEXT: vsll.vi v9, v9, 4 92; CHECK-NEXT: vsrl.vi v8, v8, 4 93; CHECK-NEXT: vand.vi v8, v8, 15 94; CHECK-NEXT: vor.vv v8, v8, v9 95; CHECK-NEXT: vsrl.vi v9, v8, 2 96; CHECK-NEXT: li a0, 51 97; CHECK-NEXT: vand.vx v9, v9, a0 98; CHECK-NEXT: vand.vx v8, v8, a0 99; CHECK-NEXT: vsll.vi v8, v8, 2 100; CHECK-NEXT: vor.vv v8, v9, v8 101; CHECK-NEXT: vsrl.vi v9, v8, 1 102; CHECK-NEXT: li a0, 85 103; CHECK-NEXT: vand.vx v9, v9, a0 104; CHECK-NEXT: vand.vx v8, v8, a0 105; CHECK-NEXT: vadd.vv v8, v8, v8 106; CHECK-NEXT: vor.vv v8, v9, v8 107; CHECK-NEXT: ret 108 %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va) 109 ret <vscale x 8 x i8> %a 110} 111declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>) 112 113define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) { 114; CHECK-LABEL: bitreverse_nxv16i8: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu 117; CHECK-NEXT: vand.vi v10, v8, 15 118; CHECK-NEXT: vsll.vi v10, v10, 4 119; CHECK-NEXT: vsrl.vi v8, v8, 4 120; CHECK-NEXT: vand.vi v8, v8, 15 121; CHECK-NEXT: vor.vv v8, v8, v10 122; CHECK-NEXT: vsrl.vi v10, v8, 2 123; CHECK-NEXT: li a0, 51 124; CHECK-NEXT: vand.vx v10, v10, a0 125; CHECK-NEXT: vand.vx v8, v8, a0 126; CHECK-NEXT: vsll.vi v8, v8, 2 127; CHECK-NEXT: vor.vv v8, v10, v8 128; CHECK-NEXT: vsrl.vi v10, v8, 1 129; CHECK-NEXT: li a0, 85 130; CHECK-NEXT: vand.vx v10, v10, a0 131; CHECK-NEXT: vand.vx v8, v8, a0 132; CHECK-NEXT: vadd.vv v8, v8, v8 133; CHECK-NEXT: vor.vv v8, v10, v8 134; CHECK-NEXT: ret 135 %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va) 136 ret <vscale x 16 x i8> %a 137} 138declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>) 139 140define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) { 141; CHECK-LABEL: bitreverse_nxv32i8: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu 144; CHECK-NEXT: vand.vi v12, v8, 15 145; CHECK-NEXT: vsll.vi v12, v12, 4 146; CHECK-NEXT: vsrl.vi v8, v8, 4 147; CHECK-NEXT: vand.vi v8, v8, 15 148; CHECK-NEXT: vor.vv v8, v8, v12 149; CHECK-NEXT: vsrl.vi v12, v8, 2 150; CHECK-NEXT: li a0, 51 151; CHECK-NEXT: vand.vx v12, v12, a0 152; CHECK-NEXT: vand.vx v8, v8, a0 153; CHECK-NEXT: vsll.vi v8, v8, 2 154; CHECK-NEXT: vor.vv v8, v12, v8 155; CHECK-NEXT: vsrl.vi v12, v8, 1 156; CHECK-NEXT: li a0, 85 157; CHECK-NEXT: vand.vx v12, v12, a0 158; CHECK-NEXT: vand.vx v8, v8, a0 159; CHECK-NEXT: vadd.vv v8, v8, v8 160; CHECK-NEXT: vor.vv v8, v12, v8 161; CHECK-NEXT: ret 162 %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va) 163 ret <vscale x 32 x i8> %a 164} 165declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>) 166 167define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) { 168; CHECK-LABEL: bitreverse_nxv64i8: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu 171; CHECK-NEXT: vand.vi v16, v8, 15 172; CHECK-NEXT: vsll.vi v16, v16, 4 173; CHECK-NEXT: vsrl.vi v8, v8, 4 174; CHECK-NEXT: vand.vi v8, v8, 15 175; CHECK-NEXT: vor.vv v8, v8, v16 176; CHECK-NEXT: vsrl.vi v16, v8, 2 177; CHECK-NEXT: li a0, 51 178; CHECK-NEXT: vand.vx v16, v16, a0 179; CHECK-NEXT: vand.vx v8, v8, a0 180; CHECK-NEXT: vsll.vi v8, v8, 2 181; CHECK-NEXT: vor.vv v8, v16, v8 182; CHECK-NEXT: vsrl.vi v16, v8, 1 183; CHECK-NEXT: li a0, 85 184; CHECK-NEXT: vand.vx v16, v16, a0 185; CHECK-NEXT: vand.vx v8, v8, a0 186; CHECK-NEXT: vadd.vv v8, v8, v8 187; CHECK-NEXT: vor.vv v8, v16, v8 188; CHECK-NEXT: ret 189 %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va) 190 ret <vscale x 64 x i8> %a 191} 192declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>) 193 194define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) { 195; RV32-LABEL: bitreverse_nxv1i16: 196; RV32: # %bb.0: 197; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 198; RV32-NEXT: vsrl.vi v9, v8, 8 199; RV32-NEXT: vsll.vi v8, v8, 8 200; RV32-NEXT: vor.vv v8, v8, v9 201; RV32-NEXT: vsrl.vi v9, v8, 4 202; RV32-NEXT: lui a0, 1 203; RV32-NEXT: addi a0, a0, -241 204; RV32-NEXT: vand.vx v9, v9, a0 205; RV32-NEXT: vand.vx v8, v8, a0 206; RV32-NEXT: vsll.vi v8, v8, 4 207; RV32-NEXT: vor.vv v8, v9, v8 208; RV32-NEXT: vsrl.vi v9, v8, 2 209; RV32-NEXT: lui a0, 3 210; RV32-NEXT: addi a0, a0, 819 211; RV32-NEXT: vand.vx v9, v9, a0 212; RV32-NEXT: vand.vx v8, v8, a0 213; RV32-NEXT: vsll.vi v8, v8, 2 214; RV32-NEXT: vor.vv v8, v9, v8 215; RV32-NEXT: vsrl.vi v9, v8, 1 216; RV32-NEXT: lui a0, 5 217; RV32-NEXT: addi a0, a0, 1365 218; RV32-NEXT: vand.vx v9, v9, a0 219; RV32-NEXT: vand.vx v8, v8, a0 220; RV32-NEXT: vadd.vv v8, v8, v8 221; RV32-NEXT: vor.vv v8, v9, v8 222; RV32-NEXT: ret 223; 224; RV64-LABEL: bitreverse_nxv1i16: 225; RV64: # %bb.0: 226; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 227; RV64-NEXT: vsrl.vi v9, v8, 8 228; RV64-NEXT: vsll.vi v8, v8, 8 229; RV64-NEXT: vor.vv v8, v8, v9 230; RV64-NEXT: vsrl.vi v9, v8, 4 231; RV64-NEXT: lui a0, 1 232; RV64-NEXT: addiw a0, a0, -241 233; RV64-NEXT: vand.vx v9, v9, a0 234; RV64-NEXT: vand.vx v8, v8, a0 235; RV64-NEXT: vsll.vi v8, v8, 4 236; RV64-NEXT: vor.vv v8, v9, v8 237; RV64-NEXT: vsrl.vi v9, v8, 2 238; RV64-NEXT: lui a0, 3 239; RV64-NEXT: addiw a0, a0, 819 240; RV64-NEXT: vand.vx v9, v9, a0 241; RV64-NEXT: vand.vx v8, v8, a0 242; RV64-NEXT: vsll.vi v8, v8, 2 243; RV64-NEXT: vor.vv v8, v9, v8 244; RV64-NEXT: vsrl.vi v9, v8, 1 245; RV64-NEXT: lui a0, 5 246; RV64-NEXT: addiw a0, a0, 1365 247; RV64-NEXT: vand.vx v9, v9, a0 248; RV64-NEXT: vand.vx v8, v8, a0 249; RV64-NEXT: vadd.vv v8, v8, v8 250; RV64-NEXT: vor.vv v8, v9, v8 251; RV64-NEXT: ret 252 %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va) 253 ret <vscale x 1 x i16> %a 254} 255declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>) 256 257define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) { 258; RV32-LABEL: bitreverse_nxv2i16: 259; RV32: # %bb.0: 260; RV32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 261; RV32-NEXT: vsrl.vi v9, v8, 8 262; RV32-NEXT: vsll.vi v8, v8, 8 263; RV32-NEXT: vor.vv v8, v8, v9 264; RV32-NEXT: vsrl.vi v9, v8, 4 265; RV32-NEXT: lui a0, 1 266; RV32-NEXT: addi a0, a0, -241 267; RV32-NEXT: vand.vx v9, v9, a0 268; RV32-NEXT: vand.vx v8, v8, a0 269; RV32-NEXT: vsll.vi v8, v8, 4 270; RV32-NEXT: vor.vv v8, v9, v8 271; RV32-NEXT: vsrl.vi v9, v8, 2 272; RV32-NEXT: lui a0, 3 273; RV32-NEXT: addi a0, a0, 819 274; RV32-NEXT: vand.vx v9, v9, a0 275; RV32-NEXT: vand.vx v8, v8, a0 276; RV32-NEXT: vsll.vi v8, v8, 2 277; RV32-NEXT: vor.vv v8, v9, v8 278; RV32-NEXT: vsrl.vi v9, v8, 1 279; RV32-NEXT: lui a0, 5 280; RV32-NEXT: addi a0, a0, 1365 281; RV32-NEXT: vand.vx v9, v9, a0 282; RV32-NEXT: vand.vx v8, v8, a0 283; RV32-NEXT: vadd.vv v8, v8, v8 284; RV32-NEXT: vor.vv v8, v9, v8 285; RV32-NEXT: ret 286; 287; RV64-LABEL: bitreverse_nxv2i16: 288; RV64: # %bb.0: 289; RV64-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 290; RV64-NEXT: vsrl.vi v9, v8, 8 291; RV64-NEXT: vsll.vi v8, v8, 8 292; RV64-NEXT: vor.vv v8, v8, v9 293; RV64-NEXT: vsrl.vi v9, v8, 4 294; RV64-NEXT: lui a0, 1 295; RV64-NEXT: addiw a0, a0, -241 296; RV64-NEXT: vand.vx v9, v9, a0 297; RV64-NEXT: vand.vx v8, v8, a0 298; RV64-NEXT: vsll.vi v8, v8, 4 299; RV64-NEXT: vor.vv v8, v9, v8 300; RV64-NEXT: vsrl.vi v9, v8, 2 301; RV64-NEXT: lui a0, 3 302; RV64-NEXT: addiw a0, a0, 819 303; RV64-NEXT: vand.vx v9, v9, a0 304; RV64-NEXT: vand.vx v8, v8, a0 305; RV64-NEXT: vsll.vi v8, v8, 2 306; RV64-NEXT: vor.vv v8, v9, v8 307; RV64-NEXT: vsrl.vi v9, v8, 1 308; RV64-NEXT: lui a0, 5 309; RV64-NEXT: addiw a0, a0, 1365 310; RV64-NEXT: vand.vx v9, v9, a0 311; RV64-NEXT: vand.vx v8, v8, a0 312; RV64-NEXT: vadd.vv v8, v8, v8 313; RV64-NEXT: vor.vv v8, v9, v8 314; RV64-NEXT: ret 315 %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va) 316 ret <vscale x 2 x i16> %a 317} 318declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>) 319 320define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) { 321; RV32-LABEL: bitreverse_nxv4i16: 322; RV32: # %bb.0: 323; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, mu 324; RV32-NEXT: vsrl.vi v9, v8, 8 325; RV32-NEXT: vsll.vi v8, v8, 8 326; RV32-NEXT: vor.vv v8, v8, v9 327; RV32-NEXT: vsrl.vi v9, v8, 4 328; RV32-NEXT: lui a0, 1 329; RV32-NEXT: addi a0, a0, -241 330; RV32-NEXT: vand.vx v9, v9, a0 331; RV32-NEXT: vand.vx v8, v8, a0 332; RV32-NEXT: vsll.vi v8, v8, 4 333; RV32-NEXT: vor.vv v8, v9, v8 334; RV32-NEXT: vsrl.vi v9, v8, 2 335; RV32-NEXT: lui a0, 3 336; RV32-NEXT: addi a0, a0, 819 337; RV32-NEXT: vand.vx v9, v9, a0 338; RV32-NEXT: vand.vx v8, v8, a0 339; RV32-NEXT: vsll.vi v8, v8, 2 340; RV32-NEXT: vor.vv v8, v9, v8 341; RV32-NEXT: vsrl.vi v9, v8, 1 342; RV32-NEXT: lui a0, 5 343; RV32-NEXT: addi a0, a0, 1365 344; RV32-NEXT: vand.vx v9, v9, a0 345; RV32-NEXT: vand.vx v8, v8, a0 346; RV32-NEXT: vadd.vv v8, v8, v8 347; RV32-NEXT: vor.vv v8, v9, v8 348; RV32-NEXT: ret 349; 350; RV64-LABEL: bitreverse_nxv4i16: 351; RV64: # %bb.0: 352; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, mu 353; RV64-NEXT: vsrl.vi v9, v8, 8 354; RV64-NEXT: vsll.vi v8, v8, 8 355; RV64-NEXT: vor.vv v8, v8, v9 356; RV64-NEXT: vsrl.vi v9, v8, 4 357; RV64-NEXT: lui a0, 1 358; RV64-NEXT: addiw a0, a0, -241 359; RV64-NEXT: vand.vx v9, v9, a0 360; RV64-NEXT: vand.vx v8, v8, a0 361; RV64-NEXT: vsll.vi v8, v8, 4 362; RV64-NEXT: vor.vv v8, v9, v8 363; RV64-NEXT: vsrl.vi v9, v8, 2 364; RV64-NEXT: lui a0, 3 365; RV64-NEXT: addiw a0, a0, 819 366; RV64-NEXT: vand.vx v9, v9, a0 367; RV64-NEXT: vand.vx v8, v8, a0 368; RV64-NEXT: vsll.vi v8, v8, 2 369; RV64-NEXT: vor.vv v8, v9, v8 370; RV64-NEXT: vsrl.vi v9, v8, 1 371; RV64-NEXT: lui a0, 5 372; RV64-NEXT: addiw a0, a0, 1365 373; RV64-NEXT: vand.vx v9, v9, a0 374; RV64-NEXT: vand.vx v8, v8, a0 375; RV64-NEXT: vadd.vv v8, v8, v8 376; RV64-NEXT: vor.vv v8, v9, v8 377; RV64-NEXT: ret 378 %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va) 379 ret <vscale x 4 x i16> %a 380} 381declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>) 382 383define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) { 384; RV32-LABEL: bitreverse_nxv8i16: 385; RV32: # %bb.0: 386; RV32-NEXT: vsetvli a0, zero, e16, m2, ta, mu 387; RV32-NEXT: vsrl.vi v10, v8, 8 388; RV32-NEXT: vsll.vi v8, v8, 8 389; RV32-NEXT: vor.vv v8, v8, v10 390; RV32-NEXT: vsrl.vi v10, v8, 4 391; RV32-NEXT: lui a0, 1 392; RV32-NEXT: addi a0, a0, -241 393; RV32-NEXT: vand.vx v10, v10, a0 394; RV32-NEXT: vand.vx v8, v8, a0 395; RV32-NEXT: vsll.vi v8, v8, 4 396; RV32-NEXT: vor.vv v8, v10, v8 397; RV32-NEXT: vsrl.vi v10, v8, 2 398; RV32-NEXT: lui a0, 3 399; RV32-NEXT: addi a0, a0, 819 400; RV32-NEXT: vand.vx v10, v10, a0 401; RV32-NEXT: vand.vx v8, v8, a0 402; RV32-NEXT: vsll.vi v8, v8, 2 403; RV32-NEXT: vor.vv v8, v10, v8 404; RV32-NEXT: vsrl.vi v10, v8, 1 405; RV32-NEXT: lui a0, 5 406; RV32-NEXT: addi a0, a0, 1365 407; RV32-NEXT: vand.vx v10, v10, a0 408; RV32-NEXT: vand.vx v8, v8, a0 409; RV32-NEXT: vadd.vv v8, v8, v8 410; RV32-NEXT: vor.vv v8, v10, v8 411; RV32-NEXT: ret 412; 413; RV64-LABEL: bitreverse_nxv8i16: 414; RV64: # %bb.0: 415; RV64-NEXT: vsetvli a0, zero, e16, m2, ta, mu 416; RV64-NEXT: vsrl.vi v10, v8, 8 417; RV64-NEXT: vsll.vi v8, v8, 8 418; RV64-NEXT: vor.vv v8, v8, v10 419; RV64-NEXT: vsrl.vi v10, v8, 4 420; RV64-NEXT: lui a0, 1 421; RV64-NEXT: addiw a0, a0, -241 422; RV64-NEXT: vand.vx v10, v10, a0 423; RV64-NEXT: vand.vx v8, v8, a0 424; RV64-NEXT: vsll.vi v8, v8, 4 425; RV64-NEXT: vor.vv v8, v10, v8 426; RV64-NEXT: vsrl.vi v10, v8, 2 427; RV64-NEXT: lui a0, 3 428; RV64-NEXT: addiw a0, a0, 819 429; RV64-NEXT: vand.vx v10, v10, a0 430; RV64-NEXT: vand.vx v8, v8, a0 431; RV64-NEXT: vsll.vi v8, v8, 2 432; RV64-NEXT: vor.vv v8, v10, v8 433; RV64-NEXT: vsrl.vi v10, v8, 1 434; RV64-NEXT: lui a0, 5 435; RV64-NEXT: addiw a0, a0, 1365 436; RV64-NEXT: vand.vx v10, v10, a0 437; RV64-NEXT: vand.vx v8, v8, a0 438; RV64-NEXT: vadd.vv v8, v8, v8 439; RV64-NEXT: vor.vv v8, v10, v8 440; RV64-NEXT: ret 441 %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va) 442 ret <vscale x 8 x i16> %a 443} 444declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>) 445 446define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) { 447; RV32-LABEL: bitreverse_nxv16i16: 448; RV32: # %bb.0: 449; RV32-NEXT: vsetvli a0, zero, e16, m4, ta, mu 450; RV32-NEXT: vsrl.vi v12, v8, 8 451; RV32-NEXT: vsll.vi v8, v8, 8 452; RV32-NEXT: vor.vv v8, v8, v12 453; RV32-NEXT: vsrl.vi v12, v8, 4 454; RV32-NEXT: lui a0, 1 455; RV32-NEXT: addi a0, a0, -241 456; RV32-NEXT: vand.vx v12, v12, a0 457; RV32-NEXT: vand.vx v8, v8, a0 458; RV32-NEXT: vsll.vi v8, v8, 4 459; RV32-NEXT: vor.vv v8, v12, v8 460; RV32-NEXT: vsrl.vi v12, v8, 2 461; RV32-NEXT: lui a0, 3 462; RV32-NEXT: addi a0, a0, 819 463; RV32-NEXT: vand.vx v12, v12, a0 464; RV32-NEXT: vand.vx v8, v8, a0 465; RV32-NEXT: vsll.vi v8, v8, 2 466; RV32-NEXT: vor.vv v8, v12, v8 467; RV32-NEXT: vsrl.vi v12, v8, 1 468; RV32-NEXT: lui a0, 5 469; RV32-NEXT: addi a0, a0, 1365 470; RV32-NEXT: vand.vx v12, v12, a0 471; RV32-NEXT: vand.vx v8, v8, a0 472; RV32-NEXT: vadd.vv v8, v8, v8 473; RV32-NEXT: vor.vv v8, v12, v8 474; RV32-NEXT: ret 475; 476; RV64-LABEL: bitreverse_nxv16i16: 477; RV64: # %bb.0: 478; RV64-NEXT: vsetvli a0, zero, e16, m4, ta, mu 479; RV64-NEXT: vsrl.vi v12, v8, 8 480; RV64-NEXT: vsll.vi v8, v8, 8 481; RV64-NEXT: vor.vv v8, v8, v12 482; RV64-NEXT: vsrl.vi v12, v8, 4 483; RV64-NEXT: lui a0, 1 484; RV64-NEXT: addiw a0, a0, -241 485; RV64-NEXT: vand.vx v12, v12, a0 486; RV64-NEXT: vand.vx v8, v8, a0 487; RV64-NEXT: vsll.vi v8, v8, 4 488; RV64-NEXT: vor.vv v8, v12, v8 489; RV64-NEXT: vsrl.vi v12, v8, 2 490; RV64-NEXT: lui a0, 3 491; RV64-NEXT: addiw a0, a0, 819 492; RV64-NEXT: vand.vx v12, v12, a0 493; RV64-NEXT: vand.vx v8, v8, a0 494; RV64-NEXT: vsll.vi v8, v8, 2 495; RV64-NEXT: vor.vv v8, v12, v8 496; RV64-NEXT: vsrl.vi v12, v8, 1 497; RV64-NEXT: lui a0, 5 498; RV64-NEXT: addiw a0, a0, 1365 499; RV64-NEXT: vand.vx v12, v12, a0 500; RV64-NEXT: vand.vx v8, v8, a0 501; RV64-NEXT: vadd.vv v8, v8, v8 502; RV64-NEXT: vor.vv v8, v12, v8 503; RV64-NEXT: ret 504 %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va) 505 ret <vscale x 16 x i16> %a 506} 507declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>) 508 509define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) { 510; RV32-LABEL: bitreverse_nxv32i16: 511; RV32: # %bb.0: 512; RV32-NEXT: vsetvli a0, zero, e16, m8, ta, mu 513; RV32-NEXT: vsrl.vi v16, v8, 8 514; RV32-NEXT: vsll.vi v8, v8, 8 515; RV32-NEXT: vor.vv v8, v8, v16 516; RV32-NEXT: vsrl.vi v16, v8, 4 517; RV32-NEXT: lui a0, 1 518; RV32-NEXT: addi a0, a0, -241 519; RV32-NEXT: vand.vx v16, v16, a0 520; RV32-NEXT: vand.vx v8, v8, a0 521; RV32-NEXT: vsll.vi v8, v8, 4 522; RV32-NEXT: vor.vv v8, v16, v8 523; RV32-NEXT: vsrl.vi v16, v8, 2 524; RV32-NEXT: lui a0, 3 525; RV32-NEXT: addi a0, a0, 819 526; RV32-NEXT: vand.vx v16, v16, a0 527; RV32-NEXT: vand.vx v8, v8, a0 528; RV32-NEXT: vsll.vi v8, v8, 2 529; RV32-NEXT: vor.vv v8, v16, v8 530; RV32-NEXT: vsrl.vi v16, v8, 1 531; RV32-NEXT: lui a0, 5 532; RV32-NEXT: addi a0, a0, 1365 533; RV32-NEXT: vand.vx v16, v16, a0 534; RV32-NEXT: vand.vx v8, v8, a0 535; RV32-NEXT: vadd.vv v8, v8, v8 536; RV32-NEXT: vor.vv v8, v16, v8 537; RV32-NEXT: ret 538; 539; RV64-LABEL: bitreverse_nxv32i16: 540; RV64: # %bb.0: 541; RV64-NEXT: vsetvli a0, zero, e16, m8, ta, mu 542; RV64-NEXT: vsrl.vi v16, v8, 8 543; RV64-NEXT: vsll.vi v8, v8, 8 544; RV64-NEXT: vor.vv v8, v8, v16 545; RV64-NEXT: vsrl.vi v16, v8, 4 546; RV64-NEXT: lui a0, 1 547; RV64-NEXT: addiw a0, a0, -241 548; RV64-NEXT: vand.vx v16, v16, a0 549; RV64-NEXT: vand.vx v8, v8, a0 550; RV64-NEXT: vsll.vi v8, v8, 4 551; RV64-NEXT: vor.vv v8, v16, v8 552; RV64-NEXT: vsrl.vi v16, v8, 2 553; RV64-NEXT: lui a0, 3 554; RV64-NEXT: addiw a0, a0, 819 555; RV64-NEXT: vand.vx v16, v16, a0 556; RV64-NEXT: vand.vx v8, v8, a0 557; RV64-NEXT: vsll.vi v8, v8, 2 558; RV64-NEXT: vor.vv v8, v16, v8 559; RV64-NEXT: vsrl.vi v16, v8, 1 560; RV64-NEXT: lui a0, 5 561; RV64-NEXT: addiw a0, a0, 1365 562; RV64-NEXT: vand.vx v16, v16, a0 563; RV64-NEXT: vand.vx v8, v8, a0 564; RV64-NEXT: vadd.vv v8, v8, v8 565; RV64-NEXT: vor.vv v8, v16, v8 566; RV64-NEXT: ret 567 %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va) 568 ret <vscale x 32 x i16> %a 569} 570declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>) 571 572define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) { 573; RV32-LABEL: bitreverse_nxv1i32: 574; RV32: # %bb.0: 575; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 576; RV32-NEXT: vsrl.vi v9, v8, 8 577; RV32-NEXT: lui a0, 16 578; RV32-NEXT: addi a0, a0, -256 579; RV32-NEXT: vand.vx v9, v9, a0 580; RV32-NEXT: vsrl.vi v10, v8, 24 581; RV32-NEXT: vor.vv v9, v9, v10 582; RV32-NEXT: vsll.vi v10, v8, 8 583; RV32-NEXT: lui a0, 4080 584; RV32-NEXT: vand.vx v10, v10, a0 585; RV32-NEXT: vsll.vi v8, v8, 24 586; RV32-NEXT: vor.vv v8, v8, v10 587; RV32-NEXT: vor.vv v8, v8, v9 588; RV32-NEXT: vsrl.vi v9, v8, 4 589; RV32-NEXT: lui a0, 61681 590; RV32-NEXT: addi a0, a0, -241 591; RV32-NEXT: vand.vx v9, v9, a0 592; RV32-NEXT: vand.vx v8, v8, a0 593; RV32-NEXT: vsll.vi v8, v8, 4 594; RV32-NEXT: vor.vv v8, v9, v8 595; RV32-NEXT: vsrl.vi v9, v8, 2 596; RV32-NEXT: lui a0, 209715 597; RV32-NEXT: addi a0, a0, 819 598; RV32-NEXT: vand.vx v9, v9, a0 599; RV32-NEXT: vand.vx v8, v8, a0 600; RV32-NEXT: vsll.vi v8, v8, 2 601; RV32-NEXT: vor.vv v8, v9, v8 602; RV32-NEXT: vsrl.vi v9, v8, 1 603; RV32-NEXT: lui a0, 349525 604; RV32-NEXT: addi a0, a0, 1365 605; RV32-NEXT: vand.vx v9, v9, a0 606; RV32-NEXT: vand.vx v8, v8, a0 607; RV32-NEXT: vadd.vv v8, v8, v8 608; RV32-NEXT: vor.vv v8, v9, v8 609; RV32-NEXT: ret 610; 611; RV64-LABEL: bitreverse_nxv1i32: 612; RV64: # %bb.0: 613; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 614; RV64-NEXT: vsrl.vi v9, v8, 8 615; RV64-NEXT: lui a0, 16 616; RV64-NEXT: addiw a0, a0, -256 617; RV64-NEXT: vand.vx v9, v9, a0 618; RV64-NEXT: vsrl.vi v10, v8, 24 619; RV64-NEXT: vor.vv v9, v9, v10 620; RV64-NEXT: vsll.vi v10, v8, 8 621; RV64-NEXT: lui a0, 4080 622; RV64-NEXT: vand.vx v10, v10, a0 623; RV64-NEXT: vsll.vi v8, v8, 24 624; RV64-NEXT: vor.vv v8, v8, v10 625; RV64-NEXT: vor.vv v8, v8, v9 626; RV64-NEXT: vsrl.vi v9, v8, 4 627; RV64-NEXT: lui a0, 61681 628; RV64-NEXT: addiw a0, a0, -241 629; RV64-NEXT: vand.vx v9, v9, a0 630; RV64-NEXT: vand.vx v8, v8, a0 631; RV64-NEXT: vsll.vi v8, v8, 4 632; RV64-NEXT: vor.vv v8, v9, v8 633; RV64-NEXT: vsrl.vi v9, v8, 2 634; RV64-NEXT: lui a0, 209715 635; RV64-NEXT: addiw a0, a0, 819 636; RV64-NEXT: vand.vx v9, v9, a0 637; RV64-NEXT: vand.vx v8, v8, a0 638; RV64-NEXT: vsll.vi v8, v8, 2 639; RV64-NEXT: vor.vv v8, v9, v8 640; RV64-NEXT: vsrl.vi v9, v8, 1 641; RV64-NEXT: lui a0, 349525 642; RV64-NEXT: addiw a0, a0, 1365 643; RV64-NEXT: vand.vx v9, v9, a0 644; RV64-NEXT: vand.vx v8, v8, a0 645; RV64-NEXT: vadd.vv v8, v8, v8 646; RV64-NEXT: vor.vv v8, v9, v8 647; RV64-NEXT: ret 648 %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va) 649 ret <vscale x 1 x i32> %a 650} 651declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>) 652 653define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) { 654; RV32-LABEL: bitreverse_nxv2i32: 655; RV32: # %bb.0: 656; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 657; RV32-NEXT: vsrl.vi v9, v8, 8 658; RV32-NEXT: lui a0, 16 659; RV32-NEXT: addi a0, a0, -256 660; RV32-NEXT: vand.vx v9, v9, a0 661; RV32-NEXT: vsrl.vi v10, v8, 24 662; RV32-NEXT: vor.vv v9, v9, v10 663; RV32-NEXT: vsll.vi v10, v8, 8 664; RV32-NEXT: lui a0, 4080 665; RV32-NEXT: vand.vx v10, v10, a0 666; RV32-NEXT: vsll.vi v8, v8, 24 667; RV32-NEXT: vor.vv v8, v8, v10 668; RV32-NEXT: vor.vv v8, v8, v9 669; RV32-NEXT: vsrl.vi v9, v8, 4 670; RV32-NEXT: lui a0, 61681 671; RV32-NEXT: addi a0, a0, -241 672; RV32-NEXT: vand.vx v9, v9, a0 673; RV32-NEXT: vand.vx v8, v8, a0 674; RV32-NEXT: vsll.vi v8, v8, 4 675; RV32-NEXT: vor.vv v8, v9, v8 676; RV32-NEXT: vsrl.vi v9, v8, 2 677; RV32-NEXT: lui a0, 209715 678; RV32-NEXT: addi a0, a0, 819 679; RV32-NEXT: vand.vx v9, v9, a0 680; RV32-NEXT: vand.vx v8, v8, a0 681; RV32-NEXT: vsll.vi v8, v8, 2 682; RV32-NEXT: vor.vv v8, v9, v8 683; RV32-NEXT: vsrl.vi v9, v8, 1 684; RV32-NEXT: lui a0, 349525 685; RV32-NEXT: addi a0, a0, 1365 686; RV32-NEXT: vand.vx v9, v9, a0 687; RV32-NEXT: vand.vx v8, v8, a0 688; RV32-NEXT: vadd.vv v8, v8, v8 689; RV32-NEXT: vor.vv v8, v9, v8 690; RV32-NEXT: ret 691; 692; RV64-LABEL: bitreverse_nxv2i32: 693; RV64: # %bb.0: 694; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 695; RV64-NEXT: vsrl.vi v9, v8, 8 696; RV64-NEXT: lui a0, 16 697; RV64-NEXT: addiw a0, a0, -256 698; RV64-NEXT: vand.vx v9, v9, a0 699; RV64-NEXT: vsrl.vi v10, v8, 24 700; RV64-NEXT: vor.vv v9, v9, v10 701; RV64-NEXT: vsll.vi v10, v8, 8 702; RV64-NEXT: lui a0, 4080 703; RV64-NEXT: vand.vx v10, v10, a0 704; RV64-NEXT: vsll.vi v8, v8, 24 705; RV64-NEXT: vor.vv v8, v8, v10 706; RV64-NEXT: vor.vv v8, v8, v9 707; RV64-NEXT: vsrl.vi v9, v8, 4 708; RV64-NEXT: lui a0, 61681 709; RV64-NEXT: addiw a0, a0, -241 710; RV64-NEXT: vand.vx v9, v9, a0 711; RV64-NEXT: vand.vx v8, v8, a0 712; RV64-NEXT: vsll.vi v8, v8, 4 713; RV64-NEXT: vor.vv v8, v9, v8 714; RV64-NEXT: vsrl.vi v9, v8, 2 715; RV64-NEXT: lui a0, 209715 716; RV64-NEXT: addiw a0, a0, 819 717; RV64-NEXT: vand.vx v9, v9, a0 718; RV64-NEXT: vand.vx v8, v8, a0 719; RV64-NEXT: vsll.vi v8, v8, 2 720; RV64-NEXT: vor.vv v8, v9, v8 721; RV64-NEXT: vsrl.vi v9, v8, 1 722; RV64-NEXT: lui a0, 349525 723; RV64-NEXT: addiw a0, a0, 1365 724; RV64-NEXT: vand.vx v9, v9, a0 725; RV64-NEXT: vand.vx v8, v8, a0 726; RV64-NEXT: vadd.vv v8, v8, v8 727; RV64-NEXT: vor.vv v8, v9, v8 728; RV64-NEXT: ret 729 %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va) 730 ret <vscale x 2 x i32> %a 731} 732declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>) 733 734define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) { 735; RV32-LABEL: bitreverse_nxv4i32: 736; RV32: # %bb.0: 737; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 738; RV32-NEXT: vsrl.vi v10, v8, 8 739; RV32-NEXT: lui a0, 16 740; RV32-NEXT: addi a0, a0, -256 741; RV32-NEXT: vand.vx v10, v10, a0 742; RV32-NEXT: vsrl.vi v12, v8, 24 743; RV32-NEXT: vor.vv v10, v10, v12 744; RV32-NEXT: vsll.vi v12, v8, 8 745; RV32-NEXT: lui a0, 4080 746; RV32-NEXT: vand.vx v12, v12, a0 747; RV32-NEXT: vsll.vi v8, v8, 24 748; RV32-NEXT: vor.vv v8, v8, v12 749; RV32-NEXT: vor.vv v8, v8, v10 750; RV32-NEXT: vsrl.vi v10, v8, 4 751; RV32-NEXT: lui a0, 61681 752; RV32-NEXT: addi a0, a0, -241 753; RV32-NEXT: vand.vx v10, v10, a0 754; RV32-NEXT: vand.vx v8, v8, a0 755; RV32-NEXT: vsll.vi v8, v8, 4 756; RV32-NEXT: vor.vv v8, v10, v8 757; RV32-NEXT: vsrl.vi v10, v8, 2 758; RV32-NEXT: lui a0, 209715 759; RV32-NEXT: addi a0, a0, 819 760; RV32-NEXT: vand.vx v10, v10, a0 761; RV32-NEXT: vand.vx v8, v8, a0 762; RV32-NEXT: vsll.vi v8, v8, 2 763; RV32-NEXT: vor.vv v8, v10, v8 764; RV32-NEXT: vsrl.vi v10, v8, 1 765; RV32-NEXT: lui a0, 349525 766; RV32-NEXT: addi a0, a0, 1365 767; RV32-NEXT: vand.vx v10, v10, a0 768; RV32-NEXT: vand.vx v8, v8, a0 769; RV32-NEXT: vadd.vv v8, v8, v8 770; RV32-NEXT: vor.vv v8, v10, v8 771; RV32-NEXT: ret 772; 773; RV64-LABEL: bitreverse_nxv4i32: 774; RV64: # %bb.0: 775; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 776; RV64-NEXT: vsrl.vi v10, v8, 8 777; RV64-NEXT: lui a0, 16 778; RV64-NEXT: addiw a0, a0, -256 779; RV64-NEXT: vand.vx v10, v10, a0 780; RV64-NEXT: vsrl.vi v12, v8, 24 781; RV64-NEXT: vor.vv v10, v10, v12 782; RV64-NEXT: vsll.vi v12, v8, 8 783; RV64-NEXT: lui a0, 4080 784; RV64-NEXT: vand.vx v12, v12, a0 785; RV64-NEXT: vsll.vi v8, v8, 24 786; RV64-NEXT: vor.vv v8, v8, v12 787; RV64-NEXT: vor.vv v8, v8, v10 788; RV64-NEXT: vsrl.vi v10, v8, 4 789; RV64-NEXT: lui a0, 61681 790; RV64-NEXT: addiw a0, a0, -241 791; RV64-NEXT: vand.vx v10, v10, a0 792; RV64-NEXT: vand.vx v8, v8, a0 793; RV64-NEXT: vsll.vi v8, v8, 4 794; RV64-NEXT: vor.vv v8, v10, v8 795; RV64-NEXT: vsrl.vi v10, v8, 2 796; RV64-NEXT: lui a0, 209715 797; RV64-NEXT: addiw a0, a0, 819 798; RV64-NEXT: vand.vx v10, v10, a0 799; RV64-NEXT: vand.vx v8, v8, a0 800; RV64-NEXT: vsll.vi v8, v8, 2 801; RV64-NEXT: vor.vv v8, v10, v8 802; RV64-NEXT: vsrl.vi v10, v8, 1 803; RV64-NEXT: lui a0, 349525 804; RV64-NEXT: addiw a0, a0, 1365 805; RV64-NEXT: vand.vx v10, v10, a0 806; RV64-NEXT: vand.vx v8, v8, a0 807; RV64-NEXT: vadd.vv v8, v8, v8 808; RV64-NEXT: vor.vv v8, v10, v8 809; RV64-NEXT: ret 810 %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va) 811 ret <vscale x 4 x i32> %a 812} 813declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>) 814 815define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) { 816; RV32-LABEL: bitreverse_nxv8i32: 817; RV32: # %bb.0: 818; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 819; RV32-NEXT: vsrl.vi v12, v8, 8 820; RV32-NEXT: lui a0, 16 821; RV32-NEXT: addi a0, a0, -256 822; RV32-NEXT: vand.vx v12, v12, a0 823; RV32-NEXT: vsrl.vi v16, v8, 24 824; RV32-NEXT: vor.vv v12, v12, v16 825; RV32-NEXT: vsll.vi v16, v8, 8 826; RV32-NEXT: lui a0, 4080 827; RV32-NEXT: vand.vx v16, v16, a0 828; RV32-NEXT: vsll.vi v8, v8, 24 829; RV32-NEXT: vor.vv v8, v8, v16 830; RV32-NEXT: vor.vv v8, v8, v12 831; RV32-NEXT: vsrl.vi v12, v8, 4 832; RV32-NEXT: lui a0, 61681 833; RV32-NEXT: addi a0, a0, -241 834; RV32-NEXT: vand.vx v12, v12, a0 835; RV32-NEXT: vand.vx v8, v8, a0 836; RV32-NEXT: vsll.vi v8, v8, 4 837; RV32-NEXT: vor.vv v8, v12, v8 838; RV32-NEXT: vsrl.vi v12, v8, 2 839; RV32-NEXT: lui a0, 209715 840; RV32-NEXT: addi a0, a0, 819 841; RV32-NEXT: vand.vx v12, v12, a0 842; RV32-NEXT: vand.vx v8, v8, a0 843; RV32-NEXT: vsll.vi v8, v8, 2 844; RV32-NEXT: vor.vv v8, v12, v8 845; RV32-NEXT: vsrl.vi v12, v8, 1 846; RV32-NEXT: lui a0, 349525 847; RV32-NEXT: addi a0, a0, 1365 848; RV32-NEXT: vand.vx v12, v12, a0 849; RV32-NEXT: vand.vx v8, v8, a0 850; RV32-NEXT: vadd.vv v8, v8, v8 851; RV32-NEXT: vor.vv v8, v12, v8 852; RV32-NEXT: ret 853; 854; RV64-LABEL: bitreverse_nxv8i32: 855; RV64: # %bb.0: 856; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 857; RV64-NEXT: vsrl.vi v12, v8, 8 858; RV64-NEXT: lui a0, 16 859; RV64-NEXT: addiw a0, a0, -256 860; RV64-NEXT: vand.vx v12, v12, a0 861; RV64-NEXT: vsrl.vi v16, v8, 24 862; RV64-NEXT: vor.vv v12, v12, v16 863; RV64-NEXT: vsll.vi v16, v8, 8 864; RV64-NEXT: lui a0, 4080 865; RV64-NEXT: vand.vx v16, v16, a0 866; RV64-NEXT: vsll.vi v8, v8, 24 867; RV64-NEXT: vor.vv v8, v8, v16 868; RV64-NEXT: vor.vv v8, v8, v12 869; RV64-NEXT: vsrl.vi v12, v8, 4 870; RV64-NEXT: lui a0, 61681 871; RV64-NEXT: addiw a0, a0, -241 872; RV64-NEXT: vand.vx v12, v12, a0 873; RV64-NEXT: vand.vx v8, v8, a0 874; RV64-NEXT: vsll.vi v8, v8, 4 875; RV64-NEXT: vor.vv v8, v12, v8 876; RV64-NEXT: vsrl.vi v12, v8, 2 877; RV64-NEXT: lui a0, 209715 878; RV64-NEXT: addiw a0, a0, 819 879; RV64-NEXT: vand.vx v12, v12, a0 880; RV64-NEXT: vand.vx v8, v8, a0 881; RV64-NEXT: vsll.vi v8, v8, 2 882; RV64-NEXT: vor.vv v8, v12, v8 883; RV64-NEXT: vsrl.vi v12, v8, 1 884; RV64-NEXT: lui a0, 349525 885; RV64-NEXT: addiw a0, a0, 1365 886; RV64-NEXT: vand.vx v12, v12, a0 887; RV64-NEXT: vand.vx v8, v8, a0 888; RV64-NEXT: vadd.vv v8, v8, v8 889; RV64-NEXT: vor.vv v8, v12, v8 890; RV64-NEXT: ret 891 %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va) 892 ret <vscale x 8 x i32> %a 893} 894declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>) 895 896define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) { 897; RV32-LABEL: bitreverse_nxv16i32: 898; RV32: # %bb.0: 899; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 900; RV32-NEXT: vsrl.vi v16, v8, 8 901; RV32-NEXT: lui a0, 16 902; RV32-NEXT: addi a0, a0, -256 903; RV32-NEXT: vand.vx v16, v16, a0 904; RV32-NEXT: vsrl.vi v24, v8, 24 905; RV32-NEXT: vor.vv v16, v16, v24 906; RV32-NEXT: vsll.vi v24, v8, 8 907; RV32-NEXT: lui a0, 4080 908; RV32-NEXT: vand.vx v24, v24, a0 909; RV32-NEXT: vsll.vi v8, v8, 24 910; RV32-NEXT: vor.vv v8, v8, v24 911; RV32-NEXT: vor.vv v8, v8, v16 912; RV32-NEXT: vsrl.vi v16, v8, 4 913; RV32-NEXT: lui a0, 61681 914; RV32-NEXT: addi a0, a0, -241 915; RV32-NEXT: vand.vx v16, v16, a0 916; RV32-NEXT: vand.vx v8, v8, a0 917; RV32-NEXT: vsll.vi v8, v8, 4 918; RV32-NEXT: vor.vv v8, v16, v8 919; RV32-NEXT: vsrl.vi v16, v8, 2 920; RV32-NEXT: lui a0, 209715 921; RV32-NEXT: addi a0, a0, 819 922; RV32-NEXT: vand.vx v16, v16, a0 923; RV32-NEXT: vand.vx v8, v8, a0 924; RV32-NEXT: vsll.vi v8, v8, 2 925; RV32-NEXT: vor.vv v8, v16, v8 926; RV32-NEXT: vsrl.vi v16, v8, 1 927; RV32-NEXT: lui a0, 349525 928; RV32-NEXT: addi a0, a0, 1365 929; RV32-NEXT: vand.vx v16, v16, a0 930; RV32-NEXT: vand.vx v8, v8, a0 931; RV32-NEXT: vadd.vv v8, v8, v8 932; RV32-NEXT: vor.vv v8, v16, v8 933; RV32-NEXT: ret 934; 935; RV64-LABEL: bitreverse_nxv16i32: 936; RV64: # %bb.0: 937; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 938; RV64-NEXT: vsrl.vi v16, v8, 8 939; RV64-NEXT: lui a0, 16 940; RV64-NEXT: addiw a0, a0, -256 941; RV64-NEXT: vand.vx v16, v16, a0 942; RV64-NEXT: vsrl.vi v24, v8, 24 943; RV64-NEXT: vor.vv v16, v16, v24 944; RV64-NEXT: vsll.vi v24, v8, 8 945; RV64-NEXT: lui a0, 4080 946; RV64-NEXT: vand.vx v24, v24, a0 947; RV64-NEXT: vsll.vi v8, v8, 24 948; RV64-NEXT: vor.vv v8, v8, v24 949; RV64-NEXT: vor.vv v8, v8, v16 950; RV64-NEXT: vsrl.vi v16, v8, 4 951; RV64-NEXT: lui a0, 61681 952; RV64-NEXT: addiw a0, a0, -241 953; RV64-NEXT: vand.vx v16, v16, a0 954; RV64-NEXT: vand.vx v8, v8, a0 955; RV64-NEXT: vsll.vi v8, v8, 4 956; RV64-NEXT: vor.vv v8, v16, v8 957; RV64-NEXT: vsrl.vi v16, v8, 2 958; RV64-NEXT: lui a0, 209715 959; RV64-NEXT: addiw a0, a0, 819 960; RV64-NEXT: vand.vx v16, v16, a0 961; RV64-NEXT: vand.vx v8, v8, a0 962; RV64-NEXT: vsll.vi v8, v8, 2 963; RV64-NEXT: vor.vv v8, v16, v8 964; RV64-NEXT: vsrl.vi v16, v8, 1 965; RV64-NEXT: lui a0, 349525 966; RV64-NEXT: addiw a0, a0, 1365 967; RV64-NEXT: vand.vx v16, v16, a0 968; RV64-NEXT: vand.vx v8, v8, a0 969; RV64-NEXT: vadd.vv v8, v8, v8 970; RV64-NEXT: vor.vv v8, v16, v8 971; RV64-NEXT: ret 972 %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va) 973 ret <vscale x 16 x i32> %a 974} 975declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>) 976 977define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) { 978; RV32-LABEL: bitreverse_nxv1i64: 979; RV32: # %bb.0: 980; RV32-NEXT: addi sp, sp, -16 981; RV32-NEXT: .cfi_def_cfa_offset 16 982; RV32-NEXT: sw zero, 12(sp) 983; RV32-NEXT: lui a0, 1044480 984; RV32-NEXT: sw a0, 8(sp) 985; RV32-NEXT: lui a0, 4080 986; RV32-NEXT: sw a0, 12(sp) 987; RV32-NEXT: sw zero, 8(sp) 988; RV32-NEXT: li a1, 255 989; RV32-NEXT: sw a1, 12(sp) 990; RV32-NEXT: lui a1, 16 991; RV32-NEXT: addi a1, a1, -256 992; RV32-NEXT: sw a1, 12(sp) 993; RV32-NEXT: lui a2, 61681 994; RV32-NEXT: addi a2, a2, -241 995; RV32-NEXT: sw a2, 12(sp) 996; RV32-NEXT: sw a2, 8(sp) 997; RV32-NEXT: lui a2, 209715 998; RV32-NEXT: addi a2, a2, 819 999; RV32-NEXT: sw a2, 12(sp) 1000; RV32-NEXT: sw a2, 8(sp) 1001; RV32-NEXT: lui a2, 349525 1002; RV32-NEXT: addi a2, a2, 1365 1003; RV32-NEXT: sw a2, 12(sp) 1004; RV32-NEXT: sw a2, 8(sp) 1005; RV32-NEXT: li a2, 56 1006; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, mu 1007; RV32-NEXT: vsrl.vx v9, v8, a2 1008; RV32-NEXT: li a3, 40 1009; RV32-NEXT: vsrl.vx v10, v8, a3 1010; RV32-NEXT: vand.vx v10, v10, a1 1011; RV32-NEXT: vor.vv v9, v10, v9 1012; RV32-NEXT: addi a1, sp, 8 1013; RV32-NEXT: vlse64.v v10, (a1), zero 1014; RV32-NEXT: vsrl.vi v11, v8, 24 1015; RV32-NEXT: vand.vx v11, v11, a0 1016; RV32-NEXT: vsrl.vi v12, v8, 8 1017; RV32-NEXT: vand.vv v10, v12, v10 1018; RV32-NEXT: vor.vv v10, v10, v11 1019; RV32-NEXT: addi a0, sp, 8 1020; RV32-NEXT: vlse64.v v11, (a0), zero 1021; RV32-NEXT: vor.vv v9, v10, v9 1022; RV32-NEXT: vsll.vx v10, v8, a2 1023; RV32-NEXT: vsll.vx v12, v8, a3 1024; RV32-NEXT: vand.vv v11, v12, v11 1025; RV32-NEXT: addi a0, sp, 8 1026; RV32-NEXT: vlse64.v v12, (a0), zero 1027; RV32-NEXT: vor.vv v10, v10, v11 1028; RV32-NEXT: addi a0, sp, 8 1029; RV32-NEXT: vlse64.v v11, (a0), zero 1030; RV32-NEXT: vsll.vi v13, v8, 8 1031; RV32-NEXT: vand.vv v12, v13, v12 1032; RV32-NEXT: vsll.vi v8, v8, 24 1033; RV32-NEXT: vand.vv v8, v8, v11 1034; RV32-NEXT: vor.vv v8, v8, v12 1035; RV32-NEXT: addi a0, sp, 8 1036; RV32-NEXT: vlse64.v v11, (a0), zero 1037; RV32-NEXT: vor.vv v8, v10, v8 1038; RV32-NEXT: vor.vv v8, v8, v9 1039; RV32-NEXT: vsrl.vi v9, v8, 4 1040; RV32-NEXT: vand.vv v9, v9, v11 1041; RV32-NEXT: vand.vv v8, v8, v11 1042; RV32-NEXT: addi a0, sp, 8 1043; RV32-NEXT: vlse64.v v10, (a0), zero 1044; RV32-NEXT: vsll.vi v8, v8, 4 1045; RV32-NEXT: vor.vv v8, v9, v8 1046; RV32-NEXT: vsrl.vi v9, v8, 2 1047; RV32-NEXT: vand.vv v9, v9, v10 1048; RV32-NEXT: vand.vv v8, v8, v10 1049; RV32-NEXT: addi a0, sp, 8 1050; RV32-NEXT: vlse64.v v10, (a0), zero 1051; RV32-NEXT: vsll.vi v8, v8, 2 1052; RV32-NEXT: vor.vv v8, v9, v8 1053; RV32-NEXT: vsrl.vi v9, v8, 1 1054; RV32-NEXT: vand.vv v9, v9, v10 1055; RV32-NEXT: vand.vv v8, v8, v10 1056; RV32-NEXT: vadd.vv v8, v8, v8 1057; RV32-NEXT: vor.vv v8, v9, v8 1058; RV32-NEXT: addi sp, sp, 16 1059; RV32-NEXT: ret 1060; 1061; RV64-LABEL: bitreverse_nxv1i64: 1062; RV64: # %bb.0: 1063; RV64-NEXT: li a0, 56 1064; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu 1065; RV64-NEXT: vsrl.vx v9, v8, a0 1066; RV64-NEXT: li a1, 40 1067; RV64-NEXT: vsrl.vx v10, v8, a1 1068; RV64-NEXT: lui a2, 16 1069; RV64-NEXT: addiw a2, a2, -256 1070; RV64-NEXT: vand.vx v10, v10, a2 1071; RV64-NEXT: vor.vv v9, v10, v9 1072; RV64-NEXT: vsrl.vi v10, v8, 24 1073; RV64-NEXT: lui a2, 4080 1074; RV64-NEXT: vand.vx v10, v10, a2 1075; RV64-NEXT: vsrl.vi v11, v8, 8 1076; RV64-NEXT: li a2, 255 1077; RV64-NEXT: slli a3, a2, 24 1078; RV64-NEXT: vand.vx v11, v11, a3 1079; RV64-NEXT: vor.vv v10, v11, v10 1080; RV64-NEXT: vor.vv v9, v10, v9 1081; RV64-NEXT: vsll.vi v10, v8, 8 1082; RV64-NEXT: slli a3, a2, 32 1083; RV64-NEXT: vand.vx v10, v10, a3 1084; RV64-NEXT: vsll.vi v11, v8, 24 1085; RV64-NEXT: slli a3, a2, 40 1086; RV64-NEXT: vand.vx v11, v11, a3 1087; RV64-NEXT: vor.vv v10, v11, v10 1088; RV64-NEXT: vsll.vx v11, v8, a0 1089; RV64-NEXT: vsll.vx v8, v8, a1 1090; RV64-NEXT: slli a0, a2, 48 1091; RV64-NEXT: vand.vx v8, v8, a0 1092; RV64-NEXT: vor.vv v8, v11, v8 1093; RV64-NEXT: lui a0, %hi(.LCPI18_0) 1094; RV64-NEXT: ld a0, %lo(.LCPI18_0)(a0) 1095; RV64-NEXT: vor.vv v8, v8, v10 1096; RV64-NEXT: vor.vv v8, v8, v9 1097; RV64-NEXT: vsrl.vi v9, v8, 4 1098; RV64-NEXT: vand.vx v9, v9, a0 1099; RV64-NEXT: vand.vx v8, v8, a0 1100; RV64-NEXT: lui a0, %hi(.LCPI18_1) 1101; RV64-NEXT: ld a0, %lo(.LCPI18_1)(a0) 1102; RV64-NEXT: vsll.vi v8, v8, 4 1103; RV64-NEXT: vor.vv v8, v9, v8 1104; RV64-NEXT: vsrl.vi v9, v8, 2 1105; RV64-NEXT: vand.vx v9, v9, a0 1106; RV64-NEXT: vand.vx v8, v8, a0 1107; RV64-NEXT: lui a0, %hi(.LCPI18_2) 1108; RV64-NEXT: ld a0, %lo(.LCPI18_2)(a0) 1109; RV64-NEXT: vsll.vi v8, v8, 2 1110; RV64-NEXT: vor.vv v8, v9, v8 1111; RV64-NEXT: vsrl.vi v9, v8, 1 1112; RV64-NEXT: vand.vx v9, v9, a0 1113; RV64-NEXT: vand.vx v8, v8, a0 1114; RV64-NEXT: vadd.vv v8, v8, v8 1115; RV64-NEXT: vor.vv v8, v9, v8 1116; RV64-NEXT: ret 1117 %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va) 1118 ret <vscale x 1 x i64> %a 1119} 1120declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>) 1121 1122define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) { 1123; RV32-LABEL: bitreverse_nxv2i64: 1124; RV32: # %bb.0: 1125; RV32-NEXT: addi sp, sp, -16 1126; RV32-NEXT: .cfi_def_cfa_offset 16 1127; RV32-NEXT: sw zero, 12(sp) 1128; RV32-NEXT: lui a0, 1044480 1129; RV32-NEXT: sw a0, 8(sp) 1130; RV32-NEXT: lui a0, 4080 1131; RV32-NEXT: sw a0, 12(sp) 1132; RV32-NEXT: sw zero, 8(sp) 1133; RV32-NEXT: li a1, 255 1134; RV32-NEXT: sw a1, 12(sp) 1135; RV32-NEXT: lui a1, 16 1136; RV32-NEXT: addi a1, a1, -256 1137; RV32-NEXT: sw a1, 12(sp) 1138; RV32-NEXT: lui a2, 61681 1139; RV32-NEXT: addi a2, a2, -241 1140; RV32-NEXT: sw a2, 12(sp) 1141; RV32-NEXT: sw a2, 8(sp) 1142; RV32-NEXT: lui a2, 209715 1143; RV32-NEXT: addi a2, a2, 819 1144; RV32-NEXT: sw a2, 12(sp) 1145; RV32-NEXT: sw a2, 8(sp) 1146; RV32-NEXT: lui a2, 349525 1147; RV32-NEXT: addi a2, a2, 1365 1148; RV32-NEXT: sw a2, 12(sp) 1149; RV32-NEXT: sw a2, 8(sp) 1150; RV32-NEXT: li a2, 56 1151; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, mu 1152; RV32-NEXT: vsrl.vx v10, v8, a2 1153; RV32-NEXT: li a3, 40 1154; RV32-NEXT: vsrl.vx v12, v8, a3 1155; RV32-NEXT: vand.vx v12, v12, a1 1156; RV32-NEXT: vor.vv v10, v12, v10 1157; RV32-NEXT: addi a1, sp, 8 1158; RV32-NEXT: vlse64.v v12, (a1), zero 1159; RV32-NEXT: vsrl.vi v14, v8, 24 1160; RV32-NEXT: vand.vx v14, v14, a0 1161; RV32-NEXT: vsrl.vi v16, v8, 8 1162; RV32-NEXT: vand.vv v12, v16, v12 1163; RV32-NEXT: vor.vv v12, v12, v14 1164; RV32-NEXT: addi a0, sp, 8 1165; RV32-NEXT: vlse64.v v14, (a0), zero 1166; RV32-NEXT: vor.vv v10, v12, v10 1167; RV32-NEXT: vsll.vx v12, v8, a2 1168; RV32-NEXT: vsll.vx v16, v8, a3 1169; RV32-NEXT: vand.vv v14, v16, v14 1170; RV32-NEXT: addi a0, sp, 8 1171; RV32-NEXT: vlse64.v v16, (a0), zero 1172; RV32-NEXT: vor.vv v12, v12, v14 1173; RV32-NEXT: addi a0, sp, 8 1174; RV32-NEXT: vlse64.v v14, (a0), zero 1175; RV32-NEXT: vsll.vi v18, v8, 8 1176; RV32-NEXT: vand.vv v16, v18, v16 1177; RV32-NEXT: vsll.vi v8, v8, 24 1178; RV32-NEXT: vand.vv v8, v8, v14 1179; RV32-NEXT: vor.vv v8, v8, v16 1180; RV32-NEXT: addi a0, sp, 8 1181; RV32-NEXT: vlse64.v v14, (a0), zero 1182; RV32-NEXT: vor.vv v8, v12, v8 1183; RV32-NEXT: vor.vv v8, v8, v10 1184; RV32-NEXT: vsrl.vi v10, v8, 4 1185; RV32-NEXT: vand.vv v10, v10, v14 1186; RV32-NEXT: vand.vv v8, v8, v14 1187; RV32-NEXT: addi a0, sp, 8 1188; RV32-NEXT: vlse64.v v12, (a0), zero 1189; RV32-NEXT: vsll.vi v8, v8, 4 1190; RV32-NEXT: vor.vv v8, v10, v8 1191; RV32-NEXT: vsrl.vi v10, v8, 2 1192; RV32-NEXT: vand.vv v10, v10, v12 1193; RV32-NEXT: vand.vv v8, v8, v12 1194; RV32-NEXT: addi a0, sp, 8 1195; RV32-NEXT: vlse64.v v12, (a0), zero 1196; RV32-NEXT: vsll.vi v8, v8, 2 1197; RV32-NEXT: vor.vv v8, v10, v8 1198; RV32-NEXT: vsrl.vi v10, v8, 1 1199; RV32-NEXT: vand.vv v10, v10, v12 1200; RV32-NEXT: vand.vv v8, v8, v12 1201; RV32-NEXT: vadd.vv v8, v8, v8 1202; RV32-NEXT: vor.vv v8, v10, v8 1203; RV32-NEXT: addi sp, sp, 16 1204; RV32-NEXT: ret 1205; 1206; RV64-LABEL: bitreverse_nxv2i64: 1207; RV64: # %bb.0: 1208; RV64-NEXT: li a0, 56 1209; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu 1210; RV64-NEXT: vsrl.vx v10, v8, a0 1211; RV64-NEXT: li a1, 40 1212; RV64-NEXT: vsrl.vx v12, v8, a1 1213; RV64-NEXT: lui a2, 16 1214; RV64-NEXT: addiw a2, a2, -256 1215; RV64-NEXT: vand.vx v12, v12, a2 1216; RV64-NEXT: vor.vv v10, v12, v10 1217; RV64-NEXT: vsrl.vi v12, v8, 24 1218; RV64-NEXT: lui a2, 4080 1219; RV64-NEXT: vand.vx v12, v12, a2 1220; RV64-NEXT: vsrl.vi v14, v8, 8 1221; RV64-NEXT: li a2, 255 1222; RV64-NEXT: slli a3, a2, 24 1223; RV64-NEXT: vand.vx v14, v14, a3 1224; RV64-NEXT: vor.vv v12, v14, v12 1225; RV64-NEXT: vor.vv v10, v12, v10 1226; RV64-NEXT: vsll.vi v12, v8, 8 1227; RV64-NEXT: slli a3, a2, 32 1228; RV64-NEXT: vand.vx v12, v12, a3 1229; RV64-NEXT: vsll.vi v14, v8, 24 1230; RV64-NEXT: slli a3, a2, 40 1231; RV64-NEXT: vand.vx v14, v14, a3 1232; RV64-NEXT: vor.vv v12, v14, v12 1233; RV64-NEXT: vsll.vx v14, v8, a0 1234; RV64-NEXT: vsll.vx v8, v8, a1 1235; RV64-NEXT: slli a0, a2, 48 1236; RV64-NEXT: vand.vx v8, v8, a0 1237; RV64-NEXT: vor.vv v8, v14, v8 1238; RV64-NEXT: lui a0, %hi(.LCPI19_0) 1239; RV64-NEXT: ld a0, %lo(.LCPI19_0)(a0) 1240; RV64-NEXT: vor.vv v8, v8, v12 1241; RV64-NEXT: vor.vv v8, v8, v10 1242; RV64-NEXT: vsrl.vi v10, v8, 4 1243; RV64-NEXT: vand.vx v10, v10, a0 1244; RV64-NEXT: vand.vx v8, v8, a0 1245; RV64-NEXT: lui a0, %hi(.LCPI19_1) 1246; RV64-NEXT: ld a0, %lo(.LCPI19_1)(a0) 1247; RV64-NEXT: vsll.vi v8, v8, 4 1248; RV64-NEXT: vor.vv v8, v10, v8 1249; RV64-NEXT: vsrl.vi v10, v8, 2 1250; RV64-NEXT: vand.vx v10, v10, a0 1251; RV64-NEXT: vand.vx v8, v8, a0 1252; RV64-NEXT: lui a0, %hi(.LCPI19_2) 1253; RV64-NEXT: ld a0, %lo(.LCPI19_2)(a0) 1254; RV64-NEXT: vsll.vi v8, v8, 2 1255; RV64-NEXT: vor.vv v8, v10, v8 1256; RV64-NEXT: vsrl.vi v10, v8, 1 1257; RV64-NEXT: vand.vx v10, v10, a0 1258; RV64-NEXT: vand.vx v8, v8, a0 1259; RV64-NEXT: vadd.vv v8, v8, v8 1260; RV64-NEXT: vor.vv v8, v10, v8 1261; RV64-NEXT: ret 1262 %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va) 1263 ret <vscale x 2 x i64> %a 1264} 1265declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>) 1266 1267define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) { 1268; RV32-LABEL: bitreverse_nxv4i64: 1269; RV32: # %bb.0: 1270; RV32-NEXT: addi sp, sp, -16 1271; RV32-NEXT: .cfi_def_cfa_offset 16 1272; RV32-NEXT: sw zero, 12(sp) 1273; RV32-NEXT: lui a0, 1044480 1274; RV32-NEXT: sw a0, 8(sp) 1275; RV32-NEXT: lui a0, 4080 1276; RV32-NEXT: sw a0, 12(sp) 1277; RV32-NEXT: sw zero, 8(sp) 1278; RV32-NEXT: li a1, 255 1279; RV32-NEXT: sw a1, 12(sp) 1280; RV32-NEXT: lui a1, 16 1281; RV32-NEXT: addi a1, a1, -256 1282; RV32-NEXT: sw a1, 12(sp) 1283; RV32-NEXT: lui a2, 61681 1284; RV32-NEXT: addi a2, a2, -241 1285; RV32-NEXT: sw a2, 12(sp) 1286; RV32-NEXT: sw a2, 8(sp) 1287; RV32-NEXT: lui a2, 209715 1288; RV32-NEXT: addi a2, a2, 819 1289; RV32-NEXT: sw a2, 12(sp) 1290; RV32-NEXT: sw a2, 8(sp) 1291; RV32-NEXT: lui a2, 349525 1292; RV32-NEXT: addi a2, a2, 1365 1293; RV32-NEXT: sw a2, 12(sp) 1294; RV32-NEXT: sw a2, 8(sp) 1295; RV32-NEXT: li a2, 56 1296; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, mu 1297; RV32-NEXT: vsrl.vx v12, v8, a2 1298; RV32-NEXT: li a3, 40 1299; RV32-NEXT: vsrl.vx v16, v8, a3 1300; RV32-NEXT: vand.vx v16, v16, a1 1301; RV32-NEXT: vor.vv v12, v16, v12 1302; RV32-NEXT: addi a1, sp, 8 1303; RV32-NEXT: vlse64.v v16, (a1), zero 1304; RV32-NEXT: vsrl.vi v20, v8, 24 1305; RV32-NEXT: vand.vx v20, v20, a0 1306; RV32-NEXT: vsrl.vi v24, v8, 8 1307; RV32-NEXT: vand.vv v16, v24, v16 1308; RV32-NEXT: vor.vv v16, v16, v20 1309; RV32-NEXT: addi a0, sp, 8 1310; RV32-NEXT: vlse64.v v20, (a0), zero 1311; RV32-NEXT: vor.vv v12, v16, v12 1312; RV32-NEXT: vsll.vx v16, v8, a2 1313; RV32-NEXT: vsll.vx v24, v8, a3 1314; RV32-NEXT: vand.vv v20, v24, v20 1315; RV32-NEXT: addi a0, sp, 8 1316; RV32-NEXT: vlse64.v v24, (a0), zero 1317; RV32-NEXT: vor.vv v16, v16, v20 1318; RV32-NEXT: addi a0, sp, 8 1319; RV32-NEXT: vlse64.v v20, (a0), zero 1320; RV32-NEXT: vsll.vi v28, v8, 8 1321; RV32-NEXT: vand.vv v24, v28, v24 1322; RV32-NEXT: vsll.vi v8, v8, 24 1323; RV32-NEXT: vand.vv v8, v8, v20 1324; RV32-NEXT: vor.vv v8, v8, v24 1325; RV32-NEXT: addi a0, sp, 8 1326; RV32-NEXT: vlse64.v v20, (a0), zero 1327; RV32-NEXT: vor.vv v8, v16, v8 1328; RV32-NEXT: vor.vv v8, v8, v12 1329; RV32-NEXT: vsrl.vi v12, v8, 4 1330; RV32-NEXT: vand.vv v12, v12, v20 1331; RV32-NEXT: vand.vv v8, v8, v20 1332; RV32-NEXT: addi a0, sp, 8 1333; RV32-NEXT: vlse64.v v16, (a0), zero 1334; RV32-NEXT: vsll.vi v8, v8, 4 1335; RV32-NEXT: vor.vv v8, v12, v8 1336; RV32-NEXT: vsrl.vi v12, v8, 2 1337; RV32-NEXT: vand.vv v12, v12, v16 1338; RV32-NEXT: vand.vv v8, v8, v16 1339; RV32-NEXT: addi a0, sp, 8 1340; RV32-NEXT: vlse64.v v16, (a0), zero 1341; RV32-NEXT: vsll.vi v8, v8, 2 1342; RV32-NEXT: vor.vv v8, v12, v8 1343; RV32-NEXT: vsrl.vi v12, v8, 1 1344; RV32-NEXT: vand.vv v12, v12, v16 1345; RV32-NEXT: vand.vv v8, v8, v16 1346; RV32-NEXT: vadd.vv v8, v8, v8 1347; RV32-NEXT: vor.vv v8, v12, v8 1348; RV32-NEXT: addi sp, sp, 16 1349; RV32-NEXT: ret 1350; 1351; RV64-LABEL: bitreverse_nxv4i64: 1352; RV64: # %bb.0: 1353; RV64-NEXT: li a0, 56 1354; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu 1355; RV64-NEXT: vsrl.vx v12, v8, a0 1356; RV64-NEXT: li a1, 40 1357; RV64-NEXT: vsrl.vx v16, v8, a1 1358; RV64-NEXT: lui a2, 16 1359; RV64-NEXT: addiw a2, a2, -256 1360; RV64-NEXT: vand.vx v16, v16, a2 1361; RV64-NEXT: vor.vv v12, v16, v12 1362; RV64-NEXT: vsrl.vi v16, v8, 24 1363; RV64-NEXT: lui a2, 4080 1364; RV64-NEXT: vand.vx v16, v16, a2 1365; RV64-NEXT: vsrl.vi v20, v8, 8 1366; RV64-NEXT: li a2, 255 1367; RV64-NEXT: slli a3, a2, 24 1368; RV64-NEXT: vand.vx v20, v20, a3 1369; RV64-NEXT: vor.vv v16, v20, v16 1370; RV64-NEXT: vor.vv v12, v16, v12 1371; RV64-NEXT: vsll.vi v16, v8, 8 1372; RV64-NEXT: slli a3, a2, 32 1373; RV64-NEXT: vand.vx v16, v16, a3 1374; RV64-NEXT: vsll.vi v20, v8, 24 1375; RV64-NEXT: slli a3, a2, 40 1376; RV64-NEXT: vand.vx v20, v20, a3 1377; RV64-NEXT: vor.vv v16, v20, v16 1378; RV64-NEXT: vsll.vx v20, v8, a0 1379; RV64-NEXT: vsll.vx v8, v8, a1 1380; RV64-NEXT: slli a0, a2, 48 1381; RV64-NEXT: vand.vx v8, v8, a0 1382; RV64-NEXT: vor.vv v8, v20, v8 1383; RV64-NEXT: lui a0, %hi(.LCPI20_0) 1384; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) 1385; RV64-NEXT: vor.vv v8, v8, v16 1386; RV64-NEXT: vor.vv v8, v8, v12 1387; RV64-NEXT: vsrl.vi v12, v8, 4 1388; RV64-NEXT: vand.vx v12, v12, a0 1389; RV64-NEXT: vand.vx v8, v8, a0 1390; RV64-NEXT: lui a0, %hi(.LCPI20_1) 1391; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) 1392; RV64-NEXT: vsll.vi v8, v8, 4 1393; RV64-NEXT: vor.vv v8, v12, v8 1394; RV64-NEXT: vsrl.vi v12, v8, 2 1395; RV64-NEXT: vand.vx v12, v12, a0 1396; RV64-NEXT: vand.vx v8, v8, a0 1397; RV64-NEXT: lui a0, %hi(.LCPI20_2) 1398; RV64-NEXT: ld a0, %lo(.LCPI20_2)(a0) 1399; RV64-NEXT: vsll.vi v8, v8, 2 1400; RV64-NEXT: vor.vv v8, v12, v8 1401; RV64-NEXT: vsrl.vi v12, v8, 1 1402; RV64-NEXT: vand.vx v12, v12, a0 1403; RV64-NEXT: vand.vx v8, v8, a0 1404; RV64-NEXT: vadd.vv v8, v8, v8 1405; RV64-NEXT: vor.vv v8, v12, v8 1406; RV64-NEXT: ret 1407 %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va) 1408 ret <vscale x 4 x i64> %a 1409} 1410declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>) 1411 1412define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) { 1413; RV32-LABEL: bitreverse_nxv8i64: 1414; RV32: # %bb.0: 1415; RV32-NEXT: addi sp, sp, -16 1416; RV32-NEXT: .cfi_def_cfa_offset 16 1417; RV32-NEXT: csrr a0, vlenb 1418; RV32-NEXT: slli a0, a0, 4 1419; RV32-NEXT: sub sp, sp, a0 1420; RV32-NEXT: sw zero, 12(sp) 1421; RV32-NEXT: lui a0, 1044480 1422; RV32-NEXT: sw a0, 8(sp) 1423; RV32-NEXT: lui a0, 4080 1424; RV32-NEXT: sw a0, 12(sp) 1425; RV32-NEXT: sw zero, 8(sp) 1426; RV32-NEXT: li a1, 255 1427; RV32-NEXT: sw a1, 12(sp) 1428; RV32-NEXT: lui a1, 16 1429; RV32-NEXT: addi a1, a1, -256 1430; RV32-NEXT: sw a1, 12(sp) 1431; RV32-NEXT: lui a2, 61681 1432; RV32-NEXT: addi a2, a2, -241 1433; RV32-NEXT: sw a2, 12(sp) 1434; RV32-NEXT: sw a2, 8(sp) 1435; RV32-NEXT: lui a2, 209715 1436; RV32-NEXT: addi a2, a2, 819 1437; RV32-NEXT: sw a2, 12(sp) 1438; RV32-NEXT: sw a2, 8(sp) 1439; RV32-NEXT: lui a2, 349525 1440; RV32-NEXT: addi a2, a2, 1365 1441; RV32-NEXT: sw a2, 12(sp) 1442; RV32-NEXT: sw a2, 8(sp) 1443; RV32-NEXT: li a2, 56 1444; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, mu 1445; RV32-NEXT: vsrl.vx v16, v8, a2 1446; RV32-NEXT: li a3, 40 1447; RV32-NEXT: vsrl.vx v24, v8, a3 1448; RV32-NEXT: addi a4, sp, 8 1449; RV32-NEXT: vlse64.v v0, (a4), zero 1450; RV32-NEXT: vand.vx v24, v24, a1 1451; RV32-NEXT: vor.vv v16, v24, v16 1452; RV32-NEXT: csrr a1, vlenb 1453; RV32-NEXT: slli a1, a1, 3 1454; RV32-NEXT: add a1, sp, a1 1455; RV32-NEXT: addi a1, a1, 16 1456; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1457; RV32-NEXT: vsrl.vi v24, v8, 8 1458; RV32-NEXT: vand.vv v24, v24, v0 1459; RV32-NEXT: vsrl.vi v0, v8, 24 1460; RV32-NEXT: vand.vx v0, v0, a0 1461; RV32-NEXT: addi a0, sp, 8 1462; RV32-NEXT: vlse64.v v16, (a0), zero 1463; RV32-NEXT: vor.vv v24, v24, v0 1464; RV32-NEXT: csrr a0, vlenb 1465; RV32-NEXT: slli a0, a0, 3 1466; RV32-NEXT: add a0, sp, a0 1467; RV32-NEXT: addi a0, a0, 16 1468; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload 1469; RV32-NEXT: vor.vv v24, v24, v0 1470; RV32-NEXT: csrr a0, vlenb 1471; RV32-NEXT: slli a0, a0, 3 1472; RV32-NEXT: add a0, sp, a0 1473; RV32-NEXT: addi a0, a0, 16 1474; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1475; RV32-NEXT: vsll.vx v24, v8, a3 1476; RV32-NEXT: vand.vv v16, v24, v16 1477; RV32-NEXT: vsll.vx v24, v8, a2 1478; RV32-NEXT: addi a0, sp, 8 1479; RV32-NEXT: vlse64.v v0, (a0), zero 1480; RV32-NEXT: vor.vv v16, v24, v16 1481; RV32-NEXT: addi a0, sp, 16 1482; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1483; RV32-NEXT: addi a0, sp, 8 1484; RV32-NEXT: vlse64.v v16, (a0), zero 1485; RV32-NEXT: vsll.vi v24, v8, 8 1486; RV32-NEXT: vand.vv v24, v24, v0 1487; RV32-NEXT: vsll.vi v8, v8, 24 1488; RV32-NEXT: vand.vv v8, v8, v16 1489; RV32-NEXT: vor.vv v8, v8, v24 1490; RV32-NEXT: addi a0, sp, 8 1491; RV32-NEXT: vlse64.v v16, (a0), zero 1492; RV32-NEXT: addi a0, sp, 16 1493; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload 1494; RV32-NEXT: vor.vv v8, v24, v8 1495; RV32-NEXT: csrr a0, vlenb 1496; RV32-NEXT: slli a0, a0, 3 1497; RV32-NEXT: add a0, sp, a0 1498; RV32-NEXT: addi a0, a0, 16 1499; RV32-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload 1500; RV32-NEXT: vor.vv v8, v8, v24 1501; RV32-NEXT: vsrl.vi v24, v8, 4 1502; RV32-NEXT: vand.vv v24, v24, v16 1503; RV32-NEXT: vand.vv v8, v8, v16 1504; RV32-NEXT: addi a0, sp, 8 1505; RV32-NEXT: vlse64.v v16, (a0), zero 1506; RV32-NEXT: vsll.vi v8, v8, 4 1507; RV32-NEXT: vor.vv v8, v24, v8 1508; RV32-NEXT: vsrl.vi v24, v8, 2 1509; RV32-NEXT: vand.vv v24, v24, v16 1510; RV32-NEXT: vand.vv v8, v8, v16 1511; RV32-NEXT: addi a0, sp, 8 1512; RV32-NEXT: vlse64.v v16, (a0), zero 1513; RV32-NEXT: vsll.vi v8, v8, 2 1514; RV32-NEXT: vor.vv v8, v24, v8 1515; RV32-NEXT: vsrl.vi v24, v8, 1 1516; RV32-NEXT: vand.vv v24, v24, v16 1517; RV32-NEXT: vand.vv v8, v8, v16 1518; RV32-NEXT: vadd.vv v8, v8, v8 1519; RV32-NEXT: vor.vv v8, v24, v8 1520; RV32-NEXT: csrr a0, vlenb 1521; RV32-NEXT: slli a0, a0, 4 1522; RV32-NEXT: add sp, sp, a0 1523; RV32-NEXT: addi sp, sp, 16 1524; RV32-NEXT: ret 1525; 1526; RV64-LABEL: bitreverse_nxv8i64: 1527; RV64: # %bb.0: 1528; RV64-NEXT: li a0, 56 1529; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 1530; RV64-NEXT: vsrl.vx v16, v8, a0 1531; RV64-NEXT: li a1, 40 1532; RV64-NEXT: vsrl.vx v24, v8, a1 1533; RV64-NEXT: lui a2, 16 1534; RV64-NEXT: addiw a2, a2, -256 1535; RV64-NEXT: vand.vx v24, v24, a2 1536; RV64-NEXT: vor.vv v16, v24, v16 1537; RV64-NEXT: vsrl.vi v24, v8, 24 1538; RV64-NEXT: lui a2, 4080 1539; RV64-NEXT: vand.vx v24, v24, a2 1540; RV64-NEXT: vsrl.vi v0, v8, 8 1541; RV64-NEXT: li a2, 255 1542; RV64-NEXT: slli a3, a2, 24 1543; RV64-NEXT: vand.vx v0, v0, a3 1544; RV64-NEXT: vor.vv v24, v0, v24 1545; RV64-NEXT: vor.vv v16, v24, v16 1546; RV64-NEXT: vsll.vi v24, v8, 8 1547; RV64-NEXT: slli a3, a2, 32 1548; RV64-NEXT: vand.vx v24, v24, a3 1549; RV64-NEXT: vsll.vi v0, v8, 24 1550; RV64-NEXT: slli a3, a2, 40 1551; RV64-NEXT: vand.vx v0, v0, a3 1552; RV64-NEXT: vor.vv v24, v0, v24 1553; RV64-NEXT: vsll.vx v0, v8, a0 1554; RV64-NEXT: vsll.vx v8, v8, a1 1555; RV64-NEXT: slli a0, a2, 48 1556; RV64-NEXT: vand.vx v8, v8, a0 1557; RV64-NEXT: vor.vv v8, v0, v8 1558; RV64-NEXT: lui a0, %hi(.LCPI21_0) 1559; RV64-NEXT: ld a0, %lo(.LCPI21_0)(a0) 1560; RV64-NEXT: vor.vv v8, v8, v24 1561; RV64-NEXT: vor.vv v8, v8, v16 1562; RV64-NEXT: vsrl.vi v16, v8, 4 1563; RV64-NEXT: vand.vx v16, v16, a0 1564; RV64-NEXT: vand.vx v8, v8, a0 1565; RV64-NEXT: lui a0, %hi(.LCPI21_1) 1566; RV64-NEXT: ld a0, %lo(.LCPI21_1)(a0) 1567; RV64-NEXT: vsll.vi v8, v8, 4 1568; RV64-NEXT: vor.vv v8, v16, v8 1569; RV64-NEXT: vsrl.vi v16, v8, 2 1570; RV64-NEXT: vand.vx v16, v16, a0 1571; RV64-NEXT: vand.vx v8, v8, a0 1572; RV64-NEXT: lui a0, %hi(.LCPI21_2) 1573; RV64-NEXT: ld a0, %lo(.LCPI21_2)(a0) 1574; RV64-NEXT: vsll.vi v8, v8, 2 1575; RV64-NEXT: vor.vv v8, v16, v8 1576; RV64-NEXT: vsrl.vi v16, v8, 1 1577; RV64-NEXT: vand.vx v16, v16, a0 1578; RV64-NEXT: vand.vx v8, v8, a0 1579; RV64-NEXT: vadd.vv v8, v8, v8 1580; RV64-NEXT: vor.vv v8, v16, v8 1581; RV64-NEXT: ret 1582 %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va) 1583 ret <vscale x 8 x i64> %a 1584} 1585declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>) 1586